Edgewall Software

source: branches/stable/0.6.x/genshi/template/interpolation.py

Last change on this file was 1214, checked in by hodgestar, 11 years ago

Merge r1213 from trunk (fix infinite loop in interpolation brace matching caused by a change in 2.7).

  • Property svn:eol-style set to native
File size: 5.1 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2007-2009 Edgewall Software
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://genshi.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://genshi.edgewall.org/log/.
13
14"""String interpolation routines, i.e. the splitting up a given text into some
15parts that are literal strings, and others that are Python expressions.
16"""
17
18from itertools import chain
19import os
20import re
21from tokenize import PseudoToken
22
23from genshi.core import TEXT
24from genshi.template.base import TemplateSyntaxError, EXPR
25from genshi.template.eval import Expression
26
27__all__ = ['interpolate']
28__docformat__ = 'restructuredtext en'
29
30NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
31NAMECHARS = NAMESTART + '.0123456789'
32PREFIX = '$'
33
34token_re = re.compile('%s|%s(?s)' % (
35    r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1',
36    PseudoToken
37))
38
39
40def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'):
41    """Parse the given string and extract expressions.
42   
43    This function is a generator that yields `TEXT` events for literal strings,
44    and `EXPR` events for expressions, depending on the results of parsing the
45    string.
46   
47    >>> for kind, data, pos in interpolate("hey ${foo}bar"):
48    ...     print('%s %r' % (kind, data))
49    TEXT 'hey '
50    EXPR Expression('foo')
51    TEXT 'bar'
52   
53    :param text: the text to parse
54    :param filepath: absolute path to the file in which the text was found
55                     (optional)
56    :param lineno: the line number at which the text was found (optional)
57    :param offset: the column number at which the text starts in the source
58                   (optional)
59    :param lookup: the variable lookup mechanism; either "lenient" (the
60                   default), "strict", or a custom lookup class
61    :return: a list of `TEXT` and `EXPR` events
62    :raise TemplateSyntaxError: when a syntax error in an expression is
63                                encountered
64    """
65    pos = [filepath, lineno, offset]
66
67    textbuf = []
68    textpos = None
69    for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
70        if is_expr:
71            if textbuf:
72                yield TEXT, ''.join(textbuf), textpos
73                del textbuf[:]
74                textpos = None
75            if chunk:
76                try:
77                    expr = Expression(chunk.strip(), pos[0], pos[1],
78                                      lookup=lookup)
79                    yield EXPR, expr, tuple(pos)
80                except SyntaxError, err:
81                    raise TemplateSyntaxError(err, filepath, pos[1],
82                                              pos[2] + (err.offset or 0))
83        else:
84            textbuf.append(chunk)
85            if textpos is None:
86                textpos = tuple(pos)
87
88        if '\n' in chunk:
89            lines = chunk.splitlines()
90            pos[1] += len(lines) - 1
91            pos[2] += len(lines[-1])
92        else:
93            pos[2] += len(chunk)
94
95
96def lex(text, textpos, filepath):
97    offset = pos = 0
98    end = len(text)
99    escaped = False
100
101    while 1:
102        if escaped:
103            offset = text.find(PREFIX, offset + 2)
104            escaped = False
105        else:
106            offset = text.find(PREFIX, pos)
107        if offset < 0 or offset == end - 1:
108            break
109        next = text[offset + 1]
110
111        if next == '{':
112            if offset > pos:
113                yield False, text[pos:offset]
114            pos = offset + 2
115            level = 1
116            while level:
117                match = token_re.match(text, pos)
118                if match is None or not match.group():
119                    # if there isn't a match or the match is the empty
120                    # string, we're not going to match up braces ever
121                    raise TemplateSyntaxError('invalid syntax',  filepath,
122                                              *textpos[1:])
123                pos = match.end()
124                tstart, tend = match.regs[3]
125                token = text[tstart:tend]
126                if token == '{':
127                    level += 1
128                elif token == '}':
129                    level -= 1
130            yield True, text[offset + 2:pos - 1]
131
132        elif next in NAMESTART:
133            if offset > pos:
134                yield False, text[pos:offset]
135                pos = offset
136            pos += 1
137            while pos < end:
138                char = text[pos]
139                if char not in NAMECHARS:
140                    break
141                pos += 1
142            yield True, text[offset + 1:pos].strip()
143
144        elif not escaped and next == PREFIX:
145            if offset > pos:
146                yield False, text[pos:offset]
147            escaped = True
148            pos = offset + 1
149
150        else:
151            yield False, text[pos:offset + 1]
152            pos = offset + 1
153
154    if pos < end:
155        yield False, text[pos:]
Note: See TracBrowser for help on using the repository browser.