Edgewall Software

source: trunk/genshi/template/interpolation.py

Last change on this file was 1213, checked in by hodgestar, 11 years ago

Stop try to lex for matching braces in interpolation if the token_re matches an empty string. This fixes the infinite loop triggered by the test suite as a result of the change to Python 2.7 describe in http://bugs.python.org/issue16152 and reported in http://genshi.edgewall.org/ticket/540.

  • Property svn:eol-style set to native
File size: 5.1 KB
RevLine 
[499]1# -*- coding: utf-8 -*-
2#
[1077]3# Copyright (C) 2007-2009 Edgewall Software
[499]4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://genshi.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://genshi.edgewall.org/log/.
13
14"""String interpolation routines, i.e. the splitting up a given text into some
15parts that are literal strings, and others that are Python expressions.
16"""
17
18from itertools import chain
19import os
[856]20import re
21from tokenize import PseudoToken
[499]22
23from genshi.core import TEXT
24from genshi.template.base import TemplateSyntaxError, EXPR
25from genshi.template.eval import Expression
26
27__all__ = ['interpolate']
[517]28__docformat__ = 'restructuredtext en'
[499]29
30NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
31NAMECHARS = NAMESTART + '.0123456789'
32PREFIX = '$'
33
[856]34token_re = re.compile('%s|%s(?s)' % (
35    r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1',
36    PseudoToken
37))
38
[1082]39
[830]40def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'):
[499]41    """Parse the given string and extract expressions.
42   
[517]43    This function is a generator that yields `TEXT` events for literal strings,
44    and `EXPR` events for expressions, depending on the results of parsing the
45    string.
[499]46   
[534]47    >>> for kind, data, pos in interpolate("hey ${foo}bar"):
[1076]48    ...     print('%s %r' % (kind, data))
[1082]49    TEXT 'hey '
[499]50    EXPR Expression('foo')
[1082]51    TEXT 'bar'
[499]52   
[517]53    :param text: the text to parse
[830]54    :param filepath: absolute path to the file in which the text was found
55                     (optional)
[517]56    :param lineno: the line number at which the text was found (optional)
57    :param offset: the column number at which the text starts in the source
58                   (optional)
[534]59    :param lookup: the variable lookup mechanism; either "lenient" (the
60                   default), "strict", or a custom lookup class
[517]61    :return: a list of `TEXT` and `EXPR` events
62    :raise TemplateSyntaxError: when a syntax error in an expression is
63                                encountered
[499]64    """
65    pos = [filepath, lineno, offset]
66
67    textbuf = []
68    textpos = None
[514]69    for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
[499]70        if is_expr:
71            if textbuf:
[1082]72                yield TEXT, ''.join(textbuf), textpos
[499]73                del textbuf[:]
74                textpos = None
75            if chunk:
76                try:
[534]77                    expr = Expression(chunk.strip(), pos[0], pos[1],
[717]78                                      lookup=lookup)
[499]79                    yield EXPR, expr, tuple(pos)
80                except SyntaxError, err:
[514]81                    raise TemplateSyntaxError(err, filepath, pos[1],
[499]82                                              pos[2] + (err.offset or 0))
83        else:
84            textbuf.append(chunk)
85            if textpos is None:
86                textpos = tuple(pos)
87
88        if '\n' in chunk:
89            lines = chunk.splitlines()
90            pos[1] += len(lines) - 1
91            pos[2] += len(lines[-1])
92        else:
93            pos[2] += len(chunk)
94
[1082]95
[514]96def lex(text, textpos, filepath):
[499]97    offset = pos = 0
98    end = len(text)
99    escaped = False
100
101    while 1:
102        if escaped:
103            offset = text.find(PREFIX, offset + 2)
104            escaped = False
105        else:
106            offset = text.find(PREFIX, pos)
107        if offset < 0 or offset == end - 1:
108            break
109        next = text[offset + 1]
110
111        if next == '{':
112            if offset > pos:
113                yield False, text[pos:offset]
114            pos = offset + 2
115            level = 1
116            while level:
[856]117                match = token_re.match(text, pos)
[1213]118                if match is None or not match.group():
119                    # if there isn't a match or the match is the empty
120                    # string, we're not going to match up braces ever
[514]121                    raise TemplateSyntaxError('invalid syntax',  filepath,
122                                              *textpos[1:])
[499]123                pos = match.end()
124                tstart, tend = match.regs[3]
125                token = text[tstart:tend]
126                if token == '{':
127                    level += 1
128                elif token == '}':
129                    level -= 1
130            yield True, text[offset + 2:pos - 1]
131
132        elif next in NAMESTART:
133            if offset > pos:
134                yield False, text[pos:offset]
135                pos = offset
136            pos += 1
137            while pos < end:
138                char = text[pos]
139                if char not in NAMECHARS:
140                    break
141                pos += 1
142            yield True, text[offset + 1:pos].strip()
143
144        elif not escaped and next == PREFIX:
[630]145            if offset > pos:
146                yield False, text[pos:offset]
[499]147            escaped = True
148            pos = offset + 1
149
150        else:
151            yield False, text[pos:offset + 1]
152            pos = offset + 1
153
154    if pos < end:
155        yield False, text[pos:]
Note: See TracBrowser for help on using the repository browser.