Edgewall Software

source: branches/stable/0.5.x/genshi/template/interpolation.py

Last change on this file was 998, checked in by cmlenz, 15 years ago

Ported [914], [970], and [971] to 0.5.x branch.

  • Property svn:eol-style set to native
File size: 4.9 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2007-2008 Edgewall Software
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://genshi.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://genshi.edgewall.org/log/.
13
14"""String interpolation routines, i.e. the splitting up a given text into some
15parts that are literal strings, and others that are Python expressions.
16"""
17
18from itertools import chain
19import os
20import re
21from tokenize import PseudoToken
22
23from genshi.core import TEXT
24from genshi.template.base import TemplateSyntaxError, EXPR
25from genshi.template.eval import Expression
26
27__all__ = ['interpolate']
28__docformat__ = 'restructuredtext en'
29
30NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
31NAMECHARS = NAMESTART + '.0123456789'
32PREFIX = '$'
33
34token_re = re.compile('%s|%s(?s)' % (
35    r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1',
36    PseudoToken
37))
38
39def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'):
40    """Parse the given string and extract expressions.
41   
42    This function is a generator that yields `TEXT` events for literal strings,
43    and `EXPR` events for expressions, depending on the results of parsing the
44    string.
45   
46    >>> for kind, data, pos in interpolate("hey ${foo}bar"):
47    ...     print kind, repr(data)
48    TEXT u'hey '
49    EXPR Expression('foo')
50    TEXT u'bar'
51   
52    :param text: the text to parse
53    :param filepath: absolute path to the file in which the text was found
54                     (optional)
55    :param lineno: the line number at which the text was found (optional)
56    :param offset: the column number at which the text starts in the source
57                   (optional)
58    :param lookup: the variable lookup mechanism; either "lenient" (the
59                   default), "strict", or a custom lookup class
60    :return: a list of `TEXT` and `EXPR` events
61    :raise TemplateSyntaxError: when a syntax error in an expression is
62                                encountered
63    """
64    pos = [filepath, lineno, offset]
65
66    textbuf = []
67    textpos = None
68    for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
69        if is_expr:
70            if textbuf:
71                yield TEXT, u''.join(textbuf), textpos
72                del textbuf[:]
73                textpos = None
74            if chunk:
75                try:
76                    expr = Expression(chunk.strip(), pos[0], pos[1],
77                                      lookup=lookup)
78                    yield EXPR, expr, tuple(pos)
79                except SyntaxError, err:
80                    raise TemplateSyntaxError(err, filepath, pos[1],
81                                              pos[2] + (err.offset or 0))
82        else:
83            textbuf.append(chunk)
84            if textpos is None:
85                textpos = tuple(pos)
86
87        if '\n' in chunk:
88            lines = chunk.splitlines()
89            pos[1] += len(lines) - 1
90            pos[2] += len(lines[-1])
91        else:
92            pos[2] += len(chunk)
93
94def lex(text, textpos, filepath):
95    offset = pos = 0
96    end = len(text)
97    escaped = False
98
99    while 1:
100        if escaped:
101            offset = text.find(PREFIX, offset + 2)
102            escaped = False
103        else:
104            offset = text.find(PREFIX, pos)
105        if offset < 0 or offset == end - 1:
106            break
107        next = text[offset + 1]
108
109        if next == '{':
110            if offset > pos:
111                yield False, text[pos:offset]
112            pos = offset + 2
113            level = 1
114            while level:
115                match = token_re.match(text, pos)
116                if match is None:
117                    raise TemplateSyntaxError('invalid syntax',  filepath,
118                                              *textpos[1:])
119                pos = match.end()
120                tstart, tend = match.regs[3]
121                token = text[tstart:tend]
122                if token == '{':
123                    level += 1
124                elif token == '}':
125                    level -= 1
126            yield True, text[offset + 2:pos - 1]
127
128        elif next in NAMESTART:
129            if offset > pos:
130                yield False, text[pos:offset]
131                pos = offset
132            pos += 1
133            while pos < end:
134                char = text[pos]
135                if char not in NAMECHARS:
136                    break
137                pos += 1
138            yield True, text[offset + 1:pos].strip()
139
140        elif not escaped and next == PREFIX:
141            if offset > pos:
142                yield False, text[pos:offset]
143            escaped = True
144            pos = offset + 1
145
146        else:
147            yield False, text[pos:offset + 1]
148            pos = offset + 1
149
150    if pos < end:
151        yield False, text[pos:]
Note: See TracBrowser for help on using the repository browser.