Edgewall Software

source: branches/stable/0.5.x/genshi/filters/transform.py

Last change on this file was 1001, checked in by cmlenz, 15 years ago

Ported [1000] to stable branch.

  • Property svn:eol-style set to native
File size: 46.1 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2007 Edgewall Software
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://genshi.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://genshi.edgewall.org/log/.
13
14"""A filter for functional-style transformations of markup streams.
15
16The `Transformer` filter provides a variety of transformations that can be
17applied to parts of streams that match given XPath expressions. These
18transformations can be chained to achieve results that would be comparitively
19tedious to achieve by writing stream filters by hand. The approach of chaining
20node selection and transformation has been inspired by the `jQuery`_ Javascript
21library.
22
23 .. _`jQuery`: http://jquery.com/
24
25For example, the following transformation removes the ``<title>`` element from
26the ``<head>`` of the input document:
27
28>>> from genshi.builder import tag
29>>> html = HTML('''<html>
30...  <head><title>Some Title</title></head>
31...  <body>
32...    Some <em>body</em> text.
33...  </body>
34... </html>''')
35>>> print html | Transformer('body/em').map(unicode.upper, TEXT) \\
36...                                    .unwrap().wrap(tag.u)
37<html>
38  <head><title>Some Title</title></head>
39  <body>
40    Some <u>BODY</u> text.
41  </body>
42</html>
43
44The ``Transformer`` support a large number of useful transformations out of the
45box, but custom transformations can be added easily.
46
47:since: version 0.5
48"""
49
50import re
51import sys
52
53from genshi.builder import Element
54from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup
55from genshi.path import Path
56
57__all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER',
58           'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK']
59
60
61class TransformMark(str):
62    """A mark on a transformation stream."""
63    __slots__ = []
64    _instances = {}
65
66    def __new__(cls, val):
67        return cls._instances.setdefault(val, str.__new__(cls, val))
68
69
70ENTER = TransformMark('ENTER')
71"""Stream augmentation mark indicating that a selected element is being
72entered."""
73
74INSIDE = TransformMark('INSIDE')
75"""Stream augmentation mark indicating that processing is currently inside a
76selected element."""
77
78OUTSIDE = TransformMark('OUTSIDE')
79"""Stream augmentation mark indicating that a match occurred outside a selected
80element."""
81
82ATTR = TransformMark('ATTR')
83"""Stream augmentation mark indicating a selected element attribute."""
84
85EXIT = TransformMark('EXIT')
86"""Stream augmentation mark indicating that a selected element is being
87exited."""
88
89BREAK = TransformMark('BREAK')
90"""Stream augmentation mark indicating a break between two otherwise contiguous
91blocks of marked events.
92
93This is used primarily by the cut() transform to provide later transforms with
94an opportunity to operate on the cut buffer.
95"""
96
97
98class PushBackStream(object):
99    """Allows a single event to be pushed back onto the stream and re-consumed.
100    """
101    def __init__(self, stream):
102        self.stream = iter(stream)
103        self.peek = None
104
105    def push(self, event):
106        assert self.peek is None
107        self.peek = event
108
109    def __iter__(self):
110        while True:
111            if self.peek is not None:
112                peek = self.peek
113                self.peek = None
114                yield peek
115            else:
116                try:
117                    event = self.stream.next()
118                    yield event
119                except StopIteration:
120                    if self.peek is None:
121                        raise
122
123
124class Transformer(object):
125    """Stream filter that can apply a variety of different transformations to
126    a stream.
127
128    This is achieved by selecting the events to be transformed using XPath,
129    then applying the transformations to the events matched by the path
130    expression. Each marked event is in the form (mark, (kind, data, pos)),
131    where mark can be any of `ENTER`, `INSIDE`, `EXIT`, `OUTSIDE`, or `None`.
132
133    The first three marks match `START` and `END` events, and any events
134    contained `INSIDE` any selected XML/HTML element. A non-element match
135    outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE`
136    mark.
137
138    >>> html = HTML('<html><head><title>Some Title</title></head>'
139    ...             '<body>Some <em>body</em> text.</body></html>')
140
141    Transformations act on selected stream events matching an XPath expression.
142    Here's an example of removing some markup (the title, in this case)
143    selected by an expression:
144
145    >>> print html | Transformer('head/title').remove()
146    <html><head/><body>Some <em>body</em> text.</body></html>
147
148    Inserted content can be passed in the form of a string, or a markup event
149    stream, which includes streams generated programmatically via the
150    `builder` module:
151
152    >>> from genshi.builder import tag
153    >>> print html | Transformer('body').prepend(tag.h1('Document Title'))
154    <html><head><title>Some Title</title></head><body><h1>Document
155    Title</h1>Some <em>body</em> text.</body></html>
156
157    Each XPath expression determines the set of tags that will be acted upon by
158    subsequent transformations. In this example we select the ``<title>`` text,
159    copy it into a buffer, then select the ``<body>`` element and paste the
160    copied text into the body as ``<h1>`` enclosed text:
161
162    >>> buffer = StreamBuffer()
163    >>> print html | Transformer('head/title/text()').copy(buffer) \\
164    ...     .end().select('body').prepend(tag.h1(buffer))
165    <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some
166    <em>body</em> text.</body></html>
167
168    Transformations can also be assigned and reused, although care must be
169    taken when using buffers, to ensure that buffers are cleared between
170    transforms:
171
172    >>> emphasis = Transformer('body//em').attr('class', 'emphasis')
173    >>> print html | emphasis
174    <html><head><title>Some Title</title></head><body>Some <em
175    class="emphasis">body</em> text.</body></html>
176    """
177
178    __slots__ = ['transforms']
179
180    def __init__(self, path='.'):
181        """Construct a new transformation filter.
182
183        :param path: an XPath expression (as string) or a `Path` instance
184        """
185        self.transforms = [SelectTransformation(path)]
186
187    def __call__(self, stream, keep_marks=False):
188        """Apply the transform filter to the marked stream.
189
190        :param stream: the marked event stream to filter
191        :param keep_marks: Do not strip transformer selection marks from the
192                           stream. Useful for testing.
193        :return: the transformed stream
194        :rtype: `Stream`
195        """
196        transforms = self._mark(stream)
197        for link in self.transforms:
198            transforms = link(transforms)
199        if not keep_marks:
200            transforms = self._unmark(transforms)
201        return Stream(transforms,
202                      serializer=getattr(stream, 'serializer', None))
203
204    def apply(self, function):
205        """Apply a transformation to the stream.
206
207        Transformations can be chained, similar to stream filters. Any callable
208        accepting a marked stream can be used as a transform.
209
210        As an example, here is a simple `TEXT` event upper-casing transform:
211
212        >>> def upper(stream):
213        ...     for mark, (kind, data, pos) in stream:
214        ...         if mark and kind is TEXT:
215        ...             yield mark, (kind, data.upper(), pos)
216        ...         else:
217        ...             yield mark, (kind, data, pos)
218        >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
219        >>> print short_stream | Transformer('.//em/text()').apply(upper)
220        <body>Some <em>TEST</em> text</body>
221        """
222        transformer = Transformer()
223        transformer.transforms = self.transforms[:]
224        if isinstance(function, Transformer):
225            transformer.transforms.extend(function.transforms)
226        else:
227            transformer.transforms.append(function)
228        return transformer
229
230    #{ Selection operations
231
232    def select(self, path):
233        """Mark events matching the given XPath expression, within the current
234        selection.
235
236        >>> html = HTML('<body>Some <em>test</em> text</body>')
237        >>> print html | Transformer().select('.//em').trace()
238        (None, ('START', (QName(u'body'), Attrs()), (None, 1, 0)))
239        (None, ('TEXT', u'Some ', (None, 1, 6)))
240        ('ENTER', ('START', (QName(u'em'), Attrs()), (None, 1, 11)))
241        ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
242        ('EXIT', ('END', QName(u'em'), (None, 1, 19)))
243        (None, ('TEXT', u' text', (None, 1, 24)))
244        (None, ('END', QName(u'body'), (None, 1, 29)))
245        <body>Some <em>test</em> text</body>
246
247        :param path: an XPath expression (as string) or a `Path` instance
248        :return: the stream augmented by transformation marks
249        :rtype: `Transformer`
250        """
251        return self.apply(SelectTransformation(path))
252
253    def invert(self):
254        """Invert selection so that marked events become unmarked, and vice
255        versa.
256
257        Specificaly, all marks are converted to null marks, and all null marks
258        are converted to OUTSIDE marks.
259
260        >>> html = HTML('<body>Some <em>test</em> text</body>')
261        >>> print html | Transformer('//em').invert().trace()
262        ('OUTSIDE', ('START', (QName(u'body'), Attrs()), (None, 1, 0)))
263        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
264        (None, ('START', (QName(u'em'), Attrs()), (None, 1, 11)))
265        (None, ('TEXT', u'test', (None, 1, 15)))
266        (None, ('END', QName(u'em'), (None, 1, 19)))
267        ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
268        ('OUTSIDE', ('END', QName(u'body'), (None, 1, 29)))
269        <body>Some <em>test</em> text</body>
270
271        :rtype: `Transformer`
272        """
273        return self.apply(InvertTransformation())
274
275    def end(self):
276        """End current selection, allowing all events to be selected.
277
278        Example:
279
280        >>> html = HTML('<body>Some <em>test</em> text</body>')
281        >>> print html | Transformer('//em').end().trace()
282        ('OUTSIDE', ('START', (QName(u'body'), Attrs()), (None, 1, 0)))
283        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
284        ('OUTSIDE', ('START', (QName(u'em'), Attrs()), (None, 1, 11)))
285        ('OUTSIDE', ('TEXT', u'test', (None, 1, 15)))
286        ('OUTSIDE', ('END', QName(u'em'), (None, 1, 19)))
287        ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
288        ('OUTSIDE', ('END', QName(u'body'), (None, 1, 29)))
289        <body>Some <em>test</em> text</body>
290
291        :return: the stream augmented by transformation marks
292        :rtype: `Transformer`
293        """
294        return self.apply(EndTransformation())
295
296    #{ Deletion operations
297
298    def empty(self):
299        """Empty selected elements of all content.
300
301        Example:
302
303        >>> html = HTML('<html><head><title>Some Title</title></head>'
304        ...             '<body>Some <em>body</em> text.</body></html>')
305        >>> print html | Transformer('.//em').empty()
306        <html><head><title>Some Title</title></head><body>Some <em/>
307        text.</body></html>
308
309        :rtype: `Transformer`
310        """
311        return self.apply(EmptyTransformation())
312
313    def remove(self):
314        """Remove selection from the stream.
315
316        Example:
317
318        >>> html = HTML('<html><head><title>Some Title</title></head>'
319        ...             '<body>Some <em>body</em> text.</body></html>')
320        >>> print html | Transformer('.//em').remove()
321        <html><head><title>Some Title</title></head><body>Some
322        text.</body></html>
323
324        :rtype: `Transformer`
325        """
326        return self.apply(RemoveTransformation())
327
328    #{ Direct element operations
329
330    def unwrap(self):
331        """Remove outermost enclosing elements from selection.
332
333        Example:
334
335        >>> html = HTML('<html><head><title>Some Title</title></head>'
336        ...             '<body>Some <em>body</em> text.</body></html>')
337        >>> print html | Transformer('.//em').unwrap()
338        <html><head><title>Some Title</title></head><body>Some body
339        text.</body></html>
340
341        :rtype: `Transformer`
342        """
343        return self.apply(UnwrapTransformation())
344
345    def wrap(self, element):
346        """Wrap selection in an element.
347
348        >>> html = HTML('<html><head><title>Some Title</title></head>'
349        ...             '<body>Some <em>body</em> text.</body></html>')
350        >>> print html | Transformer('.//em').wrap('strong')
351        <html><head><title>Some Title</title></head><body>Some
352        <strong><em>body</em></strong> text.</body></html>
353
354        :param element: either a tag name (as string) or an `Element` object
355        :rtype: `Transformer`
356        """
357        return self.apply(WrapTransformation(element))
358
359    #{ Content insertion operations
360
361    def replace(self, content):
362        """Replace selection with content.
363
364        >>> html = HTML('<html><head><title>Some Title</title></head>'
365        ...             '<body>Some <em>body</em> text.</body></html>')
366        >>> print html | Transformer('.//title/text()').replace('New Title')
367        <html><head><title>New Title</title></head><body>Some <em>body</em>
368        text.</body></html>
369
370        :param content: Either a callable, an iterable of events, or a string
371                        to insert.
372        :rtype: `Transformer`
373        """
374        return self.apply(ReplaceTransformation(content))
375
376    def before(self, content):
377        """Insert content before selection.
378
379        In this example we insert the word 'emphasised' before the <em> opening
380        tag:
381
382        >>> html = HTML('<html><head><title>Some Title</title></head>'
383        ...             '<body>Some <em>body</em> text.</body></html>')
384        >>> print html | Transformer('.//em').before('emphasised ')
385        <html><head><title>Some Title</title></head><body>Some emphasised
386        <em>body</em> text.</body></html>
387
388        :param content: Either a callable, an iterable of events, or a string
389                        to insert.
390        :rtype: `Transformer`
391        """
392        return self.apply(BeforeTransformation(content))
393
394    def after(self, content):
395        """Insert content after selection.
396
397        Here, we insert some text after the </em> closing tag:
398
399        >>> html = HTML('<html><head><title>Some Title</title></head>'
400        ...             '<body>Some <em>body</em> text.</body></html>')
401        >>> print html | Transformer('.//em').after(' rock')
402        <html><head><title>Some Title</title></head><body>Some <em>body</em>
403        rock text.</body></html>
404
405        :param content: Either a callable, an iterable of events, or a string
406                        to insert.
407        :rtype: `Transformer`
408        """
409        return self.apply(AfterTransformation(content))
410
411    def prepend(self, content):
412        """Insert content after the ENTER event of the selection.
413
414        Inserting some new text at the start of the <body>:
415
416        >>> html = HTML('<html><head><title>Some Title</title></head>'
417        ...             '<body>Some <em>body</em> text.</body></html>')
418        >>> print html | Transformer('.//body').prepend('Some new body text. ')
419        <html><head><title>Some Title</title></head><body>Some new body text.
420        Some <em>body</em> text.</body></html>
421
422        :param content: Either a callable, an iterable of events, or a string
423                        to insert.
424        :rtype: `Transformer`
425        """
426        return self.apply(PrependTransformation(content))
427
428    def append(self, content):
429        """Insert content before the END event of the selection.
430
431        >>> html = HTML('<html><head><title>Some Title</title></head>'
432        ...             '<body>Some <em>body</em> text.</body></html>')
433        >>> print html | Transformer('.//body').append(' Some new body text.')
434        <html><head><title>Some Title</title></head><body>Some <em>body</em>
435        text. Some new body text.</body></html>
436
437        :param content: Either a callable, an iterable of events, or a string
438                        to insert.
439        :rtype: `Transformer`
440        """
441        return self.apply(AppendTransformation(content))
442
443    #{ Attribute manipulation
444
445    def attr(self, name, value):
446        """Add, replace or delete an attribute on selected elements.
447
448        If `value` evaulates to `None` the attribute will be deleted from the
449        element:
450
451        >>> html = HTML('<html><head><title>Some Title</title></head>'
452        ...             '<body>Some <em class="before">body</em> <em>text</em>.</body>'
453        ...             '</html>')
454        >>> print html | Transformer('body/em').attr('class', None)
455        <html><head><title>Some Title</title></head><body>Some <em>body</em>
456        <em>text</em>.</body></html>
457
458        Otherwise the attribute will be set to `value`:
459
460        >>> print html | Transformer('body/em').attr('class', 'emphasis')
461        <html><head><title>Some Title</title></head><body>Some <em
462        class="emphasis">body</em> <em class="emphasis">text</em>.</body></html>
463
464        If `value` is a callable it will be called with the attribute name and
465        the `START` event for the matching element. Its return value will then
466        be used to set the attribute:
467
468        >>> def print_attr(name, event):
469        ...     attrs = event[1][1]
470        ...     print attrs
471        ...     return attrs.get(name)
472        >>> print html | Transformer('body/em').attr('class', print_attr)
473        Attrs([(QName(u'class'), u'before')])
474        Attrs()
475        <html><head><title>Some Title</title></head><body>Some <em
476        class="before">body</em> <em>text</em>.</body></html>
477
478        :param name: the name of the attribute
479        :param value: the value that should be set for the attribute.
480        :rtype: `Transformer`
481        """
482        return self.apply(AttrTransformation(name, value))
483
484    #{ Buffer operations
485
486    def copy(self, buffer, accumulate=False):
487        """Copy selection into buffer.
488
489        The buffer is replaced by each *contiguous* selection before being passed
490        to the next transformation. If accumulate=True, further selections will
491        be appended to the buffer rather than replacing it.
492
493        >>> from genshi.builder import tag
494        >>> buffer = StreamBuffer()
495        >>> html = HTML('<html><head><title>Some Title</title></head>'
496        ...             '<body>Some <em>body</em> text.</body></html>')
497        >>> print html | Transformer('head/title/text()').copy(buffer) \\
498        ...     .end().select('body').prepend(tag.h1(buffer))
499        <html><head><title>Some Title</title></head><body><h1>Some
500        Title</h1>Some <em>body</em> text.</body></html>
501
502        This example illustrates that only a single contiguous selection will
503        be buffered:
504
505        >>> print html | Transformer('head/title/text()').copy(buffer) \\
506        ...     .end().select('body/em').copy(buffer).end().select('body') \\
507        ...     .prepend(tag.h1(buffer))
508        <html><head><title>Some Title</title></head><body><h1>Some
509        Title</h1>Some <em>body</em> text.</body></html>
510        >>> print buffer
511        <em>body</em>
512
513        Element attributes can also be copied for later use:
514
515        >>> html = HTML('<html><head><title>Some Title</title></head>'
516        ...             '<body><em>Some</em> <em class="before">body</em>'
517        ...             '<em>text</em>.</body></html>')
518        >>> buffer = StreamBuffer()
519        >>> def apply_attr(name, entry):
520        ...     return list(buffer)[0][1][1].get('class')
521        >>> print html | Transformer('body/em[@class]/@class').copy(buffer) \\
522        ...     .end().buffer().select('body/em[not(@class)]') \\
523        ...     .attr('class', apply_attr)
524        <html><head><title>Some Title</title></head><body><em
525        class="before">Some</em> <em class="before">body</em><em
526        class="before">text</em>.</body></html>
527
528
529        :param buffer: the `StreamBuffer` in which the selection should be
530                       stored
531        :rtype: `Transformer`
532        :note: Copy (and cut) copy each individual selected object into the
533               buffer before passing to the next transform. For example, the
534               XPath ``*|text()`` will select all elements and text, each
535               instance of which will be copied to the buffer individually
536               before passing to the next transform. This has implications for
537               how ``StreamBuffer`` objects can be used, so some
538               experimentation may be required.
539
540        """
541        return self.apply(CopyTransformation(buffer, accumulate))
542
543    def cut(self, buffer, accumulate=False):
544        """Copy selection into buffer and remove the selection from the stream.
545
546        >>> from genshi.builder import tag
547        >>> buffer = StreamBuffer()
548        >>> html = HTML('<html><head><title>Some Title</title></head>'
549        ...             '<body>Some <em>body</em> text.</body></html>')
550        >>> print html | Transformer('.//em/text()').cut(buffer) \\
551        ...     .end().select('.//em').after(tag.h1(buffer))
552        <html><head><title>Some Title</title></head><body>Some
553        <em/><h1>body</h1> text.</body></html>
554
555        Specifying accumulate=True, appends all selected intervals onto the
556        buffer. Combining this with the .buffer() operation allows us operate
557        on all copied events rather than per-segment. See the documentation on
558        buffer() for more information.
559
560        :param buffer: the `StreamBuffer` in which the selection should be
561                       stored
562        :rtype: `Transformer`
563        :note: this transformation will buffer the entire input stream
564        """
565        return self.apply(CutTransformation(buffer, accumulate))
566
567    def buffer(self):
568        """Buffer the entire stream (can consume a considerable amount of
569        memory).
570
571        Useful in conjunction with copy(accumulate=True) and
572        cut(accumulate=True) to ensure that all marked events in the entire
573        stream are copied to the buffer before further transformations are
574        applied.
575
576        For example, to move all <note> elements inside a <notes> tag at the
577        top of the document:
578
579        >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
580        ...            'text <note>two</note>.</body></doc>')
581        >>> buffer = StreamBuffer()
582        >>> print doc | Transformer('body/note').cut(buffer, accumulate=True) \\
583        ...     .end().buffer().select('notes').prepend(buffer)
584        <doc><notes><note>one</note><note>two</note></notes><body>Some  text
585        .</body></doc>
586
587        """
588        return self.apply(list)
589
590    #{ Miscellaneous operations
591
592    def filter(self, filter):
593        """Apply a normal stream filter to the selection. The filter is called
594        once for each contiguous block of marked events.
595
596        >>> from genshi.filters.html import HTMLSanitizer
597        >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
598        ...             '</script> and some more text</body></html>')
599        >>> print html | Transformer('body/*').filter(HTMLSanitizer())
600        <html><body>Some text and some more text</body></html>
601
602        :param filter: The stream filter to apply.
603        :rtype: `Transformer`
604        """
605        return self.apply(FilterTransformation(filter))
606
607    def map(self, function, kind):
608        """Applies a function to the ``data`` element of events of ``kind`` in
609        the selection.
610
611        >>> html = HTML('<html><head><title>Some Title</title></head>'
612        ...               '<body>Some <em>body</em> text.</body></html>')
613        >>> print html | Transformer('head/title').map(unicode.upper, TEXT)
614        <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
615        text.</body></html>
616
617        :param function: the function to apply
618        :param kind: the kind of event the function should be applied to
619        :rtype: `Transformer`
620        """
621        return self.apply(MapTransformation(function, kind))
622
623    def substitute(self, pattern, replace, count=1):
624        """Replace text matching a regular expression.
625
626        Refer to the documentation for ``re.sub()`` for details.
627
628        >>> html = HTML('<html><body>Some text, some more text and '
629        ...             '<b>some bold text</b>\\n'
630        ...             '<i>some italicised text</i></body></html>')
631        >>> print html | Transformer('body/b').substitute('(?i)some', 'SOME')
632        <html><body>Some text, some more text and <b>SOME bold text</b>
633        <i>some italicised text</i></body></html>
634        >>> tags = tag.html(tag.body('Some text, some more text and\\n',
635        ...      Markup('<b>some bold text</b>')))
636        >>> print tags.generate() | Transformer('body').substitute(
637        ...     '(?i)some', 'SOME')
638        <html><body>SOME text, some more text and
639        <b>SOME bold text</b></body></html>
640
641        :param pattern: A regular expression object or string.
642        :param replace: Replacement pattern.
643        :param count: Number of replacements to make in each text fragment.
644        :rtype: `Transformer`
645        """
646        return self.apply(SubstituteTransformation(pattern, replace, count))
647
648    def rename(self, name):
649        """Rename matching elements.
650
651        >>> html = HTML('<html><body>Some text, some more text and '
652        ...             '<b>some bold text</b></body></html>')
653        >>> print html | Transformer('body/b').rename('strong')
654        <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
655        """
656        return self.apply(RenameTransformation(name))
657
658    def trace(self, prefix='', fileobj=None):
659        """Print events as they pass through the transform.
660
661        >>> html = HTML('<body>Some <em>test</em> text</body>')
662        >>> print html | Transformer('em').trace()
663        (None, ('START', (QName(u'body'), Attrs()), (None, 1, 0)))
664        (None, ('TEXT', u'Some ', (None, 1, 6)))
665        ('ENTER', ('START', (QName(u'em'), Attrs()), (None, 1, 11)))
666        ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
667        ('EXIT', ('END', QName(u'em'), (None, 1, 19)))
668        (None, ('TEXT', u' text', (None, 1, 24)))
669        (None, ('END', QName(u'body'), (None, 1, 29)))
670        <body>Some <em>test</em> text</body>
671
672        :param prefix: a string to prefix each event with in the output
673        :param fileobj: the writable file-like object to write to; defaults to
674                        the standard output stream
675        :rtype: `Transformer`
676        """
677        return self.apply(TraceTransformation(prefix, fileobj=fileobj))
678
679    # Internal methods
680
681    def _mark(self, stream):
682        for event in stream:
683            yield OUTSIDE, event
684
685    def _unmark(self, stream):
686        for mark, event in stream:
687            kind = event[0]
688            if not (kind is None or kind is ATTR or kind is BREAK):
689                yield event
690
691
692class SelectTransformation(object):
693    """Select and mark events that match an XPath expression."""
694
695    def __init__(self, path):
696        """Create selection.
697
698        :param path: an XPath expression (as string) or a `Path` object
699        """
700        if not isinstance(path, Path):
701            path = Path(path)
702        self.path = path
703
704    def __call__(self, stream):
705        """Apply the transform filter to the marked stream.
706
707        :param stream: the marked event stream to filter
708        """
709        namespaces = {}
710        variables = {}
711        test = self.path.test()
712        stream = iter(stream)
713        for mark, event in stream:
714            if mark is None:
715                yield mark, event
716                continue
717            result = test(event, {}, {})
718            # XXX This is effectively genshi.core._ensure() for transform
719            # streams.
720            if result is True:
721                if event[0] is START:
722                    yield ENTER, event
723                    depth = 1
724                    while depth > 0:
725                        mark, subevent = stream.next()
726                        if subevent[0] is START:
727                            depth += 1
728                        elif subevent[0] is END:
729                            depth -= 1
730                        if depth == 0:
731                            yield EXIT, subevent
732                        else:
733                            yield INSIDE, subevent
734                        test(subevent, {}, {}, updateonly=True)
735                else:
736                    yield OUTSIDE, event
737            elif isinstance(result, Attrs):
738                # XXX  Selected *attributes* are given a "kind" of None to
739                # indicate they are not really part of the stream.
740                yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2])
741                yield None, event
742            elif isinstance(result, tuple):
743                yield OUTSIDE, result
744            elif result:
745                # XXX Assume everything else is "text"?
746                yield None, (TEXT, unicode(result), (None, -1, -1))
747            else:
748                yield None, event
749
750
751class InvertTransformation(object):
752    """Invert selection so that marked events become unmarked, and vice versa.
753
754    Specificaly, all input marks are converted to null marks, and all input
755    null marks are converted to OUTSIDE marks.
756    """
757
758    def __call__(self, stream):
759        """Apply the transform filter to the marked stream.
760
761        :param stream: the marked event stream to filter
762        """
763        for mark, event in stream:
764            if mark:
765                yield None, event
766            else:
767                yield OUTSIDE, event
768
769
770class EndTransformation(object):
771    """End the current selection."""
772
773    def __call__(self, stream):
774        """Apply the transform filter to the marked stream.
775
776        :param stream: the marked event stream to filter
777        """
778        for mark, event in stream:
779            yield OUTSIDE, event
780
781
782class EmptyTransformation(object):
783    """Empty selected elements of all content."""
784
785    def __call__(self, stream):
786        """Apply the transform filter to the marked stream.
787
788        :param stream: the marked event stream to filter
789        """
790        for mark, event in stream:
791            yield mark, event
792            if mark is ENTER:
793                for mark, event in stream:
794                    if mark is EXIT:
795                        yield mark, event
796                        break
797
798
799class RemoveTransformation(object):
800    """Remove selection from the stream."""
801
802    def __call__(self, stream):
803        """Apply the transform filter to the marked stream.
804
805        :param stream: the marked event stream to filter
806        """
807        for mark, event in stream:
808            if mark is None:
809                yield mark, event
810
811
812class UnwrapTransformation(object):
813    """Remove outtermost enclosing elements from selection."""
814
815    def __call__(self, stream):
816        """Apply the transform filter to the marked stream.
817
818        :param stream: the marked event stream to filter
819        """
820        for mark, event in stream:
821            if mark not in (ENTER, EXIT):
822                yield mark, event
823
824
825class WrapTransformation(object):
826    """Wrap selection in an element."""
827
828    def __init__(self, element):
829        if isinstance(element, Element):
830            self.element = element
831        else:
832            self.element = Element(element)
833
834    def __call__(self, stream):
835        for mark, event in stream:
836            if mark:
837                element = list(self.element.generate())
838                for prefix in element[:-1]:
839                    yield None, prefix
840                yield mark, event
841                start = mark
842                stopped = False
843                for mark, event in stream:
844                    if start is ENTER and mark is EXIT:
845                        yield mark, event
846                        stopped = True
847                        break
848                    if not mark:
849                        break
850                    yield mark, event
851                else:
852                    stopped = True
853                yield None, element[-1]
854                if not stopped:
855                    yield mark, event
856            else:
857                yield mark, event
858
859
860class TraceTransformation(object):
861    """Print events as they pass through the transform."""
862
863    def __init__(self, prefix='', fileobj=None):
864        """Trace constructor.
865
866        :param prefix: text to prefix each traced line with.
867        :param fileobj: the writable file-like object to write to
868        """
869        self.prefix = prefix
870        self.fileobj = fileobj or sys.stdout
871
872    def __call__(self, stream):
873        """Apply the transform filter to the marked stream.
874
875        :param stream: the marked event stream to filter
876        """
877        for event in stream:
878            print>>self.fileobj, self.prefix + str(event)
879            yield event
880
881
882class FilterTransformation(object):
883    """Apply a normal stream filter to the selection. The filter is called once
884    for each selection."""
885
886    def __init__(self, filter):
887        """Create the transform.
888
889        :param filter: The stream filter to apply.
890        """
891        self.filter = filter
892
893    def __call__(self, stream):
894        """Apply the transform filter to the marked stream.
895
896        :param stream: The marked event stream to filter
897        """
898        def flush(queue):
899            if queue:
900                for event in self.filter(queue):
901                    yield OUTSIDE, event
902                del queue[:]
903
904        queue = []
905        for mark, event in stream:
906            if mark is ENTER:
907                queue.append(event)
908                for mark, event in stream:
909                    queue.append(event)
910                    if mark is EXIT:
911                        break
912                for queue_event in flush(queue):
913                    yield queue_event
914            elif mark is OUTSIDE:
915                stopped = False
916                queue.append(event)
917                for mark, event in stream:
918                    if mark is not OUTSIDE:
919                        break
920                    queue.append(event)
921                else:
922                    stopped = True
923                for queue_event in flush(queue):
924                    yield queue_event
925                if not stopped:
926                    yield mark, event
927            else:
928                yield mark, event
929        for queue_event in flush(queue):
930            yield queue_event
931
932
933class MapTransformation(object):
934    """Apply a function to the `data` element of events of ``kind`` in the
935    selection.
936    """
937
938    def __init__(self, function, kind):
939        """Create the transform.
940
941        :param function: the function to apply; the function must take one
942                         argument, the `data` element of each selected event
943        :param kind: the stream event ``kind`` to apply the `function` to
944        """
945        self.function = function
946        self.kind = kind
947
948    def __call__(self, stream):
949        """Apply the transform filter to the marked stream.
950
951        :param stream: The marked event stream to filter
952        """
953        for mark, (kind, data, pos) in stream:
954            if mark and self.kind in (None, kind):
955                yield mark, (kind, self.function(data), pos)
956            else:
957                yield mark, (kind, data, pos)
958
959
960class SubstituteTransformation(object):
961    """Replace text matching a regular expression.
962
963    Refer to the documentation for ``re.sub()`` for details.
964    """
965    def __init__(self, pattern, replace, count=0):
966        """Create the transform.
967
968        :param pattern: A regular expression object, or string.
969        :param replace: Replacement pattern.
970        :param count: Number of replacements to make in each text fragment.
971        """
972        if isinstance(pattern, basestring):
973            self.pattern = re.compile(pattern)
974        else:
975            self.pattern = pattern
976        self.count = count
977        self.replace = replace
978
979    def __call__(self, stream):
980        """Apply the transform filter to the marked stream.
981
982        :param stream: The marked event stream to filter
983        """
984        for mark, (kind, data, pos) in stream:
985            if mark is not None and kind is TEXT:
986                new_data = self.pattern.sub(self.replace, data, self.count)
987                if isinstance(data, Markup):
988                    data = Markup(new_data)
989                else:
990                    data = new_data
991            yield mark, (kind, data, pos)
992
993
994class RenameTransformation(object):
995    """Rename matching elements."""
996    def __init__(self, name):
997        """Create the transform.
998
999        :param name: New element name.
1000        """
1001        self.name = QName(name)
1002
1003    def __call__(self, stream):
1004        """Apply the transform filter to the marked stream.
1005
1006        :param stream: The marked event stream to filter
1007        """
1008        for mark, (kind, data, pos) in stream:
1009            if mark is ENTER:
1010                data = self.name, data[1]
1011            elif mark is EXIT:
1012                data = self.name
1013            yield mark, (kind, data, pos)
1014
1015
1016class InjectorTransformation(object):
1017    """Abstract base class for transformations that inject content into a
1018    stream.
1019
1020    >>> class Top(InjectorTransformation):
1021    ...     def __call__(self, stream):
1022    ...         for event in self._inject():
1023    ...             yield event
1024    ...         for event in stream:
1025    ...             yield event
1026    >>> html = HTML('<body>Some <em>test</em> text</body>')
1027    >>> print html | Transformer('.//em').apply(Top('Prefix '))
1028    Prefix <body>Some <em>test</em> text</body>
1029    """
1030    def __init__(self, content):
1031        """Create a new injector.
1032
1033        :param content: An iterable of Genshi stream events, or a string to be
1034                        injected.
1035        """
1036        self.content = content
1037
1038    def _inject(self):
1039        content = self.content
1040        if hasattr(content, '__call__'):
1041            content = content()
1042        for event in _ensure(content):
1043            yield None, event
1044
1045
1046class ReplaceTransformation(InjectorTransformation):
1047    """Replace selection with content."""
1048
1049    def __call__(self, stream):
1050        """Apply the transform filter to the marked stream.
1051
1052        :param stream: The marked event stream to filter
1053        """
1054        stream = PushBackStream(stream)
1055        for mark, event in stream:
1056            if mark is not None:
1057                start = mark
1058                for subevent in self._inject():
1059                    yield subevent
1060                for mark, event in stream:
1061                    if start is ENTER:
1062                        if mark is EXIT:
1063                            break
1064                    elif mark != start:
1065                        stream.push((mark, event))
1066                        break
1067            else:
1068                yield mark, event
1069
1070
1071class BeforeTransformation(InjectorTransformation):
1072    """Insert content before selection."""
1073
1074    def __call__(self, stream):
1075        """Apply the transform filter to the marked stream.
1076
1077        :param stream: The marked event stream to filter
1078        """
1079        stream = PushBackStream(stream)
1080        for mark, event in stream:
1081            if mark is not None:
1082                start = mark
1083                for subevent in self._inject():
1084                    yield subevent
1085                yield mark, event
1086                for mark, event in stream:
1087                    if mark != start and start is not ENTER:
1088                        stream.push((mark, event))
1089                        break
1090                    yield mark, event
1091                    if start is ENTER and mark is EXIT:
1092                        break
1093            else:
1094                yield mark, event
1095
1096
1097class AfterTransformation(InjectorTransformation):
1098    """Insert content after selection."""
1099
1100    def __call__(self, stream):
1101        """Apply the transform filter to the marked stream.
1102
1103        :param stream: The marked event stream to filter
1104        """
1105        stream = PushBackStream(stream)
1106        for mark, event in stream:
1107            yield mark, event
1108            if mark:
1109                start = mark
1110                for mark, event in stream:
1111                    if start is not ENTER and mark != start:
1112                        stream.push((mark, event))
1113                        break
1114                    yield mark, event
1115                    if start is ENTER and mark is EXIT:
1116                        break
1117                for subevent in self._inject():
1118                    yield subevent
1119
1120
1121class PrependTransformation(InjectorTransformation):
1122    """Prepend content to the inside of selected elements."""
1123
1124    def __call__(self, stream):
1125        """Apply the transform filter to the marked stream.
1126
1127        :param stream: The marked event stream to filter
1128        """
1129        for mark, event in stream:
1130            yield mark, event
1131            if mark is ENTER:
1132                for subevent in self._inject():
1133                    yield subevent
1134
1135
1136class AppendTransformation(InjectorTransformation):
1137    """Append content after the content of selected elements."""
1138
1139    def __call__(self, stream):
1140        """Apply the transform filter to the marked stream.
1141
1142        :param stream: The marked event stream to filter
1143        """
1144        for mark, event in stream:
1145            yield mark, event
1146            if mark is ENTER:
1147                for mark, event in stream:
1148                    if mark is EXIT:
1149                        break
1150                    yield mark, event
1151                for subevent in self._inject():
1152                    yield subevent
1153                yield mark, event
1154
1155
1156class AttrTransformation(object):
1157    """Set an attribute on selected elements."""
1158
1159    def __init__(self, name, value):
1160        """Construct transform.
1161
1162        :param name: name of the attribute that should be set
1163        :param value: the value to set
1164        """
1165        self.name = name
1166        self.value = value
1167
1168    def __call__(self, stream):
1169        """Apply the transform filter to the marked stream.
1170
1171        :param stream: The marked event stream to filter
1172        """
1173        callable_value = hasattr(self.value, '__call__')
1174        for mark, (kind, data, pos) in stream:
1175            if mark is ENTER:
1176                if callable_value:
1177                    value = self.value(self.name, (kind, data, pos))
1178                else:
1179                    value = self.value
1180                if value is None:
1181                    attrs = data[1] - [QName(self.name)]
1182                else:
1183                    attrs = data[1] | [(QName(self.name), value)]
1184                data = (data[0], attrs)
1185            yield mark, (kind, data, pos)
1186
1187
1188
1189class StreamBuffer(Stream):
1190    """Stream event buffer used for cut and copy transformations."""
1191
1192    def __init__(self):
1193        """Create the buffer."""
1194        Stream.__init__(self, [])
1195
1196    def append(self, event):
1197        """Add an event to the buffer.
1198
1199        :param event: the markup event to add
1200        """
1201        self.events.append(event)
1202
1203    def reset(self):
1204        """Empty the buffer of events."""
1205        del self.events[:]
1206
1207
1208class CopyTransformation(object):
1209    """Copy selected events into a buffer for later insertion."""
1210
1211    def __init__(self, buffer, accumulate=False):
1212        """Create the copy transformation.
1213
1214        :param buffer: the `StreamBuffer` in which the selection should be
1215                       stored
1216        """
1217        if not accumulate:
1218            buffer.reset()
1219        self.buffer = buffer
1220        self.accumulate = accumulate
1221
1222    def __call__(self, stream):
1223        """Apply the transformation to the marked stream.
1224
1225        :param stream: the marked event stream to filter
1226        """
1227        stream = PushBackStream(stream)
1228
1229        for mark, event in stream:
1230            if mark:
1231                if not self.accumulate:
1232                    self.buffer.reset()
1233                events = [(mark, event)]
1234                self.buffer.append(event)
1235                start = mark
1236                for mark, event in stream:
1237                    if start is not ENTER and mark != start:
1238                        stream.push((mark, event))
1239                        break
1240                    events.append((mark, event))
1241                    self.buffer.append(event)
1242                    if start is ENTER and mark is EXIT:
1243                        break
1244                for i in events:
1245                    yield i
1246            else:
1247                yield mark, event
1248
1249
1250class CutTransformation(object):
1251    """Cut selected events into a buffer for later insertion and remove the
1252    selection.
1253    """
1254
1255    def __init__(self, buffer, accumulate=False):
1256        """Create the cut transformation.
1257
1258        :param buffer: the `StreamBuffer` in which the selection should be
1259                       stored
1260        """
1261        self.buffer = buffer
1262        self.accumulate = accumulate
1263
1264
1265    def __call__(self, stream):
1266        """Apply the transform filter to the marked stream.
1267
1268        :param stream: the marked event stream to filter
1269        """
1270        attributes = []
1271        stream = PushBackStream(stream)
1272        broken = False
1273        if not self.accumulate:
1274            self.buffer.reset()
1275        for mark, event in stream:
1276            if mark:
1277                # Send a BREAK event if there was no other event sent between
1278                if not self.accumulate:
1279                    if not broken and self.buffer:
1280                        yield BREAK, (BREAK, None, None)
1281                    self.buffer.reset()
1282                self.buffer.append(event)
1283                start = mark
1284                if mark is ATTR:
1285                    attributes.extend([name for name, _ in event[1][1]])
1286                for mark, event in stream:
1287                    if start is mark is ATTR:
1288                        attributes.extend([name for name, _ in event[1][1]])
1289                    # Handle non-element contiguous selection
1290                    if start is not ENTER and mark != start:
1291                        # Operating on the attributes of a START event
1292                        if start is ATTR:
1293                            kind, data, pos = event
1294                            assert kind is START
1295                            data = (data[0], data[1] - attributes)
1296                            attributes = None
1297                            stream.push((mark, (kind, data, pos)))
1298                        else:
1299                            stream.push((mark, event))
1300                        break
1301                    self.buffer.append(event)
1302                    if start is ENTER and mark is EXIT:
1303                        break
1304                broken = False
1305            else:
1306                broken = True
1307                yield mark, event
1308        if not broken and self.buffer:
1309            yield BREAK, (BREAK, None, None)
Note: See TracBrowser for help on using the repository browser.