Edgewall Software

source: branches/stable/0.6.x/genshi/filters/transform.py

Last change on this file was 1080, checked in by cmlenz, 14 years ago

Avoid unicode literals in reprs of QName and Namespace when not necessary.

  • Property svn:eol-style set to native
File size: 46.2 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2007-2009 Edgewall Software
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://genshi.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://genshi.edgewall.org/log/.
13
14"""A filter for functional-style transformations of markup streams.
15
16The `Transformer` filter provides a variety of transformations that can be
17applied to parts of streams that match given XPath expressions. These
18transformations can be chained to achieve results that would be comparitively
19tedious to achieve by writing stream filters by hand. The approach of chaining
20node selection and transformation has been inspired by the `jQuery`_ Javascript
21library.
22
23 .. _`jQuery`: http://jquery.com/
24
25For example, the following transformation removes the ``<title>`` element from
26the ``<head>`` of the input document:
27
28>>> from genshi.builder import tag
29>>> html = HTML('''<html>
30...  <head><title>Some Title</title></head>
31...  <body>
32...    Some <em>body</em> text.
33...  </body>
34... </html>''')
35>>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
36...                                    .unwrap().wrap(tag.u))
37<html>
38  <head><title>Some Title</title></head>
39  <body>
40    Some <u>BODY</u> text.
41  </body>
42</html>
43
44The ``Transformer`` support a large number of useful transformations out of the
45box, but custom transformations can be added easily.
46
47:since: version 0.5
48"""
49
50import re
51import sys
52
53from genshi.builder import Element
54from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup
55from genshi.path import Path
56
57__all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER',
58           'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK']
59
60
61class TransformMark(str):
62    """A mark on a transformation stream."""
63    __slots__ = []
64    _instances = {}
65
66    def __new__(cls, val):
67        return cls._instances.setdefault(val, str.__new__(cls, val))
68
69
70ENTER = TransformMark('ENTER')
71"""Stream augmentation mark indicating that a selected element is being
72entered."""
73
74INSIDE = TransformMark('INSIDE')
75"""Stream augmentation mark indicating that processing is currently inside a
76selected element."""
77
78OUTSIDE = TransformMark('OUTSIDE')
79"""Stream augmentation mark indicating that a match occurred outside a selected
80element."""
81
82ATTR = TransformMark('ATTR')
83"""Stream augmentation mark indicating a selected element attribute."""
84
85EXIT = TransformMark('EXIT')
86"""Stream augmentation mark indicating that a selected element is being
87exited."""
88
89BREAK = TransformMark('BREAK')
90"""Stream augmentation mark indicating a break between two otherwise contiguous
91blocks of marked events.
92
93This is used primarily by the cut() transform to provide later transforms with
94an opportunity to operate on the cut buffer.
95"""
96
97
98class PushBackStream(object):
99    """Allows a single event to be pushed back onto the stream and re-consumed.
100    """
101    def __init__(self, stream):
102        self.stream = iter(stream)
103        self.peek = None
104
105    def push(self, event):
106        assert self.peek is None
107        self.peek = event
108
109    def __iter__(self):
110        while True:
111            if self.peek is not None:
112                peek = self.peek
113                self.peek = None
114                yield peek
115            else:
116                try:
117                    event = self.stream.next()
118                    yield event
119                except StopIteration:
120                    if self.peek is None:
121                        raise
122
123
124class Transformer(object):
125    """Stream filter that can apply a variety of different transformations to
126    a stream.
127
128    This is achieved by selecting the events to be transformed using XPath,
129    then applying the transformations to the events matched by the path
130    expression. Each marked event is in the form (mark, (kind, data, pos)),
131    where mark can be any of `ENTER`, `INSIDE`, `EXIT`, `OUTSIDE`, or `None`.
132
133    The first three marks match `START` and `END` events, and any events
134    contained `INSIDE` any selected XML/HTML element. A non-element match
135    outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE`
136    mark.
137
138    >>> html = HTML('<html><head><title>Some Title</title></head>'
139    ...             '<body>Some <em>body</em> text.</body></html>')
140
141    Transformations act on selected stream events matching an XPath expression.
142    Here's an example of removing some markup (the title, in this case)
143    selected by an expression:
144
145    >>> print(html | Transformer('head/title').remove())
146    <html><head/><body>Some <em>body</em> text.</body></html>
147
148    Inserted content can be passed in the form of a string, or a markup event
149    stream, which includes streams generated programmatically via the
150    `builder` module:
151
152    >>> from genshi.builder import tag
153    >>> print(html | Transformer('body').prepend(tag.h1('Document Title')))
154    <html><head><title>Some Title</title></head><body><h1>Document
155    Title</h1>Some <em>body</em> text.</body></html>
156
157    Each XPath expression determines the set of tags that will be acted upon by
158    subsequent transformations. In this example we select the ``<title>`` text,
159    copy it into a buffer, then select the ``<body>`` element and paste the
160    copied text into the body as ``<h1>`` enclosed text:
161
162    >>> buffer = StreamBuffer()
163    >>> print(html | Transformer('head/title/text()').copy(buffer)
164    ...     .end().select('body').prepend(tag.h1(buffer)))
165    <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some
166    <em>body</em> text.</body></html>
167
168    Transformations can also be assigned and reused, although care must be
169    taken when using buffers, to ensure that buffers are cleared between
170    transforms:
171
172    >>> emphasis = Transformer('body//em').attr('class', 'emphasis')
173    >>> print(html | emphasis)
174    <html><head><title>Some Title</title></head><body>Some <em
175    class="emphasis">body</em> text.</body></html>
176    """
177
178    __slots__ = ['transforms']
179
180    def __init__(self, path='.'):
181        """Construct a new transformation filter.
182
183        :param path: an XPath expression (as string) or a `Path` instance
184        """
185        self.transforms = [SelectTransformation(path)]
186
187    def __call__(self, stream, keep_marks=False):
188        """Apply the transform filter to the marked stream.
189
190        :param stream: the marked event stream to filter
191        :param keep_marks: Do not strip transformer selection marks from the
192                           stream. Useful for testing.
193        :return: the transformed stream
194        :rtype: `Stream`
195        """
196        transforms = self._mark(stream)
197        for link in self.transforms:
198            transforms = link(transforms)
199        if not keep_marks:
200            transforms = self._unmark(transforms)
201        return Stream(transforms,
202                      serializer=getattr(stream, 'serializer', None))
203
204    def apply(self, function):
205        """Apply a transformation to the stream.
206
207        Transformations can be chained, similar to stream filters. Any callable
208        accepting a marked stream can be used as a transform.
209
210        As an example, here is a simple `TEXT` event upper-casing transform:
211
212        >>> def upper(stream):
213        ...     for mark, (kind, data, pos) in stream:
214        ...         if mark and kind is TEXT:
215        ...             yield mark, (kind, data.upper(), pos)
216        ...         else:
217        ...             yield mark, (kind, data, pos)
218        >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
219        >>> print(short_stream | Transformer('.//em/text()').apply(upper))
220        <body>Some <em>TEST</em> text</body>
221        """
222        transformer = Transformer()
223        transformer.transforms = self.transforms[:]
224        if isinstance(function, Transformer):
225            transformer.transforms.extend(function.transforms)
226        else:
227            transformer.transforms.append(function)
228        return transformer
229
230    #{ Selection operations
231
232    def select(self, path):
233        """Mark events matching the given XPath expression, within the current
234        selection.
235
236        >>> html = HTML('<body>Some <em>test</em> text</body>')
237        >>> print(html | Transformer().select('.//em').trace())
238        (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
239        (None, ('TEXT', u'Some ', (None, 1, 6)))
240        ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
241        ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
242        ('EXIT', ('END', QName('em'), (None, 1, 19)))
243        (None, ('TEXT', u' text', (None, 1, 24)))
244        (None, ('END', QName('body'), (None, 1, 29)))
245        <body>Some <em>test</em> text</body>
246
247        :param path: an XPath expression (as string) or a `Path` instance
248        :return: the stream augmented by transformation marks
249        :rtype: `Transformer`
250        """
251        return self.apply(SelectTransformation(path))
252
253    def invert(self):
254        """Invert selection so that marked events become unmarked, and vice
255        versa.
256
257        Specificaly, all marks are converted to null marks, and all null marks
258        are converted to OUTSIDE marks.
259
260        >>> html = HTML('<body>Some <em>test</em> text</body>')
261        >>> print(html | Transformer('//em').invert().trace())
262        ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
263        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
264        (None, ('START', (QName('em'), Attrs()), (None, 1, 11)))
265        (None, ('TEXT', u'test', (None, 1, 15)))
266        (None, ('END', QName('em'), (None, 1, 19)))
267        ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
268        ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
269        <body>Some <em>test</em> text</body>
270
271        :rtype: `Transformer`
272        """
273        return self.apply(InvertTransformation())
274
275    def end(self):
276        """End current selection, allowing all events to be selected.
277
278        Example:
279
280        >>> html = HTML('<body>Some <em>test</em> text</body>')
281        >>> print(html | Transformer('//em').end().trace())
282        ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
283        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
284        ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11)))
285        ('OUTSIDE', ('TEXT', u'test', (None, 1, 15)))
286        ('OUTSIDE', ('END', QName('em'), (None, 1, 19)))
287        ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
288        ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
289        <body>Some <em>test</em> text</body>
290
291        :return: the stream augmented by transformation marks
292        :rtype: `Transformer`
293        """
294        return self.apply(EndTransformation())
295
296    #{ Deletion operations
297
298    def empty(self):
299        """Empty selected elements of all content.
300
301        Example:
302
303        >>> html = HTML('<html><head><title>Some Title</title></head>'
304        ...             '<body>Some <em>body</em> text.</body></html>')
305        >>> print(html | Transformer('.//em').empty())
306        <html><head><title>Some Title</title></head><body>Some <em/>
307        text.</body></html>
308
309        :rtype: `Transformer`
310        """
311        return self.apply(EmptyTransformation())
312
313    def remove(self):
314        """Remove selection from the stream.
315
316        Example:
317
318        >>> html = HTML('<html><head><title>Some Title</title></head>'
319        ...             '<body>Some <em>body</em> text.</body></html>')
320        >>> print(html | Transformer('.//em').remove())
321        <html><head><title>Some Title</title></head><body>Some
322        text.</body></html>
323
324        :rtype: `Transformer`
325        """
326        return self.apply(RemoveTransformation())
327
328    #{ Direct element operations
329
330    def unwrap(self):
331        """Remove outermost enclosing elements from selection.
332
333        Example:
334
335        >>> html = HTML('<html><head><title>Some Title</title></head>'
336        ...             '<body>Some <em>body</em> text.</body></html>')
337        >>> print(html | Transformer('.//em').unwrap())
338        <html><head><title>Some Title</title></head><body>Some body
339        text.</body></html>
340
341        :rtype: `Transformer`
342        """
343        return self.apply(UnwrapTransformation())
344
345    def wrap(self, element):
346        """Wrap selection in an element.
347
348        >>> html = HTML('<html><head><title>Some Title</title></head>'
349        ...             '<body>Some <em>body</em> text.</body></html>')
350        >>> print(html | Transformer('.//em').wrap('strong'))
351        <html><head><title>Some Title</title></head><body>Some
352        <strong><em>body</em></strong> text.</body></html>
353
354        :param element: either a tag name (as string) or an `Element` object
355        :rtype: `Transformer`
356        """
357        return self.apply(WrapTransformation(element))
358
359    #{ Content insertion operations
360
361    def replace(self, content):
362        """Replace selection with content.
363
364        >>> html = HTML('<html><head><title>Some Title</title></head>'
365        ...             '<body>Some <em>body</em> text.</body></html>')
366        >>> print(html | Transformer('.//title/text()').replace('New Title'))
367        <html><head><title>New Title</title></head><body>Some <em>body</em>
368        text.</body></html>
369
370        :param content: Either a callable, an iterable of events, or a string
371                        to insert.
372        :rtype: `Transformer`
373        """
374        return self.apply(ReplaceTransformation(content))
375
376    def before(self, content):
377        """Insert content before selection.
378
379        In this example we insert the word 'emphasised' before the <em> opening
380        tag:
381
382        >>> html = HTML('<html><head><title>Some Title</title></head>'
383        ...             '<body>Some <em>body</em> text.</body></html>')
384        >>> print(html | Transformer('.//em').before('emphasised '))
385        <html><head><title>Some Title</title></head><body>Some emphasised
386        <em>body</em> text.</body></html>
387
388        :param content: Either a callable, an iterable of events, or a string
389                        to insert.
390        :rtype: `Transformer`
391        """
392        return self.apply(BeforeTransformation(content))
393
394    def after(self, content):
395        """Insert content after selection.
396
397        Here, we insert some text after the </em> closing tag:
398
399        >>> html = HTML('<html><head><title>Some Title</title></head>'
400        ...             '<body>Some <em>body</em> text.</body></html>')
401        >>> print(html | Transformer('.//em').after(' rock'))
402        <html><head><title>Some Title</title></head><body>Some <em>body</em>
403        rock text.</body></html>
404
405        :param content: Either a callable, an iterable of events, or a string
406                        to insert.
407        :rtype: `Transformer`
408        """
409        return self.apply(AfterTransformation(content))
410
411    def prepend(self, content):
412        """Insert content after the ENTER event of the selection.
413
414        Inserting some new text at the start of the <body>:
415
416        >>> html = HTML('<html><head><title>Some Title</title></head>'
417        ...             '<body>Some <em>body</em> text.</body></html>')
418        >>> print(html | Transformer('.//body').prepend('Some new body text. '))
419        <html><head><title>Some Title</title></head><body>Some new body text.
420        Some <em>body</em> text.</body></html>
421
422        :param content: Either a callable, an iterable of events, or a string
423                        to insert.
424        :rtype: `Transformer`
425        """
426        return self.apply(PrependTransformation(content))
427
428    def append(self, content):
429        """Insert content before the END event of the selection.
430
431        >>> html = HTML('<html><head><title>Some Title</title></head>'
432        ...             '<body>Some <em>body</em> text.</body></html>')
433        >>> print(html | Transformer('.//body').append(' Some new body text.'))
434        <html><head><title>Some Title</title></head><body>Some <em>body</em>
435        text. Some new body text.</body></html>
436
437        :param content: Either a callable, an iterable of events, or a string
438                        to insert.
439        :rtype: `Transformer`
440        """
441        return self.apply(AppendTransformation(content))
442
443    #{ Attribute manipulation
444
445    def attr(self, name, value):
446        """Add, replace or delete an attribute on selected elements.
447
448        If `value` evaulates to `None` the attribute will be deleted from the
449        element:
450
451        >>> html = HTML('<html><head><title>Some Title</title></head>'
452        ...             '<body>Some <em class="before">body</em> <em>text</em>.</body>'
453        ...             '</html>')
454        >>> print(html | Transformer('body/em').attr('class', None))
455        <html><head><title>Some Title</title></head><body>Some <em>body</em>
456        <em>text</em>.</body></html>
457
458        Otherwise the attribute will be set to `value`:
459
460        >>> print(html | Transformer('body/em').attr('class', 'emphasis'))
461        <html><head><title>Some Title</title></head><body>Some <em
462        class="emphasis">body</em> <em class="emphasis">text</em>.</body></html>
463
464        If `value` is a callable it will be called with the attribute name and
465        the `START` event for the matching element. Its return value will then
466        be used to set the attribute:
467
468        >>> def print_attr(name, event):
469        ...     attrs = event[1][1]
470        ...     print(attrs)
471        ...     return attrs.get(name)
472        >>> print(html | Transformer('body/em').attr('class', print_attr))
473        Attrs([(QName('class'), u'before')])
474        Attrs()
475        <html><head><title>Some Title</title></head><body>Some <em
476        class="before">body</em> <em>text</em>.</body></html>
477
478        :param name: the name of the attribute
479        :param value: the value that should be set for the attribute.
480        :rtype: `Transformer`
481        """
482        return self.apply(AttrTransformation(name, value))
483
484    #{ Buffer operations
485
486    def copy(self, buffer, accumulate=False):
487        """Copy selection into buffer.
488
489        The buffer is replaced by each *contiguous* selection before being passed
490        to the next transformation. If accumulate=True, further selections will
491        be appended to the buffer rather than replacing it.
492
493        >>> from genshi.builder import tag
494        >>> buffer = StreamBuffer()
495        >>> html = HTML('<html><head><title>Some Title</title></head>'
496        ...             '<body>Some <em>body</em> text.</body></html>')
497        >>> print(html | Transformer('head/title/text()').copy(buffer)
498        ...     .end().select('body').prepend(tag.h1(buffer)))
499        <html><head><title>Some Title</title></head><body><h1>Some
500        Title</h1>Some <em>body</em> text.</body></html>
501
502        This example illustrates that only a single contiguous selection will
503        be buffered:
504
505        >>> print(html | Transformer('head/title/text()').copy(buffer)
506        ...     .end().select('body/em').copy(buffer).end().select('body')
507        ...     .prepend(tag.h1(buffer)))
508        <html><head><title>Some Title</title></head><body><h1>Some
509        Title</h1>Some <em>body</em> text.</body></html>
510        >>> print(buffer)
511        <em>body</em>
512
513        Element attributes can also be copied for later use:
514
515        >>> html = HTML('<html><head><title>Some Title</title></head>'
516        ...             '<body><em>Some</em> <em class="before">body</em>'
517        ...             '<em>text</em>.</body></html>')
518        >>> buffer = StreamBuffer()
519        >>> def apply_attr(name, entry):
520        ...     return list(buffer)[0][1][1].get('class')
521        >>> print(html | Transformer('body/em[@class]/@class').copy(buffer)
522        ...     .end().buffer().select('body/em[not(@class)]')
523        ...     .attr('class', apply_attr))
524        <html><head><title>Some Title</title></head><body><em
525        class="before">Some</em> <em class="before">body</em><em
526        class="before">text</em>.</body></html>
527
528
529        :param buffer: the `StreamBuffer` in which the selection should be
530                       stored
531        :rtype: `Transformer`
532        :note: Copy (and cut) copy each individual selected object into the
533               buffer before passing to the next transform. For example, the
534               XPath ``*|text()`` will select all elements and text, each
535               instance of which will be copied to the buffer individually
536               before passing to the next transform. This has implications for
537               how ``StreamBuffer`` objects can be used, so some
538               experimentation may be required.
539
540        """
541        return self.apply(CopyTransformation(buffer, accumulate))
542
543    def cut(self, buffer, accumulate=False):
544        """Copy selection into buffer and remove the selection from the stream.
545
546        >>> from genshi.builder import tag
547        >>> buffer = StreamBuffer()
548        >>> html = HTML('<html><head><title>Some Title</title></head>'
549        ...             '<body>Some <em>body</em> text.</body></html>')
550        >>> print(html | Transformer('.//em/text()').cut(buffer)
551        ...     .end().select('.//em').after(tag.h1(buffer)))
552        <html><head><title>Some Title</title></head><body>Some
553        <em/><h1>body</h1> text.</body></html>
554
555        Specifying accumulate=True, appends all selected intervals onto the
556        buffer. Combining this with the .buffer() operation allows us operate
557        on all copied events rather than per-segment. See the documentation on
558        buffer() for more information.
559
560        :param buffer: the `StreamBuffer` in which the selection should be
561                       stored
562        :rtype: `Transformer`
563        :note: this transformation will buffer the entire input stream
564        """
565        return self.apply(CutTransformation(buffer, accumulate))
566
567    def buffer(self):
568        """Buffer the entire stream (can consume a considerable amount of
569        memory).
570
571        Useful in conjunction with copy(accumulate=True) and
572        cut(accumulate=True) to ensure that all marked events in the entire
573        stream are copied to the buffer before further transformations are
574        applied.
575
576        For example, to move all <note> elements inside a <notes> tag at the
577        top of the document:
578
579        >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
580        ...            'text <note>two</note>.</body></doc>')
581        >>> buffer = StreamBuffer()
582        >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
583        ...     .end().buffer().select('notes').prepend(buffer))
584        <doc><notes><note>one</note><note>two</note></notes><body>Some  text
585        .</body></doc>
586
587        """
588        return self.apply(list)
589
590    #{ Miscellaneous operations
591
592    def filter(self, filter):
593        """Apply a normal stream filter to the selection. The filter is called
594        once for each contiguous block of marked events.
595
596        >>> from genshi.filters.html import HTMLSanitizer
597        >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
598        ...             '</script> and some more text</body></html>')
599        >>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
600        <html><body>Some text and some more text</body></html>
601
602        :param filter: The stream filter to apply.
603        :rtype: `Transformer`
604        """
605        return self.apply(FilterTransformation(filter))
606
607    def map(self, function, kind):
608        """Applies a function to the ``data`` element of events of ``kind`` in
609        the selection.
610
611        >>> html = HTML('<html><head><title>Some Title</title></head>'
612        ...               '<body>Some <em>body</em> text.</body></html>')
613        >>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
614        <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
615        text.</body></html>
616
617        :param function: the function to apply
618        :param kind: the kind of event the function should be applied to
619        :rtype: `Transformer`
620        """
621        return self.apply(MapTransformation(function, kind))
622
623    def substitute(self, pattern, replace, count=1):
624        """Replace text matching a regular expression.
625
626        Refer to the documentation for ``re.sub()`` for details.
627
628        >>> html = HTML('<html><body>Some text, some more text and '
629        ...             '<b>some bold text</b>\\n'
630        ...             '<i>some italicised text</i></body></html>')
631        >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
632        <html><body>Some text, some more text and <b>SOME bold text</b>
633        <i>some italicised text</i></body></html>
634        >>> tags = tag.html(tag.body('Some text, some more text and\\n',
635        ...      Markup('<b>some bold text</b>')))
636        >>> print(tags.generate() | Transformer('body').substitute(
637        ...     '(?i)some', 'SOME'))
638        <html><body>SOME text, some more text and
639        <b>SOME bold text</b></body></html>
640
641        :param pattern: A regular expression object or string.
642        :param replace: Replacement pattern.
643        :param count: Number of replacements to make in each text fragment.
644        :rtype: `Transformer`
645        """
646        return self.apply(SubstituteTransformation(pattern, replace, count))
647
648    def rename(self, name):
649        """Rename matching elements.
650
651        >>> html = HTML('<html><body>Some text, some more text and '
652        ...             '<b>some bold text</b></body></html>')
653        >>> print(html | Transformer('body/b').rename('strong'))
654        <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
655        """
656        return self.apply(RenameTransformation(name))
657
658    def trace(self, prefix='', fileobj=None):
659        """Print events as they pass through the transform.
660
661        >>> html = HTML('<body>Some <em>test</em> text</body>')
662        >>> print(html | Transformer('em').trace())
663        (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
664        (None, ('TEXT', u'Some ', (None, 1, 6)))
665        ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
666        ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
667        ('EXIT', ('END', QName('em'), (None, 1, 19)))
668        (None, ('TEXT', u' text', (None, 1, 24)))
669        (None, ('END', QName('body'), (None, 1, 29)))
670        <body>Some <em>test</em> text</body>
671
672        :param prefix: a string to prefix each event with in the output
673        :param fileobj: the writable file-like object to write to; defaults to
674                        the standard output stream
675        :rtype: `Transformer`
676        """
677        return self.apply(TraceTransformation(prefix, fileobj=fileobj))
678
679    # Internal methods
680
681    def _mark(self, stream):
682        for event in stream:
683            yield OUTSIDE, event
684
685    def _unmark(self, stream):
686        for mark, event in stream:
687            kind = event[0]
688            if not (kind is None or kind is ATTR or kind is BREAK):
689                yield event
690
691
692class SelectTransformation(object):
693    """Select and mark events that match an XPath expression."""
694
695    def __init__(self, path):
696        """Create selection.
697
698        :param path: an XPath expression (as string) or a `Path` object
699        """
700        if not isinstance(path, Path):
701            path = Path(path)
702        self.path = path
703
704    def __call__(self, stream):
705        """Apply the transform filter to the marked stream.
706
707        :param stream: the marked event stream to filter
708        """
709        namespaces = {}
710        variables = {}
711        test = self.path.test()
712        stream = iter(stream)
713        next = stream.next
714        for mark, event in stream:
715            if mark is None:
716                yield mark, event
717                continue
718            result = test(event, namespaces, variables)
719            # XXX This is effectively genshi.core._ensure() for transform
720            # streams.
721            if result is True:
722                if event[0] is START:
723                    yield ENTER, event
724                    depth = 1
725                    while depth > 0:
726                        mark, subevent = next()
727                        if subevent[0] is START:
728                            depth += 1
729                        elif subevent[0] is END:
730                            depth -= 1
731                        if depth == 0:
732                            yield EXIT, subevent
733                        else:
734                            yield INSIDE, subevent
735                        test(subevent, namespaces, variables, updateonly=True)
736                else:
737                    yield OUTSIDE, event
738            elif isinstance(result, Attrs):
739                # XXX  Selected *attributes* are given a "kind" of None to
740                # indicate they are not really part of the stream.
741                yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2])
742                yield None, event
743            elif isinstance(result, tuple):
744                yield OUTSIDE, result
745            elif result:
746                # XXX Assume everything else is "text"?
747                yield None, (TEXT, unicode(result), (None, -1, -1))
748            else:
749                yield None, event
750
751
752class InvertTransformation(object):
753    """Invert selection so that marked events become unmarked, and vice versa.
754
755    Specificaly, all input marks are converted to null marks, and all input
756    null marks are converted to OUTSIDE marks.
757    """
758
759    def __call__(self, stream):
760        """Apply the transform filter to the marked stream.
761
762        :param stream: the marked event stream to filter
763        """
764        for mark, event in stream:
765            if mark:
766                yield None, event
767            else:
768                yield OUTSIDE, event
769
770
771class EndTransformation(object):
772    """End the current selection."""
773
774    def __call__(self, stream):
775        """Apply the transform filter to the marked stream.
776
777        :param stream: the marked event stream to filter
778        """
779        for mark, event in stream:
780            yield OUTSIDE, event
781
782
783class EmptyTransformation(object):
784    """Empty selected elements of all content."""
785
786    def __call__(self, stream):
787        """Apply the transform filter to the marked stream.
788
789        :param stream: the marked event stream to filter
790        """
791        for mark, event in stream:
792            yield mark, event
793            if mark is ENTER:
794                for mark, event in stream:
795                    if mark is EXIT:
796                        yield mark, event
797                        break
798
799
800class RemoveTransformation(object):
801    """Remove selection from the stream."""
802
803    def __call__(self, stream):
804        """Apply the transform filter to the marked stream.
805
806        :param stream: the marked event stream to filter
807        """
808        for mark, event in stream:
809            if mark is None:
810                yield mark, event
811
812
813class UnwrapTransformation(object):
814    """Remove outtermost enclosing elements from selection."""
815
816    def __call__(self, stream):
817        """Apply the transform filter to the marked stream.
818
819        :param stream: the marked event stream to filter
820        """
821        for mark, event in stream:
822            if mark not in (ENTER, EXIT):
823                yield mark, event
824
825
826class WrapTransformation(object):
827    """Wrap selection in an element."""
828
829    def __init__(self, element):
830        if isinstance(element, Element):
831            self.element = element
832        else:
833            self.element = Element(element)
834
835    def __call__(self, stream):
836        for mark, event in stream:
837            if mark:
838                element = list(self.element.generate())
839                for prefix in element[:-1]:
840                    yield None, prefix
841                yield mark, event
842                start = mark
843                stopped = False
844                for mark, event in stream:
845                    if start is ENTER and mark is EXIT:
846                        yield mark, event
847                        stopped = True
848                        break
849                    if not mark:
850                        break
851                    yield mark, event
852                else:
853                    stopped = True
854                yield None, element[-1]
855                if not stopped:
856                    yield mark, event
857            else:
858                yield mark, event
859
860
861class TraceTransformation(object):
862    """Print events as they pass through the transform."""
863
864    def __init__(self, prefix='', fileobj=None):
865        """Trace constructor.
866
867        :param prefix: text to prefix each traced line with.
868        :param fileobj: the writable file-like object to write to
869        """
870        self.prefix = prefix
871        self.fileobj = fileobj or sys.stdout
872
873    def __call__(self, stream):
874        """Apply the transform filter to the marked stream.
875
876        :param stream: the marked event stream to filter
877        """
878        for event in stream:
879            self.fileobj.write('%s%s\n' % (self.prefix, event))
880            yield event
881
882
883class FilterTransformation(object):
884    """Apply a normal stream filter to the selection. The filter is called once
885    for each selection."""
886
887    def __init__(self, filter):
888        """Create the transform.
889
890        :param filter: The stream filter to apply.
891        """
892        self.filter = filter
893
894    def __call__(self, stream):
895        """Apply the transform filter to the marked stream.
896
897        :param stream: The marked event stream to filter
898        """
899        def flush(queue):
900            if queue:
901                for event in self.filter(queue):
902                    yield OUTSIDE, event
903                del queue[:]
904
905        queue = []
906        for mark, event in stream:
907            if mark is ENTER:
908                queue.append(event)
909                for mark, event in stream:
910                    queue.append(event)
911                    if mark is EXIT:
912                        break
913                for queue_event in flush(queue):
914                    yield queue_event
915            elif mark is OUTSIDE:
916                stopped = False
917                queue.append(event)
918                for mark, event in stream:
919                    if mark is not OUTSIDE:
920                        break
921                    queue.append(event)
922                else:
923                    stopped = True
924                for queue_event in flush(queue):
925                    yield queue_event
926                if not stopped:
927                    yield mark, event
928            else:
929                yield mark, event
930        for queue_event in flush(queue):
931            yield queue_event
932
933
934class MapTransformation(object):
935    """Apply a function to the `data` element of events of ``kind`` in the
936    selection.
937    """
938
939    def __init__(self, function, kind):
940        """Create the transform.
941
942        :param function: the function to apply; the function must take one
943                         argument, the `data` element of each selected event
944        :param kind: the stream event ``kind`` to apply the `function` to
945        """
946        self.function = function
947        self.kind = kind
948
949    def __call__(self, stream):
950        """Apply the transform filter to the marked stream.
951
952        :param stream: The marked event stream to filter
953        """
954        for mark, (kind, data, pos) in stream:
955            if mark and self.kind in (None, kind):
956                yield mark, (kind, self.function(data), pos)
957            else:
958                yield mark, (kind, data, pos)
959
960
961class SubstituteTransformation(object):
962    """Replace text matching a regular expression.
963
964    Refer to the documentation for ``re.sub()`` for details.
965    """
966    def __init__(self, pattern, replace, count=0):
967        """Create the transform.
968
969        :param pattern: A regular expression object, or string.
970        :param replace: Replacement pattern.
971        :param count: Number of replacements to make in each text fragment.
972        """
973        if isinstance(pattern, basestring):
974            self.pattern = re.compile(pattern)
975        else:
976            self.pattern = pattern
977        self.count = count
978        self.replace = replace
979
980    def __call__(self, stream):
981        """Apply the transform filter to the marked stream.
982
983        :param stream: The marked event stream to filter
984        """
985        for mark, (kind, data, pos) in stream:
986            if mark is not None and kind is TEXT:
987                new_data = self.pattern.sub(self.replace, data, self.count)
988                if isinstance(data, Markup):
989                    data = Markup(new_data)
990                else:
991                    data = new_data
992            yield mark, (kind, data, pos)
993
994
995class RenameTransformation(object):
996    """Rename matching elements."""
997    def __init__(self, name):
998        """Create the transform.
999
1000        :param name: New element name.
1001        """
1002        self.name = QName(name)
1003
1004    def __call__(self, stream):
1005        """Apply the transform filter to the marked stream.
1006
1007        :param stream: The marked event stream to filter
1008        """
1009        for mark, (kind, data, pos) in stream:
1010            if mark is ENTER:
1011                data = self.name, data[1]
1012            elif mark is EXIT:
1013                data = self.name
1014            yield mark, (kind, data, pos)
1015
1016
1017class InjectorTransformation(object):
1018    """Abstract base class for transformations that inject content into a
1019    stream.
1020
1021    >>> class Top(InjectorTransformation):
1022    ...     def __call__(self, stream):
1023    ...         for event in self._inject():
1024    ...             yield event
1025    ...         for event in stream:
1026    ...             yield event
1027    >>> html = HTML('<body>Some <em>test</em> text</body>')
1028    >>> print(html | Transformer('.//em').apply(Top('Prefix ')))
1029    Prefix <body>Some <em>test</em> text</body>
1030    """
1031    def __init__(self, content):
1032        """Create a new injector.
1033
1034        :param content: An iterable of Genshi stream events, or a string to be
1035                        injected.
1036        """
1037        self.content = content
1038
1039    def _inject(self):
1040        content = self.content
1041        if hasattr(content, '__call__'):
1042            content = content()
1043        for event in _ensure(content):
1044            yield None, event
1045
1046
1047class ReplaceTransformation(InjectorTransformation):
1048    """Replace selection with content."""
1049
1050    def __call__(self, stream):
1051        """Apply the transform filter to the marked stream.
1052
1053        :param stream: The marked event stream to filter
1054        """
1055        stream = PushBackStream(stream)
1056        for mark, event in stream:
1057            if mark is not None:
1058                start = mark
1059                for subevent in self._inject():
1060                    yield subevent
1061                for mark, event in stream:
1062                    if start is ENTER:
1063                        if mark is EXIT:
1064                            break
1065                    elif mark != start:
1066                        stream.push((mark, event))
1067                        break
1068            else:
1069                yield mark, event
1070
1071
1072class BeforeTransformation(InjectorTransformation):
1073    """Insert content before selection."""
1074
1075    def __call__(self, stream):
1076        """Apply the transform filter to the marked stream.
1077
1078        :param stream: The marked event stream to filter
1079        """
1080        stream = PushBackStream(stream)
1081        for mark, event in stream:
1082            if mark is not None:
1083                start = mark
1084                for subevent in self._inject():
1085                    yield subevent
1086                yield mark, event
1087                for mark, event in stream:
1088                    if mark != start and start is not ENTER:
1089                        stream.push((mark, event))
1090                        break
1091                    yield mark, event
1092                    if start is ENTER and mark is EXIT:
1093                        break
1094            else:
1095                yield mark, event
1096
1097
1098class AfterTransformation(InjectorTransformation):
1099    """Insert content after selection."""
1100
1101    def __call__(self, stream):
1102        """Apply the transform filter to the marked stream.
1103
1104        :param stream: The marked event stream to filter
1105        """
1106        stream = PushBackStream(stream)
1107        for mark, event in stream:
1108            yield mark, event
1109            if mark:
1110                start = mark
1111                for mark, event in stream:
1112                    if start is not ENTER and mark != start:
1113                        stream.push((mark, event))
1114                        break
1115                    yield mark, event
1116                    if start is ENTER and mark is EXIT:
1117                        break
1118                for subevent in self._inject():
1119                    yield subevent
1120
1121
1122class PrependTransformation(InjectorTransformation):
1123    """Prepend content to the inside of selected elements."""
1124
1125    def __call__(self, stream):
1126        """Apply the transform filter to the marked stream.
1127
1128        :param stream: The marked event stream to filter
1129        """
1130        for mark, event in stream:
1131            yield mark, event
1132            if mark is ENTER:
1133                for subevent in self._inject():
1134                    yield subevent
1135
1136
1137class AppendTransformation(InjectorTransformation):
1138    """Append content after the content of selected elements."""
1139
1140    def __call__(self, stream):
1141        """Apply the transform filter to the marked stream.
1142
1143        :param stream: The marked event stream to filter
1144        """
1145        for mark, event in stream:
1146            yield mark, event
1147            if mark is ENTER:
1148                for mark, event in stream:
1149                    if mark is EXIT:
1150                        break
1151                    yield mark, event
1152                for subevent in self._inject():
1153                    yield subevent
1154                yield mark, event
1155
1156
1157class AttrTransformation(object):
1158    """Set an attribute on selected elements."""
1159
1160    def __init__(self, name, value):
1161        """Construct transform.
1162
1163        :param name: name of the attribute that should be set
1164        :param value: the value to set
1165        """
1166        self.name = name
1167        self.value = value
1168
1169    def __call__(self, stream):
1170        """Apply the transform filter to the marked stream.
1171
1172        :param stream: The marked event stream to filter
1173        """
1174        callable_value = hasattr(self.value, '__call__')
1175        for mark, (kind, data, pos) in stream:
1176            if mark is ENTER:
1177                if callable_value:
1178                    value = self.value(self.name, (kind, data, pos))
1179                else:
1180                    value = self.value
1181                if value is None:
1182                    attrs = data[1] - [QName(self.name)]
1183                else:
1184                    attrs = data[1] | [(QName(self.name), value)]
1185                data = (data[0], attrs)
1186            yield mark, (kind, data, pos)
1187
1188
1189
1190class StreamBuffer(Stream):
1191    """Stream event buffer used for cut and copy transformations."""
1192
1193    def __init__(self):
1194        """Create the buffer."""
1195        Stream.__init__(self, [])
1196
1197    def append(self, event):
1198        """Add an event to the buffer.
1199
1200        :param event: the markup event to add
1201        """
1202        self.events.append(event)
1203
1204    def reset(self):
1205        """Empty the buffer of events."""
1206        del self.events[:]
1207
1208
1209class CopyTransformation(object):
1210    """Copy selected events into a buffer for later insertion."""
1211
1212    def __init__(self, buffer, accumulate=False):
1213        """Create the copy transformation.
1214
1215        :param buffer: the `StreamBuffer` in which the selection should be
1216                       stored
1217        """
1218        if not accumulate:
1219            buffer.reset()
1220        self.buffer = buffer
1221        self.accumulate = accumulate
1222
1223    def __call__(self, stream):
1224        """Apply the transformation to the marked stream.
1225
1226        :param stream: the marked event stream to filter
1227        """
1228        stream = PushBackStream(stream)
1229
1230        for mark, event in stream:
1231            if mark:
1232                if not self.accumulate:
1233                    self.buffer.reset()
1234                events = [(mark, event)]
1235                self.buffer.append(event)
1236                start = mark
1237                for mark, event in stream:
1238                    if start is not ENTER and mark != start:
1239                        stream.push((mark, event))
1240                        break
1241                    events.append((mark, event))
1242                    self.buffer.append(event)
1243                    if start is ENTER and mark is EXIT:
1244                        break
1245                for i in events:
1246                    yield i
1247            else:
1248                yield mark, event
1249
1250
1251class CutTransformation(object):
1252    """Cut selected events into a buffer for later insertion and remove the
1253    selection.
1254    """
1255
1256    def __init__(self, buffer, accumulate=False):
1257        """Create the cut transformation.
1258
1259        :param buffer: the `StreamBuffer` in which the selection should be
1260                       stored
1261        """
1262        self.buffer = buffer
1263        self.accumulate = accumulate
1264
1265
1266    def __call__(self, stream):
1267        """Apply the transform filter to the marked stream.
1268
1269        :param stream: the marked event stream to filter
1270        """
1271        attributes = []
1272        stream = PushBackStream(stream)
1273        broken = False
1274        if not self.accumulate:
1275            self.buffer.reset()
1276        for mark, event in stream:
1277            if mark:
1278                # Send a BREAK event if there was no other event sent between
1279                if not self.accumulate:
1280                    if not broken and self.buffer:
1281                        yield BREAK, (BREAK, None, None)
1282                    self.buffer.reset()
1283                self.buffer.append(event)
1284                start = mark
1285                if mark is ATTR:
1286                    attributes.extend([name for name, _ in event[1][1]])
1287                for mark, event in stream:
1288                    if start is mark is ATTR:
1289                        attributes.extend([name for name, _ in event[1][1]])
1290                    # Handle non-element contiguous selection
1291                    if start is not ENTER and mark != start:
1292                        # Operating on the attributes of a START event
1293                        if start is ATTR:
1294                            kind, data, pos = event
1295                            assert kind is START
1296                            data = (data[0], data[1] - attributes)
1297                            attributes = None
1298                            stream.push((mark, (kind, data, pos)))
1299                        else:
1300                            stream.push((mark, event))
1301                        break
1302                    self.buffer.append(event)
1303                    if start is ENTER and mark is EXIT:
1304                        break
1305                broken = False
1306            else:
1307                broken = True
1308                yield mark, event
1309        if not broken and self.buffer:
1310            yield BREAK, (BREAK, None, None)
Note: See TracBrowser for help on using the repository browser.