1 | # -*- coding: utf-8 -*- |
---|
2 | # |
---|
3 | # Copyright (C) 2007-2009 Edgewall Software |
---|
4 | # All rights reserved. |
---|
5 | # |
---|
6 | # This software is licensed as described in the file COPYING, which |
---|
7 | # you should have received as part of this distribution. The terms |
---|
8 | # are also available at http://genshi.edgewall.org/wiki/License. |
---|
9 | # |
---|
10 | # This software consists of voluntary contributions made by many |
---|
11 | # individuals. For the exact contribution history, see the revision |
---|
12 | # history and logs, available at http://genshi.edgewall.org/log/. |
---|
13 | |
---|
14 | """A filter for functional-style transformations of markup streams. |
---|
15 | |
---|
16 | The `Transformer` filter provides a variety of transformations that can be |
---|
17 | applied to parts of streams that match given XPath expressions. These |
---|
18 | transformations can be chained to achieve results that would be comparitively |
---|
19 | tedious to achieve by writing stream filters by hand. The approach of chaining |
---|
20 | node selection and transformation has been inspired by the `jQuery`_ Javascript |
---|
21 | library. |
---|
22 | |
---|
23 | .. _`jQuery`: http://jquery.com/ |
---|
24 | |
---|
25 | For example, the following transformation removes the ``<title>`` element from |
---|
26 | the ``<head>`` of the input document: |
---|
27 | |
---|
28 | >>> from genshi.builder import tag |
---|
29 | >>> html = HTML('''<html> |
---|
30 | ... <head><title>Some Title</title></head> |
---|
31 | ... <body> |
---|
32 | ... Some <em>body</em> text. |
---|
33 | ... </body> |
---|
34 | ... </html>''') |
---|
35 | >>> print(html | Transformer('body/em').map(unicode.upper, TEXT) |
---|
36 | ... .unwrap().wrap(tag.u)) |
---|
37 | <html> |
---|
38 | <head><title>Some Title</title></head> |
---|
39 | <body> |
---|
40 | Some <u>BODY</u> text. |
---|
41 | </body> |
---|
42 | </html> |
---|
43 | |
---|
44 | The ``Transformer`` support a large number of useful transformations out of the |
---|
45 | box, but custom transformations can be added easily. |
---|
46 | |
---|
47 | :since: version 0.5 |
---|
48 | """ |
---|
49 | |
---|
50 | import re |
---|
51 | import sys |
---|
52 | |
---|
53 | from genshi.builder import Element |
---|
54 | from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup |
---|
55 | from genshi.path import Path |
---|
56 | |
---|
57 | __all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER', |
---|
58 | 'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK'] |
---|
59 | |
---|
60 | |
---|
61 | class TransformMark(str): |
---|
62 | """A mark on a transformation stream.""" |
---|
63 | __slots__ = [] |
---|
64 | _instances = {} |
---|
65 | |
---|
66 | def __new__(cls, val): |
---|
67 | return cls._instances.setdefault(val, str.__new__(cls, val)) |
---|
68 | |
---|
69 | |
---|
70 | ENTER = TransformMark('ENTER') |
---|
71 | """Stream augmentation mark indicating that a selected element is being |
---|
72 | entered.""" |
---|
73 | |
---|
74 | INSIDE = TransformMark('INSIDE') |
---|
75 | """Stream augmentation mark indicating that processing is currently inside a |
---|
76 | selected element.""" |
---|
77 | |
---|
78 | OUTSIDE = TransformMark('OUTSIDE') |
---|
79 | """Stream augmentation mark indicating that a match occurred outside a selected |
---|
80 | element.""" |
---|
81 | |
---|
82 | ATTR = TransformMark('ATTR') |
---|
83 | """Stream augmentation mark indicating a selected element attribute.""" |
---|
84 | |
---|
85 | EXIT = TransformMark('EXIT') |
---|
86 | """Stream augmentation mark indicating that a selected element is being |
---|
87 | exited.""" |
---|
88 | |
---|
89 | BREAK = TransformMark('BREAK') |
---|
90 | """Stream augmentation mark indicating a break between two otherwise contiguous |
---|
91 | blocks of marked events. |
---|
92 | |
---|
93 | This is used primarily by the cut() transform to provide later transforms with |
---|
94 | an opportunity to operate on the cut buffer. |
---|
95 | """ |
---|
96 | |
---|
97 | |
---|
98 | class PushBackStream(object): |
---|
99 | """Allows a single event to be pushed back onto the stream and re-consumed. |
---|
100 | """ |
---|
101 | def __init__(self, stream): |
---|
102 | self.stream = iter(stream) |
---|
103 | self.peek = None |
---|
104 | |
---|
105 | def push(self, event): |
---|
106 | assert self.peek is None |
---|
107 | self.peek = event |
---|
108 | |
---|
109 | def __iter__(self): |
---|
110 | while True: |
---|
111 | if self.peek is not None: |
---|
112 | peek = self.peek |
---|
113 | self.peek = None |
---|
114 | yield peek |
---|
115 | else: |
---|
116 | try: |
---|
117 | event = self.stream.next() |
---|
118 | yield event |
---|
119 | except StopIteration: |
---|
120 | if self.peek is None: |
---|
121 | raise |
---|
122 | |
---|
123 | |
---|
124 | class Transformer(object): |
---|
125 | """Stream filter that can apply a variety of different transformations to |
---|
126 | a stream. |
---|
127 | |
---|
128 | This is achieved by selecting the events to be transformed using XPath, |
---|
129 | then applying the transformations to the events matched by the path |
---|
130 | expression. Each marked event is in the form (mark, (kind, data, pos)), |
---|
131 | where mark can be any of `ENTER`, `INSIDE`, `EXIT`, `OUTSIDE`, or `None`. |
---|
132 | |
---|
133 | The first three marks match `START` and `END` events, and any events |
---|
134 | contained `INSIDE` any selected XML/HTML element. A non-element match |
---|
135 | outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE` |
---|
136 | mark. |
---|
137 | |
---|
138 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
139 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
140 | |
---|
141 | Transformations act on selected stream events matching an XPath expression. |
---|
142 | Here's an example of removing some markup (the title, in this case) |
---|
143 | selected by an expression: |
---|
144 | |
---|
145 | >>> print(html | Transformer('head/title').remove()) |
---|
146 | <html><head/><body>Some <em>body</em> text.</body></html> |
---|
147 | |
---|
148 | Inserted content can be passed in the form of a string, or a markup event |
---|
149 | stream, which includes streams generated programmatically via the |
---|
150 | `builder` module: |
---|
151 | |
---|
152 | >>> from genshi.builder import tag |
---|
153 | >>> print(html | Transformer('body').prepend(tag.h1('Document Title'))) |
---|
154 | <html><head><title>Some Title</title></head><body><h1>Document |
---|
155 | Title</h1>Some <em>body</em> text.</body></html> |
---|
156 | |
---|
157 | Each XPath expression determines the set of tags that will be acted upon by |
---|
158 | subsequent transformations. In this example we select the ``<title>`` text, |
---|
159 | copy it into a buffer, then select the ``<body>`` element and paste the |
---|
160 | copied text into the body as ``<h1>`` enclosed text: |
---|
161 | |
---|
162 | >>> buffer = StreamBuffer() |
---|
163 | >>> print(html | Transformer('head/title/text()').copy(buffer) |
---|
164 | ... .end().select('body').prepend(tag.h1(buffer))) |
---|
165 | <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some |
---|
166 | <em>body</em> text.</body></html> |
---|
167 | |
---|
168 | Transformations can also be assigned and reused, although care must be |
---|
169 | taken when using buffers, to ensure that buffers are cleared between |
---|
170 | transforms: |
---|
171 | |
---|
172 | >>> emphasis = Transformer('body//em').attr('class', 'emphasis') |
---|
173 | >>> print(html | emphasis) |
---|
174 | <html><head><title>Some Title</title></head><body>Some <em |
---|
175 | class="emphasis">body</em> text.</body></html> |
---|
176 | """ |
---|
177 | |
---|
178 | __slots__ = ['transforms'] |
---|
179 | |
---|
180 | def __init__(self, path='.'): |
---|
181 | """Construct a new transformation filter. |
---|
182 | |
---|
183 | :param path: an XPath expression (as string) or a `Path` instance |
---|
184 | """ |
---|
185 | self.transforms = [SelectTransformation(path)] |
---|
186 | |
---|
187 | def __call__(self, stream, keep_marks=False): |
---|
188 | """Apply the transform filter to the marked stream. |
---|
189 | |
---|
190 | :param stream: the marked event stream to filter |
---|
191 | :param keep_marks: Do not strip transformer selection marks from the |
---|
192 | stream. Useful for testing. |
---|
193 | :return: the transformed stream |
---|
194 | :rtype: `Stream` |
---|
195 | """ |
---|
196 | transforms = self._mark(stream) |
---|
197 | for link in self.transforms: |
---|
198 | transforms = link(transforms) |
---|
199 | if not keep_marks: |
---|
200 | transforms = self._unmark(transforms) |
---|
201 | return Stream(transforms, |
---|
202 | serializer=getattr(stream, 'serializer', None)) |
---|
203 | |
---|
204 | def apply(self, function): |
---|
205 | """Apply a transformation to the stream. |
---|
206 | |
---|
207 | Transformations can be chained, similar to stream filters. Any callable |
---|
208 | accepting a marked stream can be used as a transform. |
---|
209 | |
---|
210 | As an example, here is a simple `TEXT` event upper-casing transform: |
---|
211 | |
---|
212 | >>> def upper(stream): |
---|
213 | ... for mark, (kind, data, pos) in stream: |
---|
214 | ... if mark and kind is TEXT: |
---|
215 | ... yield mark, (kind, data.upper(), pos) |
---|
216 | ... else: |
---|
217 | ... yield mark, (kind, data, pos) |
---|
218 | >>> short_stream = HTML('<body>Some <em>test</em> text</body>') |
---|
219 | >>> print(short_stream | Transformer('.//em/text()').apply(upper)) |
---|
220 | <body>Some <em>TEST</em> text</body> |
---|
221 | """ |
---|
222 | transformer = Transformer() |
---|
223 | transformer.transforms = self.transforms[:] |
---|
224 | if isinstance(function, Transformer): |
---|
225 | transformer.transforms.extend(function.transforms) |
---|
226 | else: |
---|
227 | transformer.transforms.append(function) |
---|
228 | return transformer |
---|
229 | |
---|
230 | #{ Selection operations |
---|
231 | |
---|
232 | def select(self, path): |
---|
233 | """Mark events matching the given XPath expression, within the current |
---|
234 | selection. |
---|
235 | |
---|
236 | >>> html = HTML('<body>Some <em>test</em> text</body>') |
---|
237 | >>> print(html | Transformer().select('.//em').trace()) |
---|
238 | (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) |
---|
239 | (None, ('TEXT', u'Some ', (None, 1, 6))) |
---|
240 | ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) |
---|
241 | ('INSIDE', ('TEXT', u'test', (None, 1, 15))) |
---|
242 | ('EXIT', ('END', QName('em'), (None, 1, 19))) |
---|
243 | (None, ('TEXT', u' text', (None, 1, 24))) |
---|
244 | (None, ('END', QName('body'), (None, 1, 29))) |
---|
245 | <body>Some <em>test</em> text</body> |
---|
246 | |
---|
247 | :param path: an XPath expression (as string) or a `Path` instance |
---|
248 | :return: the stream augmented by transformation marks |
---|
249 | :rtype: `Transformer` |
---|
250 | """ |
---|
251 | return self.apply(SelectTransformation(path)) |
---|
252 | |
---|
253 | def invert(self): |
---|
254 | """Invert selection so that marked events become unmarked, and vice |
---|
255 | versa. |
---|
256 | |
---|
257 | Specificaly, all marks are converted to null marks, and all null marks |
---|
258 | are converted to OUTSIDE marks. |
---|
259 | |
---|
260 | >>> html = HTML('<body>Some <em>test</em> text</body>') |
---|
261 | >>> print(html | Transformer('//em').invert().trace()) |
---|
262 | ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) |
---|
263 | ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) |
---|
264 | (None, ('START', (QName('em'), Attrs()), (None, 1, 11))) |
---|
265 | (None, ('TEXT', u'test', (None, 1, 15))) |
---|
266 | (None, ('END', QName('em'), (None, 1, 19))) |
---|
267 | ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) |
---|
268 | ('OUTSIDE', ('END', QName('body'), (None, 1, 29))) |
---|
269 | <body>Some <em>test</em> text</body> |
---|
270 | |
---|
271 | :rtype: `Transformer` |
---|
272 | """ |
---|
273 | return self.apply(InvertTransformation()) |
---|
274 | |
---|
275 | def end(self): |
---|
276 | """End current selection, allowing all events to be selected. |
---|
277 | |
---|
278 | Example: |
---|
279 | |
---|
280 | >>> html = HTML('<body>Some <em>test</em> text</body>') |
---|
281 | >>> print(html | Transformer('//em').end().trace()) |
---|
282 | ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) |
---|
283 | ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) |
---|
284 | ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11))) |
---|
285 | ('OUTSIDE', ('TEXT', u'test', (None, 1, 15))) |
---|
286 | ('OUTSIDE', ('END', QName('em'), (None, 1, 19))) |
---|
287 | ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) |
---|
288 | ('OUTSIDE', ('END', QName('body'), (None, 1, 29))) |
---|
289 | <body>Some <em>test</em> text</body> |
---|
290 | |
---|
291 | :return: the stream augmented by transformation marks |
---|
292 | :rtype: `Transformer` |
---|
293 | """ |
---|
294 | return self.apply(EndTransformation()) |
---|
295 | |
---|
296 | #{ Deletion operations |
---|
297 | |
---|
298 | def empty(self): |
---|
299 | """Empty selected elements of all content. |
---|
300 | |
---|
301 | Example: |
---|
302 | |
---|
303 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
304 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
305 | >>> print(html | Transformer('.//em').empty()) |
---|
306 | <html><head><title>Some Title</title></head><body>Some <em/> |
---|
307 | text.</body></html> |
---|
308 | |
---|
309 | :rtype: `Transformer` |
---|
310 | """ |
---|
311 | return self.apply(EmptyTransformation()) |
---|
312 | |
---|
313 | def remove(self): |
---|
314 | """Remove selection from the stream. |
---|
315 | |
---|
316 | Example: |
---|
317 | |
---|
318 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
319 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
320 | >>> print(html | Transformer('.//em').remove()) |
---|
321 | <html><head><title>Some Title</title></head><body>Some |
---|
322 | text.</body></html> |
---|
323 | |
---|
324 | :rtype: `Transformer` |
---|
325 | """ |
---|
326 | return self.apply(RemoveTransformation()) |
---|
327 | |
---|
328 | #{ Direct element operations |
---|
329 | |
---|
330 | def unwrap(self): |
---|
331 | """Remove outermost enclosing elements from selection. |
---|
332 | |
---|
333 | Example: |
---|
334 | |
---|
335 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
336 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
337 | >>> print(html | Transformer('.//em').unwrap()) |
---|
338 | <html><head><title>Some Title</title></head><body>Some body |
---|
339 | text.</body></html> |
---|
340 | |
---|
341 | :rtype: `Transformer` |
---|
342 | """ |
---|
343 | return self.apply(UnwrapTransformation()) |
---|
344 | |
---|
345 | def wrap(self, element): |
---|
346 | """Wrap selection in an element. |
---|
347 | |
---|
348 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
349 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
350 | >>> print(html | Transformer('.//em').wrap('strong')) |
---|
351 | <html><head><title>Some Title</title></head><body>Some |
---|
352 | <strong><em>body</em></strong> text.</body></html> |
---|
353 | |
---|
354 | :param element: either a tag name (as string) or an `Element` object |
---|
355 | :rtype: `Transformer` |
---|
356 | """ |
---|
357 | return self.apply(WrapTransformation(element)) |
---|
358 | |
---|
359 | #{ Content insertion operations |
---|
360 | |
---|
361 | def replace(self, content): |
---|
362 | """Replace selection with content. |
---|
363 | |
---|
364 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
365 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
366 | >>> print(html | Transformer('.//title/text()').replace('New Title')) |
---|
367 | <html><head><title>New Title</title></head><body>Some <em>body</em> |
---|
368 | text.</body></html> |
---|
369 | |
---|
370 | :param content: Either a callable, an iterable of events, or a string |
---|
371 | to insert. |
---|
372 | :rtype: `Transformer` |
---|
373 | """ |
---|
374 | return self.apply(ReplaceTransformation(content)) |
---|
375 | |
---|
376 | def before(self, content): |
---|
377 | """Insert content before selection. |
---|
378 | |
---|
379 | In this example we insert the word 'emphasised' before the <em> opening |
---|
380 | tag: |
---|
381 | |
---|
382 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
383 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
384 | >>> print(html | Transformer('.//em').before('emphasised ')) |
---|
385 | <html><head><title>Some Title</title></head><body>Some emphasised |
---|
386 | <em>body</em> text.</body></html> |
---|
387 | |
---|
388 | :param content: Either a callable, an iterable of events, or a string |
---|
389 | to insert. |
---|
390 | :rtype: `Transformer` |
---|
391 | """ |
---|
392 | return self.apply(BeforeTransformation(content)) |
---|
393 | |
---|
394 | def after(self, content): |
---|
395 | """Insert content after selection. |
---|
396 | |
---|
397 | Here, we insert some text after the </em> closing tag: |
---|
398 | |
---|
399 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
400 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
401 | >>> print(html | Transformer('.//em').after(' rock')) |
---|
402 | <html><head><title>Some Title</title></head><body>Some <em>body</em> |
---|
403 | rock text.</body></html> |
---|
404 | |
---|
405 | :param content: Either a callable, an iterable of events, or a string |
---|
406 | to insert. |
---|
407 | :rtype: `Transformer` |
---|
408 | """ |
---|
409 | return self.apply(AfterTransformation(content)) |
---|
410 | |
---|
411 | def prepend(self, content): |
---|
412 | """Insert content after the ENTER event of the selection. |
---|
413 | |
---|
414 | Inserting some new text at the start of the <body>: |
---|
415 | |
---|
416 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
417 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
418 | >>> print(html | Transformer('.//body').prepend('Some new body text. ')) |
---|
419 | <html><head><title>Some Title</title></head><body>Some new body text. |
---|
420 | Some <em>body</em> text.</body></html> |
---|
421 | |
---|
422 | :param content: Either a callable, an iterable of events, or a string |
---|
423 | to insert. |
---|
424 | :rtype: `Transformer` |
---|
425 | """ |
---|
426 | return self.apply(PrependTransformation(content)) |
---|
427 | |
---|
428 | def append(self, content): |
---|
429 | """Insert content before the END event of the selection. |
---|
430 | |
---|
431 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
432 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
433 | >>> print(html | Transformer('.//body').append(' Some new body text.')) |
---|
434 | <html><head><title>Some Title</title></head><body>Some <em>body</em> |
---|
435 | text. Some new body text.</body></html> |
---|
436 | |
---|
437 | :param content: Either a callable, an iterable of events, or a string |
---|
438 | to insert. |
---|
439 | :rtype: `Transformer` |
---|
440 | """ |
---|
441 | return self.apply(AppendTransformation(content)) |
---|
442 | |
---|
443 | #{ Attribute manipulation |
---|
444 | |
---|
445 | def attr(self, name, value): |
---|
446 | """Add, replace or delete an attribute on selected elements. |
---|
447 | |
---|
448 | If `value` evaulates to `None` the attribute will be deleted from the |
---|
449 | element: |
---|
450 | |
---|
451 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
452 | ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' |
---|
453 | ... '</html>') |
---|
454 | >>> print(html | Transformer('body/em').attr('class', None)) |
---|
455 | <html><head><title>Some Title</title></head><body>Some <em>body</em> |
---|
456 | <em>text</em>.</body></html> |
---|
457 | |
---|
458 | Otherwise the attribute will be set to `value`: |
---|
459 | |
---|
460 | >>> print(html | Transformer('body/em').attr('class', 'emphasis')) |
---|
461 | <html><head><title>Some Title</title></head><body>Some <em |
---|
462 | class="emphasis">body</em> <em class="emphasis">text</em>.</body></html> |
---|
463 | |
---|
464 | If `value` is a callable it will be called with the attribute name and |
---|
465 | the `START` event for the matching element. Its return value will then |
---|
466 | be used to set the attribute: |
---|
467 | |
---|
468 | >>> def print_attr(name, event): |
---|
469 | ... attrs = event[1][1] |
---|
470 | ... print(attrs) |
---|
471 | ... return attrs.get(name) |
---|
472 | >>> print(html | Transformer('body/em').attr('class', print_attr)) |
---|
473 | Attrs([(QName('class'), u'before')]) |
---|
474 | Attrs() |
---|
475 | <html><head><title>Some Title</title></head><body>Some <em |
---|
476 | class="before">body</em> <em>text</em>.</body></html> |
---|
477 | |
---|
478 | :param name: the name of the attribute |
---|
479 | :param value: the value that should be set for the attribute. |
---|
480 | :rtype: `Transformer` |
---|
481 | """ |
---|
482 | return self.apply(AttrTransformation(name, value)) |
---|
483 | |
---|
484 | #{ Buffer operations |
---|
485 | |
---|
486 | def copy(self, buffer, accumulate=False): |
---|
487 | """Copy selection into buffer. |
---|
488 | |
---|
489 | The buffer is replaced by each *contiguous* selection before being passed |
---|
490 | to the next transformation. If accumulate=True, further selections will |
---|
491 | be appended to the buffer rather than replacing it. |
---|
492 | |
---|
493 | >>> from genshi.builder import tag |
---|
494 | >>> buffer = StreamBuffer() |
---|
495 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
496 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
497 | >>> print(html | Transformer('head/title/text()').copy(buffer) |
---|
498 | ... .end().select('body').prepend(tag.h1(buffer))) |
---|
499 | <html><head><title>Some Title</title></head><body><h1>Some |
---|
500 | Title</h1>Some <em>body</em> text.</body></html> |
---|
501 | |
---|
502 | This example illustrates that only a single contiguous selection will |
---|
503 | be buffered: |
---|
504 | |
---|
505 | >>> print(html | Transformer('head/title/text()').copy(buffer) |
---|
506 | ... .end().select('body/em').copy(buffer).end().select('body') |
---|
507 | ... .prepend(tag.h1(buffer))) |
---|
508 | <html><head><title>Some Title</title></head><body><h1>Some |
---|
509 | Title</h1>Some <em>body</em> text.</body></html> |
---|
510 | >>> print(buffer) |
---|
511 | <em>body</em> |
---|
512 | |
---|
513 | Element attributes can also be copied for later use: |
---|
514 | |
---|
515 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
516 | ... '<body><em>Some</em> <em class="before">body</em>' |
---|
517 | ... '<em>text</em>.</body></html>') |
---|
518 | >>> buffer = StreamBuffer() |
---|
519 | >>> def apply_attr(name, entry): |
---|
520 | ... return list(buffer)[0][1][1].get('class') |
---|
521 | >>> print(html | Transformer('body/em[@class]/@class').copy(buffer) |
---|
522 | ... .end().buffer().select('body/em[not(@class)]') |
---|
523 | ... .attr('class', apply_attr)) |
---|
524 | <html><head><title>Some Title</title></head><body><em |
---|
525 | class="before">Some</em> <em class="before">body</em><em |
---|
526 | class="before">text</em>.</body></html> |
---|
527 | |
---|
528 | |
---|
529 | :param buffer: the `StreamBuffer` in which the selection should be |
---|
530 | stored |
---|
531 | :rtype: `Transformer` |
---|
532 | :note: Copy (and cut) copy each individual selected object into the |
---|
533 | buffer before passing to the next transform. For example, the |
---|
534 | XPath ``*|text()`` will select all elements and text, each |
---|
535 | instance of which will be copied to the buffer individually |
---|
536 | before passing to the next transform. This has implications for |
---|
537 | how ``StreamBuffer`` objects can be used, so some |
---|
538 | experimentation may be required. |
---|
539 | |
---|
540 | """ |
---|
541 | return self.apply(CopyTransformation(buffer, accumulate)) |
---|
542 | |
---|
543 | def cut(self, buffer, accumulate=False): |
---|
544 | """Copy selection into buffer and remove the selection from the stream. |
---|
545 | |
---|
546 | >>> from genshi.builder import tag |
---|
547 | >>> buffer = StreamBuffer() |
---|
548 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
549 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
550 | >>> print(html | Transformer('.//em/text()').cut(buffer) |
---|
551 | ... .end().select('.//em').after(tag.h1(buffer))) |
---|
552 | <html><head><title>Some Title</title></head><body>Some |
---|
553 | <em/><h1>body</h1> text.</body></html> |
---|
554 | |
---|
555 | Specifying accumulate=True, appends all selected intervals onto the |
---|
556 | buffer. Combining this with the .buffer() operation allows us operate |
---|
557 | on all copied events rather than per-segment. See the documentation on |
---|
558 | buffer() for more information. |
---|
559 | |
---|
560 | :param buffer: the `StreamBuffer` in which the selection should be |
---|
561 | stored |
---|
562 | :rtype: `Transformer` |
---|
563 | :note: this transformation will buffer the entire input stream |
---|
564 | """ |
---|
565 | return self.apply(CutTransformation(buffer, accumulate)) |
---|
566 | |
---|
567 | def buffer(self): |
---|
568 | """Buffer the entire stream (can consume a considerable amount of |
---|
569 | memory). |
---|
570 | |
---|
571 | Useful in conjunction with copy(accumulate=True) and |
---|
572 | cut(accumulate=True) to ensure that all marked events in the entire |
---|
573 | stream are copied to the buffer before further transformations are |
---|
574 | applied. |
---|
575 | |
---|
576 | For example, to move all <note> elements inside a <notes> tag at the |
---|
577 | top of the document: |
---|
578 | |
---|
579 | >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' |
---|
580 | ... 'text <note>two</note>.</body></doc>') |
---|
581 | >>> buffer = StreamBuffer() |
---|
582 | >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True) |
---|
583 | ... .end().buffer().select('notes').prepend(buffer)) |
---|
584 | <doc><notes><note>one</note><note>two</note></notes><body>Some text |
---|
585 | .</body></doc> |
---|
586 | |
---|
587 | """ |
---|
588 | return self.apply(list) |
---|
589 | |
---|
590 | #{ Miscellaneous operations |
---|
591 | |
---|
592 | def filter(self, filter): |
---|
593 | """Apply a normal stream filter to the selection. The filter is called |
---|
594 | once for each contiguous block of marked events. |
---|
595 | |
---|
596 | >>> from genshi.filters.html import HTMLSanitizer |
---|
597 | >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' |
---|
598 | ... '</script> and some more text</body></html>') |
---|
599 | >>> print(html | Transformer('body/*').filter(HTMLSanitizer())) |
---|
600 | <html><body>Some text and some more text</body></html> |
---|
601 | |
---|
602 | :param filter: The stream filter to apply. |
---|
603 | :rtype: `Transformer` |
---|
604 | """ |
---|
605 | return self.apply(FilterTransformation(filter)) |
---|
606 | |
---|
607 | def map(self, function, kind): |
---|
608 | """Applies a function to the ``data`` element of events of ``kind`` in |
---|
609 | the selection. |
---|
610 | |
---|
611 | >>> html = HTML('<html><head><title>Some Title</title></head>' |
---|
612 | ... '<body>Some <em>body</em> text.</body></html>') |
---|
613 | >>> print(html | Transformer('head/title').map(unicode.upper, TEXT)) |
---|
614 | <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> |
---|
615 | text.</body></html> |
---|
616 | |
---|
617 | :param function: the function to apply |
---|
618 | :param kind: the kind of event the function should be applied to |
---|
619 | :rtype: `Transformer` |
---|
620 | """ |
---|
621 | return self.apply(MapTransformation(function, kind)) |
---|
622 | |
---|
623 | def substitute(self, pattern, replace, count=1): |
---|
624 | """Replace text matching a regular expression. |
---|
625 | |
---|
626 | Refer to the documentation for ``re.sub()`` for details. |
---|
627 | |
---|
628 | >>> html = HTML('<html><body>Some text, some more text and ' |
---|
629 | ... '<b>some bold text</b>\\n' |
---|
630 | ... '<i>some italicised text</i></body></html>') |
---|
631 | >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME')) |
---|
632 | <html><body>Some text, some more text and <b>SOME bold text</b> |
---|
633 | <i>some italicised text</i></body></html> |
---|
634 | >>> tags = tag.html(tag.body('Some text, some more text and\\n', |
---|
635 | ... Markup('<b>some bold text</b>'))) |
---|
636 | >>> print(tags.generate() | Transformer('body').substitute( |
---|
637 | ... '(?i)some', 'SOME')) |
---|
638 | <html><body>SOME text, some more text and |
---|
639 | <b>SOME bold text</b></body></html> |
---|
640 | |
---|
641 | :param pattern: A regular expression object or string. |
---|
642 | :param replace: Replacement pattern. |
---|
643 | :param count: Number of replacements to make in each text fragment. |
---|
644 | :rtype: `Transformer` |
---|
645 | """ |
---|
646 | return self.apply(SubstituteTransformation(pattern, replace, count)) |
---|
647 | |
---|
648 | def rename(self, name): |
---|
649 | """Rename matching elements. |
---|
650 | |
---|
651 | >>> html = HTML('<html><body>Some text, some more text and ' |
---|
652 | ... '<b>some bold text</b></body></html>') |
---|
653 | >>> print(html | Transformer('body/b').rename('strong')) |
---|
654 | <html><body>Some text, some more text and <strong>some bold text</strong></body></html> |
---|
655 | """ |
---|
656 | return self.apply(RenameTransformation(name)) |
---|
657 | |
---|
658 | def trace(self, prefix='', fileobj=None): |
---|
659 | """Print events as they pass through the transform. |
---|
660 | |
---|
661 | >>> html = HTML('<body>Some <em>test</em> text</body>') |
---|
662 | >>> print(html | Transformer('em').trace()) |
---|
663 | (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) |
---|
664 | (None, ('TEXT', u'Some ', (None, 1, 6))) |
---|
665 | ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) |
---|
666 | ('INSIDE', ('TEXT', u'test', (None, 1, 15))) |
---|
667 | ('EXIT', ('END', QName('em'), (None, 1, 19))) |
---|
668 | (None, ('TEXT', u' text', (None, 1, 24))) |
---|
669 | (None, ('END', QName('body'), (None, 1, 29))) |
---|
670 | <body>Some <em>test</em> text</body> |
---|
671 | |
---|
672 | :param prefix: a string to prefix each event with in the output |
---|
673 | :param fileobj: the writable file-like object to write to; defaults to |
---|
674 | the standard output stream |
---|
675 | :rtype: `Transformer` |
---|
676 | """ |
---|
677 | return self.apply(TraceTransformation(prefix, fileobj=fileobj)) |
---|
678 | |
---|
679 | # Internal methods |
---|
680 | |
---|
681 | def _mark(self, stream): |
---|
682 | for event in stream: |
---|
683 | yield OUTSIDE, event |
---|
684 | |
---|
685 | def _unmark(self, stream): |
---|
686 | for mark, event in stream: |
---|
687 | kind = event[0] |
---|
688 | if not (kind is None or kind is ATTR or kind is BREAK): |
---|
689 | yield event |
---|
690 | |
---|
691 | |
---|
692 | class SelectTransformation(object): |
---|
693 | """Select and mark events that match an XPath expression.""" |
---|
694 | |
---|
695 | def __init__(self, path): |
---|
696 | """Create selection. |
---|
697 | |
---|
698 | :param path: an XPath expression (as string) or a `Path` object |
---|
699 | """ |
---|
700 | if not isinstance(path, Path): |
---|
701 | path = Path(path) |
---|
702 | self.path = path |
---|
703 | |
---|
704 | def __call__(self, stream): |
---|
705 | """Apply the transform filter to the marked stream. |
---|
706 | |
---|
707 | :param stream: the marked event stream to filter |
---|
708 | """ |
---|
709 | namespaces = {} |
---|
710 | variables = {} |
---|
711 | test = self.path.test() |
---|
712 | stream = iter(stream) |
---|
713 | next = stream.next |
---|
714 | for mark, event in stream: |
---|
715 | if mark is None: |
---|
716 | yield mark, event |
---|
717 | continue |
---|
718 | result = test(event, namespaces, variables) |
---|
719 | # XXX This is effectively genshi.core._ensure() for transform |
---|
720 | # streams. |
---|
721 | if result is True: |
---|
722 | if event[0] is START: |
---|
723 | yield ENTER, event |
---|
724 | depth = 1 |
---|
725 | while depth > 0: |
---|
726 | mark, subevent = next() |
---|
727 | if subevent[0] is START: |
---|
728 | depth += 1 |
---|
729 | elif subevent[0] is END: |
---|
730 | depth -= 1 |
---|
731 | if depth == 0: |
---|
732 | yield EXIT, subevent |
---|
733 | else: |
---|
734 | yield INSIDE, subevent |
---|
735 | test(subevent, namespaces, variables, updateonly=True) |
---|
736 | else: |
---|
737 | yield OUTSIDE, event |
---|
738 | elif isinstance(result, Attrs): |
---|
739 | # XXX Selected *attributes* are given a "kind" of None to |
---|
740 | # indicate they are not really part of the stream. |
---|
741 | yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2]) |
---|
742 | yield None, event |
---|
743 | elif isinstance(result, tuple): |
---|
744 | yield OUTSIDE, result |
---|
745 | elif result: |
---|
746 | # XXX Assume everything else is "text"? |
---|
747 | yield None, (TEXT, unicode(result), (None, -1, -1)) |
---|
748 | else: |
---|
749 | yield None, event |
---|
750 | |
---|
751 | |
---|
752 | class InvertTransformation(object): |
---|
753 | """Invert selection so that marked events become unmarked, and vice versa. |
---|
754 | |
---|
755 | Specificaly, all input marks are converted to null marks, and all input |
---|
756 | null marks are converted to OUTSIDE marks. |
---|
757 | """ |
---|
758 | |
---|
759 | def __call__(self, stream): |
---|
760 | """Apply the transform filter to the marked stream. |
---|
761 | |
---|
762 | :param stream: the marked event stream to filter |
---|
763 | """ |
---|
764 | for mark, event in stream: |
---|
765 | if mark: |
---|
766 | yield None, event |
---|
767 | else: |
---|
768 | yield OUTSIDE, event |
---|
769 | |
---|
770 | |
---|
771 | class EndTransformation(object): |
---|
772 | """End the current selection.""" |
---|
773 | |
---|
774 | def __call__(self, stream): |
---|
775 | """Apply the transform filter to the marked stream. |
---|
776 | |
---|
777 | :param stream: the marked event stream to filter |
---|
778 | """ |
---|
779 | for mark, event in stream: |
---|
780 | yield OUTSIDE, event |
---|
781 | |
---|
782 | |
---|
783 | class EmptyTransformation(object): |
---|
784 | """Empty selected elements of all content.""" |
---|
785 | |
---|
786 | def __call__(self, stream): |
---|
787 | """Apply the transform filter to the marked stream. |
---|
788 | |
---|
789 | :param stream: the marked event stream to filter |
---|
790 | """ |
---|
791 | for mark, event in stream: |
---|
792 | yield mark, event |
---|
793 | if mark is ENTER: |
---|
794 | for mark, event in stream: |
---|
795 | if mark is EXIT: |
---|
796 | yield mark, event |
---|
797 | break |
---|
798 | |
---|
799 | |
---|
800 | class RemoveTransformation(object): |
---|
801 | """Remove selection from the stream.""" |
---|
802 | |
---|
803 | def __call__(self, stream): |
---|
804 | """Apply the transform filter to the marked stream. |
---|
805 | |
---|
806 | :param stream: the marked event stream to filter |
---|
807 | """ |
---|
808 | for mark, event in stream: |
---|
809 | if mark is None: |
---|
810 | yield mark, event |
---|
811 | |
---|
812 | |
---|
813 | class UnwrapTransformation(object): |
---|
814 | """Remove outtermost enclosing elements from selection.""" |
---|
815 | |
---|
816 | def __call__(self, stream): |
---|
817 | """Apply the transform filter to the marked stream. |
---|
818 | |
---|
819 | :param stream: the marked event stream to filter |
---|
820 | """ |
---|
821 | for mark, event in stream: |
---|
822 | if mark not in (ENTER, EXIT): |
---|
823 | yield mark, event |
---|
824 | |
---|
825 | |
---|
826 | class WrapTransformation(object): |
---|
827 | """Wrap selection in an element.""" |
---|
828 | |
---|
829 | def __init__(self, element): |
---|
830 | if isinstance(element, Element): |
---|
831 | self.element = element |
---|
832 | else: |
---|
833 | self.element = Element(element) |
---|
834 | |
---|
835 | def __call__(self, stream): |
---|
836 | for mark, event in stream: |
---|
837 | if mark: |
---|
838 | element = list(self.element.generate()) |
---|
839 | for prefix in element[:-1]: |
---|
840 | yield None, prefix |
---|
841 | yield mark, event |
---|
842 | start = mark |
---|
843 | stopped = False |
---|
844 | for mark, event in stream: |
---|
845 | if start is ENTER and mark is EXIT: |
---|
846 | yield mark, event |
---|
847 | stopped = True |
---|
848 | break |
---|
849 | if not mark: |
---|
850 | break |
---|
851 | yield mark, event |
---|
852 | else: |
---|
853 | stopped = True |
---|
854 | yield None, element[-1] |
---|
855 | if not stopped: |
---|
856 | yield mark, event |
---|
857 | else: |
---|
858 | yield mark, event |
---|
859 | |
---|
860 | |
---|
861 | class TraceTransformation(object): |
---|
862 | """Print events as they pass through the transform.""" |
---|
863 | |
---|
864 | def __init__(self, prefix='', fileobj=None): |
---|
865 | """Trace constructor. |
---|
866 | |
---|
867 | :param prefix: text to prefix each traced line with. |
---|
868 | :param fileobj: the writable file-like object to write to |
---|
869 | """ |
---|
870 | self.prefix = prefix |
---|
871 | self.fileobj = fileobj or sys.stdout |
---|
872 | |
---|
873 | def __call__(self, stream): |
---|
874 | """Apply the transform filter to the marked stream. |
---|
875 | |
---|
876 | :param stream: the marked event stream to filter |
---|
877 | """ |
---|
878 | for event in stream: |
---|
879 | self.fileobj.write('%s%s\n' % (self.prefix, event)) |
---|
880 | yield event |
---|
881 | |
---|
882 | |
---|
883 | class FilterTransformation(object): |
---|
884 | """Apply a normal stream filter to the selection. The filter is called once |
---|
885 | for each selection.""" |
---|
886 | |
---|
887 | def __init__(self, filter): |
---|
888 | """Create the transform. |
---|
889 | |
---|
890 | :param filter: The stream filter to apply. |
---|
891 | """ |
---|
892 | self.filter = filter |
---|
893 | |
---|
894 | def __call__(self, stream): |
---|
895 | """Apply the transform filter to the marked stream. |
---|
896 | |
---|
897 | :param stream: The marked event stream to filter |
---|
898 | """ |
---|
899 | def flush(queue): |
---|
900 | if queue: |
---|
901 | for event in self.filter(queue): |
---|
902 | yield OUTSIDE, event |
---|
903 | del queue[:] |
---|
904 | |
---|
905 | queue = [] |
---|
906 | for mark, event in stream: |
---|
907 | if mark is ENTER: |
---|
908 | queue.append(event) |
---|
909 | for mark, event in stream: |
---|
910 | queue.append(event) |
---|
911 | if mark is EXIT: |
---|
912 | break |
---|
913 | for queue_event in flush(queue): |
---|
914 | yield queue_event |
---|
915 | elif mark is OUTSIDE: |
---|
916 | stopped = False |
---|
917 | queue.append(event) |
---|
918 | for mark, event in stream: |
---|
919 | if mark is not OUTSIDE: |
---|
920 | break |
---|
921 | queue.append(event) |
---|
922 | else: |
---|
923 | stopped = True |
---|
924 | for queue_event in flush(queue): |
---|
925 | yield queue_event |
---|
926 | if not stopped: |
---|
927 | yield mark, event |
---|
928 | else: |
---|
929 | yield mark, event |
---|
930 | for queue_event in flush(queue): |
---|
931 | yield queue_event |
---|
932 | |
---|
933 | |
---|
934 | class MapTransformation(object): |
---|
935 | """Apply a function to the `data` element of events of ``kind`` in the |
---|
936 | selection. |
---|
937 | """ |
---|
938 | |
---|
939 | def __init__(self, function, kind): |
---|
940 | """Create the transform. |
---|
941 | |
---|
942 | :param function: the function to apply; the function must take one |
---|
943 | argument, the `data` element of each selected event |
---|
944 | :param kind: the stream event ``kind`` to apply the `function` to |
---|
945 | """ |
---|
946 | self.function = function |
---|
947 | self.kind = kind |
---|
948 | |
---|
949 | def __call__(self, stream): |
---|
950 | """Apply the transform filter to the marked stream. |
---|
951 | |
---|
952 | :param stream: The marked event stream to filter |
---|
953 | """ |
---|
954 | for mark, (kind, data, pos) in stream: |
---|
955 | if mark and self.kind in (None, kind): |
---|
956 | yield mark, (kind, self.function(data), pos) |
---|
957 | else: |
---|
958 | yield mark, (kind, data, pos) |
---|
959 | |
---|
960 | |
---|
961 | class SubstituteTransformation(object): |
---|
962 | """Replace text matching a regular expression. |
---|
963 | |
---|
964 | Refer to the documentation for ``re.sub()`` for details. |
---|
965 | """ |
---|
966 | def __init__(self, pattern, replace, count=0): |
---|
967 | """Create the transform. |
---|
968 | |
---|
969 | :param pattern: A regular expression object, or string. |
---|
970 | :param replace: Replacement pattern. |
---|
971 | :param count: Number of replacements to make in each text fragment. |
---|
972 | """ |
---|
973 | if isinstance(pattern, basestring): |
---|
974 | self.pattern = re.compile(pattern) |
---|
975 | else: |
---|
976 | self.pattern = pattern |
---|
977 | self.count = count |
---|
978 | self.replace = replace |
---|
979 | |
---|
980 | def __call__(self, stream): |
---|
981 | """Apply the transform filter to the marked stream. |
---|
982 | |
---|
983 | :param stream: The marked event stream to filter |
---|
984 | """ |
---|
985 | for mark, (kind, data, pos) in stream: |
---|
986 | if mark is not None and kind is TEXT: |
---|
987 | new_data = self.pattern.sub(self.replace, data, self.count) |
---|
988 | if isinstance(data, Markup): |
---|
989 | data = Markup(new_data) |
---|
990 | else: |
---|
991 | data = new_data |
---|
992 | yield mark, (kind, data, pos) |
---|
993 | |
---|
994 | |
---|
995 | class RenameTransformation(object): |
---|
996 | """Rename matching elements.""" |
---|
997 | def __init__(self, name): |
---|
998 | """Create the transform. |
---|
999 | |
---|
1000 | :param name: New element name. |
---|
1001 | """ |
---|
1002 | self.name = QName(name) |
---|
1003 | |
---|
1004 | def __call__(self, stream): |
---|
1005 | """Apply the transform filter to the marked stream. |
---|
1006 | |
---|
1007 | :param stream: The marked event stream to filter |
---|
1008 | """ |
---|
1009 | for mark, (kind, data, pos) in stream: |
---|
1010 | if mark is ENTER: |
---|
1011 | data = self.name, data[1] |
---|
1012 | elif mark is EXIT: |
---|
1013 | data = self.name |
---|
1014 | yield mark, (kind, data, pos) |
---|
1015 | |
---|
1016 | |
---|
1017 | class InjectorTransformation(object): |
---|
1018 | """Abstract base class for transformations that inject content into a |
---|
1019 | stream. |
---|
1020 | |
---|
1021 | >>> class Top(InjectorTransformation): |
---|
1022 | ... def __call__(self, stream): |
---|
1023 | ... for event in self._inject(): |
---|
1024 | ... yield event |
---|
1025 | ... for event in stream: |
---|
1026 | ... yield event |
---|
1027 | >>> html = HTML('<body>Some <em>test</em> text</body>') |
---|
1028 | >>> print(html | Transformer('.//em').apply(Top('Prefix '))) |
---|
1029 | Prefix <body>Some <em>test</em> text</body> |
---|
1030 | """ |
---|
1031 | def __init__(self, content): |
---|
1032 | """Create a new injector. |
---|
1033 | |
---|
1034 | :param content: An iterable of Genshi stream events, or a string to be |
---|
1035 | injected. |
---|
1036 | """ |
---|
1037 | self.content = content |
---|
1038 | |
---|
1039 | def _inject(self): |
---|
1040 | content = self.content |
---|
1041 | if hasattr(content, '__call__'): |
---|
1042 | content = content() |
---|
1043 | for event in _ensure(content): |
---|
1044 | yield None, event |
---|
1045 | |
---|
1046 | |
---|
1047 | class ReplaceTransformation(InjectorTransformation): |
---|
1048 | """Replace selection with content.""" |
---|
1049 | |
---|
1050 | def __call__(self, stream): |
---|
1051 | """Apply the transform filter to the marked stream. |
---|
1052 | |
---|
1053 | :param stream: The marked event stream to filter |
---|
1054 | """ |
---|
1055 | stream = PushBackStream(stream) |
---|
1056 | for mark, event in stream: |
---|
1057 | if mark is not None: |
---|
1058 | start = mark |
---|
1059 | for subevent in self._inject(): |
---|
1060 | yield subevent |
---|
1061 | for mark, event in stream: |
---|
1062 | if start is ENTER: |
---|
1063 | if mark is EXIT: |
---|
1064 | break |
---|
1065 | elif mark != start: |
---|
1066 | stream.push((mark, event)) |
---|
1067 | break |
---|
1068 | else: |
---|
1069 | yield mark, event |
---|
1070 | |
---|
1071 | |
---|
1072 | class BeforeTransformation(InjectorTransformation): |
---|
1073 | """Insert content before selection.""" |
---|
1074 | |
---|
1075 | def __call__(self, stream): |
---|
1076 | """Apply the transform filter to the marked stream. |
---|
1077 | |
---|
1078 | :param stream: The marked event stream to filter |
---|
1079 | """ |
---|
1080 | stream = PushBackStream(stream) |
---|
1081 | for mark, event in stream: |
---|
1082 | if mark is not None: |
---|
1083 | start = mark |
---|
1084 | for subevent in self._inject(): |
---|
1085 | yield subevent |
---|
1086 | yield mark, event |
---|
1087 | for mark, event in stream: |
---|
1088 | if mark != start and start is not ENTER: |
---|
1089 | stream.push((mark, event)) |
---|
1090 | break |
---|
1091 | yield mark, event |
---|
1092 | if start is ENTER and mark is EXIT: |
---|
1093 | break |
---|
1094 | else: |
---|
1095 | yield mark, event |
---|
1096 | |
---|
1097 | |
---|
1098 | class AfterTransformation(InjectorTransformation): |
---|
1099 | """Insert content after selection.""" |
---|
1100 | |
---|
1101 | def __call__(self, stream): |
---|
1102 | """Apply the transform filter to the marked stream. |
---|
1103 | |
---|
1104 | :param stream: The marked event stream to filter |
---|
1105 | """ |
---|
1106 | stream = PushBackStream(stream) |
---|
1107 | for mark, event in stream: |
---|
1108 | yield mark, event |
---|
1109 | if mark: |
---|
1110 | start = mark |
---|
1111 | for mark, event in stream: |
---|
1112 | if start is not ENTER and mark != start: |
---|
1113 | stream.push((mark, event)) |
---|
1114 | break |
---|
1115 | yield mark, event |
---|
1116 | if start is ENTER and mark is EXIT: |
---|
1117 | break |
---|
1118 | for subevent in self._inject(): |
---|
1119 | yield subevent |
---|
1120 | |
---|
1121 | |
---|
1122 | class PrependTransformation(InjectorTransformation): |
---|
1123 | """Prepend content to the inside of selected elements.""" |
---|
1124 | |
---|
1125 | def __call__(self, stream): |
---|
1126 | """Apply the transform filter to the marked stream. |
---|
1127 | |
---|
1128 | :param stream: The marked event stream to filter |
---|
1129 | """ |
---|
1130 | for mark, event in stream: |
---|
1131 | yield mark, event |
---|
1132 | if mark is ENTER: |
---|
1133 | for subevent in self._inject(): |
---|
1134 | yield subevent |
---|
1135 | |
---|
1136 | |
---|
1137 | class AppendTransformation(InjectorTransformation): |
---|
1138 | """Append content after the content of selected elements.""" |
---|
1139 | |
---|
1140 | def __call__(self, stream): |
---|
1141 | """Apply the transform filter to the marked stream. |
---|
1142 | |
---|
1143 | :param stream: The marked event stream to filter |
---|
1144 | """ |
---|
1145 | for mark, event in stream: |
---|
1146 | yield mark, event |
---|
1147 | if mark is ENTER: |
---|
1148 | for mark, event in stream: |
---|
1149 | if mark is EXIT: |
---|
1150 | break |
---|
1151 | yield mark, event |
---|
1152 | for subevent in self._inject(): |
---|
1153 | yield subevent |
---|
1154 | yield mark, event |
---|
1155 | |
---|
1156 | |
---|
1157 | class AttrTransformation(object): |
---|
1158 | """Set an attribute on selected elements.""" |
---|
1159 | |
---|
1160 | def __init__(self, name, value): |
---|
1161 | """Construct transform. |
---|
1162 | |
---|
1163 | :param name: name of the attribute that should be set |
---|
1164 | :param value: the value to set |
---|
1165 | """ |
---|
1166 | self.name = name |
---|
1167 | self.value = value |
---|
1168 | |
---|
1169 | def __call__(self, stream): |
---|
1170 | """Apply the transform filter to the marked stream. |
---|
1171 | |
---|
1172 | :param stream: The marked event stream to filter |
---|
1173 | """ |
---|
1174 | callable_value = hasattr(self.value, '__call__') |
---|
1175 | for mark, (kind, data, pos) in stream: |
---|
1176 | if mark is ENTER: |
---|
1177 | if callable_value: |
---|
1178 | value = self.value(self.name, (kind, data, pos)) |
---|
1179 | else: |
---|
1180 | value = self.value |
---|
1181 | if value is None: |
---|
1182 | attrs = data[1] - [QName(self.name)] |
---|
1183 | else: |
---|
1184 | attrs = data[1] | [(QName(self.name), value)] |
---|
1185 | data = (data[0], attrs) |
---|
1186 | yield mark, (kind, data, pos) |
---|
1187 | |
---|
1188 | |
---|
1189 | |
---|
1190 | class StreamBuffer(Stream): |
---|
1191 | """Stream event buffer used for cut and copy transformations.""" |
---|
1192 | |
---|
1193 | def __init__(self): |
---|
1194 | """Create the buffer.""" |
---|
1195 | Stream.__init__(self, []) |
---|
1196 | |
---|
1197 | def append(self, event): |
---|
1198 | """Add an event to the buffer. |
---|
1199 | |
---|
1200 | :param event: the markup event to add |
---|
1201 | """ |
---|
1202 | self.events.append(event) |
---|
1203 | |
---|
1204 | def reset(self): |
---|
1205 | """Empty the buffer of events.""" |
---|
1206 | del self.events[:] |
---|
1207 | |
---|
1208 | |
---|
1209 | class CopyTransformation(object): |
---|
1210 | """Copy selected events into a buffer for later insertion.""" |
---|
1211 | |
---|
1212 | def __init__(self, buffer, accumulate=False): |
---|
1213 | """Create the copy transformation. |
---|
1214 | |
---|
1215 | :param buffer: the `StreamBuffer` in which the selection should be |
---|
1216 | stored |
---|
1217 | """ |
---|
1218 | if not accumulate: |
---|
1219 | buffer.reset() |
---|
1220 | self.buffer = buffer |
---|
1221 | self.accumulate = accumulate |
---|
1222 | |
---|
1223 | def __call__(self, stream): |
---|
1224 | """Apply the transformation to the marked stream. |
---|
1225 | |
---|
1226 | :param stream: the marked event stream to filter |
---|
1227 | """ |
---|
1228 | stream = PushBackStream(stream) |
---|
1229 | |
---|
1230 | for mark, event in stream: |
---|
1231 | if mark: |
---|
1232 | if not self.accumulate: |
---|
1233 | self.buffer.reset() |
---|
1234 | events = [(mark, event)] |
---|
1235 | self.buffer.append(event) |
---|
1236 | start = mark |
---|
1237 | for mark, event in stream: |
---|
1238 | if start is not ENTER and mark != start: |
---|
1239 | stream.push((mark, event)) |
---|
1240 | break |
---|
1241 | events.append((mark, event)) |
---|
1242 | self.buffer.append(event) |
---|
1243 | if start is ENTER and mark is EXIT: |
---|
1244 | break |
---|
1245 | for i in events: |
---|
1246 | yield i |
---|
1247 | else: |
---|
1248 | yield mark, event |
---|
1249 | |
---|
1250 | |
---|
1251 | class CutTransformation(object): |
---|
1252 | """Cut selected events into a buffer for later insertion and remove the |
---|
1253 | selection. |
---|
1254 | """ |
---|
1255 | |
---|
1256 | def __init__(self, buffer, accumulate=False): |
---|
1257 | """Create the cut transformation. |
---|
1258 | |
---|
1259 | :param buffer: the `StreamBuffer` in which the selection should be |
---|
1260 | stored |
---|
1261 | """ |
---|
1262 | self.buffer = buffer |
---|
1263 | self.accumulate = accumulate |
---|
1264 | |
---|
1265 | |
---|
1266 | def __call__(self, stream): |
---|
1267 | """Apply the transform filter to the marked stream. |
---|
1268 | |
---|
1269 | :param stream: the marked event stream to filter |
---|
1270 | """ |
---|
1271 | attributes = [] |
---|
1272 | stream = PushBackStream(stream) |
---|
1273 | broken = False |
---|
1274 | if not self.accumulate: |
---|
1275 | self.buffer.reset() |
---|
1276 | for mark, event in stream: |
---|
1277 | if mark: |
---|
1278 | # Send a BREAK event if there was no other event sent between |
---|
1279 | if not self.accumulate: |
---|
1280 | if not broken and self.buffer: |
---|
1281 | yield BREAK, (BREAK, None, None) |
---|
1282 | self.buffer.reset() |
---|
1283 | self.buffer.append(event) |
---|
1284 | start = mark |
---|
1285 | if mark is ATTR: |
---|
1286 | attributes.extend([name for name, _ in event[1][1]]) |
---|
1287 | for mark, event in stream: |
---|
1288 | if start is mark is ATTR: |
---|
1289 | attributes.extend([name for name, _ in event[1][1]]) |
---|
1290 | # Handle non-element contiguous selection |
---|
1291 | if start is not ENTER and mark != start: |
---|
1292 | # Operating on the attributes of a START event |
---|
1293 | if start is ATTR: |
---|
1294 | kind, data, pos = event |
---|
1295 | assert kind is START |
---|
1296 | data = (data[0], data[1] - attributes) |
---|
1297 | attributes = None |
---|
1298 | stream.push((mark, (kind, data, pos))) |
---|
1299 | else: |
---|
1300 | stream.push((mark, event)) |
---|
1301 | break |
---|
1302 | self.buffer.append(event) |
---|
1303 | if start is ENTER and mark is EXIT: |
---|
1304 | break |
---|
1305 | broken = False |
---|
1306 | else: |
---|
1307 | broken = True |
---|
1308 | yield mark, event |
---|
1309 | if not broken and self.buffer: |
---|
1310 | yield BREAK, (BREAK, None, None) |
---|