| [2] | 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | # |
|---|
| [1077] | 3 | # Copyright (C) 2006-2009 Edgewall Software |
|---|
| [2] | 4 | # All rights reserved. |
|---|
| 5 | # |
|---|
| 6 | # This software is licensed as described in the file COPYING, which |
|---|
| 7 | # you should have received as part of this distribution. The terms |
|---|
| [287] | 8 | # are also available at http://genshi.edgewall.org/wiki/License. |
|---|
| [2] | 9 | # |
|---|
| 10 | # This software consists of voluntary contributions made by many |
|---|
| 11 | # individuals. For the exact contribution history, see the revision |
|---|
| [287] | 12 | # history and logs, available at http://genshi.edgewall.org/log/. |
|---|
| [2] | 13 | |
|---|
| 14 | """Core classes for markup processing.""" |
|---|
| 15 | |
|---|
| [970] | 16 | try: |
|---|
| [1079] | 17 | reduce # builtin in Python < 3 |
|---|
| 18 | except NameError: |
|---|
| [970] | 19 | from functools import reduce |
|---|
| [1157] | 20 | import sys |
|---|
| [750] | 21 | from itertools import chain |
|---|
| [254] | 22 | import operator |
|---|
| [2] | 23 | |
|---|
| [1080] | 24 | from genshi.util import plaintext, stripentities, striptags, stringrepr |
|---|
| [485] | 25 | |
|---|
| [459] | 26 | __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace', |
|---|
| 27 | 'QName'] |
|---|
| [517] | 28 | __docformat__ = 'restructuredtext en' |
|---|
| [2] | 29 | |
|---|
| 30 | |
|---|
| [18] | 31 | class StreamEventKind(str): |
|---|
| [485] | 32 | """A kind of event on a markup stream.""" |
|---|
| [344] | 33 | __slots__ = [] |
|---|
| 34 | _instances = {} |
|---|
| [2] | 35 | |
|---|
| [344] | 36 | def __new__(cls, val): |
|---|
| 37 | return cls._instances.setdefault(val, str.__new__(cls, val)) |
|---|
| [2] | 38 | |
|---|
| [344] | 39 | |
|---|
| [2] | 40 | class Stream(object): |
|---|
| 41 | """Represents a stream of markup events. |
|---|
| 42 | |
|---|
| 43 | This class is basically an iterator over the events. |
|---|
| 44 | |
|---|
| [517] | 45 | Stream events are tuples of the form:: |
|---|
| [2] | 46 | |
|---|
| 47 | (kind, data, position) |
|---|
| [485] | 48 | |
|---|
| [517] | 49 | where ``kind`` is the event kind (such as `START`, `END`, `TEXT`, etc), |
|---|
| 50 | ``data`` depends on the kind of event, and ``position`` is a |
|---|
| 51 | ``(filename, line, offset)`` tuple that contains the location of the |
|---|
| 52 | original element or text in the input. If the original location is unknown, |
|---|
| 53 | ``position`` is ``(None, -1, -1)``. |
|---|
| [485] | 54 | |
|---|
| 55 | Also provided are ways to serialize the stream to text. The `serialize()` |
|---|
| 56 | method will return an iterator over generated strings, while `render()` |
|---|
| 57 | returns the complete generated text at once. Both accept various parameters |
|---|
| 58 | that impact the way the stream is serialized. |
|---|
| [2] | 59 | """ |
|---|
| [721] | 60 | __slots__ = ['events', 'serializer'] |
|---|
| [2] | 61 | |
|---|
| [519] | 62 | START = StreamEventKind('START') #: a start tag |
|---|
| 63 | END = StreamEventKind('END') #: an end tag |
|---|
| 64 | TEXT = StreamEventKind('TEXT') #: literal text |
|---|
| [558] | 65 | XML_DECL = StreamEventKind('XML_DECL') #: XML declaration |
|---|
| [519] | 66 | DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration |
|---|
| 67 | START_NS = StreamEventKind('START_NS') #: start namespace mapping |
|---|
| 68 | END_NS = StreamEventKind('END_NS') #: end namespace mapping |
|---|
| 69 | START_CDATA = StreamEventKind('START_CDATA') #: start CDATA section |
|---|
| 70 | END_CDATA = StreamEventKind('END_CDATA') #: end CDATA section |
|---|
| 71 | PI = StreamEventKind('PI') #: processing instruction |
|---|
| 72 | COMMENT = StreamEventKind('COMMENT') #: comment |
|---|
| [2] | 73 | |
|---|
| [721] | 74 | def __init__(self, events, serializer=None): |
|---|
| [2] | 75 | """Initialize the stream with a sequence of markup events. |
|---|
| 76 | |
|---|
| [517] | 77 | :param events: a sequence or iterable providing the events |
|---|
| [721] | 78 | :param serializer: the default serialization method to use for this |
|---|
| 79 | stream |
|---|
| 80 | |
|---|
| 81 | :note: Changed in 0.5: added the `serializer` argument |
|---|
| [2] | 82 | """ |
|---|
| [600] | 83 | self.events = events #: The underlying iterable producing the events |
|---|
| [721] | 84 | self.serializer = serializer #: The default serializion method |
|---|
| [2] | 85 | |
|---|
| 86 | def __iter__(self): |
|---|
| 87 | return iter(self.events) |
|---|
| 88 | |
|---|
| [254] | 89 | def __or__(self, function): |
|---|
| 90 | """Override the "bitwise or" operator to apply filters or serializers |
|---|
| 91 | to the stream, providing a syntax similar to pipes on Unix shells. |
|---|
| 92 | |
|---|
| 93 | Assume the following stream produced by the `HTML` function: |
|---|
| 94 | |
|---|
| [287] | 95 | >>> from genshi.input import HTML |
|---|
| [1157] | 96 | >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''', encoding='utf-8') |
|---|
| [1075] | 97 | >>> print(html) |
|---|
| [254] | 98 | <p onclick="alert('Whoa')">Hello, world!</p> |
|---|
| 99 | |
|---|
| 100 | A filter such as the HTML sanitizer can be applied to that stream using |
|---|
| 101 | the pipe notation as follows: |
|---|
| 102 | |
|---|
| [287] | 103 | >>> from genshi.filters import HTMLSanitizer |
|---|
| [254] | 104 | >>> sanitizer = HTMLSanitizer() |
|---|
| [1075] | 105 | >>> print(html | sanitizer) |
|---|
| [254] | 106 | <p>Hello, world!</p> |
|---|
| 107 | |
|---|
| 108 | Filters can be any function that accepts and produces a stream (where |
|---|
| [485] | 109 | a stream is anything that iterates over events): |
|---|
| [254] | 110 | |
|---|
| 111 | >>> def uppercase(stream): |
|---|
| 112 | ... for kind, data, pos in stream: |
|---|
| 113 | ... if kind is TEXT: |
|---|
| 114 | ... data = data.upper() |
|---|
| 115 | ... yield kind, data, pos |
|---|
| [1075] | 116 | >>> print(html | sanitizer | uppercase) |
|---|
| [254] | 117 | <p>HELLO, WORLD!</p> |
|---|
| 118 | |
|---|
| 119 | Serializers can also be used with this notation: |
|---|
| 120 | |
|---|
| [287] | 121 | >>> from genshi.output import TextSerializer |
|---|
| [254] | 122 | >>> output = TextSerializer() |
|---|
| [1075] | 123 | >>> print(html | sanitizer | uppercase | output) |
|---|
| [254] | 124 | HELLO, WORLD! |
|---|
| 125 | |
|---|
| 126 | Commonly, serializers should be used at the end of the "pipeline"; |
|---|
| 127 | using them somewhere in the middle may produce unexpected results. |
|---|
| [600] | 128 | |
|---|
| 129 | :param function: the callable object that should be applied as a filter |
|---|
| 130 | :return: the filtered stream |
|---|
| 131 | :rtype: `Stream` |
|---|
| [254] | 132 | """ |
|---|
| [721] | 133 | return Stream(_ensure(function(self)), serializer=self.serializer) |
|---|
| [254] | 134 | |
|---|
| [154] | 135 | def filter(self, *filters): |
|---|
| 136 | """Apply filters to the stream. |
|---|
| [132] | 137 | |
|---|
| [154] | 138 | This method returns a new stream with the given filters applied. The |
|---|
| 139 | filters must be callables that accept the stream object as parameter, |
|---|
| 140 | and return the filtered stream. |
|---|
| [254] | 141 | |
|---|
| [517] | 142 | The call:: |
|---|
| [254] | 143 | |
|---|
| 144 | stream.filter(filter1, filter2) |
|---|
| 145 | |
|---|
| [517] | 146 | is equivalent to:: |
|---|
| [254] | 147 | |
|---|
| 148 | stream | filter1 | filter2 |
|---|
| [600] | 149 | |
|---|
| 150 | :param filters: one or more callable objects that should be applied as |
|---|
| 151 | filters |
|---|
| 152 | :return: the filtered stream |
|---|
| 153 | :rtype: `Stream` |
|---|
| [132] | 154 | """ |
|---|
| [254] | 155 | return reduce(operator.or_, (self,) + filters) |
|---|
| [132] | 156 | |
|---|
| [1157] | 157 | def render(self, method=None, encoding=None, out=None, **kwargs): |
|---|
| [2] | 158 | """Return a string representation of the stream. |
|---|
| 159 | |
|---|
| [600] | 160 | Any additional keyword arguments are passed to the serializer, and thus |
|---|
| 161 | depend on the `method` parameter value. |
|---|
| 162 | |
|---|
| [517] | 163 | :param method: determines how the stream is serialized; can be either |
|---|
| [250] | 164 | "xml", "xhtml", "html", "text", or a custom serializer |
|---|
| [721] | 165 | class; if `None`, the default serialization method of |
|---|
| 166 | the stream is used |
|---|
| [517] | 167 | :param encoding: how the output string should be encoded; if set to |
|---|
| [2] | 168 | `None`, this method returns a `unicode` object |
|---|
| [804] | 169 | :param out: a file-like object that the output should be written to |
|---|
| 170 | instead of being returned as one big string; note that if |
|---|
| 171 | this is a file or socket (or similar), the `encoding` must |
|---|
| 172 | not be `None` (that is, the output must be encoded) |
|---|
| 173 | :return: a `str` or `unicode` object (depending on the `encoding` |
|---|
| 174 | parameter), or `None` if the `out` parameter is provided |
|---|
| [600] | 175 | :rtype: `basestring` |
|---|
| [804] | 176 | |
|---|
| [600] | 177 | :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer |
|---|
| [804] | 178 | :note: Changed in 0.5: added the `out` parameter |
|---|
| [2] | 179 | """ |
|---|
| [560] | 180 | from genshi.output import encode |
|---|
| [721] | 181 | if method is None: |
|---|
| 182 | method = self.serializer or 'xml' |
|---|
| [154] | 183 | generator = self.serialize(method=method, **kwargs) |
|---|
| [804] | 184 | return encode(generator, method=method, encoding=encoding, out=out) |
|---|
| [2] | 185 | |
|---|
| [343] | 186 | def select(self, path, namespaces=None, variables=None): |
|---|
| [2] | 187 | """Return a new stream that contains the events matching the given |
|---|
| 188 | XPath expression. |
|---|
| 189 | |
|---|
| [690] | 190 | >>> from genshi import HTML |
|---|
| [1157] | 191 | >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>', encoding='utf-8') |
|---|
| [1075] | 192 | >>> print(stream.select('elem')) |
|---|
| [690] | 193 | <elem>foo</elem><elem>bar</elem> |
|---|
| [1075] | 194 | >>> print(stream.select('elem/text()')) |
|---|
| [690] | 195 | foobar |
|---|
| 196 | |
|---|
| 197 | Note that the outermost element of the stream becomes the *context |
|---|
| 198 | node* for the XPath test. That means that the expression "doc" would |
|---|
| 199 | not match anything in the example above, because it only tests against |
|---|
| 200 | child elements of the outermost element: |
|---|
| 201 | |
|---|
| [1075] | 202 | >>> print(stream.select('doc')) |
|---|
| [690] | 203 | <BLANKLINE> |
|---|
| 204 | |
|---|
| 205 | You can use the "." expression to match the context node itself |
|---|
| 206 | (although that usually makes little sense): |
|---|
| 207 | |
|---|
| [1075] | 208 | >>> print(stream.select('.')) |
|---|
| [690] | 209 | <doc><elem>foo</elem><elem>bar</elem></doc> |
|---|
| 210 | |
|---|
| [517] | 211 | :param path: a string containing the XPath expression |
|---|
| [527] | 212 | :param namespaces: mapping of namespace prefixes used in the path |
|---|
| 213 | :param variables: mapping of variable names to values |
|---|
| 214 | :return: the selected substream |
|---|
| [600] | 215 | :rtype: `Stream` |
|---|
| [527] | 216 | :raises PathSyntaxError: if the given path expression is invalid or not |
|---|
| 217 | supported |
|---|
| [2] | 218 | """ |
|---|
| [287] | 219 | from genshi.path import Path |
|---|
| [343] | 220 | return Path(path).select(self, namespaces, variables) |
|---|
| [2] | 221 | |
|---|
| [154] | 222 | def serialize(self, method='xml', **kwargs): |
|---|
| [2] | 223 | """Generate strings corresponding to a specific serialization of the |
|---|
| 224 | stream. |
|---|
| 225 | |
|---|
| [19] | 226 | Unlike the `render()` method, this method is a generator that returns |
|---|
| [2] | 227 | the serialized output incrementally, as opposed to returning a single |
|---|
| 228 | string. |
|---|
| 229 | |
|---|
| [600] | 230 | Any additional keyword arguments are passed to the serializer, and thus |
|---|
| 231 | depend on the `method` parameter value. |
|---|
| 232 | |
|---|
| [517] | 233 | :param method: determines how the stream is serialized; can be either |
|---|
| [250] | 234 | "xml", "xhtml", "html", "text", or a custom serializer |
|---|
| [721] | 235 | class; if `None`, the default serialization method of |
|---|
| 236 | the stream is used |
|---|
| [600] | 237 | :return: an iterator over the serialization results (`Markup` or |
|---|
| 238 | `unicode` objects, depending on the serialization method) |
|---|
| 239 | :rtype: ``iterator`` |
|---|
| 240 | :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer |
|---|
| [2] | 241 | """ |
|---|
| [560] | 242 | from genshi.output import get_serializer |
|---|
| [721] | 243 | if method is None: |
|---|
| 244 | method = self.serializer or 'xml' |
|---|
| [560] | 245 | return get_serializer(method, **kwargs)(_ensure(self)) |
|---|
| [2] | 246 | |
|---|
| 247 | def __str__(self): |
|---|
| 248 | return self.render() |
|---|
| 249 | |
|---|
| 250 | def __unicode__(self): |
|---|
| 251 | return self.render(encoding=None) |
|---|
| 252 | |
|---|
| [861] | 253 | def __html__(self): |
|---|
| 254 | return self |
|---|
| [2] | 255 | |
|---|
| [861] | 256 | |
|---|
| [74] | 257 | START = Stream.START |
|---|
| 258 | END = Stream.END |
|---|
| 259 | TEXT = Stream.TEXT |
|---|
| [558] | 260 | XML_DECL = Stream.XML_DECL |
|---|
| [74] | 261 | DOCTYPE = Stream.DOCTYPE |
|---|
| 262 | START_NS = Stream.START_NS |
|---|
| 263 | END_NS = Stream.END_NS |
|---|
| [184] | 264 | START_CDATA = Stream.START_CDATA |
|---|
| 265 | END_CDATA = Stream.END_CDATA |
|---|
| [74] | 266 | PI = Stream.PI |
|---|
| 267 | COMMENT = Stream.COMMENT |
|---|
| 268 | |
|---|
| [1052] | 269 | |
|---|
| [129] | 270 | def _ensure(stream): |
|---|
| 271 | """Ensure that every item on the stream is actually a markup event.""" |
|---|
| [750] | 272 | stream = iter(stream) |
|---|
| 273 | event = stream.next() |
|---|
| 274 | |
|---|
| 275 | # Check whether the iterable is a real markup event stream by examining the |
|---|
| 276 | # first item it yields; if it's not we'll need to do some conversion |
|---|
| 277 | if type(event) is not tuple or len(event) != 3: |
|---|
| 278 | for event in chain([event], stream): |
|---|
| [186] | 279 | if hasattr(event, 'totuple'): |
|---|
| 280 | event = event.totuple() |
|---|
| 281 | else: |
|---|
| 282 | event = TEXT, unicode(event), (None, -1, -1) |
|---|
| [750] | 283 | yield event |
|---|
| 284 | return |
|---|
| 285 | |
|---|
| 286 | # This looks like a markup event stream, so we'll just pass it through |
|---|
| 287 | # unchanged |
|---|
| 288 | yield event |
|---|
| 289 | for event in stream: |
|---|
| [186] | 290 | yield event |
|---|
| [74] | 291 | |
|---|
| [129] | 292 | |
|---|
| [424] | 293 | class Attrs(tuple): |
|---|
| 294 | """Immutable sequence type that stores the attributes of an element. |
|---|
| [19] | 295 | |
|---|
| [526] | 296 | Ordering of the attributes is preserved, while access by name is also |
|---|
| [424] | 297 | supported. |
|---|
| [19] | 298 | |
|---|
| [227] | 299 | >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| [19] | 300 | >>> attrs |
|---|
| [495] | 301 | Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| [19] | 302 | |
|---|
| 303 | >>> 'href' in attrs |
|---|
| 304 | True |
|---|
| 305 | >>> 'tabindex' in attrs |
|---|
| 306 | False |
|---|
| [495] | 307 | >>> attrs.get('title') |
|---|
| [19] | 308 | 'Foo' |
|---|
| [424] | 309 | |
|---|
| [517] | 310 | Instances may not be manipulated directly. Instead, the operators ``|`` and |
|---|
| 311 | ``-`` can be used to produce new instances that have specific attributes |
|---|
| [424] | 312 | added, replaced or removed. |
|---|
| 313 | |
|---|
| [517] | 314 | To remove an attribute, use the ``-`` operator. The right hand side can be |
|---|
| [424] | 315 | either a string or a set/sequence of strings, identifying the name(s) of |
|---|
| 316 | the attribute(s) to remove: |
|---|
| 317 | |
|---|
| 318 | >>> attrs - 'title' |
|---|
| [495] | 319 | Attrs([('href', '#')]) |
|---|
| [424] | 320 | >>> attrs - ('title', 'href') |
|---|
| 321 | Attrs() |
|---|
| 322 | |
|---|
| 323 | The original instance is not modified, but the operator can of course be |
|---|
| 324 | used with an assignment: |
|---|
| 325 | |
|---|
| [19] | 326 | >>> attrs |
|---|
| [495] | 327 | Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| [424] | 328 | >>> attrs -= 'title' |
|---|
| [19] | 329 | >>> attrs |
|---|
| [495] | 330 | Attrs([('href', '#')]) |
|---|
| [19] | 331 | |
|---|
| [517] | 332 | To add a new attribute, use the ``|`` operator, where the right hand value |
|---|
| 333 | is a sequence of ``(name, value)`` tuples (which includes `Attrs` |
|---|
| 334 | instances): |
|---|
| [19] | 335 | |
|---|
| [495] | 336 | >>> attrs | [('title', 'Bar')] |
|---|
| 337 | Attrs([('href', '#'), ('title', 'Bar')]) |
|---|
| [214] | 338 | |
|---|
| [424] | 339 | If the attributes already contain an attribute with a given name, the value |
|---|
| 340 | of that attribute is replaced: |
|---|
| [214] | 341 | |
|---|
| [495] | 342 | >>> attrs | [('href', 'http://example.org/')] |
|---|
| 343 | Attrs([('href', 'http://example.org/')]) |
|---|
| [19] | 344 | """ |
|---|
| 345 | __slots__ = [] |
|---|
| [2] | 346 | |
|---|
| 347 | def __contains__(self, name): |
|---|
| [19] | 348 | """Return whether the list includes an attribute with the specified |
|---|
| 349 | name. |
|---|
| [600] | 350 | |
|---|
| 351 | :return: `True` if the list includes the attribute |
|---|
| 352 | :rtype: `bool` |
|---|
| [19] | 353 | """ |
|---|
| [170] | 354 | for attr, _ in self: |
|---|
| 355 | if attr == name: |
|---|
| 356 | return True |
|---|
| [1166] | 357 | return False |
|---|
| [2] | 358 | |
|---|
| [1075] | 359 | def __getitem__(self, i): |
|---|
| 360 | """Return an item or slice of the attributes list. |
|---|
| 361 | |
|---|
| 362 | >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| 363 | >>> attrs[1] |
|---|
| 364 | ('title', 'Foo') |
|---|
| 365 | >>> attrs[1:] |
|---|
| 366 | Attrs([('title', 'Foo')]) |
|---|
| 367 | """ |
|---|
| 368 | items = tuple.__getitem__(self, i) |
|---|
| 369 | if type(i) is slice: |
|---|
| 370 | return Attrs(items) |
|---|
| 371 | return items |
|---|
| 372 | |
|---|
| [424] | 373 | def __getslice__(self, i, j): |
|---|
| [707] | 374 | """Return a slice of the attributes list. |
|---|
| 375 | |
|---|
| 376 | >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| 377 | >>> attrs[1:] |
|---|
| 378 | Attrs([('title', 'Foo')]) |
|---|
| 379 | """ |
|---|
| [424] | 380 | return Attrs(tuple.__getslice__(self, i, j)) |
|---|
| 381 | |
|---|
| 382 | def __or__(self, attrs): |
|---|
| 383 | """Return a new instance that contains the attributes in `attrs` in |
|---|
| [1129] | 384 | addition to any already existing attributes. Any attributes in the new |
|---|
| 385 | set that have a value of `None` are removed. |
|---|
| [600] | 386 | |
|---|
| 387 | :return: a new instance with the merged attributes |
|---|
| 388 | :rtype: `Attrs` |
|---|
| [424] | 389 | """ |
|---|
| [1129] | 390 | remove = set([an for an, av in attrs if av is None]) |
|---|
| 391 | replace = dict([(an, av) for an, av in attrs |
|---|
| 392 | if an in self and av is not None]) |
|---|
| 393 | return Attrs([(sn, replace.get(sn, sv)) for sn, sv in self |
|---|
| 394 | if sn not in remove] + |
|---|
| 395 | [(an, av) for an, av in attrs |
|---|
| 396 | if an not in self and an not in remove]) |
|---|
| [424] | 397 | |
|---|
| [403] | 398 | def __repr__(self): |
|---|
| 399 | if not self: |
|---|
| 400 | return 'Attrs()' |
|---|
| [424] | 401 | return 'Attrs([%s])' % ', '.join([repr(item) for item in self]) |
|---|
| [403] | 402 | |
|---|
| [424] | 403 | def __sub__(self, names): |
|---|
| 404 | """Return a new instance with all attributes with a name in `names` are |
|---|
| 405 | removed. |
|---|
| [600] | 406 | |
|---|
| 407 | :param names: the names of the attributes to remove |
|---|
| 408 | :return: a new instance with the attribute removed |
|---|
| 409 | :rtype: `Attrs` |
|---|
| [424] | 410 | """ |
|---|
| 411 | if isinstance(names, basestring): |
|---|
| 412 | names = (names,) |
|---|
| 413 | return Attrs([(name, val) for name, val in self if name not in names]) |
|---|
| 414 | |
|---|
| [2] | 415 | def get(self, name, default=None): |
|---|
| [19] | 416 | """Return the value of the attribute with the specified name, or the |
|---|
| 417 | value of the `default` parameter if no such attribute is found. |
|---|
| [527] | 418 | |
|---|
| 419 | :param name: the name of the attribute |
|---|
| 420 | :param default: the value to return when the attribute does not exist |
|---|
| 421 | :return: the attribute value, or the `default` value if that attribute |
|---|
| 422 | does not exist |
|---|
| [600] | 423 | :rtype: `object` |
|---|
| [19] | 424 | """ |
|---|
| [2] | 425 | for attr, value in self: |
|---|
| 426 | if attr == name: |
|---|
| 427 | return value |
|---|
| 428 | return default |
|---|
| 429 | |
|---|
| [82] | 430 | def totuple(self): |
|---|
| [203] | 431 | """Return the attributes as a markup event. |
|---|
| 432 | |
|---|
| [517] | 433 | The returned event is a `TEXT` event, the data is the value of all |
|---|
| [203] | 434 | attributes joined together. |
|---|
| [526] | 435 | |
|---|
| 436 | >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple() |
|---|
| [1075] | 437 | ('TEXT', '#Foo', (None, -1, -1)) |
|---|
| [600] | 438 | |
|---|
| 439 | :return: a `TEXT` event |
|---|
| 440 | :rtype: `tuple` |
|---|
| [203] | 441 | """ |
|---|
| [1075] | 442 | return TEXT, ''.join([x[1] for x in self]), (None, -1, -1) |
|---|
| [2] | 443 | |
|---|
| [82] | 444 | |
|---|
| [2] | 445 | class Markup(unicode): |
|---|
| 446 | """Marks a string as being safe for inclusion in HTML/XML output without |
|---|
| 447 | needing to be escaped. |
|---|
| 448 | """ |
|---|
| [19] | 449 | __slots__ = [] |
|---|
| 450 | |
|---|
| [2] | 451 | def __add__(self, other): |
|---|
| [1083] | 452 | return Markup(unicode.__add__(self, escape(other))) |
|---|
| [2] | 453 | |
|---|
| [254] | 454 | def __radd__(self, other): |
|---|
| [1083] | 455 | return Markup(unicode.__add__(escape(other), self)) |
|---|
| [254] | 456 | |
|---|
| [2] | 457 | def __mod__(self, args): |
|---|
| [829] | 458 | if isinstance(args, dict): |
|---|
| 459 | args = dict(zip(args.keys(), map(escape, args.values()))) |
|---|
| 460 | elif isinstance(args, (list, tuple)): |
|---|
| 461 | args = tuple(map(escape, args)) |
|---|
| 462 | else: |
|---|
| 463 | args = escape(args) |
|---|
| 464 | return Markup(unicode.__mod__(self, args)) |
|---|
| [2] | 465 | |
|---|
| 466 | def __mul__(self, num): |
|---|
| [1083] | 467 | return Markup(unicode.__mul__(self, num)) |
|---|
| 468 | __rmul__ = __mul__ |
|---|
| [2] | 469 | |
|---|
| [1083] | 470 | def __repr__(self): |
|---|
| 471 | return "<%s %s>" % (type(self).__name__, unicode.__repr__(self)) |
|---|
| [254] | 472 | |
|---|
| [1083] | 473 | def join(self, seq, escape_quotes=True): |
|---|
| 474 | """Return a `Markup` object which is the concatenation of the strings |
|---|
| 475 | in the given sequence, where this `Markup` object is the separator |
|---|
| 476 | between the joined elements. |
|---|
| 477 | |
|---|
| 478 | Any element in the sequence that is not a `Markup` instance is |
|---|
| 479 | automatically escaped. |
|---|
| 480 | |
|---|
| 481 | :param seq: the sequence of strings to join |
|---|
| 482 | :param escape_quotes: whether double quote characters in the elements |
|---|
| 483 | should be escaped |
|---|
| 484 | :return: the joined `Markup` object |
|---|
| 485 | :rtype: `Markup` |
|---|
| 486 | :see: `escape` |
|---|
| 487 | """ |
|---|
| 488 | return Markup(unicode.join(self, [escape(item, quotes=escape_quotes) |
|---|
| 489 | for item in seq])) |
|---|
| 490 | |
|---|
| 491 | @classmethod |
|---|
| 492 | def escape(cls, text, quotes=True): |
|---|
| 493 | """Create a Markup instance from a string and escape special characters |
|---|
| 494 | it may contain (<, >, & and \"). |
|---|
| 495 | |
|---|
| 496 | >>> escape('"1 < 2"') |
|---|
| 497 | <Markup u'"1 < 2"'> |
|---|
| 498 | |
|---|
| 499 | If the `quotes` parameter is set to `False`, the \" character is left |
|---|
| 500 | as is. Escaping quotes is generally only required for strings that are |
|---|
| 501 | to be used in attribute values. |
|---|
| 502 | |
|---|
| 503 | >>> escape('"1 < 2"', quotes=False) |
|---|
| 504 | <Markup u'"1 < 2"'> |
|---|
| 505 | |
|---|
| 506 | :param text: the text to escape |
|---|
| 507 | :param quotes: if ``True``, double quote characters are escaped in |
|---|
| 508 | addition to the other special characters |
|---|
| 509 | :return: the escaped `Markup` string |
|---|
| 510 | :rtype: `Markup` |
|---|
| 511 | """ |
|---|
| 512 | if not text: |
|---|
| 513 | return cls() |
|---|
| 514 | if type(text) is cls: |
|---|
| 515 | return text |
|---|
| 516 | if hasattr(text, '__html__'): |
|---|
| [1129] | 517 | return cls(text.__html__()) |
|---|
| [1083] | 518 | |
|---|
| 519 | text = text.replace('&', '&') \ |
|---|
| 520 | .replace('<', '<') \ |
|---|
| 521 | .replace('>', '>') |
|---|
| 522 | if quotes: |
|---|
| 523 | text = text.replace('"', '"') |
|---|
| 524 | return cls(text) |
|---|
| 525 | |
|---|
| 526 | def unescape(self): |
|---|
| 527 | """Reverse-escapes &, <, >, and \" and returns a `unicode` object. |
|---|
| 528 | |
|---|
| 529 | >>> Markup('1 < 2').unescape() |
|---|
| 530 | u'1 < 2' |
|---|
| 531 | |
|---|
| 532 | :return: the unescaped string |
|---|
| 533 | :rtype: `unicode` |
|---|
| 534 | :see: `genshi.core.unescape` |
|---|
| 535 | """ |
|---|
| 536 | if not self: |
|---|
| 537 | return '' |
|---|
| 538 | return unicode(self).replace('"', '"') \ |
|---|
| 539 | .replace('>', '>') \ |
|---|
| 540 | .replace('<', '<') \ |
|---|
| 541 | .replace('&', '&') |
|---|
| 542 | |
|---|
| 543 | def stripentities(self, keepxmlentities=False): |
|---|
| 544 | """Return a copy of the text with any character or numeric entities |
|---|
| 545 | replaced by the equivalent UTF-8 characters. |
|---|
| 546 | |
|---|
| 547 | If the `keepxmlentities` parameter is provided and evaluates to `True`, |
|---|
| 548 | the core XML entities (``&``, ``'``, ``>``, ``<`` and |
|---|
| 549 | ``"``) are not stripped. |
|---|
| 550 | |
|---|
| 551 | :return: a `Markup` instance with entities removed |
|---|
| 552 | :rtype: `Markup` |
|---|
| 553 | :see: `genshi.util.stripentities` |
|---|
| 554 | """ |
|---|
| 555 | return Markup(stripentities(self, keepxmlentities=keepxmlentities)) |
|---|
| 556 | |
|---|
| 557 | def striptags(self): |
|---|
| 558 | """Return a copy of the text with all XML/HTML tags removed. |
|---|
| 559 | |
|---|
| 560 | :return: a `Markup` instance with all tags removed |
|---|
| 561 | :rtype: `Markup` |
|---|
| 562 | :see: `genshi.util.striptags` |
|---|
| 563 | """ |
|---|
| 564 | return Markup(striptags(self)) |
|---|
| 565 | |
|---|
| 566 | |
|---|
| 567 | try: |
|---|
| 568 | from genshi._speedups import Markup |
|---|
| 569 | except ImportError: |
|---|
| 570 | pass # just use the Python implementation |
|---|
| 571 | |
|---|
| 572 | |
|---|
| 573 | escape = Markup.escape |
|---|
| 574 | |
|---|
| 575 | |
|---|
| 576 | def unescape(text): |
|---|
| 577 | """Reverse-escapes &, <, >, and \" and returns a `unicode` object. |
|---|
| 578 | |
|---|
| 579 | >>> unescape(Markup('1 < 2')) |
|---|
| 580 | u'1 < 2' |
|---|
| 581 | |
|---|
| 582 | If the provided `text` object is not a `Markup` instance, it is returned |
|---|
| 583 | unchanged. |
|---|
| 584 | |
|---|
| 585 | >>> unescape('1 < 2') |
|---|
| 586 | '1 < 2' |
|---|
| 587 | |
|---|
| 588 | :param text: the text to unescape |
|---|
| 589 | :return: the unescsaped string |
|---|
| 590 | :rtype: `unicode` |
|---|
| 591 | """ |
|---|
| 592 | if not isinstance(text, Markup): |
|---|
| 593 | return text |
|---|
| 594 | return text.unescape() |
|---|
| 595 | |
|---|
| 596 | |
|---|
| 597 | class Namespace(object): |
|---|
| 598 | """Utility class creating and testing elements with a namespace. |
|---|
| 599 | |
|---|
| 600 | Internally, namespace URIs are encoded in the `QName` of any element or |
|---|
| 601 | attribute, the namespace URI being enclosed in curly braces. This class |
|---|
| 602 | helps create and test these strings. |
|---|
| 603 | |
|---|
| 604 | A `Namespace` object is instantiated with the namespace URI. |
|---|
| 605 | |
|---|
| 606 | >>> html = Namespace('http://www.w3.org/1999/xhtml') |
|---|
| 607 | >>> html |
|---|
| 608 | Namespace('http://www.w3.org/1999/xhtml') |
|---|
| 609 | >>> html.uri |
|---|
| 610 | u'http://www.w3.org/1999/xhtml' |
|---|
| 611 | |
|---|
| 612 | The `Namespace` object can than be used to generate `QName` objects with |
|---|
| 613 | that namespace: |
|---|
| 614 | |
|---|
| 615 | >>> html.body |
|---|
| 616 | QName('http://www.w3.org/1999/xhtml}body') |
|---|
| 617 | >>> html.body.localname |
|---|
| 618 | u'body' |
|---|
| 619 | >>> html.body.namespace |
|---|
| 620 | u'http://www.w3.org/1999/xhtml' |
|---|
| 621 | |
|---|
| 622 | The same works using item access notation, which is useful for element or |
|---|
| 623 | attribute names that are not valid Python identifiers: |
|---|
| 624 | |
|---|
| 625 | >>> html['body'] |
|---|
| 626 | QName('http://www.w3.org/1999/xhtml}body') |
|---|
| 627 | |
|---|
| 628 | A `Namespace` object can also be used to test whether a specific `QName` |
|---|
| 629 | belongs to that namespace using the ``in`` operator: |
|---|
| 630 | |
|---|
| 631 | >>> qname = html.body |
|---|
| 632 | >>> qname in html |
|---|
| 633 | True |
|---|
| 634 | >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2') |
|---|
| 635 | False |
|---|
| 636 | """ |
|---|
| 637 | def __new__(cls, uri): |
|---|
| 638 | if type(uri) is cls: |
|---|
| 639 | return uri |
|---|
| 640 | return object.__new__(cls) |
|---|
| 641 | |
|---|
| 642 | def __getnewargs__(self): |
|---|
| 643 | return (self.uri,) |
|---|
| 644 | |
|---|
| 645 | def __getstate__(self): |
|---|
| 646 | return self.uri |
|---|
| 647 | |
|---|
| 648 | def __setstate__(self, uri): |
|---|
| 649 | self.uri = uri |
|---|
| 650 | |
|---|
| 651 | def __init__(self, uri): |
|---|
| 652 | self.uri = unicode(uri) |
|---|
| 653 | |
|---|
| 654 | def __contains__(self, qname): |
|---|
| 655 | return qname.namespace == self.uri |
|---|
| 656 | |
|---|
| 657 | def __ne__(self, other): |
|---|
| 658 | return not self == other |
|---|
| 659 | |
|---|
| 660 | def __eq__(self, other): |
|---|
| 661 | if isinstance(other, Namespace): |
|---|
| 662 | return self.uri == other.uri |
|---|
| 663 | return self.uri == other |
|---|
| 664 | |
|---|
| 665 | def __getitem__(self, name): |
|---|
| 666 | return QName(self.uri + '}' + name) |
|---|
| [1251] | 667 | __getattr__ = __getitem__ |
|---|
| [1083] | 668 | |
|---|
| 669 | def __hash__(self): |
|---|
| 670 | return hash(self.uri) |
|---|
| 671 | |
|---|
| [1157] | 672 | if sys.version_info[0] == 2: |
|---|
| 673 | # Only use stringrepr in python 2 |
|---|
| 674 | def __repr__(self): |
|---|
| 675 | return '%s(%s)' % (type(self).__name__, stringrepr(self.uri)) |
|---|
| 676 | else: |
|---|
| 677 | def __repr__(self): |
|---|
| 678 | return '%s(%r)' % (type(self).__name__, self.uri) |
|---|
| [18] | 679 | |
|---|
| [1083] | 680 | def __str__(self): |
|---|
| 681 | return self.uri.encode('utf-8') |
|---|
| 682 | |
|---|
| 683 | def __unicode__(self): |
|---|
| 684 | return self.uri |
|---|
| 685 | |
|---|
| 686 | |
|---|
| 687 | # The namespace used by attributes such as xml:lang and xml:space |
|---|
| 688 | XML_NAMESPACE = Namespace('http://www.w3.org/XML/1998/namespace') |
|---|
| 689 | |
|---|
| 690 | |
|---|
| 691 | class QName(unicode): |
|---|
| 692 | """A qualified element or attribute name. |
|---|
| 693 | |
|---|
| 694 | The unicode value of instances of this class contains the qualified name of |
|---|
| 695 | the element or attribute, in the form ``{namespace-uri}local-name``. The |
|---|
| 696 | namespace URI can be obtained through the additional `namespace` attribute, |
|---|
| 697 | while the local name can be accessed through the `localname` attribute. |
|---|
| 698 | |
|---|
| 699 | >>> qname = QName('foo') |
|---|
| 700 | >>> qname |
|---|
| 701 | QName('foo') |
|---|
| 702 | >>> qname.localname |
|---|
| 703 | u'foo' |
|---|
| 704 | >>> qname.namespace |
|---|
| 705 | |
|---|
| 706 | >>> qname = QName('http://www.w3.org/1999/xhtml}body') |
|---|
| 707 | >>> qname |
|---|
| 708 | QName('http://www.w3.org/1999/xhtml}body') |
|---|
| 709 | >>> qname.localname |
|---|
| 710 | u'body' |
|---|
| 711 | >>> qname.namespace |
|---|
| 712 | u'http://www.w3.org/1999/xhtml' |
|---|
| 713 | """ |
|---|
| 714 | __slots__ = ['namespace', 'localname'] |
|---|
| 715 | |
|---|
| 716 | def __new__(cls, qname): |
|---|
| 717 | """Create the `QName` instance. |
|---|
| 718 | |
|---|
| 719 | :param qname: the qualified name as a string of the form |
|---|
| 720 | ``{namespace-uri}local-name``, where the leading curly |
|---|
| 721 | brace is optional |
|---|
| 722 | """ |
|---|
| 723 | if type(qname) is cls: |
|---|
| 724 | return qname |
|---|
| 725 | |
|---|
| [1148] | 726 | qname = qname.lstrip('{') |
|---|
| 727 | parts = qname.split('}', 1) |
|---|
| [1083] | 728 | if len(parts) > 1: |
|---|
| 729 | self = unicode.__new__(cls, '{%s' % qname) |
|---|
| 730 | self.namespace, self.localname = map(unicode, parts) |
|---|
| 731 | else: |
|---|
| 732 | self = unicode.__new__(cls, qname) |
|---|
| 733 | self.namespace, self.localname = None, unicode(qname) |
|---|
| 734 | return self |
|---|
| 735 | |
|---|
| 736 | def __getnewargs__(self): |
|---|
| 737 | return (self.lstrip('{'),) |
|---|
| 738 | |
|---|
| [1157] | 739 | if sys.version_info[0] == 2: |
|---|
| 740 | # Only use stringrepr in python 2 |
|---|
| 741 | def __repr__(self): |
|---|
| 742 | return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{'))) |
|---|
| 743 | else: |
|---|
| 744 | def __repr__(self): |
|---|
| 745 | return '%s(%r)' % (type(self).__name__, self.lstrip('{')) |
|---|