| 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | # |
|---|
| 3 | # Copyright (C) 2006-2009 Edgewall Software |
|---|
| 4 | # All rights reserved. |
|---|
| 5 | # |
|---|
| 6 | # This software is licensed as described in the file COPYING, which |
|---|
| 7 | # you should have received as part of this distribution. The terms |
|---|
| 8 | # are also available at http://genshi.edgewall.org/wiki/License. |
|---|
| 9 | # |
|---|
| 10 | # This software consists of voluntary contributions made by many |
|---|
| 11 | # individuals. For the exact contribution history, see the revision |
|---|
| 12 | # history and logs, available at http://genshi.edgewall.org/log/. |
|---|
| 13 | |
|---|
| 14 | """Core classes for markup processing.""" |
|---|
| 15 | |
|---|
| 16 | try: |
|---|
| 17 | reduce # builtin in Python < 3 |
|---|
| 18 | except NameError: |
|---|
| 19 | from functools import reduce |
|---|
| 20 | from itertools import chain |
|---|
| 21 | import operator |
|---|
| 22 | |
|---|
| 23 | from genshi.util import plaintext, stripentities, striptags, stringrepr |
|---|
| 24 | |
|---|
| 25 | __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace', |
|---|
| 26 | 'QName'] |
|---|
| 27 | __docformat__ = 'restructuredtext en' |
|---|
| 28 | |
|---|
| 29 | |
|---|
| 30 | class StreamEventKind(str): |
|---|
| 31 | """A kind of event on a markup stream.""" |
|---|
| 32 | __slots__ = [] |
|---|
| 33 | _instances = {} |
|---|
| 34 | |
|---|
| 35 | def __new__(cls, val): |
|---|
| 36 | return cls._instances.setdefault(val, str.__new__(cls, val)) |
|---|
| 37 | |
|---|
| 38 | |
|---|
| 39 | class Stream(object): |
|---|
| 40 | """Represents a stream of markup events. |
|---|
| 41 | |
|---|
| 42 | This class is basically an iterator over the events. |
|---|
| 43 | |
|---|
| 44 | Stream events are tuples of the form:: |
|---|
| 45 | |
|---|
| 46 | (kind, data, position) |
|---|
| 47 | |
|---|
| 48 | where ``kind`` is the event kind (such as `START`, `END`, `TEXT`, etc), |
|---|
| 49 | ``data`` depends on the kind of event, and ``position`` is a |
|---|
| 50 | ``(filename, line, offset)`` tuple that contains the location of the |
|---|
| 51 | original element or text in the input. If the original location is unknown, |
|---|
| 52 | ``position`` is ``(None, -1, -1)``. |
|---|
| 53 | |
|---|
| 54 | Also provided are ways to serialize the stream to text. The `serialize()` |
|---|
| 55 | method will return an iterator over generated strings, while `render()` |
|---|
| 56 | returns the complete generated text at once. Both accept various parameters |
|---|
| 57 | that impact the way the stream is serialized. |
|---|
| 58 | """ |
|---|
| 59 | __slots__ = ['events', 'serializer'] |
|---|
| 60 | |
|---|
| 61 | START = StreamEventKind('START') #: a start tag |
|---|
| 62 | END = StreamEventKind('END') #: an end tag |
|---|
| 63 | TEXT = StreamEventKind('TEXT') #: literal text |
|---|
| 64 | XML_DECL = StreamEventKind('XML_DECL') #: XML declaration |
|---|
| 65 | DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration |
|---|
| 66 | START_NS = StreamEventKind('START_NS') #: start namespace mapping |
|---|
| 67 | END_NS = StreamEventKind('END_NS') #: end namespace mapping |
|---|
| 68 | START_CDATA = StreamEventKind('START_CDATA') #: start CDATA section |
|---|
| 69 | END_CDATA = StreamEventKind('END_CDATA') #: end CDATA section |
|---|
| 70 | PI = StreamEventKind('PI') #: processing instruction |
|---|
| 71 | COMMENT = StreamEventKind('COMMENT') #: comment |
|---|
| 72 | |
|---|
| 73 | def __init__(self, events, serializer=None): |
|---|
| 74 | """Initialize the stream with a sequence of markup events. |
|---|
| 75 | |
|---|
| 76 | :param events: a sequence or iterable providing the events |
|---|
| 77 | :param serializer: the default serialization method to use for this |
|---|
| 78 | stream |
|---|
| 79 | |
|---|
| 80 | :note: Changed in 0.5: added the `serializer` argument |
|---|
| 81 | """ |
|---|
| 82 | self.events = events #: The underlying iterable producing the events |
|---|
| 83 | self.serializer = serializer #: The default serializion method |
|---|
| 84 | |
|---|
| 85 | def __iter__(self): |
|---|
| 86 | return iter(self.events) |
|---|
| 87 | |
|---|
| 88 | def __or__(self, function): |
|---|
| 89 | """Override the "bitwise or" operator to apply filters or serializers |
|---|
| 90 | to the stream, providing a syntax similar to pipes on Unix shells. |
|---|
| 91 | |
|---|
| 92 | Assume the following stream produced by the `HTML` function: |
|---|
| 93 | |
|---|
| 94 | >>> from genshi.input import HTML |
|---|
| 95 | >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''') |
|---|
| 96 | >>> print(html) |
|---|
| 97 | <p onclick="alert('Whoa')">Hello, world!</p> |
|---|
| 98 | |
|---|
| 99 | A filter such as the HTML sanitizer can be applied to that stream using |
|---|
| 100 | the pipe notation as follows: |
|---|
| 101 | |
|---|
| 102 | >>> from genshi.filters import HTMLSanitizer |
|---|
| 103 | >>> sanitizer = HTMLSanitizer() |
|---|
| 104 | >>> print(html | sanitizer) |
|---|
| 105 | <p>Hello, world!</p> |
|---|
| 106 | |
|---|
| 107 | Filters can be any function that accepts and produces a stream (where |
|---|
| 108 | a stream is anything that iterates over events): |
|---|
| 109 | |
|---|
| 110 | >>> def uppercase(stream): |
|---|
| 111 | ... for kind, data, pos in stream: |
|---|
| 112 | ... if kind is TEXT: |
|---|
| 113 | ... data = data.upper() |
|---|
| 114 | ... yield kind, data, pos |
|---|
| 115 | >>> print(html | sanitizer | uppercase) |
|---|
| 116 | <p>HELLO, WORLD!</p> |
|---|
| 117 | |
|---|
| 118 | Serializers can also be used with this notation: |
|---|
| 119 | |
|---|
| 120 | >>> from genshi.output import TextSerializer |
|---|
| 121 | >>> output = TextSerializer() |
|---|
| 122 | >>> print(html | sanitizer | uppercase | output) |
|---|
| 123 | HELLO, WORLD! |
|---|
| 124 | |
|---|
| 125 | Commonly, serializers should be used at the end of the "pipeline"; |
|---|
| 126 | using them somewhere in the middle may produce unexpected results. |
|---|
| 127 | |
|---|
| 128 | :param function: the callable object that should be applied as a filter |
|---|
| 129 | :return: the filtered stream |
|---|
| 130 | :rtype: `Stream` |
|---|
| 131 | """ |
|---|
| 132 | return Stream(_ensure(function(self)), serializer=self.serializer) |
|---|
| 133 | |
|---|
| 134 | def filter(self, *filters): |
|---|
| 135 | """Apply filters to the stream. |
|---|
| 136 | |
|---|
| 137 | This method returns a new stream with the given filters applied. The |
|---|
| 138 | filters must be callables that accept the stream object as parameter, |
|---|
| 139 | and return the filtered stream. |
|---|
| 140 | |
|---|
| 141 | The call:: |
|---|
| 142 | |
|---|
| 143 | stream.filter(filter1, filter2) |
|---|
| 144 | |
|---|
| 145 | is equivalent to:: |
|---|
| 146 | |
|---|
| 147 | stream | filter1 | filter2 |
|---|
| 148 | |
|---|
| 149 | :param filters: one or more callable objects that should be applied as |
|---|
| 150 | filters |
|---|
| 151 | :return: the filtered stream |
|---|
| 152 | :rtype: `Stream` |
|---|
| 153 | """ |
|---|
| 154 | return reduce(operator.or_, (self,) + filters) |
|---|
| 155 | |
|---|
| 156 | def render(self, method=None, encoding='utf-8', out=None, **kwargs): |
|---|
| 157 | """Return a string representation of the stream. |
|---|
| 158 | |
|---|
| 159 | Any additional keyword arguments are passed to the serializer, and thus |
|---|
| 160 | depend on the `method` parameter value. |
|---|
| 161 | |
|---|
| 162 | :param method: determines how the stream is serialized; can be either |
|---|
| 163 | "xml", "xhtml", "html", "text", or a custom serializer |
|---|
| 164 | class; if `None`, the default serialization method of |
|---|
| 165 | the stream is used |
|---|
| 166 | :param encoding: how the output string should be encoded; if set to |
|---|
| 167 | `None`, this method returns a `unicode` object |
|---|
| 168 | :param out: a file-like object that the output should be written to |
|---|
| 169 | instead of being returned as one big string; note that if |
|---|
| 170 | this is a file or socket (or similar), the `encoding` must |
|---|
| 171 | not be `None` (that is, the output must be encoded) |
|---|
| 172 | :return: a `str` or `unicode` object (depending on the `encoding` |
|---|
| 173 | parameter), or `None` if the `out` parameter is provided |
|---|
| 174 | :rtype: `basestring` |
|---|
| 175 | |
|---|
| 176 | :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer |
|---|
| 177 | :note: Changed in 0.5: added the `out` parameter |
|---|
| 178 | """ |
|---|
| 179 | from genshi.output import encode |
|---|
| 180 | if method is None: |
|---|
| 181 | method = self.serializer or 'xml' |
|---|
| 182 | generator = self.serialize(method=method, **kwargs) |
|---|
| 183 | return encode(generator, method=method, encoding=encoding, out=out) |
|---|
| 184 | |
|---|
| 185 | def select(self, path, namespaces=None, variables=None): |
|---|
| 186 | """Return a new stream that contains the events matching the given |
|---|
| 187 | XPath expression. |
|---|
| 188 | |
|---|
| 189 | >>> from genshi import HTML |
|---|
| 190 | >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>') |
|---|
| 191 | >>> print(stream.select('elem')) |
|---|
| 192 | <elem>foo</elem><elem>bar</elem> |
|---|
| 193 | >>> print(stream.select('elem/text()')) |
|---|
| 194 | foobar |
|---|
| 195 | |
|---|
| 196 | Note that the outermost element of the stream becomes the *context |
|---|
| 197 | node* for the XPath test. That means that the expression "doc" would |
|---|
| 198 | not match anything in the example above, because it only tests against |
|---|
| 199 | child elements of the outermost element: |
|---|
| 200 | |
|---|
| 201 | >>> print(stream.select('doc')) |
|---|
| 202 | <BLANKLINE> |
|---|
| 203 | |
|---|
| 204 | You can use the "." expression to match the context node itself |
|---|
| 205 | (although that usually makes little sense): |
|---|
| 206 | |
|---|
| 207 | >>> print(stream.select('.')) |
|---|
| 208 | <doc><elem>foo</elem><elem>bar</elem></doc> |
|---|
| 209 | |
|---|
| 210 | :param path: a string containing the XPath expression |
|---|
| 211 | :param namespaces: mapping of namespace prefixes used in the path |
|---|
| 212 | :param variables: mapping of variable names to values |
|---|
| 213 | :return: the selected substream |
|---|
| 214 | :rtype: `Stream` |
|---|
| 215 | :raises PathSyntaxError: if the given path expression is invalid or not |
|---|
| 216 | supported |
|---|
| 217 | """ |
|---|
| 218 | from genshi.path import Path |
|---|
| 219 | return Path(path).select(self, namespaces, variables) |
|---|
| 220 | |
|---|
| 221 | def serialize(self, method='xml', **kwargs): |
|---|
| 222 | """Generate strings corresponding to a specific serialization of the |
|---|
| 223 | stream. |
|---|
| 224 | |
|---|
| 225 | Unlike the `render()` method, this method is a generator that returns |
|---|
| 226 | the serialized output incrementally, as opposed to returning a single |
|---|
| 227 | string. |
|---|
| 228 | |
|---|
| 229 | Any additional keyword arguments are passed to the serializer, and thus |
|---|
| 230 | depend on the `method` parameter value. |
|---|
| 231 | |
|---|
| 232 | :param method: determines how the stream is serialized; can be either |
|---|
| 233 | "xml", "xhtml", "html", "text", or a custom serializer |
|---|
| 234 | class; if `None`, the default serialization method of |
|---|
| 235 | the stream is used |
|---|
| 236 | :return: an iterator over the serialization results (`Markup` or |
|---|
| 237 | `unicode` objects, depending on the serialization method) |
|---|
| 238 | :rtype: ``iterator`` |
|---|
| 239 | :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer |
|---|
| 240 | """ |
|---|
| 241 | from genshi.output import get_serializer |
|---|
| 242 | if method is None: |
|---|
| 243 | method = self.serializer or 'xml' |
|---|
| 244 | return get_serializer(method, **kwargs)(_ensure(self)) |
|---|
| 245 | |
|---|
| 246 | def __str__(self): |
|---|
| 247 | return self.render() |
|---|
| 248 | |
|---|
| 249 | def __unicode__(self): |
|---|
| 250 | return self.render(encoding=None) |
|---|
| 251 | |
|---|
| 252 | def __html__(self): |
|---|
| 253 | return self |
|---|
| 254 | |
|---|
| 255 | |
|---|
| 256 | START = Stream.START |
|---|
| 257 | END = Stream.END |
|---|
| 258 | TEXT = Stream.TEXT |
|---|
| 259 | XML_DECL = Stream.XML_DECL |
|---|
| 260 | DOCTYPE = Stream.DOCTYPE |
|---|
| 261 | START_NS = Stream.START_NS |
|---|
| 262 | END_NS = Stream.END_NS |
|---|
| 263 | START_CDATA = Stream.START_CDATA |
|---|
| 264 | END_CDATA = Stream.END_CDATA |
|---|
| 265 | PI = Stream.PI |
|---|
| 266 | COMMENT = Stream.COMMENT |
|---|
| 267 | |
|---|
| 268 | |
|---|
| 269 | def _ensure(stream): |
|---|
| 270 | """Ensure that every item on the stream is actually a markup event.""" |
|---|
| 271 | stream = iter(stream) |
|---|
| 272 | event = stream.next() |
|---|
| 273 | |
|---|
| 274 | # Check whether the iterable is a real markup event stream by examining the |
|---|
| 275 | # first item it yields; if it's not we'll need to do some conversion |
|---|
| 276 | if type(event) is not tuple or len(event) != 3: |
|---|
| 277 | for event in chain([event], stream): |
|---|
| 278 | if hasattr(event, 'totuple'): |
|---|
| 279 | event = event.totuple() |
|---|
| 280 | else: |
|---|
| 281 | event = TEXT, unicode(event), (None, -1, -1) |
|---|
| 282 | yield event |
|---|
| 283 | return |
|---|
| 284 | |
|---|
| 285 | # This looks like a markup event stream, so we'll just pass it through |
|---|
| 286 | # unchanged |
|---|
| 287 | yield event |
|---|
| 288 | for event in stream: |
|---|
| 289 | yield event |
|---|
| 290 | |
|---|
| 291 | |
|---|
| 292 | class Attrs(tuple): |
|---|
| 293 | """Immutable sequence type that stores the attributes of an element. |
|---|
| 294 | |
|---|
| 295 | Ordering of the attributes is preserved, while access by name is also |
|---|
| 296 | supported. |
|---|
| 297 | |
|---|
| 298 | >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| 299 | >>> attrs |
|---|
| 300 | Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| 301 | |
|---|
| 302 | >>> 'href' in attrs |
|---|
| 303 | True |
|---|
| 304 | >>> 'tabindex' in attrs |
|---|
| 305 | False |
|---|
| 306 | >>> attrs.get('title') |
|---|
| 307 | 'Foo' |
|---|
| 308 | |
|---|
| 309 | Instances may not be manipulated directly. Instead, the operators ``|`` and |
|---|
| 310 | ``-`` can be used to produce new instances that have specific attributes |
|---|
| 311 | added, replaced or removed. |
|---|
| 312 | |
|---|
| 313 | To remove an attribute, use the ``-`` operator. The right hand side can be |
|---|
| 314 | either a string or a set/sequence of strings, identifying the name(s) of |
|---|
| 315 | the attribute(s) to remove: |
|---|
| 316 | |
|---|
| 317 | >>> attrs - 'title' |
|---|
| 318 | Attrs([('href', '#')]) |
|---|
| 319 | >>> attrs - ('title', 'href') |
|---|
| 320 | Attrs() |
|---|
| 321 | |
|---|
| 322 | The original instance is not modified, but the operator can of course be |
|---|
| 323 | used with an assignment: |
|---|
| 324 | |
|---|
| 325 | >>> attrs |
|---|
| 326 | Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| 327 | >>> attrs -= 'title' |
|---|
| 328 | >>> attrs |
|---|
| 329 | Attrs([('href', '#')]) |
|---|
| 330 | |
|---|
| 331 | To add a new attribute, use the ``|`` operator, where the right hand value |
|---|
| 332 | is a sequence of ``(name, value)`` tuples (which includes `Attrs` |
|---|
| 333 | instances): |
|---|
| 334 | |
|---|
| 335 | >>> attrs | [('title', 'Bar')] |
|---|
| 336 | Attrs([('href', '#'), ('title', 'Bar')]) |
|---|
| 337 | |
|---|
| 338 | If the attributes already contain an attribute with a given name, the value |
|---|
| 339 | of that attribute is replaced: |
|---|
| 340 | |
|---|
| 341 | >>> attrs | [('href', 'http://example.org/')] |
|---|
| 342 | Attrs([('href', 'http://example.org/')]) |
|---|
| 343 | """ |
|---|
| 344 | __slots__ = [] |
|---|
| 345 | |
|---|
| 346 | def __contains__(self, name): |
|---|
| 347 | """Return whether the list includes an attribute with the specified |
|---|
| 348 | name. |
|---|
| 349 | |
|---|
| 350 | :return: `True` if the list includes the attribute |
|---|
| 351 | :rtype: `bool` |
|---|
| 352 | """ |
|---|
| 353 | for attr, _ in self: |
|---|
| 354 | if attr == name: |
|---|
| 355 | return True |
|---|
| 356 | |
|---|
| 357 | def __getitem__(self, i): |
|---|
| 358 | """Return an item or slice of the attributes list. |
|---|
| 359 | |
|---|
| 360 | >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| 361 | >>> attrs[1] |
|---|
| 362 | ('title', 'Foo') |
|---|
| 363 | >>> attrs[1:] |
|---|
| 364 | Attrs([('title', 'Foo')]) |
|---|
| 365 | """ |
|---|
| 366 | items = tuple.__getitem__(self, i) |
|---|
| 367 | if type(i) is slice: |
|---|
| 368 | return Attrs(items) |
|---|
| 369 | return items |
|---|
| 370 | |
|---|
| 371 | def __getslice__(self, i, j): |
|---|
| 372 | """Return a slice of the attributes list. |
|---|
| 373 | |
|---|
| 374 | >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) |
|---|
| 375 | >>> attrs[1:] |
|---|
| 376 | Attrs([('title', 'Foo')]) |
|---|
| 377 | """ |
|---|
| 378 | return Attrs(tuple.__getslice__(self, i, j)) |
|---|
| 379 | |
|---|
| 380 | def __or__(self, attrs): |
|---|
| 381 | """Return a new instance that contains the attributes in `attrs` in |
|---|
| 382 | addition to any already existing attributes. |
|---|
| 383 | |
|---|
| 384 | :return: a new instance with the merged attributes |
|---|
| 385 | :rtype: `Attrs` |
|---|
| 386 | """ |
|---|
| 387 | repl = dict([(an, av) for an, av in attrs if an in self]) |
|---|
| 388 | return Attrs([(sn, repl.get(sn, sv)) for sn, sv in self] + |
|---|
| 389 | [(an, av) for an, av in attrs if an not in self]) |
|---|
| 390 | |
|---|
| 391 | def __repr__(self): |
|---|
| 392 | if not self: |
|---|
| 393 | return 'Attrs()' |
|---|
| 394 | return 'Attrs([%s])' % ', '.join([repr(item) for item in self]) |
|---|
| 395 | |
|---|
| 396 | def __sub__(self, names): |
|---|
| 397 | """Return a new instance with all attributes with a name in `names` are |
|---|
| 398 | removed. |
|---|
| 399 | |
|---|
| 400 | :param names: the names of the attributes to remove |
|---|
| 401 | :return: a new instance with the attribute removed |
|---|
| 402 | :rtype: `Attrs` |
|---|
| 403 | """ |
|---|
| 404 | if isinstance(names, basestring): |
|---|
| 405 | names = (names,) |
|---|
| 406 | return Attrs([(name, val) for name, val in self if name not in names]) |
|---|
| 407 | |
|---|
| 408 | def get(self, name, default=None): |
|---|
| 409 | """Return the value of the attribute with the specified name, or the |
|---|
| 410 | value of the `default` parameter if no such attribute is found. |
|---|
| 411 | |
|---|
| 412 | :param name: the name of the attribute |
|---|
| 413 | :param default: the value to return when the attribute does not exist |
|---|
| 414 | :return: the attribute value, or the `default` value if that attribute |
|---|
| 415 | does not exist |
|---|
| 416 | :rtype: `object` |
|---|
| 417 | """ |
|---|
| 418 | for attr, value in self: |
|---|
| 419 | if attr == name: |
|---|
| 420 | return value |
|---|
| 421 | return default |
|---|
| 422 | |
|---|
| 423 | def totuple(self): |
|---|
| 424 | """Return the attributes as a markup event. |
|---|
| 425 | |
|---|
| 426 | The returned event is a `TEXT` event, the data is the value of all |
|---|
| 427 | attributes joined together. |
|---|
| 428 | |
|---|
| 429 | >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple() |
|---|
| 430 | ('TEXT', '#Foo', (None, -1, -1)) |
|---|
| 431 | |
|---|
| 432 | :return: a `TEXT` event |
|---|
| 433 | :rtype: `tuple` |
|---|
| 434 | """ |
|---|
| 435 | return TEXT, ''.join([x[1] for x in self]), (None, -1, -1) |
|---|
| 436 | |
|---|
| 437 | |
|---|
| 438 | class Markup(unicode): |
|---|
| 439 | """Marks a string as being safe for inclusion in HTML/XML output without |
|---|
| 440 | needing to be escaped. |
|---|
| 441 | """ |
|---|
| 442 | __slots__ = [] |
|---|
| 443 | |
|---|
| 444 | def __add__(self, other): |
|---|
| 445 | return Markup(unicode.__add__(self, escape(other))) |
|---|
| 446 | |
|---|
| 447 | def __radd__(self, other): |
|---|
| 448 | return Markup(unicode.__add__(escape(other), self)) |
|---|
| 449 | |
|---|
| 450 | def __mod__(self, args): |
|---|
| 451 | if isinstance(args, dict): |
|---|
| 452 | args = dict(zip(args.keys(), map(escape, args.values()))) |
|---|
| 453 | elif isinstance(args, (list, tuple)): |
|---|
| 454 | args = tuple(map(escape, args)) |
|---|
| 455 | else: |
|---|
| 456 | args = escape(args) |
|---|
| 457 | return Markup(unicode.__mod__(self, args)) |
|---|
| 458 | |
|---|
| 459 | def __mul__(self, num): |
|---|
| 460 | return Markup(unicode.__mul__(self, num)) |
|---|
| 461 | __rmul__ = __mul__ |
|---|
| 462 | |
|---|
| 463 | def __repr__(self): |
|---|
| 464 | return "<%s %s>" % (type(self).__name__, unicode.__repr__(self)) |
|---|
| 465 | |
|---|
| 466 | def join(self, seq, escape_quotes=True): |
|---|
| 467 | """Return a `Markup` object which is the concatenation of the strings |
|---|
| 468 | in the given sequence, where this `Markup` object is the separator |
|---|
| 469 | between the joined elements. |
|---|
| 470 | |
|---|
| 471 | Any element in the sequence that is not a `Markup` instance is |
|---|
| 472 | automatically escaped. |
|---|
| 473 | |
|---|
| 474 | :param seq: the sequence of strings to join |
|---|
| 475 | :param escape_quotes: whether double quote characters in the elements |
|---|
| 476 | should be escaped |
|---|
| 477 | :return: the joined `Markup` object |
|---|
| 478 | :rtype: `Markup` |
|---|
| 479 | :see: `escape` |
|---|
| 480 | """ |
|---|
| 481 | return Markup(unicode.join(self, [escape(item, quotes=escape_quotes) |
|---|
| 482 | for item in seq])) |
|---|
| 483 | |
|---|
| 484 | @classmethod |
|---|
| 485 | def escape(cls, text, quotes=True): |
|---|
| 486 | """Create a Markup instance from a string and escape special characters |
|---|
| 487 | it may contain (<, >, & and \"). |
|---|
| 488 | |
|---|
| 489 | >>> escape('"1 < 2"') |
|---|
| 490 | <Markup u'"1 < 2"'> |
|---|
| 491 | |
|---|
| 492 | If the `quotes` parameter is set to `False`, the \" character is left |
|---|
| 493 | as is. Escaping quotes is generally only required for strings that are |
|---|
| 494 | to be used in attribute values. |
|---|
| 495 | |
|---|
| 496 | >>> escape('"1 < 2"', quotes=False) |
|---|
| 497 | <Markup u'"1 < 2"'> |
|---|
| 498 | |
|---|
| 499 | :param text: the text to escape |
|---|
| 500 | :param quotes: if ``True``, double quote characters are escaped in |
|---|
| 501 | addition to the other special characters |
|---|
| 502 | :return: the escaped `Markup` string |
|---|
| 503 | :rtype: `Markup` |
|---|
| 504 | """ |
|---|
| 505 | if not text: |
|---|
| 506 | return cls() |
|---|
| 507 | if type(text) is cls: |
|---|
| 508 | return text |
|---|
| 509 | if hasattr(text, '__html__'): |
|---|
| 510 | return Markup(text.__html__()) |
|---|
| 511 | |
|---|
| 512 | text = text.replace('&', '&') \ |
|---|
| 513 | .replace('<', '<') \ |
|---|
| 514 | .replace('>', '>') |
|---|
| 515 | if quotes: |
|---|
| 516 | text = text.replace('"', '"') |
|---|
| 517 | return cls(text) |
|---|
| 518 | |
|---|
| 519 | def unescape(self): |
|---|
| 520 | """Reverse-escapes &, <, >, and \" and returns a `unicode` object. |
|---|
| 521 | |
|---|
| 522 | >>> Markup('1 < 2').unescape() |
|---|
| 523 | u'1 < 2' |
|---|
| 524 | |
|---|
| 525 | :return: the unescaped string |
|---|
| 526 | :rtype: `unicode` |
|---|
| 527 | :see: `genshi.core.unescape` |
|---|
| 528 | """ |
|---|
| 529 | if not self: |
|---|
| 530 | return '' |
|---|
| 531 | return unicode(self).replace('"', '"') \ |
|---|
| 532 | .replace('>', '>') \ |
|---|
| 533 | .replace('<', '<') \ |
|---|
| 534 | .replace('&', '&') |
|---|
| 535 | |
|---|
| 536 | def stripentities(self, keepxmlentities=False): |
|---|
| 537 | """Return a copy of the text with any character or numeric entities |
|---|
| 538 | replaced by the equivalent UTF-8 characters. |
|---|
| 539 | |
|---|
| 540 | If the `keepxmlentities` parameter is provided and evaluates to `True`, |
|---|
| 541 | the core XML entities (``&``, ``'``, ``>``, ``<`` and |
|---|
| 542 | ``"``) are not stripped. |
|---|
| 543 | |
|---|
| 544 | :return: a `Markup` instance with entities removed |
|---|
| 545 | :rtype: `Markup` |
|---|
| 546 | :see: `genshi.util.stripentities` |
|---|
| 547 | """ |
|---|
| 548 | return Markup(stripentities(self, keepxmlentities=keepxmlentities)) |
|---|
| 549 | |
|---|
| 550 | def striptags(self): |
|---|
| 551 | """Return a copy of the text with all XML/HTML tags removed. |
|---|
| 552 | |
|---|
| 553 | :return: a `Markup` instance with all tags removed |
|---|
| 554 | :rtype: `Markup` |
|---|
| 555 | :see: `genshi.util.striptags` |
|---|
| 556 | """ |
|---|
| 557 | return Markup(striptags(self)) |
|---|
| 558 | |
|---|
| 559 | |
|---|
| 560 | try: |
|---|
| 561 | from genshi._speedups import Markup |
|---|
| 562 | except ImportError: |
|---|
| 563 | pass # just use the Python implementation |
|---|
| 564 | |
|---|
| 565 | |
|---|
| 566 | escape = Markup.escape |
|---|
| 567 | |
|---|
| 568 | |
|---|
| 569 | def unescape(text): |
|---|
| 570 | """Reverse-escapes &, <, >, and \" and returns a `unicode` object. |
|---|
| 571 | |
|---|
| 572 | >>> unescape(Markup('1 < 2')) |
|---|
| 573 | u'1 < 2' |
|---|
| 574 | |
|---|
| 575 | If the provided `text` object is not a `Markup` instance, it is returned |
|---|
| 576 | unchanged. |
|---|
| 577 | |
|---|
| 578 | >>> unescape('1 < 2') |
|---|
| 579 | '1 < 2' |
|---|
| 580 | |
|---|
| 581 | :param text: the text to unescape |
|---|
| 582 | :return: the unescsaped string |
|---|
| 583 | :rtype: `unicode` |
|---|
| 584 | """ |
|---|
| 585 | if not isinstance(text, Markup): |
|---|
| 586 | return text |
|---|
| 587 | return text.unescape() |
|---|
| 588 | |
|---|
| 589 | |
|---|
| 590 | class Namespace(object): |
|---|
| 591 | """Utility class creating and testing elements with a namespace. |
|---|
| 592 | |
|---|
| 593 | Internally, namespace URIs are encoded in the `QName` of any element or |
|---|
| 594 | attribute, the namespace URI being enclosed in curly braces. This class |
|---|
| 595 | helps create and test these strings. |
|---|
| 596 | |
|---|
| 597 | A `Namespace` object is instantiated with the namespace URI. |
|---|
| 598 | |
|---|
| 599 | >>> html = Namespace('http://www.w3.org/1999/xhtml') |
|---|
| 600 | >>> html |
|---|
| 601 | Namespace('http://www.w3.org/1999/xhtml') |
|---|
| 602 | >>> html.uri |
|---|
| 603 | u'http://www.w3.org/1999/xhtml' |
|---|
| 604 | |
|---|
| 605 | The `Namespace` object can than be used to generate `QName` objects with |
|---|
| 606 | that namespace: |
|---|
| 607 | |
|---|
| 608 | >>> html.body |
|---|
| 609 | QName('http://www.w3.org/1999/xhtml}body') |
|---|
| 610 | >>> html.body.localname |
|---|
| 611 | u'body' |
|---|
| 612 | >>> html.body.namespace |
|---|
| 613 | u'http://www.w3.org/1999/xhtml' |
|---|
| 614 | |
|---|
| 615 | The same works using item access notation, which is useful for element or |
|---|
| 616 | attribute names that are not valid Python identifiers: |
|---|
| 617 | |
|---|
| 618 | >>> html['body'] |
|---|
| 619 | QName('http://www.w3.org/1999/xhtml}body') |
|---|
| 620 | |
|---|
| 621 | A `Namespace` object can also be used to test whether a specific `QName` |
|---|
| 622 | belongs to that namespace using the ``in`` operator: |
|---|
| 623 | |
|---|
| 624 | >>> qname = html.body |
|---|
| 625 | >>> qname in html |
|---|
| 626 | True |
|---|
| 627 | >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2') |
|---|
| 628 | False |
|---|
| 629 | """ |
|---|
| 630 | def __new__(cls, uri): |
|---|
| 631 | if type(uri) is cls: |
|---|
| 632 | return uri |
|---|
| 633 | return object.__new__(cls) |
|---|
| 634 | |
|---|
| 635 | def __getnewargs__(self): |
|---|
| 636 | return (self.uri,) |
|---|
| 637 | |
|---|
| 638 | def __getstate__(self): |
|---|
| 639 | return self.uri |
|---|
| 640 | |
|---|
| 641 | def __setstate__(self, uri): |
|---|
| 642 | self.uri = uri |
|---|
| 643 | |
|---|
| 644 | def __init__(self, uri): |
|---|
| 645 | self.uri = unicode(uri) |
|---|
| 646 | |
|---|
| 647 | def __contains__(self, qname): |
|---|
| 648 | return qname.namespace == self.uri |
|---|
| 649 | |
|---|
| 650 | def __ne__(self, other): |
|---|
| 651 | return not self == other |
|---|
| 652 | |
|---|
| 653 | def __eq__(self, other): |
|---|
| 654 | if isinstance(other, Namespace): |
|---|
| 655 | return self.uri == other.uri |
|---|
| 656 | return self.uri == other |
|---|
| 657 | |
|---|
| 658 | def __getitem__(self, name): |
|---|
| 659 | return QName(self.uri + '}' + name) |
|---|
| 660 | __getattr__ = __getitem__ |
|---|
| 661 | |
|---|
| 662 | def __hash__(self): |
|---|
| 663 | return hash(self.uri) |
|---|
| 664 | |
|---|
| 665 | def __repr__(self): |
|---|
| 666 | return '%s(%s)' % (type(self).__name__, stringrepr(self.uri)) |
|---|
| 667 | |
|---|
| 668 | def __str__(self): |
|---|
| 669 | return self.uri.encode('utf-8') |
|---|
| 670 | |
|---|
| 671 | def __unicode__(self): |
|---|
| 672 | return self.uri |
|---|
| 673 | |
|---|
| 674 | |
|---|
| 675 | # The namespace used by attributes such as xml:lang and xml:space |
|---|
| 676 | XML_NAMESPACE = Namespace('http://www.w3.org/XML/1998/namespace') |
|---|
| 677 | |
|---|
| 678 | |
|---|
| 679 | class QName(unicode): |
|---|
| 680 | """A qualified element or attribute name. |
|---|
| 681 | |
|---|
| 682 | The unicode value of instances of this class contains the qualified name of |
|---|
| 683 | the element or attribute, in the form ``{namespace-uri}local-name``. The |
|---|
| 684 | namespace URI can be obtained through the additional `namespace` attribute, |
|---|
| 685 | while the local name can be accessed through the `localname` attribute. |
|---|
| 686 | |
|---|
| 687 | >>> qname = QName('foo') |
|---|
| 688 | >>> qname |
|---|
| 689 | QName('foo') |
|---|
| 690 | >>> qname.localname |
|---|
| 691 | u'foo' |
|---|
| 692 | >>> qname.namespace |
|---|
| 693 | |
|---|
| 694 | >>> qname = QName('http://www.w3.org/1999/xhtml}body') |
|---|
| 695 | >>> qname |
|---|
| 696 | QName('http://www.w3.org/1999/xhtml}body') |
|---|
| 697 | >>> qname.localname |
|---|
| 698 | u'body' |
|---|
| 699 | >>> qname.namespace |
|---|
| 700 | u'http://www.w3.org/1999/xhtml' |
|---|
| 701 | """ |
|---|
| 702 | __slots__ = ['namespace', 'localname'] |
|---|
| 703 | |
|---|
| 704 | def __new__(cls, qname): |
|---|
| 705 | """Create the `QName` instance. |
|---|
| 706 | |
|---|
| 707 | :param qname: the qualified name as a string of the form |
|---|
| 708 | ``{namespace-uri}local-name``, where the leading curly |
|---|
| 709 | brace is optional |
|---|
| 710 | """ |
|---|
| 711 | if type(qname) is cls: |
|---|
| 712 | return qname |
|---|
| 713 | |
|---|
| 714 | qname = qname.lstrip('{') |
|---|
| 715 | parts = qname.split('}', 1) |
|---|
| 716 | if len(parts) > 1: |
|---|
| 717 | self = unicode.__new__(cls, '{%s' % qname) |
|---|
| 718 | self.namespace, self.localname = map(unicode, parts) |
|---|
| 719 | else: |
|---|
| 720 | self = unicode.__new__(cls, qname) |
|---|
| 721 | self.namespace, self.localname = None, unicode(qname) |
|---|
| 722 | return self |
|---|
| 723 | |
|---|
| 724 | def __getnewargs__(self): |
|---|
| 725 | return (self.lstrip('{'),) |
|---|
| 726 | |
|---|
| 727 | def __repr__(self): |
|---|
| 728 | return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{'))) |
|---|