Edgewall Software

source: trunk/genshi/core.py

Last change on this file was 1251, checked in by hodgestar, 10 years ago

Revert r1245 (the Python3.4 regression was fixed in http://hg.python.org/cpython/rev/b328f8ccbccf).

  • Property svn:eol-style set to native
File size: 25.1 KB
RevLine 
[2]1# -*- coding: utf-8 -*-
2#
[1077]3# Copyright (C) 2006-2009 Edgewall Software
[2]4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
[287]8# are also available at http://genshi.edgewall.org/wiki/License.
[2]9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
[287]12# history and logs, available at http://genshi.edgewall.org/log/.
[2]13
14"""Core classes for markup processing."""
15
[970]16try:
[1079]17    reduce # builtin in Python < 3
18except NameError:
[970]19    from functools import reduce
[1157]20import sys
[750]21from itertools import chain
[254]22import operator
[2]23
[1080]24from genshi.util import plaintext, stripentities, striptags, stringrepr
[485]25
[459]26__all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace',
27           'QName']
[517]28__docformat__ = 'restructuredtext en'
[2]29
30
[18]31class StreamEventKind(str):
[485]32    """A kind of event on a markup stream."""
[344]33    __slots__ = []
34    _instances = {}
[2]35
[344]36    def __new__(cls, val):
37        return cls._instances.setdefault(val, str.__new__(cls, val))
[2]38
[344]39
[2]40class Stream(object):
41    """Represents a stream of markup events.
42   
43    This class is basically an iterator over the events.
44   
[517]45    Stream events are tuples of the form::
[2]46   
47      (kind, data, position)
[485]48   
[517]49    where ``kind`` is the event kind (such as `START`, `END`, `TEXT`, etc),
50    ``data`` depends on the kind of event, and ``position`` is a
51    ``(filename, line, offset)`` tuple that contains the location of the
52    original element or text in the input. If the original location is unknown,
53    ``position`` is ``(None, -1, -1)``.
[485]54   
55    Also provided are ways to serialize the stream to text. The `serialize()`
56    method will return an iterator over generated strings, while `render()`
57    returns the complete generated text at once. Both accept various parameters
58    that impact the way the stream is serialized.
[2]59    """
[721]60    __slots__ = ['events', 'serializer']
[2]61
[519]62    START = StreamEventKind('START') #: a start tag
63    END = StreamEventKind('END') #: an end tag
64    TEXT = StreamEventKind('TEXT') #: literal text
[558]65    XML_DECL = StreamEventKind('XML_DECL') #: XML declaration
[519]66    DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration
67    START_NS = StreamEventKind('START_NS') #: start namespace mapping
68    END_NS = StreamEventKind('END_NS') #: end namespace mapping
69    START_CDATA = StreamEventKind('START_CDATA') #: start CDATA section
70    END_CDATA = StreamEventKind('END_CDATA') #: end CDATA section
71    PI = StreamEventKind('PI') #: processing instruction
72    COMMENT = StreamEventKind('COMMENT') #: comment
[2]73
[721]74    def __init__(self, events, serializer=None):
[2]75        """Initialize the stream with a sequence of markup events.
76       
[517]77        :param events: a sequence or iterable providing the events
[721]78        :param serializer: the default serialization method to use for this
79                           stream
80
81        :note: Changed in 0.5: added the `serializer` argument
[2]82        """
[600]83        self.events = events #: The underlying iterable producing the events
[721]84        self.serializer = serializer #: The default serializion method
[2]85
86    def __iter__(self):
87        return iter(self.events)
88
[254]89    def __or__(self, function):
90        """Override the "bitwise or" operator to apply filters or serializers
91        to the stream, providing a syntax similar to pipes on Unix shells.
92       
93        Assume the following stream produced by the `HTML` function:
94       
[287]95        >>> from genshi.input import HTML
[1157]96        >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''', encoding='utf-8')
[1075]97        >>> print(html)
[254]98        <p onclick="alert('Whoa')">Hello, world!</p>
99       
100        A filter such as the HTML sanitizer can be applied to that stream using
101        the pipe notation as follows:
102       
[287]103        >>> from genshi.filters import HTMLSanitizer
[254]104        >>> sanitizer = HTMLSanitizer()
[1075]105        >>> print(html | sanitizer)
[254]106        <p>Hello, world!</p>
107       
108        Filters can be any function that accepts and produces a stream (where
[485]109        a stream is anything that iterates over events):
[254]110       
111        >>> def uppercase(stream):
112        ...     for kind, data, pos in stream:
113        ...         if kind is TEXT:
114        ...             data = data.upper()
115        ...         yield kind, data, pos
[1075]116        >>> print(html | sanitizer | uppercase)
[254]117        <p>HELLO, WORLD!</p>
118       
119        Serializers can also be used with this notation:
120       
[287]121        >>> from genshi.output import TextSerializer
[254]122        >>> output = TextSerializer()
[1075]123        >>> print(html | sanitizer | uppercase | output)
[254]124        HELLO, WORLD!
125       
126        Commonly, serializers should be used at the end of the "pipeline";
127        using them somewhere in the middle may produce unexpected results.
[600]128       
129        :param function: the callable object that should be applied as a filter
130        :return: the filtered stream
131        :rtype: `Stream`
[254]132        """
[721]133        return Stream(_ensure(function(self)), serializer=self.serializer)
[254]134
[154]135    def filter(self, *filters):
136        """Apply filters to the stream.
[132]137       
[154]138        This method returns a new stream with the given filters applied. The
139        filters must be callables that accept the stream object as parameter,
140        and return the filtered stream.
[254]141       
[517]142        The call::
[254]143       
144            stream.filter(filter1, filter2)
145       
[517]146        is equivalent to::
[254]147       
148            stream | filter1 | filter2
[600]149       
150        :param filters: one or more callable objects that should be applied as
151                        filters
152        :return: the filtered stream
153        :rtype: `Stream`
[132]154        """
[254]155        return reduce(operator.or_, (self,) + filters)
[132]156
[1157]157    def render(self, method=None, encoding=None, out=None, **kwargs):
[2]158        """Return a string representation of the stream.
159       
[600]160        Any additional keyword arguments are passed to the serializer, and thus
161        depend on the `method` parameter value.
162       
[517]163        :param method: determines how the stream is serialized; can be either
[250]164                       "xml", "xhtml", "html", "text", or a custom serializer
[721]165                       class; if `None`, the default serialization method of
166                       the stream is used
[517]167        :param encoding: how the output string should be encoded; if set to
[2]168                         `None`, this method returns a `unicode` object
[804]169        :param out: a file-like object that the output should be written to
170                    instead of being returned as one big string; note that if
171                    this is a file or socket (or similar), the `encoding` must
172                    not be `None` (that is, the output must be encoded)
173        :return: a `str` or `unicode` object (depending on the `encoding`
174                 parameter), or `None` if the `out` parameter is provided
[600]175        :rtype: `basestring`
[804]176       
[600]177        :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
[804]178        :note: Changed in 0.5: added the `out` parameter
[2]179        """
[560]180        from genshi.output import encode
[721]181        if method is None:
182            method = self.serializer or 'xml'
[154]183        generator = self.serialize(method=method, **kwargs)
[804]184        return encode(generator, method=method, encoding=encoding, out=out)
[2]185
[343]186    def select(self, path, namespaces=None, variables=None):
[2]187        """Return a new stream that contains the events matching the given
188        XPath expression.
189       
[690]190        >>> from genshi import HTML
[1157]191        >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>', encoding='utf-8')
[1075]192        >>> print(stream.select('elem'))
[690]193        <elem>foo</elem><elem>bar</elem>
[1075]194        >>> print(stream.select('elem/text()'))
[690]195        foobar
196       
197        Note that the outermost element of the stream becomes the *context
198        node* for the XPath test. That means that the expression "doc" would
199        not match anything in the example above, because it only tests against
200        child elements of the outermost element:
201       
[1075]202        >>> print(stream.select('doc'))
[690]203        <BLANKLINE>
204       
205        You can use the "." expression to match the context node itself
206        (although that usually makes little sense):
207       
[1075]208        >>> print(stream.select('.'))
[690]209        <doc><elem>foo</elem><elem>bar</elem></doc>
210       
[517]211        :param path: a string containing the XPath expression
[527]212        :param namespaces: mapping of namespace prefixes used in the path
213        :param variables: mapping of variable names to values
214        :return: the selected substream
[600]215        :rtype: `Stream`
[527]216        :raises PathSyntaxError: if the given path expression is invalid or not
217                                 supported
[2]218        """
[287]219        from genshi.path import Path
[343]220        return Path(path).select(self, namespaces, variables)
[2]221
[154]222    def serialize(self, method='xml', **kwargs):
[2]223        """Generate strings corresponding to a specific serialization of the
224        stream.
225       
[19]226        Unlike the `render()` method, this method is a generator that returns
[2]227        the serialized output incrementally, as opposed to returning a single
228        string.
229       
[600]230        Any additional keyword arguments are passed to the serializer, and thus
231        depend on the `method` parameter value.
232       
[517]233        :param method: determines how the stream is serialized; can be either
[250]234                       "xml", "xhtml", "html", "text", or a custom serializer
[721]235                       class; if `None`, the default serialization method of
236                       the stream is used
[600]237        :return: an iterator over the serialization results (`Markup` or
238                 `unicode` objects, depending on the serialization method)
239        :rtype: ``iterator``
240        :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
[2]241        """
[560]242        from genshi.output import get_serializer
[721]243        if method is None:
244            method = self.serializer or 'xml'
[560]245        return get_serializer(method, **kwargs)(_ensure(self))
[2]246
247    def __str__(self):
248        return self.render()
249
250    def __unicode__(self):
251        return self.render(encoding=None)
252
[861]253    def __html__(self):
254        return self
[2]255
[861]256
[74]257START = Stream.START
258END = Stream.END
259TEXT = Stream.TEXT
[558]260XML_DECL = Stream.XML_DECL
[74]261DOCTYPE = Stream.DOCTYPE
262START_NS = Stream.START_NS
263END_NS = Stream.END_NS
[184]264START_CDATA = Stream.START_CDATA
265END_CDATA = Stream.END_CDATA
[74]266PI = Stream.PI
267COMMENT = Stream.COMMENT
268
[1052]269
[129]270def _ensure(stream):
271    """Ensure that every item on the stream is actually a markup event."""
[750]272    stream = iter(stream)
273    event = stream.next()
274
275    # Check whether the iterable is a real markup event stream by examining the
276    # first item it yields; if it's not we'll need to do some conversion
277    if type(event) is not tuple or len(event) != 3:
278        for event in chain([event], stream):
[186]279            if hasattr(event, 'totuple'):
280                event = event.totuple()
281            else:
282                event = TEXT, unicode(event), (None, -1, -1)
[750]283            yield event
284        return
285
286    # This looks like a markup event stream, so we'll just pass it through
287    # unchanged
288    yield event
289    for event in stream:
[186]290        yield event
[74]291
[129]292
[424]293class Attrs(tuple):
294    """Immutable sequence type that stores the attributes of an element.
[19]295   
[526]296    Ordering of the attributes is preserved, while access by name is also
[424]297    supported.
[19]298   
[227]299    >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
[19]300    >>> attrs
[495]301    Attrs([('href', '#'), ('title', 'Foo')])
[19]302   
303    >>> 'href' in attrs
304    True
305    >>> 'tabindex' in attrs
306    False
[495]307    >>> attrs.get('title')
[19]308    'Foo'
[424]309   
[517]310    Instances may not be manipulated directly. Instead, the operators ``|`` and
311    ``-`` can be used to produce new instances that have specific attributes
[424]312    added, replaced or removed.
313   
[517]314    To remove an attribute, use the ``-`` operator. The right hand side can be
[424]315    either a string or a set/sequence of strings, identifying the name(s) of
316    the attribute(s) to remove:
317   
318    >>> attrs - 'title'
[495]319    Attrs([('href', '#')])
[424]320    >>> attrs - ('title', 'href')
321    Attrs()
322   
323    The original instance is not modified, but the operator can of course be
324    used with an assignment:
325
[19]326    >>> attrs
[495]327    Attrs([('href', '#'), ('title', 'Foo')])
[424]328    >>> attrs -= 'title'
[19]329    >>> attrs
[495]330    Attrs([('href', '#')])
[19]331   
[517]332    To add a new attribute, use the ``|`` operator, where the right hand value
333    is a sequence of ``(name, value)`` tuples (which includes `Attrs`
334    instances):
[19]335   
[495]336    >>> attrs | [('title', 'Bar')]
337    Attrs([('href', '#'), ('title', 'Bar')])
[214]338   
[424]339    If the attributes already contain an attribute with a given name, the value
340    of that attribute is replaced:
[214]341   
[495]342    >>> attrs | [('href', 'http://example.org/')]
343    Attrs([('href', 'http://example.org/')])
[19]344    """
345    __slots__ = []
[2]346
347    def __contains__(self, name):
[19]348        """Return whether the list includes an attribute with the specified
349        name.
[600]350       
351        :return: `True` if the list includes the attribute
352        :rtype: `bool`
[19]353        """
[170]354        for attr, _ in self:
355            if attr == name:
356                return True
[1166]357        return False
[2]358
[1075]359    def __getitem__(self, i):
360        """Return an item or slice of the attributes list.
361       
362        >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
363        >>> attrs[1]
364        ('title', 'Foo')
365        >>> attrs[1:]
366        Attrs([('title', 'Foo')])
367        """
368        items = tuple.__getitem__(self, i)
369        if type(i) is slice:
370            return Attrs(items)
371        return items
372
[424]373    def __getslice__(self, i, j):
[707]374        """Return a slice of the attributes list.
375       
376        >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
377        >>> attrs[1:]
378        Attrs([('title', 'Foo')])
379        """
[424]380        return Attrs(tuple.__getslice__(self, i, j))
381
382    def __or__(self, attrs):
383        """Return a new instance that contains the attributes in `attrs` in
[1129]384        addition to any already existing attributes. Any attributes in the new
385        set that have a value of `None` are removed.
[600]386       
387        :return: a new instance with the merged attributes
388        :rtype: `Attrs`
[424]389        """
[1129]390        remove = set([an for an, av in attrs if av is None])
391        replace = dict([(an, av) for an, av in attrs
392                        if an in self and av is not None])
393        return Attrs([(sn, replace.get(sn, sv)) for sn, sv in self
394                      if sn not in remove] +
395                     [(an, av) for an, av in attrs
396                      if an not in self and an not in remove])
[424]397
[403]398    def __repr__(self):
399        if not self:
400            return 'Attrs()'
[424]401        return 'Attrs([%s])' % ', '.join([repr(item) for item in self])
[403]402
[424]403    def __sub__(self, names):
404        """Return a new instance with all attributes with a name in `names` are
405        removed.
[600]406       
407        :param names: the names of the attributes to remove
408        :return: a new instance with the attribute removed
409        :rtype: `Attrs`
[424]410        """
411        if isinstance(names, basestring):
412            names = (names,)
413        return Attrs([(name, val) for name, val in self if name not in names])
414
[2]415    def get(self, name, default=None):
[19]416        """Return the value of the attribute with the specified name, or the
417        value of the `default` parameter if no such attribute is found.
[527]418       
419        :param name: the name of the attribute
420        :param default: the value to return when the attribute does not exist
421        :return: the attribute value, or the `default` value if that attribute
422                 does not exist
[600]423        :rtype: `object`
[19]424        """
[2]425        for attr, value in self:
426            if attr == name:
427                return value
428        return default
429
[82]430    def totuple(self):
[203]431        """Return the attributes as a markup event.
432       
[517]433        The returned event is a `TEXT` event, the data is the value of all
[203]434        attributes joined together.
[526]435       
436        >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple()
[1075]437        ('TEXT', '#Foo', (None, -1, -1))
[600]438       
439        :return: a `TEXT` event
440        :rtype: `tuple`
[203]441        """
[1075]442        return TEXT, ''.join([x[1] for x in self]), (None, -1, -1)
[2]443
[82]444
[2]445class Markup(unicode):
446    """Marks a string as being safe for inclusion in HTML/XML output without
447    needing to be escaped.
448    """
[19]449    __slots__ = []
450
[2]451    def __add__(self, other):
[1083]452        return Markup(unicode.__add__(self, escape(other)))
[2]453
[254]454    def __radd__(self, other):
[1083]455        return Markup(unicode.__add__(escape(other), self))
[254]456
[2]457    def __mod__(self, args):
[829]458        if isinstance(args, dict):
459            args = dict(zip(args.keys(), map(escape, args.values())))
460        elif isinstance(args, (list, tuple)):
461            args = tuple(map(escape, args))
462        else:
463            args = escape(args)
464        return Markup(unicode.__mod__(self, args))
[2]465
466    def __mul__(self, num):
[1083]467        return Markup(unicode.__mul__(self, num))
468    __rmul__ = __mul__
[2]469
[1083]470    def __repr__(self):
471        return "<%s %s>" % (type(self).__name__, unicode.__repr__(self))
[254]472
[1083]473    def join(self, seq, escape_quotes=True):
474        """Return a `Markup` object which is the concatenation of the strings
475        in the given sequence, where this `Markup` object is the separator
476        between the joined elements.
477       
478        Any element in the sequence that is not a `Markup` instance is
479        automatically escaped.
480       
481        :param seq: the sequence of strings to join
482        :param escape_quotes: whether double quote characters in the elements
483                              should be escaped
484        :return: the joined `Markup` object
485        :rtype: `Markup`
486        :see: `escape`
487        """
488        return Markup(unicode.join(self, [escape(item, quotes=escape_quotes)
489                                          for item in seq]))
490
491    @classmethod
492    def escape(cls, text, quotes=True):
493        """Create a Markup instance from a string and escape special characters
494        it may contain (<, >, & and \").
495       
496        >>> escape('"1 < 2"')
497        <Markup u'&#34;1 &lt; 2&#34;'>
498       
499        If the `quotes` parameter is set to `False`, the \" character is left
500        as is. Escaping quotes is generally only required for strings that are
501        to be used in attribute values.
502       
503        >>> escape('"1 < 2"', quotes=False)
504        <Markup u'"1 &lt; 2"'>
505       
506        :param text: the text to escape
507        :param quotes: if ``True``, double quote characters are escaped in
508                       addition to the other special characters
509        :return: the escaped `Markup` string
510        :rtype: `Markup`
511        """
512        if not text:
513            return cls()
514        if type(text) is cls:
515            return text
516        if hasattr(text, '__html__'):
[1129]517            return cls(text.__html__())
[1083]518
519        text = text.replace('&', '&amp;') \
520                   .replace('<', '&lt;') \
521                   .replace('>', '&gt;')
522        if quotes:
523            text = text.replace('"', '&#34;')
524        return cls(text)
525
526    def unescape(self):
527        """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
528       
529        >>> Markup('1 &lt; 2').unescape()
530        u'1 < 2'
531       
532        :return: the unescaped string
533        :rtype: `unicode`
534        :see: `genshi.core.unescape`
535        """
536        if not self:
537            return ''
538        return unicode(self).replace('&#34;', '"') \
539                            .replace('&gt;', '>') \
540                            .replace('&lt;', '<') \
541                            .replace('&amp;', '&')
542
543    def stripentities(self, keepxmlentities=False):
544        """Return a copy of the text with any character or numeric entities
545        replaced by the equivalent UTF-8 characters.
546       
547        If the `keepxmlentities` parameter is provided and evaluates to `True`,
548        the core XML entities (``&amp;``, ``&apos;``, ``&gt;``, ``&lt;`` and
549        ``&quot;``) are not stripped.
550       
551        :return: a `Markup` instance with entities removed
552        :rtype: `Markup`
553        :see: `genshi.util.stripentities`
554        """
555        return Markup(stripentities(self, keepxmlentities=keepxmlentities))
556
557    def striptags(self):
558        """Return a copy of the text with all XML/HTML tags removed.
559       
560        :return: a `Markup` instance with all tags removed
561        :rtype: `Markup`
562        :see: `genshi.util.striptags`
563        """
564        return Markup(striptags(self))
565
566
567try:
568    from genshi._speedups import Markup
569except ImportError:
570    pass # just use the Python implementation
571
572
573escape = Markup.escape
574
575
576def unescape(text):
577    """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
578   
579    >>> unescape(Markup('1 &lt; 2'))
580    u'1 < 2'
581   
582    If the provided `text` object is not a `Markup` instance, it is returned
583    unchanged.
584   
585    >>> unescape('1 &lt; 2')
586    '1 &lt; 2'
587   
588    :param text: the text to unescape
589    :return: the unescsaped string
590    :rtype: `unicode`
591    """
592    if not isinstance(text, Markup):
593        return text
594    return text.unescape()
595
596
597class Namespace(object):
598    """Utility class creating and testing elements with a namespace.
599   
600    Internally, namespace URIs are encoded in the `QName` of any element or
601    attribute, the namespace URI being enclosed in curly braces. This class
602    helps create and test these strings.
603   
604    A `Namespace` object is instantiated with the namespace URI.
605   
606    >>> html = Namespace('http://www.w3.org/1999/xhtml')
607    >>> html
608    Namespace('http://www.w3.org/1999/xhtml')
609    >>> html.uri
610    u'http://www.w3.org/1999/xhtml'
611   
612    The `Namespace` object can than be used to generate `QName` objects with
613    that namespace:
614   
615    >>> html.body
616    QName('http://www.w3.org/1999/xhtml}body')
617    >>> html.body.localname
618    u'body'
619    >>> html.body.namespace
620    u'http://www.w3.org/1999/xhtml'
621   
622    The same works using item access notation, which is useful for element or
623    attribute names that are not valid Python identifiers:
624   
625    >>> html['body']
626    QName('http://www.w3.org/1999/xhtml}body')
627   
628    A `Namespace` object can also be used to test whether a specific `QName`
629    belongs to that namespace using the ``in`` operator:
630   
631    >>> qname = html.body
632    >>> qname in html
633    True
634    >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2')
635    False
636    """
637    def __new__(cls, uri):
638        if type(uri) is cls:
639            return uri
640        return object.__new__(cls)
641
642    def __getnewargs__(self):
643        return (self.uri,)
644
645    def __getstate__(self):
646        return self.uri
647
648    def __setstate__(self, uri):
649        self.uri = uri
650
651    def __init__(self, uri):
652        self.uri = unicode(uri)
653
654    def __contains__(self, qname):
655        return qname.namespace == self.uri
656
657    def __ne__(self, other):
658        return not self == other
659
660    def __eq__(self, other):
661        if isinstance(other, Namespace):
662            return self.uri == other.uri
663        return self.uri == other
664
665    def __getitem__(self, name):
666        return QName(self.uri + '}' + name)
[1251]667    __getattr__ = __getitem__
[1083]668
669    def __hash__(self):
670        return hash(self.uri)
671
[1157]672    if sys.version_info[0] == 2:
673        # Only use stringrepr in python 2
674        def __repr__(self):
675            return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
676    else:
677        def __repr__(self):
678            return '%s(%r)' % (type(self).__name__, self.uri)
[18]679
[1083]680    def __str__(self):
681        return self.uri.encode('utf-8')
682
683    def __unicode__(self):
684        return self.uri
685
686
687# The namespace used by attributes such as xml:lang and xml:space
688XML_NAMESPACE = Namespace('http://www.w3.org/XML/1998/namespace')
689
690
691class QName(unicode):
692    """A qualified element or attribute name.
693   
694    The unicode value of instances of this class contains the qualified name of
695    the element or attribute, in the form ``{namespace-uri}local-name``. The
696    namespace URI can be obtained through the additional `namespace` attribute,
697    while the local name can be accessed through the `localname` attribute.
698   
699    >>> qname = QName('foo')
700    >>> qname
701    QName('foo')
702    >>> qname.localname
703    u'foo'
704    >>> qname.namespace
705   
706    >>> qname = QName('http://www.w3.org/1999/xhtml}body')
707    >>> qname
708    QName('http://www.w3.org/1999/xhtml}body')
709    >>> qname.localname
710    u'body'
711    >>> qname.namespace
712    u'http://www.w3.org/1999/xhtml'
713    """
714    __slots__ = ['namespace', 'localname']
715
716    def __new__(cls, qname):
717        """Create the `QName` instance.
718       
719        :param qname: the qualified name as a string of the form
720                      ``{namespace-uri}local-name``, where the leading curly
721                      brace is optional
722        """
723        if type(qname) is cls:
724            return qname
725
[1148]726        qname = qname.lstrip('{')
727        parts = qname.split('}', 1)
[1083]728        if len(parts) > 1:
729            self = unicode.__new__(cls, '{%s' % qname)
730            self.namespace, self.localname = map(unicode, parts)
731        else:
732            self = unicode.__new__(cls, qname)
733            self.namespace, self.localname = None, unicode(qname)
734        return self
735
736    def __getnewargs__(self):
737        return (self.lstrip('{'),)
738
[1157]739    if sys.version_info[0] == 2:
740        # Only use stringrepr in python 2
741        def __repr__(self):
742            return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
743    else:
744        def __repr__(self):
745            return '%s(%r)' % (type(self).__name__, self.lstrip('{'))
Note: See TracBrowser for help on using the repository browser.