Edgewall Software

Changeset 1140


Ignore:
Timestamp:
Oct 25, 2010, 12:08:11 AM (13 years ago)
Author:
hodgestar
Message:

add support for python 3 to core genshi components (genshi.core, genshi.input and genshi.output):

  • default input and output encodings changed from UTF-8 to None (i.e. unicode strings)
  • Namespace and QName objects do not call stringrepr in repr in Python 3 since repr() returns a unicode string there.
  • track changes to expat parser in Python 3 (mostly it accepts bytes instead of strings)
Location:
branches/experimental/py3k/genshi
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • branches/experimental/py3k/genshi/core.py

    r1129 r1140  
    1818except NameError:
    1919    from functools import reduce
     20import sys
    2021from itertools import chain
    2122import operator
     
    9394       
    9495        >>> from genshi.input import HTML
    95         >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
     96        >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''', encoding='utf-8')
    9697        >>> print(html)
    9798        <p onclick="alert('Whoa')">Hello, world!</p>
     
    154155        return reduce(operator.or_, (self,) + filters)
    155156
    156     def render(self, method=None, encoding='utf-8', out=None, **kwargs):
     157    def render(self, method=None, encoding=None, out=None, **kwargs):
    157158        """Return a string representation of the stream.
    158159       
     
    188189       
    189190        >>> from genshi import HTML
    190         >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
     191        >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>', encoding='utf-8')
    191192        >>> print(stream.select('elem'))
    192193        <elem>foo</elem><elem>bar</elem>
     
    668669        return hash(self.uri)
    669670
    670     def __repr__(self):
    671         return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
     671    if sys.version_info[0] == 2:
     672        # Only use stringrepr in python 2
     673        def __repr__(self):
     674            return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
     675    else:
     676        def __repr__(self):
     677            return '%s(%r)' % (type(self).__name__, self.uri)
    672678
    673679    def __str__(self):
     
    729735        return (self.lstrip('{'),)
    730736
    731     def __repr__(self):
    732         return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
     737    if sys.version_info[0] == 2:
     738        # Only use stringrepr in python 2
     739        def __repr__(self):
     740            return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
     741    else:
     742        def __repr__(self):
     743            return '%s(%r)' % (type(self).__name__, self.lstrip('{'))
  • branches/experimental/py3k/genshi/input.py

    r1082 r1140  
    1919import htmlentitydefs as entities
    2020import HTMLParser as html
    21 from StringIO import StringIO
    2221from xml.parsers import expat
    2322
     
    2524from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \
    2625                        END_NS, START_CDATA, END_CDATA, PI, COMMENT
     26from genshi.compat import StringIO, BytesIO
     27
    2728
    2829__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML']
     
    9192    _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
    9293                   entities.name2codepoint.items()]
    93     _external_dtd = '\n'.join(_entitydefs)
     94    _external_dtd = u'\n'.join(_entitydefs).encode('utf-8')
    9495
    9596    def __init__(self, source, filename=None, encoding=None):
     
    109110        parser = expat.ParserCreate(encoding, '}')
    110111        parser.buffer_text = True
    111         parser.returns_unicode = True
     112        # Python 3 does not have returns_unicode
     113        if hasattr(parser, 'returns_unicode'):
     114            parser.returns_unicode = True
    112115        parser.ordered_attributes = True
    113116
     
    147150                    while not done and len(self._queue) == 0:
    148151                        data = self.source.read(bufsize)
    149                         if data == '': # end of data
     152                        if not data: # end of data
    150153                            if hasattr(self, 'expat'):
    151154                                self.expat.Parse('', True)
     
    171174    def _build_foreign(self, context, base, sysid, pubid):
    172175        parser = self.expat.ExternalEntityParserCreate(context)
    173         parser.ParseFile(StringIO(self._external_dtd))
     176        parser.ParseFile(BytesIO(self._external_dtd))
    174177        return 1
    175178
     
    280283    The parsing is initiated by iterating over the parser object:
    281284   
    282     >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>'))
     285    >>> parser = HTMLParser(BytesIO(u'<UL compact><LI>Foo</UL>'.encode('utf-8')), encoding='utf-8')
    283286    >>> for kind, data, pos in parser:
    284287    ...     print('%s %s' % (kind, data))
     
    294297                              'param'])
    295298
    296     def __init__(self, source, filename=None, encoding='utf-8'):
     299    def __init__(self, source, filename=None, encoding=None):
    297300        """Initialize the parser for the given HTML input.
    298301       
     
    321324                    while not done and len(self._queue) == 0:
    322325                        data = self.source.read(bufsize)
    323                         if data == '': # end of data
     326                        if not data: # end of data
    324327                            self.close()
    325328                            done = True
    326329                        else:
     330                            if not isinstance(data, unicode):
     331                                # bytes
     332                                if self.encoding:
     333                                    data = data.decode(self.encoding)
     334                                else:
     335                                    raise UnicodeError("source returned bytes, but no encoding specified")
    327336                            self.feed(data)
    328337                    for kind, data, pos in self._queue:
     
    404413
    405414
    406 def HTML(text, encoding='utf-8'):
     415def HTML(text, encoding=None):
    407416    """Parse the given HTML source and return a markup stream.
    408417   
     
    410419    iterated over multiple times:
    411420   
    412     >>> html = HTML('<body><h1>Foo</h1></body>')
     421    >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8')
    413422    >>> print(html)
    414423    <body><h1>Foo</h1></body>
     
    423432                        fails
    424433    """
    425     return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
     434    if isinstance(text, unicode):
     435        return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
     436    return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
    426437
    427438
  • branches/experimental/py3k/genshi/output.py

    r1086 r1140  
    2828
    2929
    30 def encode(iterator, method='xml', encoding='utf-8', out=None):
     30def encode(iterator, method='xml', encoding=None, out=None):
    3131    """Encode serializer output into a string.
    3232   
  • branches/experimental/py3k/genshi/tests/core.py

    r1085 r1140  
    1414import doctest
    1515import pickle
    16 from StringIO import StringIO
    17 try:
    18     from cStringIO import StringIO as cStringIO
    19 except ImportError:
    20     cStringIO = StringIO
    2116import unittest
    2217
     
    2419from genshi.core import Markup, Attrs, Namespace, QName, escape, unescape
    2520from genshi.input import XML, ParseError
     21from genshi.compat import StringIO, BytesIO
    2622
    2723
     
    3026    def test_render_utf8(self):
    3127        xml = XML('<li>Über uns</li>')
    32         self.assertEqual('<li>Über uns</li>', xml.render())
     28        self.assertEqual(u'<li>Über uns</li>'.encode('utf-8'), xml.render(encoding='utf-8'))
    3329
    3430    def test_render_unicode(self):
    3531        xml = XML('<li>Über uns</li>')
     32        self.assertEqual(u'<li>Über uns</li>', xml.render())
    3633        self.assertEqual(u'<li>Über uns</li>', xml.render(encoding=None))
    3734
    3835    def test_render_ascii(self):
    3936        xml = XML('<li>Über uns</li>')
    40         self.assertEqual('<li>&#220;ber uns</li>', xml.render(encoding='ascii'))
     37        self.assertEqual(u'<li>&#220;ber uns</li>'.encode('ascii'), xml.render(encoding='ascii'))
    4138
    4239    def test_render_output_stream_utf8(self):
    4340        xml = XML('<li>Über uns</li>')
    44         strio = cStringIO()
    45         self.assertEqual(None, xml.render(out=strio))
    46         self.assertEqual('<li>Über uns</li>', strio.getvalue())
     41        strio = BytesIO()
     42        self.assertEqual(None, xml.render(encoding='utf-8', out=strio))
     43        self.assertEqual(u'<li>Über uns</li>'.encode('utf-8'), strio.getvalue())
    4744
    4845    def test_render_output_stream_unicode(self):
     
    5451    def test_pickle(self):
    5552        xml = XML('<li>Foo</li>')
    56         buf = StringIO()
     53        buf = BytesIO()
    5754        pickle.dump(xml, buf, 2)
    5855        buf.seek(0)
     
    6461
    6562    def test_new_with_encoding(self):
    66         markup = Markup('Döner', encoding='utf-8')
    67         self.assertEquals("<Markup u'D\\xf6ner'>", repr(markup))
     63        markup = Markup(u'Döner'.encode('utf-8'), encoding='utf-8')
     64        # mimic Markup.__repr__ when constructing output for Python 2/3 compatibility
     65        self.assertEquals("<Markup %r>" % u'D\u00f6ner', repr(markup))
    6866
    6967    def test_repr(self):
     
    159157    def test_pickle(self):
    160158        markup = Markup('foo')
    161         buf = StringIO()
     159        buf = BytesIO()
    162160        pickle.dump(markup, buf, 2)
    163161        buf.seek(0)
     
    169167    def test_pickle(self):
    170168        attrs = Attrs([("attr1", "foo"), ("attr2", "bar")])
    171         buf = StringIO()
     169        buf = BytesIO()
    172170        pickle.dump(attrs, buf, 2)
    173171        buf.seek(0)
     
    197195    def test_pickle(self):
    198196        ns = Namespace('http://www.example.org/namespace')
    199         buf = StringIO()
     197        buf = BytesIO()
    200198        pickle.dump(ns, buf, 2)
    201199        buf.seek(0)
     
    210208    def test_pickle(self):
    211209        qname = QName('http://www.example.org/namespace}elem')
    212         buf = StringIO()
     210        buf = BytesIO()
    213211        pickle.dump(qname, buf, 2)
    214212        buf.seek(0)
  • branches/experimental/py3k/genshi/tests/input.py

    r1077 r1140  
    1313
    1414import doctest
    15 from StringIO import StringIO
    1615import sys
    1716import unittest
     
    1918from genshi.core import Attrs, Stream
    2019from genshi.input import XMLParser, HTMLParser, ParseError
     20from genshi.compat import StringIO, BytesIO
    2121
    2222
     
    6060    def test_latin1_encoded(self):
    6161        text = u'<div>\xf6</div>'.encode('iso-8859-1')
    62         events = list(XMLParser(StringIO(text), encoding='iso-8859-1'))
     62        events = list(XMLParser(BytesIO(text), encoding='iso-8859-1'))
    6363        kind, data, pos = events[1]
    6464        self.assertEqual(Stream.TEXT, kind)
     
    6969        <div>\xf6</div>
    7070        """.encode('iso-8859-1')
    71         events = list(XMLParser(StringIO(text)))
     71        events = list(XMLParser(BytesIO(text)))
    7272        kind, data, pos = events[2]
    7373        self.assertEqual(Stream.TEXT, kind)
     
    117117
    118118    def test_text_node_pos_single_line(self):
    119         text = '<elem>foo bar</elem>'
     119        text = u'<elem>foo bar</elem>'
    120120        events = list(HTMLParser(StringIO(text)))
    121121        kind, data, pos = events[1]
     
    125125
    126126    def test_text_node_pos_multi_line(self):
    127         text = '''<elem>foo
     127        text = u'''<elem>foo
    128128bar</elem>'''
    129129        events = list(HTMLParser(StringIO(text)))
     
    135135    def test_input_encoding_text(self):
    136136        text = u'<div>\xf6</div>'.encode('iso-8859-1')
    137         events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
     137        events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1'))
    138138        kind, data, pos = events[1]
    139139        self.assertEqual(Stream.TEXT, kind)
     
    142142    def test_input_encoding_attribute(self):
    143143        text = u'<div title="\xf6"></div>'.encode('iso-8859-1')
    144         events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
     144        events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1'))
    145145        kind, (tag, attrib), pos = events[0]
    146146        self.assertEqual(Stream.START, kind)
     
    155155
    156156    def test_html_entity_in_attribute(self):
    157         text = '<p title="&nbsp;"></p>'
     157        text = u'<p title="&nbsp;"></p>'
    158158        events = list(HTMLParser(StringIO(text)))
    159159        kind, data, pos = events[0]
     
    164164
    165165    def test_html_entity_in_text(self):
    166         text = '<p>&nbsp;</p>'
     166        text = u'<p>&nbsp;</p>'
    167167        events = list(HTMLParser(StringIO(text)))
    168168        kind, data, pos = events[1]
     
    171171
    172172    def test_processing_instruction(self):
    173         text = '<?php echo "Foobar" ?>'
     173        text = u'<?php echo "Foobar" ?>'
    174174        events = list(HTMLParser(StringIO(text)))
    175175        kind, (target, data), pos = events[0]
     
    206206
    207207    def test_processing_instruction_trailing_qmark(self):
    208         text = '<?php echo "Foobar" ??>'
     208        text = u'<?php echo "Foobar" ??>'
    209209        events = list(HTMLParser(StringIO(text)))
    210210        kind, (target, data), pos = events[0]
     
    214214
    215215    def test_out_of_order_tags1(self):
    216         text = '<span><b>Foobar</span></b>'
     216        text = u'<span><b>Foobar</span></b>'
    217217        events = list(HTMLParser(StringIO(text)))
    218218        self.assertEqual(5, len(events))
     
    224224
    225225    def test_out_of_order_tags2(self):
    226         text = '<span class="baz"><b><i>Foobar</span></b></i>'
    227         events = list(HTMLParser(StringIO(text)))
     226        text = u'<span class="baz"><b><i>Foobar</span></b></i>'.encode('utf-8')
     227        events = list(HTMLParser(BytesIO(text), encoding='utf-8'))
    228228        self.assertEqual(7, len(events))
    229229        self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))),
     
    237237
    238238    def test_out_of_order_tags3(self):
    239         text = '<span><b>Foobar</i>'
    240         events = list(HTMLParser(StringIO(text)))
     239        text = u'<span><b>Foobar</i>'.encode('utf-8')
     240        events = list(HTMLParser(BytesIO(text), encoding='utf-8'))
    241241        self.assertEqual(5, len(events))
    242242        self.assertEqual((Stream.START, ('span', ())), events[0][:2])
     
    247247
    248248    def test_hex_charref(self):
    249         text = '<span>&#x27;</span>'
     249        text = u'<span>&#x27;</span>'
    250250        events = list(HTMLParser(StringIO(text)))
    251251        self.assertEqual(3, len(events))
  • branches/experimental/py3k/genshi/tests/output.py

    r1085 r1140  
    357357
    358358    def test_html5_doctype(self):
    359         stream = HTML('<html></html>')
     359        stream = HTML(u'<html></html>')
    360360        output = stream.render(XHTMLSerializer, doctype=DocType.HTML5,
    361361                               encoding=None)
     
    428428
    429429    def test_html5_doctype(self):
    430         stream = HTML('<html></html>')
     430        stream = HTML(u'<html></html>')
    431431        output = stream.render(HTMLSerializer, doctype=DocType.HTML5,
    432432                               encoding=None)
Note: See TracChangeset for help on using the changeset viewer.