Changeset 1140
- Timestamp:
- Oct 25, 2010, 12:08:11 AM (13 years ago)
- Location:
- branches/experimental/py3k/genshi
- Files:
-
- 6 edited
-
core.py (modified) (6 diffs)
-
input.py (modified) (12 diffs)
-
output.py (modified) (1 diff)
-
tests/core.py (modified) (9 diffs)
-
tests/input.py (modified) (16 diffs)
-
tests/output.py (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
branches/experimental/py3k/genshi/core.py
r1129 r1140 18 18 except NameError: 19 19 from functools import reduce 20 import sys 20 21 from itertools import chain 21 22 import operator … … 93 94 94 95 >>> from genshi.input import HTML 95 >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''' )96 >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''', encoding='utf-8') 96 97 >>> print(html) 97 98 <p onclick="alert('Whoa')">Hello, world!</p> … … 154 155 return reduce(operator.or_, (self,) + filters) 155 156 156 def render(self, method=None, encoding= 'utf-8', out=None, **kwargs):157 def render(self, method=None, encoding=None, out=None, **kwargs): 157 158 """Return a string representation of the stream. 158 159 … … 188 189 189 190 >>> from genshi import HTML 190 >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>' )191 >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>', encoding='utf-8') 191 192 >>> print(stream.select('elem')) 192 193 <elem>foo</elem><elem>bar</elem> … … 668 669 return hash(self.uri) 669 670 670 def __repr__(self): 671 return '%s(%s)' % (type(self).__name__, stringrepr(self.uri)) 671 if sys.version_info[0] == 2: 672 # Only use stringrepr in python 2 673 def __repr__(self): 674 return '%s(%s)' % (type(self).__name__, stringrepr(self.uri)) 675 else: 676 def __repr__(self): 677 return '%s(%r)' % (type(self).__name__, self.uri) 672 678 673 679 def __str__(self): … … 729 735 return (self.lstrip('{'),) 730 736 731 def __repr__(self): 732 return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{'))) 737 if sys.version_info[0] == 2: 738 # Only use stringrepr in python 2 739 def __repr__(self): 740 return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{'))) 741 else: 742 def __repr__(self): 743 return '%s(%r)' % (type(self).__name__, self.lstrip('{')) -
branches/experimental/py3k/genshi/input.py
r1082 r1140 19 19 import htmlentitydefs as entities 20 20 import HTMLParser as html 21 from StringIO import StringIO22 21 from xml.parsers import expat 23 22 … … 25 24 from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \ 26 25 END_NS, START_CDATA, END_CDATA, PI, COMMENT 26 from genshi.compat import StringIO, BytesIO 27 27 28 28 29 __all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML'] … … 91 92 _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in 92 93 entities.name2codepoint.items()] 93 _external_dtd = '\n'.join(_entitydefs)94 _external_dtd = u'\n'.join(_entitydefs).encode('utf-8') 94 95 95 96 def __init__(self, source, filename=None, encoding=None): … … 109 110 parser = expat.ParserCreate(encoding, '}') 110 111 parser.buffer_text = True 111 parser.returns_unicode = True 112 # Python 3 does not have returns_unicode 113 if hasattr(parser, 'returns_unicode'): 114 parser.returns_unicode = True 112 115 parser.ordered_attributes = True 113 116 … … 147 150 while not done and len(self._queue) == 0: 148 151 data = self.source.read(bufsize) 149 if data == '': # end of data152 if not data: # end of data 150 153 if hasattr(self, 'expat'): 151 154 self.expat.Parse('', True) … … 171 174 def _build_foreign(self, context, base, sysid, pubid): 172 175 parser = self.expat.ExternalEntityParserCreate(context) 173 parser.ParseFile( StringIO(self._external_dtd))176 parser.ParseFile(BytesIO(self._external_dtd)) 174 177 return 1 175 178 … … 280 283 The parsing is initiated by iterating over the parser object: 281 284 282 >>> parser = HTMLParser( StringIO('<UL compact><LI>Foo</UL>'))285 >>> parser = HTMLParser(BytesIO(u'<UL compact><LI>Foo</UL>'.encode('utf-8')), encoding='utf-8') 283 286 >>> for kind, data, pos in parser: 284 287 ... print('%s %s' % (kind, data)) … … 294 297 'param']) 295 298 296 def __init__(self, source, filename=None, encoding= 'utf-8'):299 def __init__(self, source, filename=None, encoding=None): 297 300 """Initialize the parser for the given HTML input. 298 301 … … 321 324 while not done and len(self._queue) == 0: 322 325 data = self.source.read(bufsize) 323 if data == '': # end of data326 if not data: # end of data 324 327 self.close() 325 328 done = True 326 329 else: 330 if not isinstance(data, unicode): 331 # bytes 332 if self.encoding: 333 data = data.decode(self.encoding) 334 else: 335 raise UnicodeError("source returned bytes, but no encoding specified") 327 336 self.feed(data) 328 337 for kind, data, pos in self._queue: … … 404 413 405 414 406 def HTML(text, encoding= 'utf-8'):415 def HTML(text, encoding=None): 407 416 """Parse the given HTML source and return a markup stream. 408 417 … … 410 419 iterated over multiple times: 411 420 412 >>> html = HTML('<body><h1>Foo</h1></body>' )421 >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8') 413 422 >>> print(html) 414 423 <body><h1>Foo</h1></body> … … 423 432 fails 424 433 """ 425 return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) 434 if isinstance(text, unicode): 435 return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) 436 return Stream(list(HTMLParser(BytesIO(text), encoding=encoding))) 426 437 427 438 -
branches/experimental/py3k/genshi/output.py
r1086 r1140 28 28 29 29 30 def encode(iterator, method='xml', encoding= 'utf-8', out=None):30 def encode(iterator, method='xml', encoding=None, out=None): 31 31 """Encode serializer output into a string. 32 32 -
branches/experimental/py3k/genshi/tests/core.py
r1085 r1140 14 14 import doctest 15 15 import pickle 16 from StringIO import StringIO17 try:18 from cStringIO import StringIO as cStringIO19 except ImportError:20 cStringIO = StringIO21 16 import unittest 22 17 … … 24 19 from genshi.core import Markup, Attrs, Namespace, QName, escape, unescape 25 20 from genshi.input import XML, ParseError 21 from genshi.compat import StringIO, BytesIO 26 22 27 23 … … 30 26 def test_render_utf8(self): 31 27 xml = XML('<li>Über uns</li>') 32 self.assertEqual( '<li>Über uns</li>', xml.render())28 self.assertEqual(u'<li>Über uns</li>'.encode('utf-8'), xml.render(encoding='utf-8')) 33 29 34 30 def test_render_unicode(self): 35 31 xml = XML('<li>Über uns</li>') 32 self.assertEqual(u'<li>Über uns</li>', xml.render()) 36 33 self.assertEqual(u'<li>Über uns</li>', xml.render(encoding=None)) 37 34 38 35 def test_render_ascii(self): 39 36 xml = XML('<li>Über uns</li>') 40 self.assertEqual( '<li>Über uns</li>', xml.render(encoding='ascii'))37 self.assertEqual(u'<li>Über uns</li>'.encode('ascii'), xml.render(encoding='ascii')) 41 38 42 39 def test_render_output_stream_utf8(self): 43 40 xml = XML('<li>Über uns</li>') 44 strio = cStringIO()45 self.assertEqual(None, xml.render( out=strio))46 self.assertEqual( '<li>Über uns</li>', strio.getvalue())41 strio = BytesIO() 42 self.assertEqual(None, xml.render(encoding='utf-8', out=strio)) 43 self.assertEqual(u'<li>Über uns</li>'.encode('utf-8'), strio.getvalue()) 47 44 48 45 def test_render_output_stream_unicode(self): … … 54 51 def test_pickle(self): 55 52 xml = XML('<li>Foo</li>') 56 buf = StringIO()53 buf = BytesIO() 57 54 pickle.dump(xml, buf, 2) 58 55 buf.seek(0) … … 64 61 65 62 def test_new_with_encoding(self): 66 markup = Markup('Döner', encoding='utf-8') 67 self.assertEquals("<Markup u'D\\xf6ner'>", repr(markup)) 63 markup = Markup(u'Döner'.encode('utf-8'), encoding='utf-8') 64 # mimic Markup.__repr__ when constructing output for Python 2/3 compatibility 65 self.assertEquals("<Markup %r>" % u'D\u00f6ner', repr(markup)) 68 66 69 67 def test_repr(self): … … 159 157 def test_pickle(self): 160 158 markup = Markup('foo') 161 buf = StringIO()159 buf = BytesIO() 162 160 pickle.dump(markup, buf, 2) 163 161 buf.seek(0) … … 169 167 def test_pickle(self): 170 168 attrs = Attrs([("attr1", "foo"), ("attr2", "bar")]) 171 buf = StringIO()169 buf = BytesIO() 172 170 pickle.dump(attrs, buf, 2) 173 171 buf.seek(0) … … 197 195 def test_pickle(self): 198 196 ns = Namespace('http://www.example.org/namespace') 199 buf = StringIO()197 buf = BytesIO() 200 198 pickle.dump(ns, buf, 2) 201 199 buf.seek(0) … … 210 208 def test_pickle(self): 211 209 qname = QName('http://www.example.org/namespace}elem') 212 buf = StringIO()210 buf = BytesIO() 213 211 pickle.dump(qname, buf, 2) 214 212 buf.seek(0) -
branches/experimental/py3k/genshi/tests/input.py
r1077 r1140 13 13 14 14 import doctest 15 from StringIO import StringIO16 15 import sys 17 16 import unittest … … 19 18 from genshi.core import Attrs, Stream 20 19 from genshi.input import XMLParser, HTMLParser, ParseError 20 from genshi.compat import StringIO, BytesIO 21 21 22 22 … … 60 60 def test_latin1_encoded(self): 61 61 text = u'<div>\xf6</div>'.encode('iso-8859-1') 62 events = list(XMLParser( StringIO(text), encoding='iso-8859-1'))62 events = list(XMLParser(BytesIO(text), encoding='iso-8859-1')) 63 63 kind, data, pos = events[1] 64 64 self.assertEqual(Stream.TEXT, kind) … … 69 69 <div>\xf6</div> 70 70 """.encode('iso-8859-1') 71 events = list(XMLParser( StringIO(text)))71 events = list(XMLParser(BytesIO(text))) 72 72 kind, data, pos = events[2] 73 73 self.assertEqual(Stream.TEXT, kind) … … 117 117 118 118 def test_text_node_pos_single_line(self): 119 text = '<elem>foo bar</elem>'119 text = u'<elem>foo bar</elem>' 120 120 events = list(HTMLParser(StringIO(text))) 121 121 kind, data, pos = events[1] … … 125 125 126 126 def test_text_node_pos_multi_line(self): 127 text = '''<elem>foo127 text = u'''<elem>foo 128 128 bar</elem>''' 129 129 events = list(HTMLParser(StringIO(text))) … … 135 135 def test_input_encoding_text(self): 136 136 text = u'<div>\xf6</div>'.encode('iso-8859-1') 137 events = list(HTMLParser( StringIO(text), encoding='iso-8859-1'))137 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) 138 138 kind, data, pos = events[1] 139 139 self.assertEqual(Stream.TEXT, kind) … … 142 142 def test_input_encoding_attribute(self): 143 143 text = u'<div title="\xf6"></div>'.encode('iso-8859-1') 144 events = list(HTMLParser( StringIO(text), encoding='iso-8859-1'))144 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) 145 145 kind, (tag, attrib), pos = events[0] 146 146 self.assertEqual(Stream.START, kind) … … 155 155 156 156 def test_html_entity_in_attribute(self): 157 text = '<p title=" "></p>'157 text = u'<p title=" "></p>' 158 158 events = list(HTMLParser(StringIO(text))) 159 159 kind, data, pos = events[0] … … 164 164 165 165 def test_html_entity_in_text(self): 166 text = '<p> </p>'166 text = u'<p> </p>' 167 167 events = list(HTMLParser(StringIO(text))) 168 168 kind, data, pos = events[1] … … 171 171 172 172 def test_processing_instruction(self): 173 text = '<?php echo "Foobar" ?>'173 text = u'<?php echo "Foobar" ?>' 174 174 events = list(HTMLParser(StringIO(text))) 175 175 kind, (target, data), pos = events[0] … … 206 206 207 207 def test_processing_instruction_trailing_qmark(self): 208 text = '<?php echo "Foobar" ??>'208 text = u'<?php echo "Foobar" ??>' 209 209 events = list(HTMLParser(StringIO(text))) 210 210 kind, (target, data), pos = events[0] … … 214 214 215 215 def test_out_of_order_tags1(self): 216 text = '<span><b>Foobar</span></b>'216 text = u'<span><b>Foobar</span></b>' 217 217 events = list(HTMLParser(StringIO(text))) 218 218 self.assertEqual(5, len(events)) … … 224 224 225 225 def test_out_of_order_tags2(self): 226 text = '<span class="baz"><b><i>Foobar</span></b></i>'227 events = list(HTMLParser( StringIO(text)))226 text = u'<span class="baz"><b><i>Foobar</span></b></i>'.encode('utf-8') 227 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) 228 228 self.assertEqual(7, len(events)) 229 229 self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), … … 237 237 238 238 def test_out_of_order_tags3(self): 239 text = '<span><b>Foobar</i>'240 events = list(HTMLParser( StringIO(text)))239 text = u'<span><b>Foobar</i>'.encode('utf-8') 240 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) 241 241 self.assertEqual(5, len(events)) 242 242 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) … … 247 247 248 248 def test_hex_charref(self): 249 text = '<span>'</span>'249 text = u'<span>'</span>' 250 250 events = list(HTMLParser(StringIO(text))) 251 251 self.assertEqual(3, len(events)) -
branches/experimental/py3k/genshi/tests/output.py
r1085 r1140 357 357 358 358 def test_html5_doctype(self): 359 stream = HTML( '<html></html>')359 stream = HTML(u'<html></html>') 360 360 output = stream.render(XHTMLSerializer, doctype=DocType.HTML5, 361 361 encoding=None) … … 428 428 429 429 def test_html5_doctype(self): 430 stream = HTML( '<html></html>')430 stream = HTML(u'<html></html>') 431 431 output = stream.render(HTMLSerializer, doctype=DocType.HTML5, 432 432 encoding=None)
Note: See TracChangeset
for help on using the changeset viewer.
