Changeset 27 for trunk/markup/input.py
- Timestamp:
- Jun 28, 2006, 10:55:04 AM (17 years ago)
- File:
-
- 1 edited
-
trunk/markup/input.py (modified) (9 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/markup/input.py
r22 r27 19 19 import HTMLParser as html 20 20 import htmlentitydefs 21 import re22 21 from StringIO import StringIO 23 22 … … 38 37 class XMLParser(object): 39 38 """Generator-based XML parser based on roughly equivalent code in 40 Kid/ElementTree.""" 39 Kid/ElementTree. 40 41 The parsing is initiated by iterating over the parser object: 42 43 >>> parser = XMLParser(StringIO('<root id="2"><child>Foo</child></root>')) 44 >>> for kind, data, pos in parser: 45 ... print kind, data 46 START (u'root', [(u'id', u'2')]) 47 START (u'child', []) 48 TEXT Foo 49 END child 50 END root 51 """ 41 52 42 53 def __init__(self, source, filename=None): 54 """Initialize the parser for the given XML text. 55 56 @param source: the XML text as a file-like object 57 @param filename: the name of the file, if appropriate 58 """ 43 59 self.source = source 44 60 self.filename = filename … … 91 107 raise ParseError(msg, self.filename, e.lineno, e.offset) 92 108 109 def _enqueue(self, kind, data, pos=None): 110 if pos is None: 111 pos = self._getpos() 112 self._queue.append((kind, data, pos)) 113 93 114 def _getpos_unknown(self): 94 115 return (self.filename or '<string>', -1, -1) … … 99 120 100 121 def _handle_start(self, tag, attrib): 101 self._queue.append((Stream.START, (QName(tag), Attributes(attrib.items())), 102 self._getpos())) 122 self._enqueue(Stream.START, (QName(tag), Attributes(attrib.items()))) 103 123 104 124 def _handle_end(self, tag): 105 self._ queue.append((Stream.END, QName(tag), self._getpos()))125 self._enqueue(Stream.END, QName(tag)) 106 126 107 127 def _handle_data(self, text): 108 self._ queue.append((Stream.TEXT, text, self._getpos()))128 self._enqueue(Stream.TEXT, text) 109 129 110 130 def _handle_prolog(self, version, encoding, standalone): 111 self._queue.append((Stream.PROLOG, (version, encoding, standalone), 112 self._getpos())) 131 self._enqueue(Stream.PROLOG, (version, encoding, standalone)) 113 132 114 133 def _handle_doctype(self, name, sysid, pubid, has_internal_subset): 115 self._ queue.append((Stream.DOCTYPE, (name, pubid, sysid), self._getpos()))134 self._enqueue(Stream.DOCTYPE, (name, pubid, sysid)) 116 135 117 136 def _handle_start_ns(self, prefix, uri): 118 self._ queue.append((Stream.START_NS, (prefix or '', uri), self._getpos()))137 self._enqueue(Stream.START_NS, (prefix or '', uri)) 119 138 120 139 def _handle_end_ns(self, prefix): 121 self._ queue.append((Stream.END_NS, prefix or '', self._getpos()))140 self._enqueue(Stream.END_NS, prefix or '') 122 141 123 142 def _handle_pi(self, target, data): 124 self._ queue.append((Stream.PI, (target, data), self._getpos()))143 self._enqueue(Stream.PI, (target, data)) 125 144 126 145 def _handle_comment(self, text): 127 self._ queue.append((Stream.COMMENT, text, self._getpos()))146 self._enqueue(Stream.COMMENT, text) 128 147 129 148 def _handle_other(self, text): … … 132 151 try: 133 152 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) 134 self._ queue.append((Stream.TEXT, text, self._getpos()))153 self._enqueue(Stream.TEXT, text) 135 154 except KeyError: 136 155 lineno, offset = self._getpos() … … 148 167 This class provides the same interface for generating stream events as 149 168 `XMLParser`, and attempts to automatically balance tags. 169 170 The parsing is initiated by iterating over the parser object: 171 172 >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>')) 173 >>> for kind, data, pos in parser: 174 ... print kind, data 175 START (u'ul', [(u'compact', u'compact')]) 176 START (u'li', []) 177 TEXT Foo 178 END li 179 END ul 150 180 """ 151 181 … … 188 218 raise ParseError(msg, self.filename, e.lineno, e.offset) 189 219 220 def _enqueue(self, kind, data, pos=None): 221 if pos is None: 222 pos = self._getpos() 223 self._queue.append((kind, data, pos)) 224 190 225 def _getpos(self): 191 226 lineno, column = self.getpos() … … 193 228 194 229 def handle_starttag(self, tag, attrib): 195 pos = self._getpos() 196 self._queue.append((Stream.START, (QName(tag), Attributes(attrib)), pos)) 230 fixed_attrib = [] 231 for name, value in attrib: # Fixup minimized attributes 232 if value is None: 233 value = name 234 fixed_attrib.append((name, unicode(value))) 235 236 self._enqueue(Stream.START, (QName(tag), Attributes(fixed_attrib))) 197 237 if tag in self._EMPTY_ELEMS: 198 self._ queue.append((Stream.END, QName(tag), pos))238 self._enqueue(Stream.END, QName(tag)) 199 239 else: 200 240 self._open_tags.append(tag) … … 202 242 def handle_endtag(self, tag): 203 243 if tag not in self._EMPTY_ELEMS: 204 pos = self._getpos()205 244 while self._open_tags: 206 245 open_tag = self._open_tags.pop() 207 246 if open_tag.lower() == tag.lower(): 208 247 break 209 self._ queue.append((Stream.END, QName(open_tag), pos))210 self._ queue.append((Stream.END, QName(tag), pos))248 self._enqueue(Stream.END, QName(open_tag)) 249 self._enqueue(Stream.END, QName(tag)) 211 250 212 251 def handle_data(self, text): 213 self._ queue.append((Stream.TEXT, text, self._getpos()))252 self._enqueue(Stream.TEXT, text) 214 253 215 254 def handle_charref(self, name): 216 self._ queue.append((Stream.TEXT, Markup('&#%s;' % name), self._getpos()))255 self._enqueue(Stream.TEXT, Markup('&#%s;' % name)) 217 256 218 257 def handle_entityref(self, name): 219 self._ queue.append((Stream.TEXT, Markup('&%s;' % name), self._getpos()))258 self._enqueue(Stream.TEXT, Markup('&%s;' % name)) 220 259 221 260 def handle_pi(self, data): 222 261 target, data = data.split(maxsplit=1) 223 262 data = data.rstrip('?') 224 self._queue.append((Stream.PI, (target.strip(), data.strip()), 225 self._getpos())) 263 self._enqueue(Stream.PI, (target.strip(), data.strip())) 226 264 227 265 def handle_comment(self, text): 228 self._ queue.append((Stream.COMMENT, text, self._getpos()))266 self._enqueue(Stream.COMMENT, text) 229 267 230 268
Note: See TracChangeset
for help on using the changeset viewer.
