Changeset 22 for trunk/markup/input.py
- Timestamp:
- Jun 20, 2006, 3:05:37 PM (17 years ago)
- File:
-
- 1 edited
-
trunk/markup/input.py (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/markup/input.py
r2 r22 25 25 26 26 27 class ParseError(Exception): 28 """Exception raised when fatal syntax errors are found in the input being 29 parsed.""" 30 31 def __init__(self, message, filename='<string>', lineno=-1, offset=-1): 32 Exception.__init__(self, message) 33 self.filename = filename 34 self.lineno = lineno 35 self.offset = offset 36 37 27 38 class XMLParser(object): 28 39 """Generator-based XML parser based on roughly equivalent code in 29 40 Kid/ElementTree.""" 30 41 31 def __init__(self, source ):42 def __init__(self, source, filename=None): 32 43 self.source = source 44 self.filename = filename 33 45 34 46 # Setup the Expat parser … … 49 61 # Location reporting is only support in Python >= 2.4 50 62 if not hasattr(parser, 'CurrentLineNumber'): 51 self. getpos = self._getpos_unknown63 self._getpos = self._getpos_unknown 52 64 53 65 self.expat = parser 54 self. queue = []66 self._queue = [] 55 67 56 68 def __iter__(self): 57 bufsize = 4 * 1024 # 4K 58 done = False 59 while True: 60 while not done and len(self.queue) == 0: 61 data = self.source.read(bufsize) 62 if data == '': # end of data 63 if hasattr(self, 'expat'): 64 self.expat.Parse('', True) 65 del self.expat # get rid of circular references 66 done = True 67 else: 68 self.expat.Parse(data, False) 69 for event in self.queue: 70 yield event 71 self.queue = [] 72 if done: 73 break 69 try: 70 bufsize = 4 * 1024 # 4K 71 done = False 72 while True: 73 while not done and len(self._queue) == 0: 74 data = self.source.read(bufsize) 75 if data == '': # end of data 76 if hasattr(self, 'expat'): 77 self.expat.Parse('', True) 78 del self.expat # get rid of circular references 79 done = True 80 else: 81 self.expat.Parse(data, False) 82 for event in self._queue: 83 yield event 84 self._queue = [] 85 if done: 86 break 87 except expat.ExpatError, e: 88 msg = str(e) 89 if self.filename: 90 msg += ', in ' + self.filename 91 raise ParseError(msg, self.filename, e.lineno, e.offset) 74 92 75 93 def _getpos_unknown(self): 76 return (-1, -1) 77 78 def getpos(self): 79 return self.expat.CurrentLineNumber, self.expat.CurrentColumnNumber 94 return (self.filename or '<string>', -1, -1) 95 96 def _getpos(self): 97 return (self.filename or '<string>', self.expat.CurrentLineNumber, 98 self.expat.CurrentColumnNumber) 80 99 81 100 def _handle_start(self, tag, attrib): 82 self. queue.append((Stream.START, (QName(tag), Attributes(attrib.items())),83 self. getpos()))101 self._queue.append((Stream.START, (QName(tag), Attributes(attrib.items())), 102 self._getpos())) 84 103 85 104 def _handle_end(self, tag): 86 self. queue.append((Stream.END, QName(tag), self.getpos()))105 self._queue.append((Stream.END, QName(tag), self._getpos())) 87 106 88 107 def _handle_data(self, text): 89 self. queue.append((Stream.TEXT, text, self.getpos()))108 self._queue.append((Stream.TEXT, text, self._getpos())) 90 109 91 110 def _handle_prolog(self, version, encoding, standalone): 92 self. queue.append((Stream.PROLOG, (version, encoding, standalone),93 self. getpos()))111 self._queue.append((Stream.PROLOG, (version, encoding, standalone), 112 self._getpos())) 94 113 95 114 def _handle_doctype(self, name, sysid, pubid, has_internal_subset): 96 self. queue.append((Stream.DOCTYPE, (name, pubid, sysid), self.getpos()))115 self._queue.append((Stream.DOCTYPE, (name, pubid, sysid), self._getpos())) 97 116 98 117 def _handle_start_ns(self, prefix, uri): 99 self. queue.append((Stream.START_NS, (prefix or '', uri), self.getpos()))118 self._queue.append((Stream.START_NS, (prefix or '', uri), self._getpos())) 100 119 101 120 def _handle_end_ns(self, prefix): 102 self. queue.append((Stream.END_NS, prefix or '', self.getpos()))121 self._queue.append((Stream.END_NS, prefix or '', self._getpos())) 103 122 104 123 def _handle_pi(self, target, data): 105 self. queue.append((Stream.PI, (target, data), self.getpos()))124 self._queue.append((Stream.PI, (target, data), self._getpos())) 106 125 107 126 def _handle_comment(self, text): 108 self. queue.append((Stream.COMMENT, text, self.getpos()))127 self._queue.append((Stream.COMMENT, text, self._getpos())) 109 128 110 129 def _handle_other(self, text): … … 113 132 try: 114 133 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) 115 self. queue.append((Stream.TEXT, text, self.getpos()))134 self._queue.append((Stream.TEXT, text, self._getpos())) 116 135 except KeyError: 117 lineno, offset = self. getpos()136 lineno, offset = self._getpos() 118 137 raise expat.error("undefined entity %s: line %d, column %d" % 119 138 (text, lineno, offset)) … … 124 143 125 144 126 class HTMLParser(html.HTMLParser ):145 class HTMLParser(html.HTMLParser, object): 127 146 """Parser for HTML input based on the Python `HTMLParser` module. 128 147 … … 135 154 'param']) 136 155 137 def __init__(self, source ):156 def __init__(self, source, filename=None): 138 157 html.HTMLParser.__init__(self) 139 158 self.source = source 140 self.queue = [] 159 self.filename = filename 160 self._queue = [] 141 161 self._open_tags = [] 142 162 143 163 def __iter__(self): 144 bufsize = 4 * 1024 # 4K 145 done = False 146 while True: 147 while not done and len(self.queue) == 0: 148 data = self.source.read(bufsize) 149 if data == '': # end of data 150 self.close() 151 done = True 152 else: 153 self.feed(data) 154 for kind, data, pos in self.queue: 155 yield kind, data, pos 156 self.queue = [] 157 if done: 158 open_tags = self._open_tags 159 open_tags.reverse() 160 for tag in open_tags: 161 yield Stream.END, QName(tag), pos 162 break 164 try: 165 bufsize = 4 * 1024 # 4K 166 done = False 167 while True: 168 while not done and len(self._queue) == 0: 169 data = self.source.read(bufsize) 170 if data == '': # end of data 171 self.close() 172 done = True 173 else: 174 self.feed(data) 175 for kind, data, pos in self._queue: 176 yield kind, data, pos 177 self._queue = [] 178 if done: 179 open_tags = self._open_tags 180 open_tags.reverse() 181 for tag in open_tags: 182 yield Stream.END, QName(tag), pos 183 break 184 except html.HTMLParseError, e: 185 msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) 186 if self.filename: 187 msg += ', in %s' % self.filename 188 raise ParseError(msg, self.filename, e.lineno, e.offset) 189 190 def _getpos(self): 191 lineno, column = self.getpos() 192 return (self.filename, lineno, column) 163 193 164 194 def handle_starttag(self, tag, attrib): 165 pos = self. getpos()166 self. queue.append((Stream.START, (QName(tag), Attributes(attrib)), pos))195 pos = self._getpos() 196 self._queue.append((Stream.START, (QName(tag), Attributes(attrib)), pos)) 167 197 if tag in self._EMPTY_ELEMS: 168 self. queue.append((Stream.END, QName(tag), pos))198 self._queue.append((Stream.END, QName(tag), pos)) 169 199 else: 170 200 self._open_tags.append(tag) … … 172 202 def handle_endtag(self, tag): 173 203 if tag not in self._EMPTY_ELEMS: 174 pos = self. getpos()204 pos = self._getpos() 175 205 while self._open_tags: 176 206 open_tag = self._open_tags.pop() 177 207 if open_tag.lower() == tag.lower(): 178 208 break 179 self. queue.append((Stream.END, QName(open_tag), pos))180 self. queue.append((Stream.END, QName(tag), pos))209 self._queue.append((Stream.END, QName(open_tag), pos)) 210 self._queue.append((Stream.END, QName(tag), pos)) 181 211 182 212 def handle_data(self, text): 183 self. queue.append((Stream.TEXT, text, self.getpos()))213 self._queue.append((Stream.TEXT, text, self._getpos())) 184 214 185 215 def handle_charref(self, name): 186 self. queue.append((Stream.TEXT, Markup('&#%s;' % name), self.getpos()))216 self._queue.append((Stream.TEXT, Markup('&#%s;' % name), self._getpos())) 187 217 188 218 def handle_entityref(self, name): 189 self. queue.append((Stream.TEXT, Markup('&%s;' % name), self.getpos()))219 self._queue.append((Stream.TEXT, Markup('&%s;' % name), self._getpos())) 190 220 191 221 def handle_pi(self, data): 192 222 target, data = data.split(maxsplit=1) 193 223 data = data.rstrip('?') 194 self. queue.append((Stream.PI, (target.strip(), data.strip()),195 self. getpos()))224 self._queue.append((Stream.PI, (target.strip(), data.strip()), 225 self._getpos())) 196 226 197 227 def handle_comment(self, text): 198 self. queue.append((Stream.COMMENT, text, self.getpos()))228 self._queue.append((Stream.COMMENT, text, self._getpos())) 199 229 200 230
Note: See TracChangeset
for help on using the changeset viewer.
