Edgewall Software

Changeset 361 for trunk/genshi/input.py


Ignore:
Timestamp:
Oct 13, 2006, 3:42:38 PM (17 years ago)
Author:
cmlenz
Message:

Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for pointing out the problem.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/genshi/input.py

    r358 r361  
    2222from StringIO import StringIO
    2323
    24 from genshi.core import Attrs, QName, Stream
     24from genshi.core import Attrs, QName, Stream, stripentities
    2525from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \
    2626                        START_CDATA, END_CDATA, PI, COMMENT
     
    7171    END root
    7272    """
     73
     74    _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
     75                   htmlentitydefs.name2codepoint.items()]
     76    _external_dtd = '\n'.join(_entitydefs)
    7377
    7478    def __init__(self, source, filename=None):
     
    101105        # (in _handle_other)
    102106        parser.DefaultHandler = self._handle_other
     107        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
    103108        parser.UseForeignDTD()
     109        parser.ExternalEntityRefHandler = self._build_foreign
    104110
    105111        # Location reporting is only support in Python >= 2.4
     
    142148        return iter(self.parse())
    143149
     150    def _build_foreign(self, context, base, sysid, pubid):
     151        parser = self.expat.ExternalEntityParserCreate(context)
     152        parser.ParseFile(StringIO(self._external_dtd))
     153        return 1
     154
    144155    def _enqueue(self, kind, data=None, pos=None):
    145156        if pos is None:
     
    293304            if value is None:
    294305                value = name
    295             fixed_attrib.append((name, unicode(value)))
     306            fixed_attrib.append((name, unicode(stripentities(value))))
    296307
    297308        self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
Note: See TracChangeset for help on using the changeset viewer.