| 1 | class NamespaceFlattener(object): |
|---|
| 2 | r"""Output stream filter that removes namespace information from the stream, |
|---|
| 3 | instead adding namespace attributes and prefixes as needed. |
|---|
| 4 | |
|---|
| 5 | :param prefixes: optional mapping of namespace URIs to prefixes |
|---|
| 6 | |
|---|
| 7 | >>> from genshi.input import XML |
|---|
| 8 | >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2"> |
|---|
| 9 | ... <two:item/> |
|---|
| 10 | ... </doc>''') |
|---|
| 11 | >>> for kind, data, pos in NamespaceFlattener()(xml): |
|---|
| 12 | ... print kind, repr(data) |
|---|
| 13 | START (u'doc', Attrs([(u'xmlns', u'NS1'), (u'xmlns:two', u'NS2')])) |
|---|
| 14 | TEXT u'\n ' |
|---|
| 15 | START (u'two:item', Attrs()) |
|---|
| 16 | END u'two:item' |
|---|
| 17 | TEXT u'\n' |
|---|
| 18 | END u'doc' |
|---|
| 19 | """ |
|---|
| 20 | |
|---|
| 21 | def __init__(self, prefixes=None): |
|---|
| 22 | self.prefixes = {XML_NAMESPACE.uri: 'xml'} |
|---|
| 23 | if prefixes is not None: |
|---|
| 24 | self.prefixes.update(prefixes) |
|---|
| 25 | |
|---|
| 26 | def __call__(self, stream): |
|---|
| 27 | # The XML document (given by the stream) can be thought of as consisting of nested elements. |
|---|
| 28 | # Namespaces introduced by one element apply to all its children, and therefore current |
|---|
| 29 | # namespace scope can be represented by a stack of namespace declarations. |
|---|
| 30 | |
|---|
| 31 | class _NamespaceStackFrame(object): |
|---|
| 32 | def __init__(self, prefix = None, uri = None): |
|---|
| 33 | # Prefix -> URI and URI -> prefix mappings for all namespaces introduced by this stack frame |
|---|
| 34 | self.prefixes = {} |
|---|
| 35 | self.uris = {} |
|---|
| 36 | # True if namespaces introduced by this stack frame have not yet been emitted as xmlns attributes |
|---|
| 37 | self.pending = True |
|---|
| 38 | |
|---|
| 39 | def add(self, prefix, uri): |
|---|
| 40 | """Add prefix/uri mapping to this stack frame""" |
|---|
| 41 | # You can't change prefix/uri mapping for an existing prefix or URI in the same stack frame |
|---|
| 42 | assert not uri in self.prefixes and not prefix in self.uris |
|---|
| 43 | self.prefixes[uri] = prefix |
|---|
| 44 | self.uris[prefix] = uri |
|---|
| 45 | |
|---|
| 46 | def _add_ns(prefix, uri): |
|---|
| 47 | stack[-1].add(prefix, uri) |
|---|
| 48 | |
|---|
| 49 | def _push_ns(prefix, uri): |
|---|
| 50 | stack.append(_NamespaceStackFrame()) |
|---|
| 51 | stack[-1].add(prefix, uri) |
|---|
| 52 | |
|---|
| 53 | def _pop_ns(): |
|---|
| 54 | stack.pop() |
|---|
| 55 | |
|---|
| 56 | def _get_prefix(uri): |
|---|
| 57 | for f in reversed(stack): |
|---|
| 58 | prefix = f.prefixes.get(uri) |
|---|
| 59 | if prefix != None: |
|---|
| 60 | return prefix |
|---|
| 61 | |
|---|
| 62 | return None |
|---|
| 63 | |
|---|
| 64 | def _new_prefix(): |
|---|
| 65 | index = 1 |
|---|
| 66 | while True: |
|---|
| 67 | yield 'ns%d' % index |
|---|
| 68 | index += 1 |
|---|
| 69 | _new_prefix = _new_prefix().next |
|---|
| 70 | |
|---|
| 71 | def _make_nsattr(prefix, uri): |
|---|
| 72 | return prefix and u'xmlns:%s' % prefix or u'xmlns', uri |
|---|
| 73 | |
|---|
| 74 | def _qualify(prefix, tag): |
|---|
| 75 | return prefix and u'%s:%s' % (prefix, tag) or tag |
|---|
| 76 | |
|---|
| 77 | # Start off with a single, empty stack frame |
|---|
| 78 | stack = [_NamespaceStackFrame()] |
|---|
| 79 | |
|---|
| 80 | # Initialize the stack with namespaces given to the constructor |
|---|
| 81 | for uri, prefix in self.prefixes.iteritems(): |
|---|
| 82 | _add_ns(prefix, uri) |
|---|
| 83 | |
|---|
| 84 | # Don't emit xmlns attributes for namespaces given in our constructor (make them implicit) |
|---|
| 85 | stack[-1].pending = False |
|---|
| 86 | |
|---|
| 87 | for kind, data, pos in stream: |
|---|
| 88 | if kind is START_NS: |
|---|
| 89 | # Beginning of explicit namespace scope. Push a new stack frame for it |
|---|
| 90 | prefix, uri = data |
|---|
| 91 | _push_ns(prefix, uri) |
|---|
| 92 | |
|---|
| 93 | elif kind is END_NS: |
|---|
| 94 | # End of explicit namespace scope. Pop its stack frame |
|---|
| 95 | _pop_ns() |
|---|
| 96 | |
|---|
| 97 | elif kind is START or kind is EMPTY: |
|---|
| 98 | # Beginning of namespace scope derived from an element. |
|---|
| 99 | # An element introduces at least one namespace (for its tag), |
|---|
| 100 | # but may also introduce additional namespaces for its attributes |
|---|
| 101 | |
|---|
| 102 | tag, attrs = data |
|---|
| 103 | tagname = tag.localname |
|---|
| 104 | tagns = tag.namespace |
|---|
| 105 | |
|---|
| 106 | # Has the tag's namespace already been assigned a prefix in this scope? |
|---|
| 107 | prefix = _get_prefix(tagns) |
|---|
| 108 | if prefix is None: |
|---|
| 109 | # No? Treat the tag's namespace as the default namespace for this scope |
|---|
| 110 | prefix = '' |
|---|
| 111 | _push_ns(prefix, tagns) |
|---|
| 112 | |
|---|
| 113 | tagname = _qualify(prefix, tagname) |
|---|
| 114 | |
|---|
| 115 | # Rewrite the element's attributes |
|---|
| 116 | newattrs = [] |
|---|
| 117 | |
|---|
| 118 | for attr, value in attrs: |
|---|
| 119 | attrname = attr.localname |
|---|
| 120 | attrns = attr.namespace |
|---|
| 121 | |
|---|
| 122 | # Has the attribute's namespace already been assigned a prefix in this scope? |
|---|
| 123 | attrprefix = _get_prefix(attrns) |
|---|
| 124 | if attrprefix is None: |
|---|
| 125 | # No? Make one up |
|---|
| 126 | attrprefix = _new_prefix() |
|---|
| 127 | # Add the attribute's namespace prefix to the current scope |
|---|
| 128 | _add_ns(attrprefix, attrns) |
|---|
| 129 | |
|---|
| 130 | attrname = _qualify(attrprefix, attrname) |
|---|
| 131 | |
|---|
| 132 | newattrs.append((attrname, value)) |
|---|
| 133 | |
|---|
| 134 | # Divide all namespaces active in this scope according to whether their xmlns attribute declaration |
|---|
| 135 | # has already been emitted (comitted) in this scope or not (pending) |
|---|
| 136 | pending = {} |
|---|
| 137 | committed = {} |
|---|
| 138 | for f in stack: |
|---|
| 139 | if f.pending: |
|---|
| 140 | pending.update(f.uris) |
|---|
| 141 | # In the process, mark all namespaces in this scope as committed, because we are about to write out |
|---|
| 142 | # the xmlns declaration for the ones that are still pending at this point in time |
|---|
| 143 | f.pending = False |
|---|
| 144 | else: |
|---|
| 145 | committed.update(f.uris) |
|---|
| 146 | |
|---|
| 147 | # Compose the xmlns declarations for pending namespaces |
|---|
| 148 | nsattrs = [_make_nsattr(prefix, uri) for prefix, uri in pending.iteritems() if committed.get(prefix) != uri] |
|---|
| 149 | yield kind, (tagname, newattrs + nsattrs), pos |
|---|
| 150 | |
|---|
| 151 | # If this is an empty tag, there will be no matching END event, so we pop the current stack frame |
|---|
| 152 | if kind is EMPTY: |
|---|
| 153 | _pop_ns() |
|---|
| 154 | |
|---|
| 155 | elif kind is END: |
|---|
| 156 | # Tag scope is ending, pop its stack frame |
|---|
| 157 | tagname = data.localname |
|---|
| 158 | tagns = data.namespace |
|---|
| 159 | |
|---|
| 160 | prefix = _get_prefix(tagns) |
|---|
| 161 | tagname = _qualify(prefix, tagname) |
|---|
| 162 | |
|---|
| 163 | yield kind, tagname, pos |
|---|
| 164 | _pop_ns() |
|---|
| 165 | |
|---|
| 166 | else: |
|---|
| 167 | yield kind, data, pos |
|---|