Index: genshi/__init__.py
===================================================================
--- genshi/__init__.py	(révision 527)
+++ genshi/__init__.py	(copie de travail)
@@ -26,4 +26,4 @@
     pass
 
 from genshi.core import *
-from genshi.input import ParseError, XML, HTML
+from genshi.input import ParseError, XML, HTML, HTML5
Index: genshi/input.py
===================================================================
--- genshi/input.py	(révision 527)
+++ genshi/input.py	(copie de travail)
@@ -25,11 +25,11 @@
 import htmlentitydefs
 from StringIO import StringIO
 
-from genshi.core import Attrs, QName, Stream, stripentities
+from genshi.core import Attrs, Namespace, QName, Stream, stripentities
 from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \
                         START_CDATA, END_CDATA, PI, COMMENT
 
-__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML']
+__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML', 'HTML5Parser', 'HTML5']
 __docformat__ = 'restructuredtext en'
 
 def ET(element):
@@ -426,6 +426,107 @@
     """
     return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
 
+class HTML5Parser(object):
+    """Parser for HTML input based on `html5lib`.
+    
+    This class provides the same interface for generating stream events as
+    `XMLParser`.
+    
+    The parsing is initiated by iterating over the parser object:
+    
+    >>> parser = HTML5Parser(StringIO('<UL compact><LI>Foo</UL>'))
+    >>> for kind, data, pos in parser:
+    ...     print kind, repr(data)
+    START (QName(u'html'), Attrs())
+    START (QName(u'head'), Attrs())
+    END QName(u'head')
+    START (QName(u'body'), Attrs())
+    START (QName(u'ul'), Attrs([(QName(u'compact'), '')]))
+    START (QName(u'li'), Attrs())
+    TEXT u'Foo'
+    END QName(u'li')
+    END QName(u'ul')
+    END QName(u'body')
+    END QName(u'html')
+    """
+    
+    html = Namespace('http://www.w3.org/1999/xhtml')
+
+    def __init__(self, source, filename=None, encoding=None, innerHTML=False):
+        """Initialize the parser for the given HTML input.
+        
+        :param source: the HTML text as a file-like object
+        :param filename: the name of the file, if known
+        :param encoding: encoding of the file; ignored if the input is unicode
+        :param innerHTML: are we parsing in innerHTML mode (innerHTML=True is not yet supported by html5lib)
+        """
+        self.source = source
+        self.filename = filename
+        self.encoding = encoding
+        self.innerHTML = innerHTML
+        import html5lib
+        self.parser = html5lib.HTMLParser()
+
+    def parse(self):
+        """Generator that parses the HTML source, yielding markup events.
+        
+        :return: a markup event stream
+        """
+        # TODO: Add some basic namespace support, e.g. convert known prefixes (py:, svg:, mathml:, smil:) to QNames
+        document = self.parser.parse(self.source, encoding=self.encoding, innerHTML=self.innerHTML)
+        return self._generate(document)
+
+    def __iter__(self):
+        return iter(self.parse())
+    
+    def _generate(self, element):
+        from html5lib.treebuilders.simpletree import Document, DocumentType, CommentNode, TextNode
+
+        pos = (self.filename, -1, -1)
+
+        if isinstance(element, Document):
+            for child in element.childNodes:
+                for kind, data, pos in self._generate(child):
+                    yield kind, data, pos
+
+        elif isinstance(element, DocumentType):
+            yield DOCTYPE, (element.name, None, None), pos
+
+        elif isinstance(element, CommentNode):
+            yield COMMENT, element.data, pos
+
+        elif isinstance(element, TextNode):
+            yield TEXT, element.value, pos
+
+        else: # Element
+            tag_name = self.html[element.name]
+            attrs = Attrs([(self.html[attr], value) for attr, value in element.attributes.iteritems()])
+            yield START, (tag_name, attrs), pos
+            for child in element.childNodes:
+                for kind, data, pos in self._generate(child):
+                    yield kind, data, pos
+            yield END, tag_name, pos
+
+
+def HTML5(text, encoding=None, strict=False, innerHTML=False):
+    """Parse the given HTML source and return a markup stream.
+    
+    Unlike with `HTML5Parser`, the returned stream is reusable, meaning it can be
+    iterated over multiple times:
+    
+    >>> html = HTML5('<body><h1>Foo</h1></body>')
+    >>> print html
+    <html xmlns="http://www.w3.org/1999/xhtml"><head/><body><h1>Foo</h1></body></html>
+    >>> print html.select('body/h1')
+    <h1 xmlns="http://www.w3.org/1999/xhtml">Foo</h1>
+    >>> print html.select('body/h1/text()')
+    Foo
+    
+    :param text: the HTML source
+    :return: the parsed XML event stream
+    """
+    return Stream(list(HTML5Parser(StringIO(text), encoding=encoding)))
+
 def _coalesce(stream):
     """Coalesces adjacent TEXT events into a single event."""
     textbuf = []
Index: genshi/output.py
===================================================================
--- genshi/output.py	(révision 527)
+++ genshi/output.py	(copie de travail)
@@ -27,7 +27,7 @@
                         START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
 
 __all__ = ['DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer',
-           'TextSerializer']
+           'TextSerializer', 'HTML5Serializer']
 __docformat__ = 'restructuredtext en'
 
 
@@ -53,6 +53,8 @@
         'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
     )
     XHTML = XHTML_STRICT
+    
+    HTML5 = ('html', None, None)
 
 
 class XMLSerializer(object):
@@ -321,6 +323,96 @@
                 yield Markup('<?%s %s?>' % data)
 
 
+class HTML5Serializer(object):
+    _NOESCAPE_ELEMS = frozenset(['style', 'script', 'xmp', 'iframe', 'noembed',
+                                  'noframes', 'noscript'])
+
+    _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'bgsound', 'br',
+                              'col', 'embed', 'frame', 'hr', 'img', 'input',
+                              'link', 'meta', 'param', 'spacer', 'wbr'])
+
+    def __init__(self, doctype=DocType.HTML5):
+        self.preamble = []
+        if doctype:
+            self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
+        self.filters = []
+    
+    def __call__(self, stream):
+        empty_elems = self._EMPTY_ELEMS
+        noescape_elems = self._NOESCAPE_ELEMS
+        have_doctype = False
+        noescape = None
+        skip_content = None
+        depth = 0
+
+        stream = chain(self.preamble, stream)
+        for filter_ in self.filters:
+            stream = filter_(stream)
+        for kind, data, pos in stream:
+
+            if kind is START:
+                if kind is START:
+                    depth += 1
+                tag, attrib = data
+                if isinstance(tag, QName):
+                    tag = tag.localname
+                buf = ['<', tag]
+                for attr, value in attrib:
+                    if isinstance(attr, QName):
+                        attr = attr.localname
+                    buf += [' ', attr, '="', self.escape(value), '"']
+                buf.append('>')
+                yield Markup(u''.join(buf))
+                if tag in noescape_elems:
+                    noescape = depth
+                if tag in empty_elems:
+                    skip_content = depth
+
+            elif kind is END:
+                if noescape == depth:
+                    noescape = None
+                if isinstance(data, QName):
+                    data = data.localname
+                yield Markup('</%s>' % data)
+                if skip_content == depth:
+                    # If we skip_content, we also don't generate the end tag
+                    skip_content = None
+                depth -= 1
+
+            elif kind is TEXT:
+                if noescape:
+                    yield data
+                else:
+                    yield self.escape(data)
+
+            elif kind is COMMENT:
+                yield Markup('<!-%s-->' % data)
+
+            elif kind is DOCTYPE and not have_doctype:
+                name, pubid, sysid = data
+                buf = ['<!DOCTYPE %s']
+                if pubid:
+                    buf.append(' PUBLIC "%s"')
+                elif sysid:
+                    buf.append(' SYSTEM')
+                if sysid:
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(u''.join(buf), *filter(None, data))
+                have_doctype = True
+
+            elif kind is PI:
+                # This is not valid HTML5 but looks like an SGML PI
+                yield Markup('<?%s %s>' % data)
+
+    def escape(text):
+        return unicode(text).replace('&', '&amp;') \
+                             .replace('<', '&lt;') \
+                             .replace('>', '&gt;') \
+                             .replace('"', '&quot;')
+    escape = staticmethod(escape)
+
+
 class TextSerializer(object):
     """Produces plain text from an event stream.
     
Index: genshi/template/html5.py
===================================================================
--- genshi/template/html5.py	(révision 0)
+++ genshi/template/html5.py	(révision 0)
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Markup templating engine."""
+
+from genshi.core import Attrs, QName, Namespace, Stream
+from genshi.core import START, END
+from genshi.input import HTML5Parser
+from genshi.template.markup import MarkupTemplate
+
+class HTML5Template(MarkupTemplate):
+    """Implementation of the template language for HTML-based templates.
+    
+    >>> tmpl = HTML5Template('''<ul>
+    ...   <li py:for="item in items">${item}</li>
+    ... </ul>''')
+    >>> print tmpl.generate(items=[1, 2, 3])
+    <html xmlns="http://www.w3.org/1999/xhtml"><head/><body><ul>
+      <li>1</li><li>2</li><li>3</li>
+    </ul></body></html>
+    """
+
+    HTML_NAMESPACE = Namespace('http://www.w3.org/1999/xhtml')
+
+    def __init__(self, source, basedir=None, filename=None, loader=None,
+                 encoding=None):
+        MarkupTemplate.__init__(self, source, basedir=basedir, filename=filename,
+                                loader=loader, encoding=encoding)
+
+    def _parse(self, source, encoding):
+        if not isinstance(source, Stream):
+            source = HTML5Parser(source, filename=self.filename,
+                                 encoding=encoding)
+            source = Stream(source)
+        source = source | self._to_xml
+        return MarkupTemplate._parse(self, source, encoding)
+
+    def _to_xml(self, stream):
+        """Transforms elements or attributes from HTML namespace (or no namespace) \
+        whose name starts with py: or pi_ into XML Template directives.
+        
+        Also transforms elements from HTML namespace (or no namespace) whose name
+        starts with xi: or xi_ into XInclude elements.
+        
+        TODO: support includes using <link> or <script> with a special rel="" or
+        type="" value.
+        """
+        for kind, data, pos in stream:
+            if kind is START:
+                tag, attrs = data
+                tag = QName(tag)
+                if not tag.namespace or tag in self.HTML_NAMESPACE:
+                    if tag.localname.startswith('py:') or tag.localname.startswith('py_'):
+                        tag = self.DIRECTIVE_NAMESPACE[tag.localname[3:]]
+                    elif tag.localname.startswith('xi:') or tag.localname.startswith('xi_'):
+                        tag = self.XINCLUDE_NAMESPACE[tag.localname[3:]]
+                new_attrs = []
+                for name, value in attrs:
+                    name = QName(name)
+                    if not name.namespace or name in self.HTML_NAMESPACE:
+                        if name.localname.startswith('py:') or name.localname.startswith('py_'):
+                            name = self.DIRECTIVE_NAMESPACE[name.localname[3:]]
+                    new_attrs.append((name, value))
+                yield kind, (tag, Attrs(new_attrs)), pos
+            elif kind is END:
+                tag = QName(data)
+                if not tag.namespace or tag in self.HTML_NAMESPACE:
+                    if tag.startswith('py:'):
+                        tag = self.DIRECTIVE_NAMESPACE[tag[3:]]
+                yield kind, tag, pos
+            else:
+                yield kind, data, pos
