From 92c2e32c8f9e2554511960a1809e495c9d68ee25 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 22 Nov 2014 17:37:06 +0100
Subject: [PATCH 01/17] Fix over indentation

---
 html5lib/tests/test_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py
index 230cdb42..0f958c94 100644
--- a/html5lib/tests/test_parser.py
+++ b/html5lib/tests/test_parser.py
@@ -68,7 +68,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
                            "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
                            "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
     if checkParseErrors:
-            assert len(p.errors) == len(errors), errorMsg2
+        assert len(p.errors) == len(errors), errorMsg2
 
 
 def test_parser():

From d9b1a9f0bf74a102cd9c977c7e5ac38a4af15f74 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sun, 11 Jan 2015 14:46:49 +0000
Subject: [PATCH 02/17] sys.version_info is only a "named tuple"-like obj from
 2.7

This also adds the mock package as a dependency for the testsuite,
as we need it to test our test code.
---
 CHANGES.rst                         |  3 +++
 README.rst                          |  6 ++---
 html5lib/tests/support.py           |  2 +-
 html5lib/tests/test_meta.py         | 41 +++++++++++++++++++++++++++++
 html5lib/treebuilders/etree_lxml.py |  2 +-
 requirements-test.txt               |  1 +
 tox.ini                             |  3 +++
 7 files changed, 53 insertions(+), 5 deletions(-)
 create mode 100644 html5lib/tests/test_meta.py

diff --git a/CHANGES.rst b/CHANGES.rst
index ed951a3b..8c6865ef 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -7,9 +7,12 @@ Change Log
 Released on XXX
 
 * Added ordereddict as a mandatory dependency on Python 2.6.
+
 * Added ``lxml``, ``genshi``, ``datrie``, ``charade``, and ``all`` extras that
   will do the right thing based on the specific interpreter implementation.
 
+* Now requires the ``mock`` package for the testsuite.
+
 
 0.9999999/1.0b8
 ~~~~~~~~~~~~~~~
diff --git a/README.rst b/README.rst
index 9e0a0f74..7c320e0e 100644
--- a/README.rst
+++ b/README.rst
@@ -132,9 +132,9 @@ Please report any bugs on the `issue tracker
 Tests
 -----
 
-Unit tests require the ``nose`` library and can be run using the
-``nosetests`` command in the root directory; ``ordereddict`` is
-required under Python 2.6. All should pass.
+Unit tests require the ``nose`` and ``mock`` libraries and can be run
+using the ``nosetests`` command in the root directory; ``ordereddict``
+is required under Python 2.6. All should pass.
 
 Test data are contained in a separate `html5lib-tests
 <https://github.com/html5lib/html5lib-tests>`_ repository and included
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index dbb735a9..b64d322a 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -128,7 +128,7 @@ def convertData(data):
 def errorMessage(input, expected, actual):
     msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
            (repr(input), repr(expected), repr(actual)))
-    if sys.version_info.major == 2:
+    if sys.version_info[0] == 2:
         msg = msg.encode("ascii", "backslashreplace")
     return msg
 
diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py
new file mode 100644
index 00000000..e42eafdb
--- /dev/null
+++ b/html5lib/tests/test_meta.py
@@ -0,0 +1,41 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import six
+from mock import Mock
+
+from . import support
+
+
+def _createReprMock(r):
+    """Creates a mock with a __repr__ returning r
+
+    Also provides __str__ mock with default mock behaviour"""
+    mock = Mock()
+    mock.__repr__ = Mock()
+    mock.__repr__.return_value = r
+    mock.__str__ = Mock(wraps=mock.__str__)
+    return mock
+
+
+def test_errorMessage():
+    # Create mock objects to take repr of
+    input = _createReprMock("1")
+    expected = _createReprMock("2")
+    actual = _createReprMock("3")
+
+    # Run the actual test
+    r = support.errorMessage(input, expected, actual)
+
+    # Assertions!
+    if six.PY2:
+        assert b"Input:\n1\nExpected:\n2\nRecieved\n3\n" == r
+    else:
+        assert six.PY3
+        assert "Input:\n1\nExpected:\n2\nRecieved\n3\n" == r
+
+    assert input.__repr__.call_count == 1
+    assert expected.__repr__.call_count == 1
+    assert actual.__repr__.call_count == 1
+    assert not input.__str__.called
+    assert not expected.__str__.called
+    assert not actual.__str__.called
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 2755c485..138b30bd 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -79,7 +79,7 @@ def serializeElement(element, indent=0):
                     next_element = next_element.getnext()
             elif isinstance(element, str) or isinstance(element, bytes):
                 # Text in a fragment
-                assert isinstance(element, str) or sys.version_info.major == 2
+                assert isinstance(element, str) or sys.version_info[0] == 2
                 rv.append("|%s\"%s\"" % (' ' * indent, element))
             else:
                 # Fragment case
diff --git a/requirements-test.txt b/requirements-test.txt
index d5f8088c..8b6ace66 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -3,3 +3,4 @@
 flake8
 nose
 ordereddict # Python 2.6
+mock
diff --git a/tox.ini b/tox.ini
index d00e35dc..683c01e4 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,6 +6,7 @@ deps =
   -r{toxinidir}/requirements-optional-cpython.txt
   flake8
   nose
+  mock
 commands =
   {envbindir}/nosetests -q
   {toxinidir}/flake8-run.sh
@@ -21,6 +22,7 @@ deps =
   Genshi
   nose
   six
+  mock
 
 [testenv:py26]
 basepython = python2.6
@@ -28,3 +30,4 @@ deps =
   -r{toxinidir}/requirements-optional-2.6.txt
   flake8
   nose
+  mock

From e4d4b1520d2c34a3f5b1d19a1d0f346d1ba0c19a Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sun, 11 Jan 2015 16:01:31 +0000
Subject: [PATCH 03/17] Move where we concatenate tokens to handle
 ignoreErrorOrder

This was causing one of the tokenizer test failures.
---
 html5lib/tests/test_tokenizer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py
index 6a563c32..4201dfbb 100644
--- a/html5lib/tests/test_tokenizer.py
+++ b/html5lib/tests/test_tokenizer.py
@@ -109,6 +109,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
                 token.pop()
 
     if not ignoreErrorOrder and not ignoreErrors:
+        expectedTokens = concatenateCharacterTokens(expectedTokens)
         return expectedTokens == receivedTokens
     else:
         # Sort the tokens into two groups; non-parse errors and parse errors
@@ -121,6 +122,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
                 else:
                     if not ignoreErrors:
                         tokens[tokenType][1].append(token)
+            tokens[tokenType][0] = concatenateCharacterTokens(tokens[tokenType][0])
         return tokens["expected"] == tokens["received"]
 
 
@@ -174,7 +176,7 @@ def runTokenizerTest(test):
     warnings.resetwarnings()
     warnings.simplefilter("error")
 
-    expected = concatenateCharacterTokens(test['output'])
+    expected = test['output']
     if 'lastStartTag' not in test:
         test['lastStartTag'] = None
     parser = TokenizerTestParser(test['initialState'],

From 1025014f8011f013f2bf02d974da263d510cf54d Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 24 Nov 2014 01:49:47 +0000
Subject: [PATCH 04/17] Our tokenizer currently never outputs adjacent
 Character tokens; expect this.

---
 html5lib/tests/test_tokenizer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py
index 4201dfbb..823c6ea6 100644
--- a/html5lib/tests/test_tokenizer.py
+++ b/html5lib/tests/test_tokenizer.py
@@ -182,7 +182,6 @@ def runTokenizerTest(test):
     parser = TokenizerTestParser(test['initialState'],
                                  test['lastStartTag'])
     tokens = parser.parse(test['input'])
-    tokens = concatenateCharacterTokens(tokens)
     received = normalizeTokens(tokens)
     errorMsg = "\n".join(["\n\nInitial state:",
                           test['initialState'],

From 9ee8a1a811e61596fe4789137c25a470f012ae4a Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sun, 11 Jan 2015 16:51:50 +0000
Subject: [PATCH 05/17] Cease supporting DATrie under PyPy.

---
 CHANGES.rst                       | 2 ++
 README.rst                        | 4 ++--
 requirements-optional-cpython.txt | 4 ++++
 requirements-optional.txt         | 4 ----
 setup.py                          | 6 +++---
 5 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 8c6865ef..e99da143 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -13,6 +13,8 @@ Released on XXX
 
 * Now requires the ``mock`` package for the testsuite.
 
+* Cease supporting DATrie under PyPy.
+
 
 0.9999999/1.0b8
 ~~~~~~~~~~~~~~~
diff --git a/README.rst b/README.rst
index 7c320e0e..3d08d758 100644
--- a/README.rst
+++ b/README.rst
@@ -104,8 +104,8 @@ Optional Dependencies
 The following third-party libraries may be used for additional
 functionality:
 
-- ``datrie`` can be used to improve parsing performance (though in
-  almost all cases the improvement is marginal);
+- ``datrie`` can be used under CPython to improve parsing performance
+  (though in almost all cases the improvement is marginal);
 
 - ``lxml`` is supported as a tree format (for both building and
   walking) under CPython (but *not* PyPy where it is known to cause
diff --git a/requirements-optional-cpython.txt b/requirements-optional-cpython.txt
index 35ed3529..e93eda8d 100644
--- a/requirements-optional-cpython.txt
+++ b/requirements-optional-cpython.txt
@@ -3,3 +3,7 @@
 # lxml is supported with its own treebuilder ("lxml") and otherwise
 # uses the standard ElementTree support
 lxml
+
+# DATrie can be used in place of our Python trie implementation for
+# slightly better parsing performance.
+datrie
diff --git a/requirements-optional.txt b/requirements-optional.txt
index c6355270..4e16ea17 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -4,10 +4,6 @@
 # streams.
 genshi
 
-# DATrie can be used in place of our Python trie implementation for
-# slightly better parsing performance.
-datrie
-
 # charade can be used as a fallback in case we are unable to determine
 # the encoding of a document.
 charade
diff --git a/setup.py b/setup.py
index 7af4e292..7b06b45e 100644
--- a/setup.py
+++ b/setup.py
@@ -65,18 +65,18 @@
 
           # A conditional extra will only install these items when the extra is
           # requested and the condition matches.
+          "datrie:python_implementation == 'CPython'": ["datrie"],
           "lxml:python_implementation == 'CPython'": ["lxml"],
 
           # Standard extras, will be installed when the extra is requested.
           "genshi": ["genshi"],
-          "datrie": ["datrie"],
           "charade": ["charade"],
 
           # The all extra combines a standard extra which will be used anytime
           # the all extra is requested, and it extends it with a conditional
           # extra that will be installed whenever the condition matches and the
           # all extra is requested.
-          "all": ["genshi", "datrie", "charade"],
-          "all:python_implementation == 'CPython'": ["lxml"],
+          "all": ["genshi", "charade"],
+          "all:python_implementation == 'CPython'": ["datrie", "lxml"],
       },
       )

From e1d9a5b14ac57a2faefcdb9f12933dc34d392b6e Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sun, 11 Jan 2015 16:52:38 +0000
Subject: [PATCH 06/17] Big tox/Travis CI update to make both test the same set
 of things.

Oh, and this adds PyPy3, while we're at it.

In short: we now test both with and without optional packages in tox
and fix Travis CI to test with optional packages under PyPy.
---
 .travis.yml             |  2 ++
 requirements-install.sh | 10 ++++++----
 tox.ini                 | 29 ++++++-----------------------
 3 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 3f045b37..ee65440e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,8 @@ python:
   - "3.4"
   - "3.5"
   - "pypy"
+  - "pypy3"
+
 sudo: false
 
 cache:
diff --git a/requirements-install.sh b/requirements-install.sh
index 5f8ba506..95a688c6 100755
--- a/requirements-install.sh
+++ b/requirements-install.sh
@@ -7,10 +7,12 @@ fi
 
 pip install -r requirements-test.txt
 
-if [[ $USE_OPTIONAL == "true" && $TRAVIS_PYTHON_VERSION != "pypy" ]]; then
-  if [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then
-    pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-2.6.txt
+if [[ $USE_OPTIONAL == "true" ]]; then
+  if [[ $TRAVIS_PYTHON_VERSION == "pypy" || $TRAVIS_PYTHON_VERSION == "pypy3" ]]; then
+    pip install -r requirements-optional.txt
+  elif [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then
+    pip install -r requirements-optional-2.6.txt
   else
-    pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-cpython.txt
+    pip install -r requirements-optional-cpython.txt
   fi
 fi
diff --git a/tox.ini b/tox.ini
index 683c01e4..c200855e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,33 +1,16 @@
 [tox]
-envlist = py26,py27,py32,py33,py34,py35,pypy
+envlist = {py26,py27,py32,py33,py34,py35,pypy,pypy3}-{base,optional}
 
 [testenv]
 deps =
-  -r{toxinidir}/requirements-optional-cpython.txt
   flake8
   nose
   mock
+  py26-base: ordereddict
+  py26-optional: -r{toxinidir}/requirements-optional-2.6.txt
+  {py27,py32,py33,py34,py35}-optional: -r{toxinidir}/requirements-optional-cpython.txt
+  {pypy,pypy3}-optional: -r{toxinidir}/requirements-optional.txt
+
 commands =
   {envbindir}/nosetests -q
   {toxinidir}/flake8-run.sh
-install_command =
-  pip install {opts} {packages}
-
-[testenv:pypy]
-# lxml doesn't work and datrie doesn't make sense
-# (it's slower than the pure-python version)
-deps =
-  charade
-  flake8
-  Genshi
-  nose
-  six
-  mock
-
-[testenv:py26]
-basepython = python2.6
-deps =
-  -r{toxinidir}/requirements-optional-2.6.txt
-  flake8
-  nose
-  mock

From 40d007a20b0551017cf7b65f1a379e37ccc9c47a Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Tue, 14 Apr 2015 23:33:40 +0100
Subject: [PATCH 07/17] Fix the moduleFactoryFactory to cache based on *args
 and **kwargs.

---
 html5lib/tests/test_parser2.py     |  4 ++--
 html5lib/tests/test_treewalkers.py |  4 ++--
 html5lib/utils.py                  | 16 ++++++++++++----
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index 20bbdf31..01f16eea 100644
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -40,12 +40,12 @@ def test_namespace_html_elements_1_dom(self):
     def test_namespace_html_elements_0_etree(self):
         parser = html5parser.HTMLParser(namespaceHTMLElements=True)
         doc = parser.parse("<html></html>")
-        self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],))
+        self.assertTrue(doc.tag == "{%s}html" % (namespaces["html"],))
 
     def test_namespace_html_elements_1_etree(self):
         parser = html5parser.HTMLParser(namespaceHTMLElements=False)
         doc = parser.parse("<html></html>")
-        self.assertTrue(list(doc)[0].tag == "html")
+        self.assertTrue(doc.tag == "html")
 
     def test_unicode_file(self):
         parser = html5parser.HTMLParser()
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 3be12327..9d3e9571 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -60,7 +60,7 @@ def PullDOMAdapter(node):
     pass
 else:
     treeTypes['ElementTree'] = \
-        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
+        {"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
          "walker": treewalkers.getTreeWalker("etree", ElementTree)}
 
 try:
@@ -69,7 +69,7 @@ def PullDOMAdapter(node):
     pass
 else:
     treeTypes['cElementTree'] = \
-        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
+        {"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
          "walker": treewalkers.getTreeWalker("etree", ElementTree)}
 
 
diff --git a/html5lib/utils.py b/html5lib/utils.py
index ebad29fb..c196821f 100644
--- a/html5lib/utils.py
+++ b/html5lib/utils.py
@@ -91,13 +91,21 @@ def moduleFactory(baseModule, *args, **kwargs):
         else:
             name = b"_%s_factory" % baseModule.__name__
 
-        if name in moduleCache:
-            return moduleCache[name]
-        else:
+        kwargs_tuple = tuple(kwargs.items())
+
+        try:
+            return moduleCache[name][args][kwargs_tuple]
+        except KeyError:
             mod = ModuleType(name)
             objs = factory(baseModule, *args, **kwargs)
             mod.__dict__.update(objs)
-            moduleCache[name] = mod
+            if "name" not in moduleCache:
+                moduleCache[name] = {}
+            if "args" not in moduleCache[name]:
+                moduleCache[name][args] = {}
+            if "kwargs" not in moduleCache[name][args]:
+                moduleCache[name][args][kwargs_tuple] = {}
+            moduleCache[name][args][kwargs_tuple] = mod
             return mod
 
     return moduleFactory

From f4490bef7e3bbdfc2ece381f2b76122a0d6d7c3e Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Tue, 14 Apr 2015 23:00:34 +0100
Subject: [PATCH 08/17] Avoid running tests for cElementTree & ElementTree
 where they're the same.

---
 html5lib/tests/support.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index b64d322a..047c5534 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -21,25 +21,17 @@
 
 # Try whatever etree implementations are available from a list that are
 #"supposed" to work
-try:
-    import xml.etree.ElementTree as ElementTree
-    treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
-except ImportError:
-    try:
-        import elementtree.ElementTree as ElementTree
-        treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
-    except ImportError:
-        pass
+import xml.etree.ElementTree as ElementTree
+treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
 
 try:
     import xml.etree.cElementTree as cElementTree
-    treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
 except ImportError:
-    try:
-        import cElementTree
+    pass
+else:
+    # On Python 3.3 and above cElementTree is an alias, don't run them twice.
+    if cElementTree.Element is not ElementTree.Element:
         treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
-    except ImportError:
-        pass
 
 try:
     import lxml.etree as lxml  # flake8: noqa

From 90e43486a789db04639af9d51a4a0aa51cbb8864 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Wed, 15 Apr 2015 01:03:54 +0100
Subject: [PATCH 09/17] Move Genshi tree adapter to be in the public API,
 because sanity.

---
 html5lib/tests/test_treewalkers.py | 52 ++----------------------------
 html5lib/treeadapters/__init__.py  | 12 +++++++
 html5lib/treeadapters/genshi.py    | 50 ++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 49 deletions(-)
 create mode 100644 html5lib/treeadapters/genshi.py

diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 9d3e9571..0e31ff5f 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -13,7 +13,7 @@
 
 from .support import get_data_files, TestData, convertExpected
 
-from html5lib import html5parser, treewalkers, treebuilders, constants
+from html5lib import html5parser, treewalkers, treebuilders, treeadapters, constants
 
 
 def PullDOMAdapter(node):
@@ -84,59 +84,13 @@ def PullDOMAdapter(node):
 
 
 try:
-    from genshi.core import QName, Attrs
-    from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
+    import genshi  # flake8: noqa
 except ImportError:
     pass
 else:
-    def GenshiAdapter(tree):
-        text = None
-        for token in treewalkers.getTreeWalker("dom")(tree):
-            type = token["type"]
-            if type in ("Characters", "SpaceCharacters"):
-                if text is None:
-                    text = token["data"]
-                else:
-                    text += token["data"]
-            elif text is not None:
-                yield TEXT, text, (None, -1, -1)
-                text = None
-
-            if type in ("StartTag", "EmptyTag"):
-                if token["namespace"]:
-                    name = "{%s}%s" % (token["namespace"], token["name"])
-                else:
-                    name = token["name"]
-                attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
-                               for attr, value in token["data"].items()])
-                yield (START, (QName(name), attrs), (None, -1, -1))
-                if type == "EmptyTag":
-                    type = "EndTag"
-
-            if type == "EndTag":
-                if token["namespace"]:
-                    name = "{%s}%s" % (token["namespace"], token["name"])
-                else:
-                    name = token["name"]
-
-                yield END, QName(name), (None, -1, -1)
-
-            elif type == "Comment":
-                yield COMMENT, token["data"], (None, -1, -1)
-
-            elif type == "Doctype":
-                yield DOCTYPE, (token["name"], token["publicId"],
-                                token["systemId"]), (None, -1, -1)
-
-            else:
-                pass  # FIXME: What to do?
-
-        if text is not None:
-            yield TEXT, text, (None, -1, -1)
-
     treeTypes["genshi"] = \
         {"builder": treebuilders.getTreeBuilder("dom"),
-         "adapter": GenshiAdapter,
+         "adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
          "walker": treewalkers.getTreeWalker("genshi")}
 
 import re
diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py
index e69de29b..57d71304 100644
--- a/html5lib/treeadapters/__init__.py
+++ b/html5lib/treeadapters/__init__.py
@@ -0,0 +1,12 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import sax
+
+__all__ = ["sax"]
+
+try:
+    from . import genshi  # flake8: noqa
+except ImportError:
+    pass
+else:
+    __all__.append("genshi")
diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py
new file mode 100644
index 00000000..68a87f13
--- /dev/null
+++ b/html5lib/treeadapters/genshi.py
@@ -0,0 +1,50 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from genshi.core import QName, Attrs
+from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
+
+
+def to_genshi(walker):
+    text = None
+    for token in walker:
+        type = token["type"]
+        if type in ("Characters", "SpaceCharacters"):
+            if text is None:
+                text = token["data"]
+            else:
+                text += token["data"]
+        elif text is not None:
+            yield TEXT, text, (None, -1, -1)
+            text = None
+
+        if type in ("StartTag", "EmptyTag"):
+            if token["namespace"]:
+                name = "{%s}%s" % (token["namespace"], token["name"])
+            else:
+                name = token["name"]
+            attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
+                           for attr, value in token["data"].items()])
+            yield (START, (QName(name), attrs), (None, -1, -1))
+            if type == "EmptyTag":
+                type = "EndTag"
+
+        if type == "EndTag":
+            if token["namespace"]:
+                name = "{%s}%s" % (token["namespace"], token["name"])
+            else:
+                name = token["name"]
+
+            yield END, QName(name), (None, -1, -1)
+
+        elif type == "Comment":
+            yield COMMENT, token["data"], (None, -1, -1)
+
+        elif type == "Doctype":
+            yield DOCTYPE, (token["name"], token["publicId"],
+                            token["systemId"]), (None, -1, -1)
+
+        else:
+            pass  # FIXME: What to do?
+
+    if text is not None:
+        yield TEXT, text, (None, -1, -1)

From 23eb610a13cb730210dc83a90ed7ccf37d51fd65 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Wed, 15 Apr 2015 01:18:07 +0100
Subject: [PATCH 10/17] Change the Genshi treeadapter to avoid O(n^2) string
 concat.

---
 html5lib/treeadapters/genshi.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py
index 68a87f13..04e316df 100644
--- a/html5lib/treeadapters/genshi.py
+++ b/html5lib/treeadapters/genshi.py
@@ -5,17 +5,14 @@
 
 
 def to_genshi(walker):
-    text = None
+    text = []
     for token in walker:
         type = token["type"]
         if type in ("Characters", "SpaceCharacters"):
-            if text is None:
-                text = token["data"]
-            else:
-                text += token["data"]
-        elif text is not None:
-            yield TEXT, text, (None, -1, -1)
-            text = None
+            text.append(token["data"])
+        elif text:
+            yield TEXT, "".join(text), (None, -1, -1)
+            text = []
 
         if type in ("StartTag", "EmptyTag"):
             if token["namespace"]:
@@ -46,5 +43,5 @@ def to_genshi(walker):
         else:
             pass  # FIXME: What to do?
 
-    if text is not None:
-        yield TEXT, text, (None, -1, -1)
+    if text:
+        yield TEXT, "".join(text), (None, -1, -1)

From 69ca91644207c74f2de60a237a1d3f55795728b8 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Wed, 15 Apr 2015 01:40:23 +0100
Subject: [PATCH 11/17] Remove PullDOM support.

The test harness hasn't ever properly had an adapter that matches
the behaviour of PullDOM, and I have no interest in fixing this, so
let's simply drop support. AFAICT, nobody uses this.
---
 html5lib/tests/test_treewalkers.py | 31 ---------------
 html5lib/treewalkers/__init__.py   |  3 +-
 html5lib/treewalkers/pulldom.py    | 63 ------------------------------
 3 files changed, 1 insertion(+), 96 deletions(-)
 delete mode 100644 html5lib/treewalkers/pulldom.py

diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 0e31ff5f..a42d8299 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -16,40 +16,9 @@
 from html5lib import html5parser, treewalkers, treebuilders, treeadapters, constants
 
 
-def PullDOMAdapter(node):
-    from xml.dom import Node
-    from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS
-
-    if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
-        for childNode in node.childNodes:
-            for event in PullDOMAdapter(childNode):
-                yield event
-
-    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
-        raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM")
-
-    elif node.nodeType == Node.COMMENT_NODE:
-        yield COMMENT, node
-
-    elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
-        yield CHARACTERS, node
-
-    elif node.nodeType == Node.ELEMENT_NODE:
-        yield START_ELEMENT, node
-        for childNode in node.childNodes:
-            for event in PullDOMAdapter(childNode):
-                yield event
-        yield END_ELEMENT, node
-
-    else:
-        raise NotImplementedError("Node type not supported: " + str(node.nodeType))
-
 treeTypes = {
     "DOM": {"builder": treebuilders.getTreeBuilder("dom"),
             "walker": treewalkers.getTreeWalker("dom")},
-    "PullDOM": {"builder": treebuilders.getTreeBuilder("dom"),
-                "adapter": PullDOMAdapter,
-                "walker": treewalkers.getTreeWalker("pulldom")},
 }
 
 # Try whatever etree implementations are available from a list that are
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index 20b91b11..5414e4bb 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -10,8 +10,7 @@
 
 from __future__ import absolute_import, division, unicode_literals
 
-__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
-           "pulldom"]
+__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree"]
 
 import sys
 
diff --git a/html5lib/treewalkers/pulldom.py b/html5lib/treewalkers/pulldom.py
deleted file mode 100644
index 0b0f515f..00000000
--- a/html5lib/treewalkers/pulldom.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
-    COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
-
-from . import _base
-
-from ..constants import voidElements
-
-
-class TreeWalker(_base.TreeWalker):
-    def __iter__(self):
-        ignore_until = None
-        previous = None
-        for event in self.tree:
-            if previous is not None and \
-                    (ignore_until is None or previous[1] is ignore_until):
-                if previous[1] is ignore_until:
-                    ignore_until = None
-                for token in self.tokens(previous, event):
-                    yield token
-                    if token["type"] == "EmptyTag":
-                        ignore_until = previous[1]
-            previous = event
-        if ignore_until is None or previous[1] is ignore_until:
-            for token in self.tokens(previous, None):
-                yield token
-        elif ignore_until is not None:
-            raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
-
-    def tokens(self, event, next):
-        type, node = event
-        if type == START_ELEMENT:
-            name = node.nodeName
-            namespace = node.namespaceURI
-            attrs = {}
-            for attr in list(node.attributes.keys()):
-                attr = node.getAttributeNode(attr)
-                attrs[(attr.namespaceURI, attr.localName)] = attr.value
-            if name in voidElements:
-                for token in self.emptyTag(namespace,
-                                           name,
-                                           attrs,
-                                           not next or next[1] is not node):
-                    yield token
-            else:
-                yield self.startTag(namespace, name, attrs)
-
-        elif type == END_ELEMENT:
-            name = node.nodeName
-            namespace = node.namespaceURI
-            if name not in voidElements:
-                yield self.endTag(namespace, name)
-
-        elif type == COMMENT:
-            yield self.comment(node.nodeValue)
-
-        elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
-            for token in self.text(node.nodeValue):
-                yield token
-
-        else:
-            yield self.unknown(type)

From c2321b0234ce5b7555aa080446c872e81c6cb21a Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Tue, 21 Jul 2015 13:29:32 +0100
Subject: [PATCH 12/17] Update packages even if they're installed on Travis
 already.

---
 requirements-install.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/requirements-install.sh b/requirements-install.sh
index 95a688c6..f823ed37 100755
--- a/requirements-install.sh
+++ b/requirements-install.sh
@@ -5,14 +5,14 @@ if [[ $USE_OPTIONAL != "true" && $USE_OPTIONAL != "false" ]]; then
   exit 1
 fi
 
-pip install -r requirements-test.txt
+pip install -U -r requirements-test.txt
 
 if [[ $USE_OPTIONAL == "true" ]]; then
   if [[ $TRAVIS_PYTHON_VERSION == "pypy" || $TRAVIS_PYTHON_VERSION == "pypy3" ]]; then
-    pip install -r requirements-optional.txt
+    pip install -U -r requirements-optional.txt
   elif [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then
-    pip install -r requirements-optional-2.6.txt
+    pip install -U -r requirements-optional-2.6.txt
   else
-    pip install -r requirements-optional-cpython.txt
+    pip install -U -r requirements-optional-cpython.txt
   fi
 fi

From 71ac5580dcd8f2395b8a6de90ed59d93f72f7c67 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sun, 1 Nov 2015 15:08:26 +0900
Subject: [PATCH 13/17] Update requirements files to match setup.py

---
 requirements-install.sh           | 11 ++++-------
 requirements-optional-2.6.txt     |  5 -----
 requirements-optional-cpython.txt |  9 ---------
 requirements-optional.txt         | 12 ++++++++++++
 requirements-test.txt             |  2 +-
 tox.ini                           |  4 +---
 6 files changed, 18 insertions(+), 25 deletions(-)
 delete mode 100644 requirements-optional-2.6.txt
 delete mode 100644 requirements-optional-cpython.txt

diff --git a/requirements-install.sh b/requirements-install.sh
index f823ed37..a8964ea0 100755
--- a/requirements-install.sh
+++ b/requirements-install.sh
@@ -5,14 +5,11 @@ if [[ $USE_OPTIONAL != "true" && $USE_OPTIONAL != "false" ]]; then
   exit 1
 fi
 
+# Make sure we're running setuptools >= 18.5
+pip install -U pip setuptools
+
 pip install -U -r requirements-test.txt
 
 if [[ $USE_OPTIONAL == "true" ]]; then
-  if [[ $TRAVIS_PYTHON_VERSION == "pypy" || $TRAVIS_PYTHON_VERSION == "pypy3" ]]; then
-    pip install -U -r requirements-optional.txt
-  elif [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then
-    pip install -U -r requirements-optional-2.6.txt
-  else
-    pip install -U -r requirements-optional-cpython.txt
-  fi
+  pip install -U -r requirements-optional.txt
 fi
diff --git a/requirements-optional-2.6.txt b/requirements-optional-2.6.txt
deleted file mode 100644
index 37557ac4..00000000
--- a/requirements-optional-2.6.txt
+++ /dev/null
@@ -1,5 +0,0 @@
--r requirements-optional-cpython.txt
-
-# Can be used to force attributes to be serialized in alphabetical
-# order.
-ordereddict
diff --git a/requirements-optional-cpython.txt b/requirements-optional-cpython.txt
deleted file mode 100644
index e93eda8d..00000000
--- a/requirements-optional-cpython.txt
+++ /dev/null
@@ -1,9 +0,0 @@
--r requirements-optional.txt
-
-# lxml is supported with its own treebuilder ("lxml") and otherwise
-# uses the standard ElementTree support
-lxml
-
-# DATrie can be used in place of our Python trie implementation for
-# slightly better parsing performance.
-datrie
diff --git a/requirements-optional.txt b/requirements-optional.txt
index 4e16ea17..ac6539cb 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -7,3 +7,15 @@ genshi
 # charade can be used as a fallback in case we are unable to determine
 # the encoding of a document.
 charade
+
+# lxml is supported with its own treebuilder ("lxml") and otherwise
+# uses the standard ElementTree support
+lxml ; platform_python_implementation == 'CPython'
+
+# DATrie can be used in place of our Python trie implementation for
+# slightly better parsing performance.
+datrie ; platform_python_implementation == 'CPython'
+
+# Can be used to force attributes to be serialized in alphabetical
+# order.
+ordereddict ; python_version < '2.7'
diff --git a/requirements-test.txt b/requirements-test.txt
index 8b6ace66..13b91c45 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -2,5 +2,5 @@
 
 flake8
 nose
-ordereddict # Python 2.6
 mock
+ordereddict ; python_version < '2.7'
diff --git a/tox.ini b/tox.ini
index c200855e..2fba06d6 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,9 +7,7 @@ deps =
   nose
   mock
   py26-base: ordereddict
-  py26-optional: -r{toxinidir}/requirements-optional-2.6.txt
-  {py27,py32,py33,py34,py35}-optional: -r{toxinidir}/requirements-optional-cpython.txt
-  {pypy,pypy3}-optional: -r{toxinidir}/requirements-optional.txt
+  optional: -r{toxinidir}/requirements-optional.txt
 
 commands =
   {envbindir}/nosetests -q

From 383d1ee7e539f1268ae2e6be3a73c2fe77c76cee Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sun, 11 Jan 2015 22:44:45 +0000
Subject: [PATCH 14/17] Move to py.test!

Also enforce ordering of tests and test files, given nodeids for generators
depend upon iteration number, and pytest-expect relies on them.
---
 .travis.yml                        | 2 +-
 README.rst                         | 6 +++---
 html5lib/tests/support.py          | 2 +-
 html5lib/tests/test_parser.py      | 2 +-
 html5lib/tests/test_treewalkers.py | 6 +++---
 pytest.ini                         | 2 ++
 requirements-test.txt              | 3 ++-
 tox.ini                            | 5 +++--
 8 files changed, 16 insertions(+), 12 deletions(-)
 create mode 100644 pytest.ini

diff --git a/.travis.yml b/.travis.yml
index ee65440e..b9a89978 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,7 +38,7 @@ install:
   - bash requirements-install.sh
 
 script:
-  - nosetests
+  - py.test
   - bash flake8-run.sh
 
 after_script:
diff --git a/README.rst b/README.rst
index 3d08d758..1bbcb609 100644
--- a/README.rst
+++ b/README.rst
@@ -132,9 +132,9 @@ Please report any bugs on the `issue tracker
 Tests
 -----
 
-Unit tests require the ``nose`` and ``mock`` libraries and can be run
-using the ``nosetests`` command in the root directory; ``ordereddict``
-is required under Python 2.6. All should pass.
+Unit tests require the ``pytest`` and ``mock`` libraries and can be
+run using the ``py.test`` command in the root directory;
+``ordereddict`` is required under Python 2.6. All should pass.
 
 Test data are contained in a separate `html5lib-tests
 <https://github.com/html5lib/html5lib-tests>`_ repository and included
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index 047c5534..926cb2f2 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -42,7 +42,7 @@
 
 
 def get_data_files(subdirectory, files='*.dat'):
-    return glob.glob(os.path.join(test_dir, subdirectory, files))
+    return sorted(glob.glob(os.path.join(test_dir, subdirectory, files)))
 
 
 class DefaultDict(dict):
diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py
index 0f958c94..9cda65f8 100644
--- a/html5lib/tests/test_parser.py
+++ b/html5lib/tests/test_parser.py
@@ -90,7 +90,7 @@ def test_parser():
             if errors:
                 errors = errors.split("\n")
 
-            for treeName, treeCls in treeTypes.items():
+            for treeName, treeCls in sorted(treeTypes.items()):
                 for namespaceHTMLElements in (True, False):
                     yield (runParserTest, innerHTML, input, expected, errors, treeCls,
                            namespaceHTMLElements)
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index a42d8299..c79d0b1b 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -87,7 +87,7 @@ def test_all_tokens(self):
             {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
             {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
         ]
-        for treeName, treeCls in treeTypes.items():
+        for treeName, treeCls in sorted(treeTypes.items()):
             p = html5parser.HTMLParser(tree=treeCls["builder"])
             document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
             document = treeCls.get("adapter", lambda x: x)(document)
@@ -130,7 +130,7 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
 def test_treewalker():
     sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n")
 
-    for treeName, treeCls in treeTypes.items():
+    for treeName, treeCls in sorted(treeTypes.items()):
         files = get_data_files('tree-construction')
         for filename in files:
             testName = os.path.basename(filename).replace(".dat", "")
@@ -194,6 +194,6 @@ def test_treewalker_six_mix():
          '<link>\n  href="http://example.com/cow"\n  rel="alternate"\n  "Example"')
     ]
 
-    for tree in treeTypes.items():
+    for tree in sorted(treeTypes.items()):
         for intext, attrs, expected in sm_tests:
             yield runTreewalkerEditTest, intext, expected, attrs, tree
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..17209aa1
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = -rXw -p no:doctest
\ No newline at end of file
diff --git a/requirements-test.txt b/requirements-test.txt
index 13b91c45..0580136a 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,6 +1,7 @@
 -r requirements.txt
 
 flake8
-nose
+pytest
+pytest-expect>=1.0,<2.0
 mock
 ordereddict ; python_version < '2.7'
diff --git a/tox.ini b/tox.ini
index 2fba06d6..e66298d5 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,11 +4,12 @@ envlist = {py26,py27,py32,py33,py34,py35,pypy,pypy3}-{base,optional}
 [testenv]
 deps =
   flake8
-  nose
+  pytest
+  pytest-expect>=1.0,<2.0
   mock
   py26-base: ordereddict
   optional: -r{toxinidir}/requirements-optional.txt
 
 commands =
-  {envbindir}/nosetests -q
+  {envbindir}/py.test
   {toxinidir}/flake8-run.sh

From 9a10a4ca7245c04fa7e292da572114137e780575 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 20 Jul 2015 22:29:02 +0100
Subject: [PATCH 15/17] Update tests.

Also add an assertion for symptom of #217 (without this the
testsuite goes into an infinite loop; this doesn't fix the cause
but it avoids the infinite loop happening!).
---
 .pytest.expect                | Bin 0 -> 44449 bytes
 html5lib/tests/testdata       |   2 +-
 html5lib/treewalkers/etree.py |   1 +
 3 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 .pytest.expect

diff --git a/.pytest.expect b/.pytest.expect
new file mode 100644
index 0000000000000000000000000000000000000000..b0fc7d4c2e055e9de6a2e10e36c64ff70a9d127c
GIT binary patch
literal 44449
zcmbuI&8{9t5r+9$AR(@T3xMgKnVuPQ8|;u3g>V8^jAPjrBy730v*ixB7@nFCC5tad
zdfxWJvX9m7?)tB{s(SwY{OkAMe)f;24<Fxr{q7gfzxw~5{`2#nJ^$wEoA*y&eERI0
zFF$|t_P@XU?eic0_UZ3$_qT8U{QFN&A3uG(fB)>$ho`51e*X3={)gYa|Mu0Z`llcL
z@YSn7^)cG(=U3t4{doP{AFX}=`G=2BAAZ>LyT8|d*Rkj|d)w@q?O4~>DN#o2m))@M
z%ZNAZ+j`m!$F)I~;}P{gef`zbyEp&(5d;4C{ZC-Qc3qkU?b^BB{%EtOcVB${mpAYJ
z`UA7S{|S4wMRTs77ZT~D>b%#4DzNsY+YS3NSQBoz?<)*gJuzQDZ+cM?^Wj!MsiLio
z?)7@#xsyL$*Sv1mg(y1t*vGcpHT^7+Pg%U4dc$>Tm@Id(<5(eJc*rp>ZFSHUE*5Us
zjuXKO7x#6WxN6(IpdCZ=u(wie$HTrH`fM1yKnZr(9oX;&)2s<OyaydgXs~!5UHhWN
zerz4S2DDqfYq`8Q^nK$>p<=CT`M-FvkImY7<r}ucyNte_n&bT(+8VcO$%D?dojFz3
z7MSwu(8Ae{3i(4>Y1<BYe2n1T;yyg{`gZxK=k(bOLZZ!mzr0&%SJ5EM?9NJN!nNI-
z<gqoapzYW^zm}4~P<%%(?UufMn}~R}qb!2l@Y+aSL(Z-3r}B~)l7%C2aXvaGq0iFY
zeev$TT^-6N?p{ZXWF|UdJdW^q<vJfma^I)y50iKuX|`)3D_Qqd>owt7U{g1JF?@t(
z7Qhf18QPrIS{@W^W9ZZF<H#*!J65jh9sI6ABElW5Y5S;z{-OB4VBSX8hwE2y+>@bs
z8Qv#CcWxFP^JJBCN@QGK)s`NtY5Hv2(RJtn`p4Nql^!!Hy&k&`3C1-vRe*WCFfNBy
zm1{4d4=LG>3ClXjMywnIJ$kAD)!2PN0=3}=p`Y;-Ww@PEgVRGmZ{?0)Col8>qnf?=
zzTj*F^Wmv5gV&5oVuIk*=g}BCO_6p~MN!I9tKdE~1EUwNnJ%V`W>4FW1KHAngiqmn
zt2o~a*T{)U2#4vhcblq0^r@WOTS|P}guJv8wxH@t8mi+NR4G3xx~nCdWK=^OB%|bO
zA>wcr)*Y(uetXGq<EucbbmLyZQMj0>8=gw@cpf>4*NRmGZ{blBP)djzGGv`F;y~wK
zx2_V~kcn&ax&<CPv0<;=RZ7hr_lNB0K9lMM^D#v!4QtDTCh0X4F!UPoWY(mF+=-yV
zwX2WVfJyP+p;(WApuk%bN?*mp^spnTqg1#MZnzzl@K_!6S>|~K{YW!;G&o^ynV^bB
zB*>%lxMmyW3_~7{?jcb!Yo7#N)&CG45<i+RZfDhF!kzbC-90|x2y&=9RtS$t4?s>J
zvPB&i_kF^(^}Dd|>;@X5Z4FIs{$l5@_W_;K`vAviM2kEvm*jR9F+%y;MxgOW#o1nU
z>W)q}=rPOVEYN2b%MBOb<v^`m??Zt`8`^&QbPD{2vuTWD0yWr<37_>{NEc*q9vY!g
z0|$bw3fl-kkugrpf>#=!oQ$p9qTeYnsY{{AvE}h~O`87*h_pCwfkCc<VR$nlFJeA@
z%0d*v9otb&Md9LY@v|*-Btf4*4VcKlS!vn?CAw=tRGjsvUrkF2H7^|^^u<FQRBZe{
zTg89g5;P6K(C3<GDsA|@s$Ua6iuN{c7^=&D20`^TQ3NBXMk>S8ju!N+?bV6nRHt7>
zmGF>zuhv1gA)$f}=jn9=-3kx6trsTG9g>29hTt$LtkD*7MWTafrh5HTf*js~(=Nw`
z4=?v(U!E2ZYMr?QU;9+6hEE4T7V+PiJB2hM=6W|ph0Je9^@oHTuN823w-mtHd(%b&
zUg&fV+$;~?7|QgA1Ua?FXk9AfaRv-WnOjUQmM>;b*q?T*Q86!A6D@84%G}X!S?mk7
zXQm{}@Qju<^2yw_Z^O4ZgP^hDLnrY<_v#UJub#f=htWMK=owz_Nb24dMEq?cizK4L
zE{6`G_o9otA?Os0c57%9(&ibsI1h6u*XneLfocxvg=C^rgF-=`(~NVXC4r_i?Tc`E
zd3Z+phNy+sb1iH}iri8)7;TR>977P1`-&*NQ_1-zs^=t#6T;Sf8#?%cXWLb&Z@__i
zPQ^*c08Nf1@(6&cJ4cK)K;Mr%HpdsK33}hR`Brkqn6(|EM<%yJv_|Fr63Ev3zH4S;
z>GXBUlV4UB7)XxF4*}+M7CAyKIK3aBtt}5Dh`b32!r-crX2gqmgJ)vrBYP4<1Bg@<
zygn>`T7ro+Ez)$~C6+~pjbNHNrwGGX?EDsyn<jd@r+h*|29BP=t8|9ZuR`){=r#g}
zbw5$&s1Jsdv-?;)>yQvFlVtzsO8`yo_d^GvoUC{ATDTa}=m-Z~#G8dX3JVWO^IFb)
z+UE3Wf!@8YUq1JxwPhkIq8};IS8W*RI@2Z{7$LveXuitcmvX+LfRUqV86k~fLyub|
zR|_8ATPEV|?Vh`H2AXkVNEp%|N!G@e_tBugJ0znBT9QEwq8R<i$+4b3-=Y07w~@%d
zws#ErvL=8Gf-=)G(Xz9yQMN>?>venLm}D*51fc?c&0g=D#Clqt3qgQGD-&UcCulNJ
z8}t#-Fcj%{O>x+O`KUGmXST@0;8~(-T09701550?#i2N~3&jw8AVJA=-~&pg%_1n7
z-Uk>cx7~naUZBuNErjQh8xGpmxS^AJaO}+3FjI}O31y3xHO;>s1jx)(YupEDIim=^
zC8tQIsuz=!v8;SCPmsck5VX7s29y$a6@5d&A<jUj!oxDEY!BP-fcv5Md9gigf*sut
zV(=mt8zvFM-KH)17d(SRV;6du@_ZDI81q~k08n)V)Q3@JYSL4d)eqP93U$)86^Ddu
z;Snb`4j0o<qa6T^F1Z}^i3iMi{XDl=z$`v;z`4exWhhv02+YC_ogJWi&zsX_reJkW
zfKNQ$HbK(OXSYPpL<0{!$4fn9A+}AZsbftL=3K0Kx-Zz!Po@BIMr34>6K6W40u}Hu
z3%!KVg*3Gp;bo)HJ`;^<`wWJsu|no)@tWS(PF#&CTr7d#a^l$p`onY-LaTCauqeH1
z_A$+?WsH7F?Q5vkvQH?NOfve5wR%{=Sp%$&mSy6TnFw$KoVef7OOPuK7XXIlMr9nn
zABsjceMm43EtMbzT1Y?&0*#qyY^A?Tr`t@j>5CyxI<%uPOA~{tvG>!P9~giJ+WwLS
z<{G&ko`Ad{qKVG@Ad(&?f2%JC(i4R+g|@q2$=G|Tf4Jd3d?PEJRodRr#RBI-E}TB|
z0{!NU;eHJ7T4#uwCN%M?eNB9@gNcE_V^{w~O31={LdoQ@6@6H|O&$;e@@nghLD!<f
z$7FJft`%FjIE@~nJRYH^D^Gyeb>f920DG@dOZkUXSZl2ZBOs4Yl!DgOTk-Aj$OV$)
zO=uA<a(U{)t5`aC14Pnx0>nh`1Nfo$p%={WvsCTJi&PS+_^L8rve9882@zBX-2fty
z%hNi1#sWm6O)PEz*$8h)xh_G~G#kiRCIZtw@j@6g<@$!-JrABF?8urmllK$hUgZKK
za#bNY6cn(5+>v9Ky(zlKro+?nj-^hj!OFDmF<YF+e0)v-J<{UhfiJuVr%cMv_vfTT
z9xxoP!&B&~!(eX5nlSFVzmnrZT4ss$y=e5B@IuGLXQhF*z4<$XuasN`FNLB)yOFmK
zd(?NhY1ai>ncr-ic}Rc?elFSP%-7FaWo?1Mpv6klm#B`-0%tq%$O!4S)(WsZ?NdOl
zbsh<hz&_e7O!QBX?ZUNCcX>0HD?U8#iyc@<wzIo5QRRBL-rqrbc*+;5{bOc`{r(QA
zyCPLBEn>88LI2SGt;IWcMsRn3#(PRg?sHF)r$RZZxg%YC)TbrF#muYX%0hPZV50f*
zUwWqemRBW`kFD$zjX`|qSwkO;MQb-i8l8vocxwDfC^DQ@-;mxQy@pw(+{_<K_8N7)
zujUk=d4n9!z?J=!^L)`8<@&Juo_-KOEuLgKxlsb;;ew70WW)N9q&cKUK`7_j_i&Xu
zf}jB;r{T{0QLN&$Yx#IH=*FO3E59L)SNY<`D-SVwD7WRI9c2cN?SvbgA-^ggf=M>i
z0ssn`la%MKz2#jd@&KN|&AkiXfbc=(S;i(vQfc1KPfF(KnZM7J5dcrEcmr8LPLC-E
zk1L)v7w(h~XOz>Qk_;(J;$pPBpw+~y(CM=y+C{W&U|3RnbTUO0#_@{IEXeI8$xmaF
zSLRYNcz7Z9K0HL4d!O}wFDT9TCLZZZavZupycPuCb*Bvtc7#MZg2$~FuR%10c@4T>
zw+Yico@CQ08)eg+kTc(o<}DIDNO)6H_JFj03=5<b;}jpV!Uty!(I4#*fE6Y}&2{1<
z89b(;{R#~&xh+J%b7@_xisv1T?*;|O*Nmr)Eq6})-Q66DDbXKH^mLg}U+U5j$ZuH*
zU}*Duf`FB3_)``JmY%Zuqm$56t0B)U>Y$$)(cEMMV9uCQ-+!FUCz257hvbIHXQ+h)
zTe%S@xz%2Sfi&+zmx<S8Hl91P?ht2zGC3F8g9m~EyC<kW{L6<xeUWVmN-;Mk???#r
z2{{5g<iAx%Pn73ytB3l$VfN86X-2Nnv?md8*oyG|%>=t>c=HI{&(-Wkcjtf;Cc2q)
zRszhQS7=C>y-1zGT;2WViX_HfWxNbh)c7S;AiSA7Ai$n?CHLpn-9BKsh@V@-+4Oy=
z$Ycbk(a3MMd6WSA2f@Rr0jxOJ6C83L>@bL`<q;T?kR!fomH3^&fGxuOFcx1c6!cCe
zIOY8ca!Qs~%?pu1@$nyttu_vzt`qXzz3*U(Rzr|n3}%OdayNcS!CX8RMf-@>I0_8j
W7489qJM-}!KK86#0hNaB_5T3Zzsjrt

literal 0
HcmV?d00001

diff --git a/html5lib/tests/testdata b/html5lib/tests/testdata
index f6a1b202..6234baea 160000
--- a/html5lib/tests/testdata
+++ b/html5lib/tests/testdata
@@ -1 +1 @@
-Subproject commit f6a1b202de14fc057b196044c5ebef4672be3dd0
+Subproject commit 6234baeabc51f6d51d1cfc2c4e4656bd99531f2b
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index 69840c21..73c8e26a 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -129,6 +129,7 @@ def getParentNode(self, node):
                 if not parents:
                     return parent
                 else:
+                    assert list(parents[-1]).count(parent) == 1
                     return parent, list(parents[-1]).index(parent), parents, None
 
     return locals()

From 9337b003fa4465e91c1d9b3271064e34e26d876b Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 23 Feb 2015 01:34:30 +0000
Subject: [PATCH 16/17] Use py.test to generate tests from the data files
 themselves.

---
 .pytest.expect                      | Bin 44449 -> 58861 bytes
 html5lib/tests/conftest.py          |  21 ++++++
 html5lib/tests/support.py           |  11 ++--
 html5lib/tests/test_parser.py       |  96 ----------------------------
 html5lib/tests/tree_construction.py |  94 +++++++++++++++++++++++++++
 pytest.ini                          |   2 +-
 6 files changed, 121 insertions(+), 103 deletions(-)
 create mode 100644 html5lib/tests/conftest.py
 delete mode 100644 html5lib/tests/test_parser.py
 create mode 100644 html5lib/tests/tree_construction.py

diff --git a/.pytest.expect b/.pytest.expect
index b0fc7d4c2e055e9de6a2e10e36c64ff70a9d127c..c88e99b9140f2b24dfcee5e47ea9f9a90794de36 100644
GIT binary patch
literal 58861
zcmcIt%aR<&b+xU9BOLw;?J0FVx~jXiwPZ=w!<IqWL1+Ynm?kwrV88%I6v?fA4O@Re
zN9bd8ZueAW-K@--J|_zsg@WW{XWqxT=RPv)uOI&9>+fECbA0#y?(O|Qefa0c|M&m@
z_zxfc_4w`A$5#(8zWw6m-Rr;o;D;Z6@~4M4ueY!7KL7E<@%_X5^Z&khcy~Ph<>l+I
zu)>eN{_b$Fe>M4shr{!_>#Cd6?-GA}`|{!Ck8OcZUcJ44|M2e5uO6_A%j)&^NBGx=
zLw`8D`swTAo8$e%GyIo_!~M%Q$M;{qe097%{hnO2>Mm>U+QY%E`OVwA+fT-YKO7S4
zPR^7)=(fK({WkHpnRCAT^XK1PHdNKohQ__zp8os#1;2Rv?s)g*{U=|%d-<ga?6P=M
z9S;BY+h04${qp)n&$^yy<Mp?1Uc0q^bNyPA6K4;snps!X`y2I->$cg+f815=_U81z
z*NY*orxtp;2sbjvZDQpY$NN`rZ}0BE3{E%v6S!QpY30+jUAK|{xT!bw&FOy<8_xJZ
zS`-;coCvdZrgk>}NUBX@lTS$XMn;BecXRr6;s-NQIs4or09M_j`}+N#et!LO6IpBq
z8=D|ZvAUXJQNP6xZDNbdBCadh^s0W>+~^@;7Xeaq6fyQSb=TgUewC0zM)MO@A@Zf*
z;PkWWS9|mF;ZK(!0KG>sK!wV#+ryZDas4J-0D+iS+DF@Mb-eGYEjUSeXx*=NoThIf
zR$@oNQf3s%+wO@7&}UtZG-*&)v|e$fYaq6iX@3wRUDeBo(bN^-MkITUkv&4PPF;?C
zPLP;z?XcdQ-l?vQ=nW!6F!ivBn7~FRBiS{=il_d5QTdeBetV;sS_5jMLHQVU{RtRR
z|K<4kS9cGa`U2;%xPuMJQfZ92{rmN>9&|s3|21||Azrdf=}{=TfJvle4*#m%Hch<<
zYfkgqW<b_*iB&x~)9F7Hy;TD#h^+Dt*Dsbi9`N0JZuvQzY6tt202jy10IuvT6*!Rk
z$5metvntS@8-Z9?;Kqd6IzQOKT65|Y>m6{cj7mg=1t6vv1#{*AXyzDhd)`4*%COsX
zGTo@5cIGe{#~eI9_~Yr*>%IMA6;(O}MKTf-rmo(=Qi&+*EcJ}o7dx1okjRwJt38I3
zA@2?=sfur^4W+#3Odxp98Ydg-6Uuw>@b;_Y{oVJ+cVlhn+3-(L8`^F*NHmC?|FWC?
zMeEfJ3nIO%bh-x>7wCTjw<wNRR(Z<-oU$p4$b^8?zFG~rr57**xXzBO=$(Sd4^7I3
zhGODqLSx+qCe*D_?aZ+?xu_3+b#4;Lji?=lvI(edqIM|jpxR{?BmmDR0#+T20Z%3-
zRRxMgu!OYXwsUe@?hOcBDdRmoPZ)2ew3gnjQE_KB0bNfoN*8Zo<ZkiECRtmbl!QHA
z`E_Lj=o^&O_^Ry|IUB*ugL~!E*mBt$elE$LD08GOXU?Tmx%ZnL2m+DA1qo(4CF)MP
zy4|l3P#eS=Z=k`e1pC@yPeeQ&f9sR?$NTqp4|m_(Jv=sA=Q?BXE`r{;i#+}=)wB(1
z2R7%07I`)An8rK*>v|BIvfo)zd2%{J7irfS>mUAvj0*&@A(u>r2H6EH3aWqH^jrOp
z+a480^_Ni>Kw&`4x~hHy7bEIqLHdAjZ?tRLi4pSz|5A^MMm^%2kY!j=l86^UI&kTQ
z7^I$~?GUG#X1GrT5{WGjEqlFF+f?{tt<*dw4Z+G(A=Xw<6oL`jbm~tw-L}!}4>b{j
z9yoPZyTOUdWwxz%db_Qww_M>N9XrPK!p9C(CunD5R;Et`$tUV4?S8M%$zWIkg5<pp
z(*){4tG)pnAR0V@WRAtvatneHzy-uG)zwC5h$d+`0=QZ<cfbJp#i1TcU^V>7fqIY%
z<D?--*z{i_$}x*Y%rt@Uf`y|}h=R2cF0H^=UX+{B;fb7|CH$32`|DKQM66xztad5>
z?cVD8QfX>Hbq|A#gHJ3FaW!%e-A64ziqdF0lotQ*dfko|k<9dRyi`3}y|G~~qCV!}
z$M42#O_moTs(KnUR3U8QH0f|`jue>=-@gyD-U%Gk4_FW_n|cfBu(|W3K9`9Q3e)I1
zD-TK8Srr!`M~}HxZLYSlJJr9@hE~<twKm^bK6{pHgUCnHDH$fMx!H{XC?h5FK%z|r
z*9f(76M?5kxyuz3lM{I#@?AacXnPy?5`P&D8^FMSULTPrF?E5zsN2D8KBf+d0GY<@
z1|_q_a3o3#qAn?5^}3R)oz<-gJxxoUh+sLi76no7t&t-BF>Fqk;<m0qD2q|D9TCCf
zi;%J+z19dWwvb>0Oe0{HwtWnv#{izLYdAC*oRFKL`k;qWc#9si>Za98f0(=HsEmtv
zUVW3e!WU9%-Nm{JW8=(4lQaltlKxlM$C^<9$?V5LTI9tdGPS)yv``SRvJKHRit3k`
znOHiEupiVl$}=Bp?#G!=d1#(>PQB_FD0O1A@t<h70+?5TI^Nj|(5CHnh-bub!6mRi
zCSsWFNVNtT-3LMkhxGsyX@;mvhDC=UH~R9eOdEP!GF1P#-Pm;&!bPKkBaCawMYRCO
z5r<i@kF1?xhe1D<KzpqRO))8Xh+`iiEdT{sV{mJnIGh}@cm(PRuTc#aL-eQy5R<(V
zm5u`glXDV`dHkzn*q!Ck1a^fz=I7%D_^lA_iVNm8_&YnB)J+;wi&2m|=5>~E3h08?
zMgajtA(+1bTZOs#7@3IDUWB_t@ihuX1dyl6sZ|KhyYvgI-n!lG^n|0U8n}9Kp*mr9
zOoz>dm!vZg!<=cMr#m{T%-d#PG9;21;w2^!0+QiSNecG-`aCBsEh|wt&Jo~*3Cv-q
zNHcPgUdefCeLA4+L3)L`eDy|hoM25PNlZQYbYo$x`j4fm2d8kFk3fn~P^<|{#Ay%0
z=|o;_k(>u`iGUF7cUIKZ)aaP%l9<IJLe<;!7r{rIU7e?ya&Edp-E(9)-lodK53FQH
zp9tp)YL1;X+9-Q>I;JJja>T`yu*GU**wJc41PNw{1E(U0K!h=Rbo#6md}8@eyo{j3
zL0F@#CJPb#&0~>Ce!*u~xGod_z+~41f?j+&FyfSg8H@<xGzoZoh-{F^^wPwm8JVPC
zStV+Cbwe-Z<KZ%}Ji@QMq*5GTY=qZV8x&&#+8_`CAGa}qS$<Kdj%OqywW{(cc*_c1
zJj6&M<q2pBzP)Jv1fWOIG7e4zmsj<TNnjNBG^-t|?p{lbux6>87g#tRQ)~|!$nZhl
zx~-*6$SCX}15iv_<aIxi7cX>G{k=V&r2cVT_XYDDJ%--YU&aJAI-f$d3C&vlyeRJK
zTSoU}jagiS$efd?(!?Y<(3{8uGSPTNSi<8IFf9@98|&mNy-2QfAL92DL`T)jwt~Ht
zM^$4<3rtjz6v}|Aa{3KwBgB~XIYbPP-i|rpBr*`iz8RD*iA6R(okHRs6ar0oqX$7^
z>0}}`Y$}YrskV$R5qL&%Q+^JA>%>7^goEUZZ%Wt$&pC8vZp%1miF*_T5ee&LQuh_o
z(D-tHa7`hNunWJU<L!X@4EOY4FiY1b#}NGm4}<}}rOFb_Xu*N`ss780LE}Xm?=#{>
zm7x|C1jR@{O(E)mv4=%fzyX6mIM>?J;8?Q?VR5md&bc5#ye`3T)aH_nlw%o!Z5GQ2
ztBwmy*)@q$dq}V#)a-(UVCP_vTUEBl3X}8YaBKJQo!Wz^Xns6qs9_-$fopqwpC~;B
zEv|j2F-x8tN=5N%rw?=W8$AC{-(8xp-HfW0)OXt!Or_!V^i@(sWJ)t+CHy|{nU9bL
zLs{rb5TKEXVI$55<6{z@a7MUhqAFbQBqQ7`mc9ySxEg`F=<^j?IBNqfZhU|UN)4d#
z`u9Y0M5GINwmgKXaY9(ULjzCZR6W*vvc@oqCOLEGb~yg0x7vy02A4lE!!|D*<ndbE
z`RTvhMvE=l9>vHlPBarU*fKb<7lo>P+&(F~K%!2$#Px}Zp%BS!q*Pc-lqsuXLEUYY
zQZ4eW9Jnz9-WgLEQl-cg+5{wzFNP-H5Vd)80xko}CBg|a+8uH3v3l$e#6X9$h+Y0N
zAp;$0>D$QLO4im^jXsfyx5m(t;C0^XoVMAalBc>B9#+#DH``6g6klui#LA&P4#5Gp
zRC(YXD!ql)H1<j?L7y-FF?l>EHS8f+Q$b>}-P&77L~$iTjTy#9=%KF+Jo4!6S5`HS
zYg9-H38lGw!0%{ce*y#`StsTi!ZrZ=ej8peOrv}T*t7J6KbI9Do+W}H^(c|uD4gv@
zXerv)=E_+gGRj0Es9IpNwMtZRebDYGc{>Q^$T)O5z=l_$ymhIEM*j&@;Jh_i;P|Z6
zn)2l>`8b`u`X~%Q;JOWp90dGCcnefq(G+nTMGt@5ja_CY0N$ZZVthHWG(oy}Ljck(
zZs4wnumQG5M2n6o)<tVPoJifhzeqm}ntJS5XH!j6FgCI$pFGO|7xJ>Dqm=`%HIn=<
z1mIQi1XL3J@8Yl-J|`HqRN$ergGN@;B!GE2*DXkU&aZ6=n9$m7aIN`2r1%qI5@f<7
z_Y-ER_IxY_5yC3o<}QcH*}b#KJj6kQ8kwfpIEIj8)<}QVDSY$DJNZh7vsbi;ktvrl
z+GR^u+l=-kh7vpmzDjdx)mEln7{#+ZB?S_jq$hwl1bBNpZpmbrM8r`gtE4S1TugYL
z0(#M~P^!`=*6;xVXrlONmdJbrpJ`jG{gltsJ*C>>+y{{8jvr!~%2nFVm}hbRYO_Y(
zswZ1h;{4u52-$Z^hhBfaw8-$MG$jpp<k!LLw&a~PR6LPV>vvyL69fb)^1$|yYJ5nI
zP=q2n$7#I{WuIV|<qmJ{NpJO$P4Fd>S|51E18!1P#=d(`)G%fwCRO)l{LEA9Coeh@
z_L;E+R0I-b7bc>o7dB`}EldpPdszTf&+7%7Rbd{tQs6{{mR)1g+j!UwL#;9*L)~*m
z@`<)}AZ#OFja*P<JDq)*R+e^U*OP7&33aA!K8iFs2ja!=&&M>IgRsU=WRE|5tK3|%
zdal&<Fm~;TOmrw>(TrUUt{4u;gh?0J&Lb`}BPF)zo!UNa>CPUusM;GYlN8*g+Zxp(
zIK){v;oI~n#3SZbFRO)@@=K#h+8*r7Y{`|0<S;t7)1hv^&eZ0~VYzYO<^(!apI#3}
z=N;vq{Pzb{4YuB+NTL4mcGv5VoNYV1GDm1VZBLf(#E>9Y{cP$)EU)csB$};2P12;r
z&?3cQgZ3085VXy?#QEFj3%;gW5uRULBj>l?_M#98U=8s(e7V5;8>8y9;ik1C3vp*R
z-bLH33%)7YZE^LS2DQQP5eybAs=Q2P--%uNf#=tQaaxN@)fSQPqT2@=KtM@)XG5Cp
z?N&9Brbh>uU|N|ytvJovSTUkFeAw02S@@6@XRqWISMV%(^*~lmJ-|G;3?XElGYj_y
z#ngi?ye&MPojWCCXaTD2n!|#xtCn9Ee2=qlFt#Q15$$tK#YJ$tQ|Be@H7D%LT8PE_
zn%j4@FVX#w9j?*rx4ZajAp&k9pf5?lW^g;ugC*rLHL{4s!7LWsgt_VH7PgEjMK_6>
z+hx7SLEf>3H$gwXiffEsv?qLw0<|MHfu5vmsWeyJ460!8Jz^>>9e_i&<b6)9I%ITS
z90TWMK3D-$Xbb)XI<%HoW2gwZ9+8uUU4V|RX%)D40IMh`L1Rs3BIV*$ttS(UoGE;V
z$UZAlhHa|;gf|f}&7h1v>wtm}f=g{7iy>?ScJk_~1N(Bxbv>R^WRS>3iE|NQF|ZZ)
z7l)=&II3oKGx{`Hu$Tnd>s5<l_{H^^=~=*&xU*8H3U4SP!;(HaGvuB@VglJDpP8AF
zhm76J%xO2v%c2;{`e<$>ZXz|2y5J!}WTM3F$AGog^^FZk8ynGq63;hs%qs*~SYA%=
z&#(8W40+1;ix{KIERM7zL7&yEwMPv}jg4qg>tlj=O|qa}Yf)KyRbqgg$R`OVI=Wr5
z^_!w?%(Y<eVIKO&UzWd9gBvFx2}G(Dwlny9B)H-TH=pPCzo1%Y+u2xcu`@44ccMd*
zcINROwLdbWpi4Ll_ey3=ny4zIH)#rrkiNX&Zi_kq_7YI;cH#CBf?4r#v9Xy9i5w?l
zu~;qZ!$MY3a91Af0%S5`=cnlb8^l+QqXCL>YSC+%$w&hb*(i*cX|!rFB$-<-{elij
z=!1)gexhrsZV8`|MGS8elxO)S5h|#~=x4c2ipq#&wm~97jhzNIxZDv~0aJ#1qalJb
zb)m?c5{nV^B@OcPJae+u;vhPEgL$%e@f8D`6j}-;D&)zta<j)&BYY|jQPz5hBQntk
zK8L7DXMG9k_>TSMdCn)`s+^Zuh1zL-Vp42PBqZ3g$S+Z4Y%PInD*)!;`QD&Z?8iV2
zqB=2R?6{4UIa0}vN-=7?HYWONUDUqW>9?f1^IKWP_&DPm!I*XBLZ0P3-MVYe&DJqJ
zG$b$$MD)x;tcDM`4NXMFxBd2d@d=;C(QkfW&H~jBo^~OlCH&P1&7*vf8gtk85;;bM
zk7AJOy<2<CLnVC`E{sb388C4+W>%B8qnr({;}qN4F254}LXXFAj5vI$QpF9fHNa!`
zTJt(`!nG-MmWZ5u&Y!c+KzNszK5D+nXein>`uI2_0g!$-X`;Yn@RNKSbS5h=q6-($
zXnC?kHCnt|38N*l(#S00s=YT>rY09on$bgX&NCwnezU3wo7&#95KR_yFz5pobD<gI
zn1m`Ka|$|`mEcyARa5Y_#0EoXR49*ZHB*c)p8K)St*2m&iAsK}0FO8-F|HI~XvldZ
z?eUzi*Po}rbB!2}Ev_$G#lj=1KNQoE4R*d_{73XbDUI;9pr#6XTHr=22yfPriG)XN
zJ6#LI55y)bF-+7*iybmdnMS81;#~TaHaeH8QV{$zXnpkZXqN;@%j`0x46#Z8(3OK*
z*A=9S_)?_aWpDOYp%;%_ukxsyy=%6JSA@+to_fBh8)cE@48rbeI0%|B2=-RxP$peQ
zHI*{q{K#HK-S&9wkHKgs`5mH#SLsAZ#q`u@gwsF-3q&W;?fPslDSo2Lu(mI;|1r`1
z%3S+TgVYnfY?)8n^$sb)THcU0t2cy;iJeG;W8-zx$wD-KObRixP%Vmk+o0y5Ix@a>
zhul*)>oLmdjxq_xv+QspYJwd_t{3}sSX7r9#tMXq&mBm3d7hd7a%1`8M%x?!24V4^
zFANvq5`0aSJ)jik&Sy^r5B6i)6a<X_Nqm8<9HV1S;2lklAJ)boMxR5r28fP`k06;n
zirB@8fU&!jVTrV)US;fVHXk-|AR;8D<Mq&=ovbOgjN#^S{3(b6D->TGW``E-<fO%;
zBJ(%Y!s;p-;SE$^dU>{p$T!nRF`J!z)5>E9&%r0`CCkA-D<!26MJ56jC`nJC(eW*4
zm-a>!L}G`xLP7G2D>2~(fJptt<-qx;B%)s^-n;!mGHv36aX}@c?JWL_TEulS?NXdR
z+|l+nP)zjTLUhkNAJe~gV1E8$Ln=-SW5gSXQq)7$TFOi$Y`z_kx;Dfcp}mh)4C}Kp
zk@q@PK)95I!Y{#5&RU*a%gcoXbwj8=>OO$#W5GA+?HlA0I|f^i*>Rr;h%X-Aes#RR
z`~LWD{IuV*;h!81&+)Yv?q*_e;5kK9#b0%MI7~51w}-S7-p|0(1y15;6TH=zKTBh^
zzqL&mGDLZWpogItVz0gU?L-GACO#&s<^f){6Wq@Lsuh<tW-nQllXE;)2{)oMq(?}b
z*9~<E*?ab4TH18zqVSOhzsT@=AM9qP7j(fGvtQI+BlQLyR+S}N*$k{IS;RZ3KdRsV
z#8nT@S4I&f?`)=UwVYnW^+k_6s<h9STnHqLIOGyI3lSD~?vS?=b?><?r}7eJ#q$Bf
x<F=WcSB{hQKt;Uzm>HIs;KdaR)oa_0U3e<)Ud5p-Iw;XFNLSlz3N#{fe+M%SwweF{

literal 44449
zcmbuI&8{9t5r+9$AR(@T3xMgKnVuPQ8|;u3g>V8^jAPjrBy730v*ixB7@nFCC5tad
zdfxWJvX9m7?)tB{s(SwY{OkAMe)f;24<Fxr{q7gfzxw~5{`2#nJ^$wEoA*y&eERI0
zFF$|t_P@XU?eic0_UZ3$_qT8U{QFN&A3uG(fB)>$ho`51e*X3={)gYa|Mu0Z`llcL
z@YSn7^)cG(=U3t4{doP{AFX}=`G=2BAAZ>LyT8|d*Rkj|d)w@q?O4~>DN#o2m))@M
z%ZNAZ+j`m!$F)I~;}P{gef`zbyEp&(5d;4C{ZC-Qc3qkU?b^BB{%EtOcVB${mpAYJ
z`UA7S{|S4wMRTs77ZT~D>b%#4DzNsY+YS3NSQBoz?<)*gJuzQDZ+cM?^Wj!MsiLio
z?)7@#xsyL$*Sv1mg(y1t*vGcpHT^7+Pg%U4dc$>Tm@Id(<5(eJc*rp>ZFSHUE*5Us
zjuXKO7x#6WxN6(IpdCZ=u(wie$HTrH`fM1yKnZr(9oX;&)2s<OyaydgXs~!5UHhWN
zerz4S2DDqfYq`8Q^nK$>p<=CT`M-FvkImY7<r}ucyNte_n&bT(+8VcO$%D?dojFz3
z7MSwu(8Ae{3i(4>Y1<BYe2n1T;yyg{`gZxK=k(bOLZZ!mzr0&%SJ5EM?9NJN!nNI-
z<gqoapzYW^zm}4~P<%%(?UufMn}~R}qb!2l@Y+aSL(Z-3r}B~)l7%C2aXvaGq0iFY
zeev$TT^-6N?p{ZXWF|UdJdW^q<vJfma^I)y50iKuX|`)3D_Qqd>owt7U{g1JF?@t(
z7Qhf18QPrIS{@W^W9ZZF<H#*!J65jh9sI6ABElW5Y5S;z{-OB4VBSX8hwE2y+>@bs
z8Qv#CcWxFP^JJBCN@QGK)s`NtY5Hv2(RJtn`p4Nql^!!Hy&k&`3C1-vRe*WCFfNBy
zm1{4d4=LG>3ClXjMywnIJ$kAD)!2PN0=3}=p`Y;-Ww@PEgVRGmZ{?0)Col8>qnf?=
zzTj*F^Wmv5gV&5oVuIk*=g}BCO_6p~MN!I9tKdE~1EUwNnJ%V`W>4FW1KHAngiqmn
zt2o~a*T{)U2#4vhcblq0^r@WOTS|P}guJv8wxH@t8mi+NR4G3xx~nCdWK=^OB%|bO
zA>wcr)*Y(uetXGq<EucbbmLyZQMj0>8=gw@cpf>4*NRmGZ{blBP)djzGGv`F;y~wK
zx2_V~kcn&ax&<CPv0<;=RZ7hr_lNB0K9lMM^D#v!4QtDTCh0X4F!UPoWY(mF+=-yV
zwX2WVfJyP+p;(WApuk%bN?*mp^spnTqg1#MZnzzl@K_!6S>|~K{YW!;G&o^ynV^bB
zB*>%lxMmyW3_~7{?jcb!Yo7#N)&CG45<i+RZfDhF!kzbC-90|x2y&=9RtS$t4?s>J
zvPB&i_kF^(^}Dd|>;@X5Z4FIs{$l5@_W_;K`vAviM2kEvm*jR9F+%y;MxgOW#o1nU
z>W)q}=rPOVEYN2b%MBOb<v^`m??Zt`8`^&QbPD{2vuTWD0yWr<37_>{NEc*q9vY!g
z0|$bw3fl-kkugrpf>#=!oQ$p9qTeYnsY{{AvE}h~O`87*h_pCwfkCc<VR$nlFJeA@
z%0d*v9otb&Md9LY@v|*-Btf4*4VcKlS!vn?CAw=tRGjsvUrkF2H7^|^^u<FQRBZe{
zTg89g5;P6K(C3<GDsA|@s$Ua6iuN{c7^=&D20`^TQ3NBXMk>S8ju!N+?bV6nRHt7>
zmGF>zuhv1gA)$f}=jn9=-3kx6trsTG9g>29hTt$LtkD*7MWTafrh5HTf*js~(=Nw`
z4=?v(U!E2ZYMr?QU;9+6hEE4T7V+PiJB2hM=6W|ph0Je9^@oHTuN823w-mtHd(%b&
zUg&fV+$;~?7|QgA1Ua?FXk9AfaRv-WnOjUQmM>;b*q?T*Q86!A6D@84%G}X!S?mk7
zXQm{}@Qju<^2yw_Z^O4ZgP^hDLnrY<_v#UJub#f=htWMK=owz_Nb24dMEq?cizK4L
zE{6`G_o9otA?Os0c57%9(&ibsI1h6u*XneLfocxvg=C^rgF-=`(~NVXC4r_i?Tc`E
zd3Z+phNy+sb1iH}iri8)7;TR>977P1`-&*NQ_1-zs^=t#6T;Sf8#?%cXWLb&Z@__i
zPQ^*c08Nf1@(6&cJ4cK)K;Mr%HpdsK33}hR`Brkqn6(|EM<%yJv_|Fr63Ev3zH4S;
z>GXBUlV4UB7)XxF4*}+M7CAyKIK3aBtt}5Dh`b32!r-crX2gqmgJ)vrBYP4<1Bg@<
zygn>`T7ro+Ez)$~C6+~pjbNHNrwGGX?EDsyn<jd@r+h*|29BP=t8|9ZuR`){=r#g}
zbw5$&s1Jsdv-?;)>yQvFlVtzsO8`yo_d^GvoUC{ATDTa}=m-Z~#G8dX3JVWO^IFb)
z+UE3Wf!@8YUq1JxwPhkIq8};IS8W*RI@2Z{7$LveXuitcmvX+LfRUqV86k~fLyub|
zR|_8ATPEV|?Vh`H2AXkVNEp%|N!G@e_tBugJ0znBT9QEwq8R<i$+4b3-=Y07w~@%d
zws#ErvL=8Gf-=)G(Xz9yQMN>?>venLm}D*51fc?c&0g=D#Clqt3qgQGD-&UcCulNJ
z8}t#-Fcj%{O>x+O`KUGmXST@0;8~(-T09701550?#i2N~3&jw8AVJA=-~&pg%_1n7
z-Uk>cx7~naUZBuNErjQh8xGpmxS^AJaO}+3FjI}O31y3xHO;>s1jx)(YupEDIim=^
zC8tQIsuz=!v8;SCPmsck5VX7s29y$a6@5d&A<jUj!oxDEY!BP-fcv5Md9gigf*sut
zV(=mt8zvFM-KH)17d(SRV;6du@_ZDI81q~k08n)V)Q3@JYSL4d)eqP93U$)86^Ddu
z;Snb`4j0o<qa6T^F1Z}^i3iMi{XDl=z$`v;z`4exWhhv02+YC_ogJWi&zsX_reJkW
zfKNQ$HbK(OXSYPpL<0{!$4fn9A+}AZsbftL=3K0Kx-Zz!Po@BIMr34>6K6W40u}Hu
z3%!KVg*3Gp;bo)HJ`;^<`wWJsu|no)@tWS(PF#&CTr7d#a^l$p`onY-LaTCauqeH1
z_A$+?WsH7F?Q5vkvQH?NOfve5wR%{=Sp%$&mSy6TnFw$KoVef7OOPuK7XXIlMr9nn
zABsjceMm43EtMbzT1Y?&0*#qyY^A?Tr`t@j>5CyxI<%uPOA~{tvG>!P9~giJ+WwLS
z<{G&ko`Ad{qKVG@Ad(&?f2%JC(i4R+g|@q2$=G|Tf4Jd3d?PEJRodRr#RBI-E}TB|
z0{!NU;eHJ7T4#uwCN%M?eNB9@gNcE_V^{w~O31={LdoQ@6@6H|O&$;e@@nghLD!<f
z$7FJft`%FjIE@~nJRYH^D^Gyeb>f920DG@dOZkUXSZl2ZBOs4Yl!DgOTk-Aj$OV$)
zO=uA<a(U{)t5`aC14Pnx0>nh`1Nfo$p%={WvsCTJi&PS+_^L8rve9882@zBX-2fty
z%hNi1#sWm6O)PEz*$8h)xh_G~G#kiRCIZtw@j@6g<@$!-JrABF?8urmllK$hUgZKK
za#bNY6cn(5+>v9Ky(zlKro+?nj-^hj!OFDmF<YF+e0)v-J<{UhfiJuVr%cMv_vfTT
z9xxoP!&B&~!(eX5nlSFVzmnrZT4ss$y=e5B@IuGLXQhF*z4<$XuasN`FNLB)yOFmK
zd(?NhY1ai>ncr-ic}Rc?elFSP%-7FaWo?1Mpv6klm#B`-0%tq%$O!4S)(WsZ?NdOl
zbsh<hz&_e7O!QBX?ZUNCcX>0HD?U8#iyc@<wzIo5QRRBL-rqrbc*+;5{bOc`{r(QA
zyCPLBEn>88LI2SGt;IWcMsRn3#(PRg?sHF)r$RZZxg%YC)TbrF#muYX%0hPZV50f*
zUwWqemRBW`kFD$zjX`|qSwkO;MQb-i8l8vocxwDfC^DQ@-;mxQy@pw(+{_<K_8N7)
zujUk=d4n9!z?J=!^L)`8<@&Juo_-KOEuLgKxlsb;;ew70WW)N9q&cKUK`7_j_i&Xu
zf}jB;r{T{0QLN&$Yx#IH=*FO3E59L)SNY<`D-SVwD7WRI9c2cN?SvbgA-^ggf=M>i
z0ssn`la%MKz2#jd@&KN|&AkiXfbc=(S;i(vQfc1KPfF(KnZM7J5dcrEcmr8LPLC-E
zk1L)v7w(h~XOz>Qk_;(J;$pPBpw+~y(CM=y+C{W&U|3RnbTUO0#_@{IEXeI8$xmaF
zSLRYNcz7Z9K0HL4d!O}wFDT9TCLZZZavZupycPuCb*Bvtc7#MZg2$~FuR%10c@4T>
zw+Yico@CQ08)eg+kTc(o<}DIDNO)6H_JFj03=5<b;}jpV!Uty!(I4#*fE6Y}&2{1<
z89b(;{R#~&xh+J%b7@_xisv1T?*;|O*Nmr)Eq6})-Q66DDbXKH^mLg}U+U5j$ZuH*
zU}*Duf`FB3_)``JmY%Zuqm$56t0B)U>Y$$)(cEMMV9uCQ-+!FUCz257hvbIHXQ+h)
zTe%S@xz%2Sfi&+zmx<S8Hl91P?ht2zGC3F8g9m~EyC<kW{L6<xeUWVmN-;Mk???#r
z2{{5g<iAx%Pn73ytB3l$VfN86X-2Nnv?md8*oyG|%>=t>c=HI{&(-Wkcjtf;Cc2q)
zRszhQS7=C>y-1zGT;2WViX_HfWxNbh)c7S;AiSA7Ai$n?CHLpn-9BKsh@V@-+4Oy=
z$Ycbk(a3MMd6WSA2f@Rr0jxOJ6C83L>@bL`<q;T?kR!fomH3^&fGxuOFcx1c6!cCe
zIOY8ca!Qs~%?pu1@$nyttu_vzt`qXzz3*U(Rzr|n3}%OdayNcS!CX8RMf-@>I0_8j
W7489qJM-}!KK86#0hNaB_5T3Zzsjrt

diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
new file mode 100644
index 00000000..b6f0a1cd
--- /dev/null
+++ b/html5lib/tests/conftest.py
@@ -0,0 +1,21 @@
+import os.path
+
+from .tree_construction import TreeConstructionFile
+
+_dir = os.path.abspath(os.path.dirname(__file__))
+_testdata = os.path.join(_dir, "testdata")
+_tree_construction = os.path.join(_testdata, "tree-construction")
+
+
+def pytest_collectstart():
+    """check to see if the git submodule has been init'd"""
+    pass
+
+
+def pytest_collect_file(path, parent):
+    dir = os.path.abspath(path.dirname)
+    if dir == _tree_construction:
+        if path.basename == "template.dat":
+            return
+        if path.ext == ".dat":
+            return TreeConstructionFile(path, parent)
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index 926cb2f2..56e09c81 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -27,16 +27,18 @@
 try:
     import xml.etree.cElementTree as cElementTree
 except ImportError:
-    pass
+    treeTypes['cElementTree'] = None
 else:
     # On Python 3.3 and above cElementTree is an alias, don't run them twice.
-    if cElementTree.Element is not ElementTree.Element:
+    if cElementTree.Element is ElementTree.Element:
+        treeTypes['cElementTree'] = None
+    else:
         treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
 
 try:
     import lxml.etree as lxml  # flake8: noqa
 except ImportError:
-    pass
+    treeTypes['lxml'] = None
 else:
     treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
 
@@ -63,9 +65,6 @@ def __init__(self, filename, newTestHeading="data", encoding="utf8"):
         self.encoding = encoding
         self.newTestHeading = newTestHeading
 
-    def __del__(self):
-        self.f.close()
-
     def __iter__(self):
         data = DefaultDict(None)
         key = None
diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py
deleted file mode 100644
index 9cda65f8..00000000
--- a/html5lib/tests/test_parser.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import os
-import sys
-import traceback
-import warnings
-import re
-
-warnings.simplefilter("error")
-
-from .support import get_data_files
-from .support import TestData, convert, convertExpected, treeTypes
-from html5lib import html5parser, constants
-
-# Run the parse error checks
-checkParseErrors = False
-
-# XXX - There should just be one function here but for some reason the testcase
-# format differs from the treedump format by a single space character
-
-
-def convertTreeDump(data):
-    return "\n".join(convert(3)(data).split("\n")[1:])
-
-namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
-
-
-def runParserTest(innerHTML, input, expected, errors, treeClass,
-                  namespaceHTMLElements):
-    with warnings.catch_warnings(record=True) as caughtWarnings:
-        warnings.simplefilter("always")
-        p = html5parser.HTMLParser(tree=treeClass,
-                                   namespaceHTMLElements=namespaceHTMLElements)
-
-        try:
-            if innerHTML:
-                document = p.parseFragment(input, innerHTML)
-            else:
-                document = p.parse(input)
-        except:
-            errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
-                                  "\nTraceback:", traceback.format_exc()])
-            assert False, errorMsg
-
-    otherWarnings = [x for x in caughtWarnings
-                     if not issubclass(x.category, constants.DataLossWarning)]
-    assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings]
-    if len(caughtWarnings):
-        return
-
-    output = convertTreeDump(p.tree.testSerializer(document))
-
-    expected = convertExpected(expected)
-    if namespaceHTMLElements:
-        expected = namespaceExpected(r"\1<html \2>", expected)
-
-    errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
-                          "\nReceived:", output])
-    assert expected == output, errorMsg
-
-    errStr = []
-    for (line, col), errorcode, datavars in p.errors:
-        assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
-        errStr.append("Line: %i Col: %i %s" % (line, col,
-                                               constants.E[errorcode] % datavars))
-
-    errorMsg2 = "\n".join(["\n\nInput:", input,
-                           "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
-                           "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
-    if checkParseErrors:
-        assert len(p.errors) == len(errors), errorMsg2
-
-
-def test_parser():
-    sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n")
-    files = get_data_files('tree-construction')
-
-    for filename in files:
-        testName = os.path.basename(filename).replace(".dat", "")
-        if testName in ("template",):
-            continue
-
-        tests = TestData(filename, "data")
-
-        for index, test in enumerate(tests):
-            input, errors, innerHTML, expected = [test[key] for key in
-                                                  ('data', 'errors',
-                                                   'document-fragment',
-                                                   'document')]
-            if errors:
-                errors = errors.split("\n")
-
-            for treeName, treeCls in sorted(treeTypes.items()):
-                for namespaceHTMLElements in (True, False):
-                    yield (runParserTest, innerHTML, input, expected, errors, treeCls,
-                           namespaceHTMLElements)
diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py
new file mode 100644
index 00000000..c1125387
--- /dev/null
+++ b/html5lib/tests/tree_construction.py
@@ -0,0 +1,94 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import warnings
+import re
+
+import pytest
+
+from .support import TestData, convert, convertExpected, treeTypes
+from html5lib import html5parser, constants
+
+
+class TreeConstructionFile(pytest.File):
+    def collect(self):
+        tests = TestData(str(self.fspath), "data")
+        for i, test in enumerate(tests):
+            for treeName, treeClass in sorted(treeTypes.items()):
+                for namespaceHTMLElements in (True, False):
+                    if namespaceHTMLElements:
+                        nodeid = "%d::%s::namespaced" % (i, treeName)
+                    else:
+                        nodeid = "%d::%s::void-namespace" % (i, treeName)
+                    item = ParserTest(nodeid, self,
+                                      test, treeClass, namespaceHTMLElements)
+                    item.add_marker(getattr(pytest.mark, treeName))
+                    if namespaceHTMLElements:
+                        item.add_marker(pytest.mark.namespaced)
+                    if treeClass is None:
+                        item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
+                    yield item
+
+
+def convertTreeDump(data):
+    return "\n".join(convert(3)(data).split("\n")[1:])
+
+namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
+
+
+class ParserTest(pytest.Item):
+    def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
+        super(ParserTest, self).__init__(name, parent)
+        self.obj = lambda: 1  # this is to hack around skipif needing a function!
+        self.test = test
+        self.treeClass = treeClass
+        self.namespaceHTMLElements = namespaceHTMLElements
+
+    def runtest(self):
+        p = html5parser.HTMLParser(tree=self.treeClass,
+                                   namespaceHTMLElements=self.namespaceHTMLElements)
+
+        input = self.test['data']
+        fragmentContainer = self.test['document-fragment']
+        expected = self.test['document']
+        expectedErrors = self.test['errors'].split("\n") if self.test['errors'] else []
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            try:
+                if fragmentContainer:
+                    document = p.parseFragment(input, fragmentContainer)
+                else:
+                    document = p.parse(input)
+            except constants.DataLossWarning:
+                pytest.skip("data loss warning")
+
+        output = convertTreeDump(p.tree.testSerializer(document))
+
+        expected = convertExpected(expected)
+        if self.namespaceHTMLElements:
+            expected = namespaceExpected(r"\1<html \2>", expected)
+
+        errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
+                              "\nReceived:", output])
+        assert expected == output, errorMsg
+
+        errStr = []
+        for (line, col), errorcode, datavars in p.errors:
+            assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
+            errStr.append("Line: %i Col: %i %s" % (line, col,
+                                                   constants.E[errorcode] % datavars))
+
+        errorMsg2 = "\n".join(["\n\nInput:", input,
+                               "\nExpected errors (" + str(len(expectedErrors)) + "):\n" + "\n".join(expectedErrors),
+                               "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
+        if False:  # we're currently not testing parse errors
+            assert len(p.errors) == len(expectedErrors), errorMsg2
+
+    def repr_failure(self, excinfo):
+        traceback = excinfo.traceback
+        ntraceback = traceback.cut(path=__file__)
+        excinfo.traceback = ntraceback.filter()
+
+        return excinfo.getrepr(funcargs=True,
+                               showlocals=False,
+                               style="short", tbfilter=False)
diff --git a/pytest.ini b/pytest.ini
index 17209aa1..6875cc7d 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,2 +1,2 @@
 [pytest]
-addopts = -rXw -p no:doctest
\ No newline at end of file
+addopts = -rXw -p no:doctest

From 082c042082c78779ea47c746c77535944eec957e Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Wed, 25 Nov 2015 17:52:47 +0000
Subject: [PATCH 17/17] Add AUTHORS.rst and test files to manifest.

---
 MANIFEST.in | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 1edd0b7d..4b3ffe3e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1,10 @@
 include LICENSE
+include AUTHORS.rst
 include CHANGES.rst
 include README.rst
 include requirements*.txt
+include .pytest.expect
+include tox.ini
+include pytest.ini
 graft html5lib/tests/testdata
 recursive-include html5lib/tests *.py