From 92c2e32c8f9e2554511960a1809e495c9d68ee25 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sat, 22 Nov 2014 17:37:06 +0100 Subject: [PATCH 01/17] Fix over indentation --- html5lib/tests/test_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py index 230cdb42..0f958c94 100644 --- a/html5lib/tests/test_parser.py +++ b/html5lib/tests/test_parser.py @@ -68,7 +68,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass, "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors), "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) if checkParseErrors: - assert len(p.errors) == len(errors), errorMsg2 + assert len(p.errors) == len(errors), errorMsg2 def test_parser(): From d9b1a9f0bf74a102cd9c977c7e5ac38a4af15f74 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 11 Jan 2015 14:46:49 +0000 Subject: [PATCH 02/17] sys.version_info is only a "named tuple"-like obj from 2.7 This also adds the mock package as a dependency for the testsuite, as we need it to test our test code. --- CHANGES.rst | 3 +++ README.rst | 6 ++--- html5lib/tests/support.py | 2 +- html5lib/tests/test_meta.py | 41 +++++++++++++++++++++++++++++ html5lib/treebuilders/etree_lxml.py | 2 +- requirements-test.txt | 1 + tox.ini | 3 +++ 7 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 html5lib/tests/test_meta.py diff --git a/CHANGES.rst b/CHANGES.rst index ed951a3b..8c6865ef 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,9 +7,12 @@ Change Log Released on XXX * Added ordereddict as a mandatory dependency on Python 2.6. + * Added ``lxml``, ``genshi``, ``datrie``, ``charade``, and ``all`` extras that will do the right thing based on the specific interpreter implementation. +* Now requires the ``mock`` package for the testsuite. + 0.9999999/1.0b8 ~~~~~~~~~~~~~~~ diff --git a/README.rst b/README.rst index 9e0a0f74..7c320e0e 100644 --- a/README.rst +++ b/README.rst @@ -132,9 +132,9 @@ Please report any bugs on the `issue tracker Tests ----- -Unit tests require the ``nose`` library and can be run using the -``nosetests`` command in the root directory; ``ordereddict`` is -required under Python 2.6. All should pass. +Unit tests require the ``nose`` and ``mock`` libraries and can be run +using the ``nosetests`` command in the root directory; ``ordereddict`` +is required under Python 2.6. All should pass. Test data are contained in a separate `html5lib-tests `_ repository and included diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index dbb735a9..b64d322a 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -128,7 +128,7 @@ def convertData(data): def errorMessage(input, expected, actual): msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" % (repr(input), repr(expected), repr(actual))) - if sys.version_info.major == 2: + if sys.version_info[0] == 2: msg = msg.encode("ascii", "backslashreplace") return msg diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py new file mode 100644 index 00000000..e42eafdb --- /dev/null +++ b/html5lib/tests/test_meta.py @@ -0,0 +1,41 @@ +from __future__ import absolute_import, division, unicode_literals + +import six +from mock import Mock + +from . import support + + +def _createReprMock(r): + """Creates a mock with a __repr__ returning r + + Also provides __str__ mock with default mock behaviour""" + mock = Mock() + mock.__repr__ = Mock() + mock.__repr__.return_value = r + mock.__str__ = Mock(wraps=mock.__str__) + return mock + + +def test_errorMessage(): + # Create mock objects to take repr of + input = _createReprMock("1") + expected = _createReprMock("2") + actual = _createReprMock("3") + + # Run the actual test + r = support.errorMessage(input, expected, actual) + + # Assertions! + if six.PY2: + assert b"Input:\n1\nExpected:\n2\nRecieved\n3\n" == r + else: + assert six.PY3 + assert "Input:\n1\nExpected:\n2\nRecieved\n3\n" == r + + assert input.__repr__.call_count == 1 + assert expected.__repr__.call_count == 1 + assert actual.__repr__.call_count == 1 + assert not input.__str__.called + assert not expected.__str__.called + assert not actual.__str__.called diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 2755c485..138b30bd 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -79,7 +79,7 @@ def serializeElement(element, indent=0): next_element = next_element.getnext() elif isinstance(element, str) or isinstance(element, bytes): # Text in a fragment - assert isinstance(element, str) or sys.version_info.major == 2 + assert isinstance(element, str) or sys.version_info[0] == 2 rv.append("|%s\"%s\"" % (' ' * indent, element)) else: # Fragment case diff --git a/requirements-test.txt b/requirements-test.txt index d5f8088c..8b6ace66 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -3,3 +3,4 @@ flake8 nose ordereddict # Python 2.6 +mock diff --git a/tox.ini b/tox.ini index d00e35dc..683c01e4 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,7 @@ deps = -r{toxinidir}/requirements-optional-cpython.txt flake8 nose + mock commands = {envbindir}/nosetests -q {toxinidir}/flake8-run.sh @@ -21,6 +22,7 @@ deps = Genshi nose six + mock [testenv:py26] basepython = python2.6 @@ -28,3 +30,4 @@ deps = -r{toxinidir}/requirements-optional-2.6.txt flake8 nose + mock From e4d4b1520d2c34a3f5b1d19a1d0f346d1ba0c19a Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 11 Jan 2015 16:01:31 +0000 Subject: [PATCH 03/17] Move where we concatenate tokens to handle ignoreErrorOrder This was causing one of the tokenizer test failures. --- html5lib/tests/test_tokenizer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py index 6a563c32..4201dfbb 100644 --- a/html5lib/tests/test_tokenizer.py +++ b/html5lib/tests/test_tokenizer.py @@ -109,6 +109,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder, token.pop() if not ignoreErrorOrder and not ignoreErrors: + expectedTokens = concatenateCharacterTokens(expectedTokens) return expectedTokens == receivedTokens else: # Sort the tokens into two groups; non-parse errors and parse errors @@ -121,6 +122,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder, else: if not ignoreErrors: tokens[tokenType][1].append(token) + tokens[tokenType][0] = concatenateCharacterTokens(tokens[tokenType][0]) return tokens["expected"] == tokens["received"] @@ -174,7 +176,7 @@ def runTokenizerTest(test): warnings.resetwarnings() warnings.simplefilter("error") - expected = concatenateCharacterTokens(test['output']) + expected = test['output'] if 'lastStartTag' not in test: test['lastStartTag'] = None parser = TokenizerTestParser(test['initialState'], From 1025014f8011f013f2bf02d974da263d510cf54d Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Mon, 24 Nov 2014 01:49:47 +0000 Subject: [PATCH 04/17] Our tokenizer currently never outputs adjacent Character tokens; expect this. --- html5lib/tests/test_tokenizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py index 4201dfbb..823c6ea6 100644 --- a/html5lib/tests/test_tokenizer.py +++ b/html5lib/tests/test_tokenizer.py @@ -182,7 +182,6 @@ def runTokenizerTest(test): parser = TokenizerTestParser(test['initialState'], test['lastStartTag']) tokens = parser.parse(test['input']) - tokens = concatenateCharacterTokens(tokens) received = normalizeTokens(tokens) errorMsg = "\n".join(["\n\nInitial state:", test['initialState'], From 9ee8a1a811e61596fe4789137c25a470f012ae4a Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 11 Jan 2015 16:51:50 +0000 Subject: [PATCH 05/17] Cease supporting DATrie under PyPy. --- CHANGES.rst | 2 ++ README.rst | 4 ++-- requirements-optional-cpython.txt | 4 ++++ requirements-optional.txt | 4 ---- setup.py | 6 +++--- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8c6865ef..e99da143 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,6 +13,8 @@ Released on XXX * Now requires the ``mock`` package for the testsuite. +* Cease supporting DATrie under PyPy. + 0.9999999/1.0b8 ~~~~~~~~~~~~~~~ diff --git a/README.rst b/README.rst index 7c320e0e..3d08d758 100644 --- a/README.rst +++ b/README.rst @@ -104,8 +104,8 @@ Optional Dependencies The following third-party libraries may be used for additional functionality: -- ``datrie`` can be used to improve parsing performance (though in - almost all cases the improvement is marginal); +- ``datrie`` can be used under CPython to improve parsing performance + (though in almost all cases the improvement is marginal); - ``lxml`` is supported as a tree format (for both building and walking) under CPython (but *not* PyPy where it is known to cause diff --git a/requirements-optional-cpython.txt b/requirements-optional-cpython.txt index 35ed3529..e93eda8d 100644 --- a/requirements-optional-cpython.txt +++ b/requirements-optional-cpython.txt @@ -3,3 +3,7 @@ # lxml is supported with its own treebuilder ("lxml") and otherwise # uses the standard ElementTree support lxml + +# DATrie can be used in place of our Python trie implementation for +# slightly better parsing performance. +datrie diff --git a/requirements-optional.txt b/requirements-optional.txt index c6355270..4e16ea17 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -4,10 +4,6 @@ # streams. genshi -# DATrie can be used in place of our Python trie implementation for -# slightly better parsing performance. -datrie - # charade can be used as a fallback in case we are unable to determine # the encoding of a document. charade diff --git a/setup.py b/setup.py index 7af4e292..7b06b45e 100644 --- a/setup.py +++ b/setup.py @@ -65,18 +65,18 @@ # A conditional extra will only install these items when the extra is # requested and the condition matches. + "datrie:python_implementation == 'CPython'": ["datrie"], "lxml:python_implementation == 'CPython'": ["lxml"], # Standard extras, will be installed when the extra is requested. "genshi": ["genshi"], - "datrie": ["datrie"], "charade": ["charade"], # The all extra combines a standard extra which will be used anytime # the all extra is requested, and it extends it with a conditional # extra that will be installed whenever the condition matches and the # all extra is requested. - "all": ["genshi", "datrie", "charade"], - "all:python_implementation == 'CPython'": ["lxml"], + "all": ["genshi", "charade"], + "all:python_implementation == 'CPython'": ["datrie", "lxml"], }, ) From e1d9a5b14ac57a2faefcdb9f12933dc34d392b6e Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 11 Jan 2015 16:52:38 +0000 Subject: [PATCH 06/17] Big tox/Travis CI update to make both test the same set of things. Oh, and this adds PyPy3, while we're at it. In short: we now test both with and without optional packages in tox and fix Travis CI to test with optional packages under PyPy. --- .travis.yml | 2 ++ requirements-install.sh | 10 ++++++---- tox.ini | 29 ++++++----------------------- 3 files changed, 14 insertions(+), 27 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3f045b37..ee65440e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,8 @@ python: - "3.4" - "3.5" - "pypy" + - "pypy3" + sudo: false cache: diff --git a/requirements-install.sh b/requirements-install.sh index 5f8ba506..95a688c6 100755 --- a/requirements-install.sh +++ b/requirements-install.sh @@ -7,10 +7,12 @@ fi pip install -r requirements-test.txt -if [[ $USE_OPTIONAL == "true" && $TRAVIS_PYTHON_VERSION != "pypy" ]]; then - if [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then - pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-2.6.txt +if [[ $USE_OPTIONAL == "true" ]]; then + if [[ $TRAVIS_PYTHON_VERSION == "pypy" || $TRAVIS_PYTHON_VERSION == "pypy3" ]]; then + pip install -r requirements-optional.txt + elif [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then + pip install -r requirements-optional-2.6.txt else - pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-cpython.txt + pip install -r requirements-optional-cpython.txt fi fi diff --git a/tox.ini b/tox.ini index 683c01e4..c200855e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,33 +1,16 @@ [tox] -envlist = py26,py27,py32,py33,py34,py35,pypy +envlist = {py26,py27,py32,py33,py34,py35,pypy,pypy3}-{base,optional} [testenv] deps = - -r{toxinidir}/requirements-optional-cpython.txt flake8 nose mock + py26-base: ordereddict + py26-optional: -r{toxinidir}/requirements-optional-2.6.txt + {py27,py32,py33,py34,py35}-optional: -r{toxinidir}/requirements-optional-cpython.txt + {pypy,pypy3}-optional: -r{toxinidir}/requirements-optional.txt + commands = {envbindir}/nosetests -q {toxinidir}/flake8-run.sh -install_command = - pip install {opts} {packages} - -[testenv:pypy] -# lxml doesn't work and datrie doesn't make sense -# (it's slower than the pure-python version) -deps = - charade - flake8 - Genshi - nose - six - mock - -[testenv:py26] -basepython = python2.6 -deps = - -r{toxinidir}/requirements-optional-2.6.txt - flake8 - nose - mock From 40d007a20b0551017cf7b65f1a379e37ccc9c47a Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Tue, 14 Apr 2015 23:33:40 +0100 Subject: [PATCH 07/17] Fix the moduleFactoryFactory to cache based on *args and **kwargs. --- html5lib/tests/test_parser2.py | 4 ++-- html5lib/tests/test_treewalkers.py | 4 ++-- html5lib/utils.py | 16 ++++++++++++---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 20bbdf31..01f16eea 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -40,12 +40,12 @@ def test_namespace_html_elements_1_dom(self): def test_namespace_html_elements_0_etree(self): parser = html5parser.HTMLParser(namespaceHTMLElements=True) doc = parser.parse("") - self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],)) + self.assertTrue(doc.tag == "{%s}html" % (namespaces["html"],)) def test_namespace_html_elements_1_etree(self): parser = html5parser.HTMLParser(namespaceHTMLElements=False) doc = parser.parse("") - self.assertTrue(list(doc)[0].tag == "html") + self.assertTrue(doc.tag == "html") def test_unicode_file(self): parser = html5parser.HTMLParser() diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 3be12327..9d3e9571 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -60,7 +60,7 @@ def PullDOMAdapter(node): pass else: treeTypes['ElementTree'] = \ - {"builder": treebuilders.getTreeBuilder("etree", ElementTree), + {"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True), "walker": treewalkers.getTreeWalker("etree", ElementTree)} try: @@ -69,7 +69,7 @@ def PullDOMAdapter(node): pass else: treeTypes['cElementTree'] = \ - {"builder": treebuilders.getTreeBuilder("etree", ElementTree), + {"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True), "walker": treewalkers.getTreeWalker("etree", ElementTree)} diff --git a/html5lib/utils.py b/html5lib/utils.py index ebad29fb..c196821f 100644 --- a/html5lib/utils.py +++ b/html5lib/utils.py @@ -91,13 +91,21 @@ def moduleFactory(baseModule, *args, **kwargs): else: name = b"_%s_factory" % baseModule.__name__ - if name in moduleCache: - return moduleCache[name] - else: + kwargs_tuple = tuple(kwargs.items()) + + try: + return moduleCache[name][args][kwargs_tuple] + except KeyError: mod = ModuleType(name) objs = factory(baseModule, *args, **kwargs) mod.__dict__.update(objs) - moduleCache[name] = mod + if "name" not in moduleCache: + moduleCache[name] = {} + if "args" not in moduleCache[name]: + moduleCache[name][args] = {} + if "kwargs" not in moduleCache[name][args]: + moduleCache[name][args][kwargs_tuple] = {} + moduleCache[name][args][kwargs_tuple] = mod return mod return moduleFactory From f4490bef7e3bbdfc2ece381f2b76122a0d6d7c3e Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Tue, 14 Apr 2015 23:00:34 +0100 Subject: [PATCH 08/17] Avoid running tests for cElementTree & ElementTree where they're the same. --- html5lib/tests/support.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index b64d322a..047c5534 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -21,25 +21,17 @@ # Try whatever etree implementations are available from a list that are #"supposed" to work -try: - import xml.etree.ElementTree as ElementTree - treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) -except ImportError: - try: - import elementtree.ElementTree as ElementTree - treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) - except ImportError: - pass +import xml.etree.ElementTree as ElementTree +treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) try: import xml.etree.cElementTree as cElementTree - treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True) except ImportError: - try: - import cElementTree + pass +else: + # On Python 3.3 and above cElementTree is an alias, don't run them twice. + if cElementTree.Element is not ElementTree.Element: treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True) - except ImportError: - pass try: import lxml.etree as lxml # flake8: noqa From 90e43486a789db04639af9d51a4a0aa51cbb8864 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Wed, 15 Apr 2015 01:03:54 +0100 Subject: [PATCH 09/17] Move Genshi tree adapter to be in the public API, because sanity. --- html5lib/tests/test_treewalkers.py | 52 ++---------------------------- html5lib/treeadapters/__init__.py | 12 +++++++ html5lib/treeadapters/genshi.py | 50 ++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 49 deletions(-) create mode 100644 html5lib/treeadapters/genshi.py diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 9d3e9571..0e31ff5f 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -13,7 +13,7 @@ from .support import get_data_files, TestData, convertExpected -from html5lib import html5parser, treewalkers, treebuilders, constants +from html5lib import html5parser, treewalkers, treebuilders, treeadapters, constants def PullDOMAdapter(node): @@ -84,59 +84,13 @@ def PullDOMAdapter(node): try: - from genshi.core import QName, Attrs - from genshi.core import START, END, TEXT, COMMENT, DOCTYPE + import genshi # flake8: noqa except ImportError: pass else: - def GenshiAdapter(tree): - text = None - for token in treewalkers.getTreeWalker("dom")(tree): - type = token["type"] - if type in ("Characters", "SpaceCharacters"): - if text is None: - text = token["data"] - else: - text += token["data"] - elif text is not None: - yield TEXT, text, (None, -1, -1) - text = None - - if type in ("StartTag", "EmptyTag"): - if token["namespace"]: - name = "{%s}%s" % (token["namespace"], token["name"]) - else: - name = token["name"] - attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) - for attr, value in token["data"].items()]) - yield (START, (QName(name), attrs), (None, -1, -1)) - if type == "EmptyTag": - type = "EndTag" - - if type == "EndTag": - if token["namespace"]: - name = "{%s}%s" % (token["namespace"], token["name"]) - else: - name = token["name"] - - yield END, QName(name), (None, -1, -1) - - elif type == "Comment": - yield COMMENT, token["data"], (None, -1, -1) - - elif type == "Doctype": - yield DOCTYPE, (token["name"], token["publicId"], - token["systemId"]), (None, -1, -1) - - else: - pass # FIXME: What to do? - - if text is not None: - yield TEXT, text, (None, -1, -1) - treeTypes["genshi"] = \ {"builder": treebuilders.getTreeBuilder("dom"), - "adapter": GenshiAdapter, + "adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)), "walker": treewalkers.getTreeWalker("genshi")} import re diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py index e69de29b..57d71304 100644 --- a/html5lib/treeadapters/__init__.py +++ b/html5lib/treeadapters/__init__.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import sax + +__all__ = ["sax"] + +try: + from . import genshi # flake8: noqa +except ImportError: + pass +else: + __all__.append("genshi") diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py new file mode 100644 index 00000000..68a87f13 --- /dev/null +++ b/html5lib/treeadapters/genshi.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import, division, unicode_literals + +from genshi.core import QName, Attrs +from genshi.core import START, END, TEXT, COMMENT, DOCTYPE + + +def to_genshi(walker): + text = None + for token in walker: + type = token["type"] + if type in ("Characters", "SpaceCharacters"): + if text is None: + text = token["data"] + else: + text += token["data"] + elif text is not None: + yield TEXT, text, (None, -1, -1) + text = None + + if type in ("StartTag", "EmptyTag"): + if token["namespace"]: + name = "{%s}%s" % (token["namespace"], token["name"]) + else: + name = token["name"] + attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) + for attr, value in token["data"].items()]) + yield (START, (QName(name), attrs), (None, -1, -1)) + if type == "EmptyTag": + type = "EndTag" + + if type == "EndTag": + if token["namespace"]: + name = "{%s}%s" % (token["namespace"], token["name"]) + else: + name = token["name"] + + yield END, QName(name), (None, -1, -1) + + elif type == "Comment": + yield COMMENT, token["data"], (None, -1, -1) + + elif type == "Doctype": + yield DOCTYPE, (token["name"], token["publicId"], + token["systemId"]), (None, -1, -1) + + else: + pass # FIXME: What to do? + + if text is not None: + yield TEXT, text, (None, -1, -1) From 23eb610a13cb730210dc83a90ed7ccf37d51fd65 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Wed, 15 Apr 2015 01:18:07 +0100 Subject: [PATCH 10/17] Change the Genshi treeadapter to avoid O(n^2) string concat. --- html5lib/treeadapters/genshi.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py index 68a87f13..04e316df 100644 --- a/html5lib/treeadapters/genshi.py +++ b/html5lib/treeadapters/genshi.py @@ -5,17 +5,14 @@ def to_genshi(walker): - text = None + text = [] for token in walker: type = token["type"] if type in ("Characters", "SpaceCharacters"): - if text is None: - text = token["data"] - else: - text += token["data"] - elif text is not None: - yield TEXT, text, (None, -1, -1) - text = None + text.append(token["data"]) + elif text: + yield TEXT, "".join(text), (None, -1, -1) + text = [] if type in ("StartTag", "EmptyTag"): if token["namespace"]: @@ -46,5 +43,5 @@ def to_genshi(walker): else: pass # FIXME: What to do? - if text is not None: - yield TEXT, text, (None, -1, -1) + if text: + yield TEXT, "".join(text), (None, -1, -1) From 69ca91644207c74f2de60a237a1d3f55795728b8 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Wed, 15 Apr 2015 01:40:23 +0100 Subject: [PATCH 11/17] Remove PullDOM support. The test harness hasn't ever properly had an adapter that matches the behaviour of PullDOM, and I have no interest in fixing this, so let's simply drop support. AFAICT, nobody uses this. --- html5lib/tests/test_treewalkers.py | 31 --------------- html5lib/treewalkers/__init__.py | 3 +- html5lib/treewalkers/pulldom.py | 63 ------------------------------ 3 files changed, 1 insertion(+), 96 deletions(-) delete mode 100644 html5lib/treewalkers/pulldom.py diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 0e31ff5f..a42d8299 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -16,40 +16,9 @@ from html5lib import html5parser, treewalkers, treebuilders, treeadapters, constants -def PullDOMAdapter(node): - from xml.dom import Node - from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS - - if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): - for childNode in node.childNodes: - for event in PullDOMAdapter(childNode): - yield event - - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM") - - elif node.nodeType == Node.COMMENT_NODE: - yield COMMENT, node - - elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - yield CHARACTERS, node - - elif node.nodeType == Node.ELEMENT_NODE: - yield START_ELEMENT, node - for childNode in node.childNodes: - for event in PullDOMAdapter(childNode): - yield event - yield END_ELEMENT, node - - else: - raise NotImplementedError("Node type not supported: " + str(node.nodeType)) - treeTypes = { "DOM": {"builder": treebuilders.getTreeBuilder("dom"), "walker": treewalkers.getTreeWalker("dom")}, - "PullDOM": {"builder": treebuilders.getTreeBuilder("dom"), - "adapter": PullDOMAdapter, - "walker": treewalkers.getTreeWalker("pulldom")}, } # Try whatever etree implementations are available from a list that are diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index 20b91b11..5414e4bb 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -10,8 +10,7 @@ from __future__ import absolute_import, division, unicode_literals -__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree", - "pulldom"] +__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree"] import sys diff --git a/html5lib/treewalkers/pulldom.py b/html5lib/treewalkers/pulldom.py deleted file mode 100644 index 0b0f515f..00000000 --- a/html5lib/treewalkers/pulldom.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \ - COMMENT, IGNORABLE_WHITESPACE, CHARACTERS - -from . import _base - -from ..constants import voidElements - - -class TreeWalker(_base.TreeWalker): - def __iter__(self): - ignore_until = None - previous = None - for event in self.tree: - if previous is not None and \ - (ignore_until is None or previous[1] is ignore_until): - if previous[1] is ignore_until: - ignore_until = None - for token in self.tokens(previous, event): - yield token - if token["type"] == "EmptyTag": - ignore_until = previous[1] - previous = event - if ignore_until is None or previous[1] is ignore_until: - for token in self.tokens(previous, None): - yield token - elif ignore_until is not None: - raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") - - def tokens(self, event, next): - type, node = event - if type == START_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - attrs = {} - for attr in list(node.attributes.keys()): - attr = node.getAttributeNode(attr) - attrs[(attr.namespaceURI, attr.localName)] = attr.value - if name in voidElements: - for token in self.emptyTag(namespace, - name, - attrs, - not next or next[1] is not node): - yield token - else: - yield self.startTag(namespace, name, attrs) - - elif type == END_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - if name not in voidElements: - yield self.endTag(namespace, name) - - elif type == COMMENT: - yield self.comment(node.nodeValue) - - elif type in (IGNORABLE_WHITESPACE, CHARACTERS): - for token in self.text(node.nodeValue): - yield token - - else: - yield self.unknown(type) From c2321b0234ce5b7555aa080446c872e81c6cb21a Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Tue, 21 Jul 2015 13:29:32 +0100 Subject: [PATCH 12/17] Update packages even if they're installed on Travis already. --- requirements-install.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements-install.sh b/requirements-install.sh index 95a688c6..f823ed37 100755 --- a/requirements-install.sh +++ b/requirements-install.sh @@ -5,14 +5,14 @@ if [[ $USE_OPTIONAL != "true" && $USE_OPTIONAL != "false" ]]; then exit 1 fi -pip install -r requirements-test.txt +pip install -U -r requirements-test.txt if [[ $USE_OPTIONAL == "true" ]]; then if [[ $TRAVIS_PYTHON_VERSION == "pypy" || $TRAVIS_PYTHON_VERSION == "pypy3" ]]; then - pip install -r requirements-optional.txt + pip install -U -r requirements-optional.txt elif [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then - pip install -r requirements-optional-2.6.txt + pip install -U -r requirements-optional-2.6.txt else - pip install -r requirements-optional-cpython.txt + pip install -U -r requirements-optional-cpython.txt fi fi From 71ac5580dcd8f2395b8a6de90ed59d93f72f7c67 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 1 Nov 2015 15:08:26 +0900 Subject: [PATCH 13/17] Update requirements files to match setup.py --- requirements-install.sh | 11 ++++------- requirements-optional-2.6.txt | 5 ----- requirements-optional-cpython.txt | 9 --------- requirements-optional.txt | 12 ++++++++++++ requirements-test.txt | 2 +- tox.ini | 4 +--- 6 files changed, 18 insertions(+), 25 deletions(-) delete mode 100644 requirements-optional-2.6.txt delete mode 100644 requirements-optional-cpython.txt diff --git a/requirements-install.sh b/requirements-install.sh index f823ed37..a8964ea0 100755 --- a/requirements-install.sh +++ b/requirements-install.sh @@ -5,14 +5,11 @@ if [[ $USE_OPTIONAL != "true" && $USE_OPTIONAL != "false" ]]; then exit 1 fi +# Make sure we're running setuptools >= 18.5 +pip install -U pip setuptools + pip install -U -r requirements-test.txt if [[ $USE_OPTIONAL == "true" ]]; then - if [[ $TRAVIS_PYTHON_VERSION == "pypy" || $TRAVIS_PYTHON_VERSION == "pypy3" ]]; then - pip install -U -r requirements-optional.txt - elif [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then - pip install -U -r requirements-optional-2.6.txt - else - pip install -U -r requirements-optional-cpython.txt - fi + pip install -U -r requirements-optional.txt fi diff --git a/requirements-optional-2.6.txt b/requirements-optional-2.6.txt deleted file mode 100644 index 37557ac4..00000000 --- a/requirements-optional-2.6.txt +++ /dev/null @@ -1,5 +0,0 @@ --r requirements-optional-cpython.txt - -# Can be used to force attributes to be serialized in alphabetical -# order. -ordereddict diff --git a/requirements-optional-cpython.txt b/requirements-optional-cpython.txt deleted file mode 100644 index e93eda8d..00000000 --- a/requirements-optional-cpython.txt +++ /dev/null @@ -1,9 +0,0 @@ --r requirements-optional.txt - -# lxml is supported with its own treebuilder ("lxml") and otherwise -# uses the standard ElementTree support -lxml - -# DATrie can be used in place of our Python trie implementation for -# slightly better parsing performance. -datrie diff --git a/requirements-optional.txt b/requirements-optional.txt index 4e16ea17..ac6539cb 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -7,3 +7,15 @@ genshi # charade can be used as a fallback in case we are unable to determine # the encoding of a document. charade + +# lxml is supported with its own treebuilder ("lxml") and otherwise +# uses the standard ElementTree support +lxml ; platform_python_implementation == 'CPython' + +# DATrie can be used in place of our Python trie implementation for +# slightly better parsing performance. +datrie ; platform_python_implementation == 'CPython' + +# Can be used to force attributes to be serialized in alphabetical +# order. +ordereddict ; python_version < '2.7' diff --git a/requirements-test.txt b/requirements-test.txt index 8b6ace66..13b91c45 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,5 +2,5 @@ flake8 nose -ordereddict # Python 2.6 mock +ordereddict ; python_version < '2.7' diff --git a/tox.ini b/tox.ini index c200855e..2fba06d6 100644 --- a/tox.ini +++ b/tox.ini @@ -7,9 +7,7 @@ deps = nose mock py26-base: ordereddict - py26-optional: -r{toxinidir}/requirements-optional-2.6.txt - {py27,py32,py33,py34,py35}-optional: -r{toxinidir}/requirements-optional-cpython.txt - {pypy,pypy3}-optional: -r{toxinidir}/requirements-optional.txt + optional: -r{toxinidir}/requirements-optional.txt commands = {envbindir}/nosetests -q From 383d1ee7e539f1268ae2e6be3a73c2fe77c76cee Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 11 Jan 2015 22:44:45 +0000 Subject: [PATCH 14/17] Move to py.test! Also enforce ordering of tests and test files, given nodeids for generators depend upon iteration number, and pytest-expect relies on them. --- .travis.yml | 2 +- README.rst | 6 +++--- html5lib/tests/support.py | 2 +- html5lib/tests/test_parser.py | 2 +- html5lib/tests/test_treewalkers.py | 6 +++--- pytest.ini | 2 ++ requirements-test.txt | 3 ++- tox.ini | 5 +++-- 8 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 pytest.ini diff --git a/.travis.yml b/.travis.yml index ee65440e..b9a89978 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,7 +38,7 @@ install: - bash requirements-install.sh script: - - nosetests + - py.test - bash flake8-run.sh after_script: diff --git a/README.rst b/README.rst index 3d08d758..1bbcb609 100644 --- a/README.rst +++ b/README.rst @@ -132,9 +132,9 @@ Please report any bugs on the `issue tracker Tests ----- -Unit tests require the ``nose`` and ``mock`` libraries and can be run -using the ``nosetests`` command in the root directory; ``ordereddict`` -is required under Python 2.6. All should pass. +Unit tests require the ``pytest`` and ``mock`` libraries and can be +run using the ``py.test`` command in the root directory; +``ordereddict`` is required under Python 2.6. All should pass. Test data are contained in a separate `html5lib-tests `_ repository and included diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 047c5534..926cb2f2 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -42,7 +42,7 @@ def get_data_files(subdirectory, files='*.dat'): - return glob.glob(os.path.join(test_dir, subdirectory, files)) + return sorted(glob.glob(os.path.join(test_dir, subdirectory, files))) class DefaultDict(dict): diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py index 0f958c94..9cda65f8 100644 --- a/html5lib/tests/test_parser.py +++ b/html5lib/tests/test_parser.py @@ -90,7 +90,7 @@ def test_parser(): if errors: errors = errors.split("\n") - for treeName, treeCls in treeTypes.items(): + for treeName, treeCls in sorted(treeTypes.items()): for namespaceHTMLElements in (True, False): yield (runParserTest, innerHTML, input, expected, errors, treeCls, namespaceHTMLElements) diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index a42d8299..c79d0b1b 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -87,7 +87,7 @@ def test_all_tokens(self): {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'}, {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'} ] - for treeName, treeCls in treeTypes.items(): + for treeName, treeCls in sorted(treeTypes.items()): p = html5parser.HTMLParser(tree=treeCls["builder"]) document = p.parse("a
b
c") document = treeCls.get("adapter", lambda x: x)(document) @@ -130,7 +130,7 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass): def test_treewalker(): sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n") - for treeName, treeCls in treeTypes.items(): + for treeName, treeCls in sorted(treeTypes.items()): files = get_data_files('tree-construction') for filename in files: testName = os.path.basename(filename).replace(".dat", "") @@ -194,6 +194,6 @@ def test_treewalker_six_mix(): '\n href="http://example.com/cow"\n rel="alternate"\n "Example"') ] - for tree in treeTypes.items(): + for tree in sorted(treeTypes.items()): for intext, attrs, expected in sm_tests: yield runTreewalkerEditTest, intext, expected, attrs, tree diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..17209aa1 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -rXw -p no:doctest \ No newline at end of file diff --git a/requirements-test.txt b/requirements-test.txt index 13b91c45..0580136a 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,7 @@ -r requirements.txt flake8 -nose +pytest +pytest-expect>=1.0,<2.0 mock ordereddict ; python_version < '2.7' diff --git a/tox.ini b/tox.ini index 2fba06d6..e66298d5 100644 --- a/tox.ini +++ b/tox.ini @@ -4,11 +4,12 @@ envlist = {py26,py27,py32,py33,py34,py35,pypy,pypy3}-{base,optional} [testenv] deps = flake8 - nose + pytest + pytest-expect>=1.0,<2.0 mock py26-base: ordereddict optional: -r{toxinidir}/requirements-optional.txt commands = - {envbindir}/nosetests -q + {envbindir}/py.test {toxinidir}/flake8-run.sh From 9a10a4ca7245c04fa7e292da572114137e780575 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Mon, 20 Jul 2015 22:29:02 +0100 Subject: [PATCH 15/17] Update tests. Also add an assertion for symptom of #217 (without this the testsuite goes into an infinite loop; this doesn't fix the cause but it avoids the infinite loop happening!). --- .pytest.expect | Bin 0 -> 44449 bytes html5lib/tests/testdata | 2 +- html5lib/treewalkers/etree.py | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 .pytest.expect diff --git a/.pytest.expect b/.pytest.expect new file mode 100644 index 0000000000000000000000000000000000000000..b0fc7d4c2e055e9de6a2e10e36c64ff70a9d127c GIT binary patch literal 44449 zcmbuI&8{9t5r+9$AR(@T3xMgKnVuPQ8|;u3g>V8^jAPjrBy730v*ixB7@nFCC5tad zdfxWJvX9m7?)tB{s(SwY{OkAMe)f;24$ho`51e*X3={)gYa|Mu0Z`llcL z@YSn7^)cG(=U3t4{doP{AFX}=`G=2BAAZ>LyT8|d*Rkj|d)w@q?O4~>DN#o2m))@M z%ZNAZ+j`m!$F)I~;}P{gef`zbyEp&(5d;4C{ZC-Qc3qkU?b^BB{%EtOcVB${mpAYJ z`UA7S{|S4wMRTs77ZT~D>b%#4DzNsY+YS3NSQBoz?<)*gJuzQDZ+cM?^Wj!MsiLio z?)7@#xsyL$*Sv1mg(y1t*vGcpHT^7+Pg%U4dc$>Tm@Id(<5(eJc*rp>ZFSHUE*5Us zjuXKO7x#6WxN6(IpdCZ=u(wie$HTrH`fM1yKnZr(9oX;&)2sp<=CT`M-FvkImY7Y1owt7U{g1JF?@t( z7Qhf18QPrIS{@W^W9ZZF@bs z8Qv#CcWxFP^JJBCN@QGK)s`NtY5Hv2(RJtn`p4Nql^!!Hy&k&`3C1-vRe*WCFfNBy zm1{4d4=LG>3ClXjMywnIJ$kAD)!2PN0=3}=p`Y;-Ww@PEgVRGmZ{?0)Col8>qnf?= zzTj*F^Wmv5gV&5oVuIk*=g}BCO_6p~MN!I9tKdE~1EUwNnJ%V`W>4FW1KHAngiqmn zt2o~a*T{)U2#4vhcblq0^r@WOTS|P}guJv8wxH@t8mi+NR4G3xx~nCdWK=^OB%|bO zA>wcr)*Y(uetXGq8=gw@cpf>4*NRmGZ{blBP)djzGGv`F;y~wK zx2_V~kcn&ax&|~K{YW!;G&o^ynV^bB zB*>%lxMmyW3_~7{?jcb!Yo7#N)&CG45J zvPB&i_kF^(^}Dd|>;@X5Z4FIs{$l5@_W_;K`vAviM2kEvm*jR9F+%y;MxgOW#o1nU z>W)q}=rPOVEYN2b%MBOb{NEc*q9vY!g z0|$bw3fl-kkugrpf>#=!oQ$p9qTeYnsY{{AvE}h~O`87*h_pCwfkCcS8ju!N+?bV6nRHt7> zmGF>zuhv1gA)$f}=jn9=-3kx6trsTG9g>29hTt$LtkD*7MWTafrh5HTf*js~(=Nw` z4=?v(U!E2ZYMr?QU;9+6hEE4T7V+PiJB2hM=6W|ph0Je9^@oHTuN823w-mtHd(%b& zUg&fV+$;~?7|QgA1Ua?FXk9AfaRv-WnOjUQmM>;b*q?T*Q86!A6D@84%G}X!S?mk7 zXQm{}@Qju<^2yw_Z^O4ZgP^hDLnrY<_v#UJub#f=htWMK=owz_Nb24dMEq?cizK4L zE{6`G_o9otA?Os0c57%9(&ibsI1h6u*XneLfocxvg=C^rgF-=`(~NVXC4r_i?Tc`E zd3Z+phNy+sb1iH}iri8)7;TR>977P1`-&*NQ_1-zs^=t#6T;Sf8#?%cXWLb&Z@__i zPQ^*c08Nf1@(6&cJ4cK)K;Mr%HpdsK33}hR`Brkqn6(|EM<%yJv_|Fr63Ev3zH4S; z>GXBUlV4UB7)XxF4*}+M7CAyKIK3aBtt}5Dh`b32!r-crX2gqmgJ)vrBYP4<1Bg@< zygn>`T7ro+Ez)$~C6+~pjbNHNrwGGX?EDsynyQvFlVtzsO8`yo_d^GvoUC{ATDTa}=m-Z~#G8dX3JVWO^IFb) z+UE3Wf!@8YUq1JxwPhkIq8};IS8W*RI@2Z{7$LveXuitcmvX+LfRUqV86k~fLyub| zR|_8ATPEV|?Vh`H2AXkVNEp%|N!G@e_tBugJ0znBT9QEwq8R?>venLm}D*51fc?c&0g=D#Clqt3qgQGD-&UcCulNJ z8}t#-Fcj%{O>x+O`KUGmXST@0;8~(-T09701550?#i2N~3&jw8AVJA=-~&pg%_1n7 z-Uk>cx7~naUZBuNErjQh8xGpmxS^AJaO}+3FjI}O31y3xHO;>s1jx)(YupEDIim=^ zC8tQIsuz=!v8;SCPmsck5VX7s29y$a6@5d&A6K6W40u}Hu z3%!KVg*3Gp;bo)HJ`;^<`wWJsu|no)@tWS(PF#&CTr7d#a^l$p`onY-LaTCauqeH1 z_A$+?WsH7F?Q5vkvQH?NOfve5wR%{=Sp%$&mSy6TnFw$KoVef7OOPuK7XXIlMr9nn zABsjceMm43EtMbzT1Y?&0*#qyY^A?Tr`t@j>5CyxI<%uPOA~{tvG>!P9~giJ+WwLS z<{G&ko`Ad{qKVG@Ad(&?f2%JC(i4R+g|@q2$=G|Tf4Jd3d?PEJRodRr#RBI-E}TB| z0{!NU;eHJ7T4#uwCN%M?eNB9@gNcE_V^{w~O31={LdoQ@6@6H|O&$;e@@nghLD!f920DG@dOZkUXSZl2ZBOs4Yl!DgOTk-Aj$OV$) zO=uAnh`1Nfo$p%={WvsCTJi&PS+_^L8rve9882@zBX-2fty z%hNi1#sWm6O)PEz*$8h)xh_G~G#kiRCIZtw@j@6g<@$!-JrABF?8urmllK$hUgZKK za#bNY6cn(5+>v9Ky(zlKro+?nj-^hj!OFDmFncr-ic}Rc?elFSP%-7FaWo?1Mpv6klm#B`-0%tq%$O!4S)(WsZ?NdOl zbsh0HD?U8#iyc@88LI2SGt;IWcMsRn3#(PRg?sHF)r$RZZxg%YC)TbrF#muYX%0hPZV50f* zUwWqemRBW`kFD$zjX`|qSwkO;MQb-i8l8vocxwDfC^DQ@-;mxQy@pw(+{_i z0ssn`la%MKz2#jd@&KN|&AkiXfbc=(S;i(vQfc1KPfF(KnZM7J5dcrEcmr8LPLC-E zk1L)v7w(h~XOz>Qk_;(J;$pPBpw+~y(CM=y+C{W&U|3RnbTUO0#_@{IEXeI8$xmaF zSLRYNcz7Z9K0HL4d!O}wFDT9TCLZZZavZupycPuCb*Bvtc7#MZg2$~FuR%10c@4T> zw+Yico@CQ08)eg+kTc(o<}DIDNO)6H_JFj03=5Y$$)(cEMMV9uCQ-+!FUCz257hvbIHXQ+h) zTe%S@xz%2Sfi&+zmx=t>c=HI{&(-Wkcjtf;Cc2q) zRszhQS7=C>y-1zGT;2WViX_HfWxNbh)c7S;AiSA7Ai$n?CHLpn-9BKsh@V@-+4Oy= z$Ycbk(a3MMd6WSA2f@Rr0jxOJ6C83L>@bL`I0_8j W7489qJM-}!KK86#0hNaB_5T3Zzsjrt literal 0 HcmV?d00001 diff --git a/html5lib/tests/testdata b/html5lib/tests/testdata index f6a1b202..6234baea 160000 --- a/html5lib/tests/testdata +++ b/html5lib/tests/testdata @@ -1 +1 @@ -Subproject commit f6a1b202de14fc057b196044c5ebef4672be3dd0 +Subproject commit 6234baeabc51f6d51d1cfc2c4e4656bd99531f2b diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 69840c21..73c8e26a 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -129,6 +129,7 @@ def getParentNode(self, node): if not parents: return parent else: + assert list(parents[-1]).count(parent) == 1 return parent, list(parents[-1]).index(parent), parents, None return locals() From 9337b003fa4465e91c1d9b3271064e34e26d876b Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Mon, 23 Feb 2015 01:34:30 +0000 Subject: [PATCH 16/17] Use py.test to generate tests from the data files themselves. --- .pytest.expect | Bin 44449 -> 58861 bytes html5lib/tests/conftest.py | 21 ++++++ html5lib/tests/support.py | 11 ++-- html5lib/tests/test_parser.py | 96 ---------------------------- html5lib/tests/tree_construction.py | 94 +++++++++++++++++++++++++++ pytest.ini | 2 +- 6 files changed, 121 insertions(+), 103 deletions(-) create mode 100644 html5lib/tests/conftest.py delete mode 100644 html5lib/tests/test_parser.py create mode 100644 html5lib/tests/tree_construction.py diff --git a/.pytest.expect b/.pytest.expect index b0fc7d4c2e055e9de6a2e10e36c64ff70a9d127c..c88e99b9140f2b24dfcee5e47ea9f9a90794de36 100644 GIT binary patch literal 58861 zcmcIt%aR<&b+xU9BOLw;?J0FVx~jXiwPZ=w!+fECbA0#y?(O|Qefa0c|M&m@ z_zxfc_4w`A$5#(8zWw6m-Rr;o;D;Z6@~4M4ueY!7KL7E<@%_X5^Z&khcy~Ph<>l+I zu)>eN{_b$Fe>M4shr{!_>#Cd6?-GA}`|{!Ck8OcZUcJ44|M2e5uO6_A%j)&^NBGx= zLw`8D`swTAo8$e%GyIo_!~M%Q$M;{qe097%{hnO2>Mm>U+QY%E`OVwA+fT-YKO7S4 zPR^7)=(fK({WkHpnRCAT^XK1PHdNKohQ__zp8os#1;2Rv?s)g*{U=|%d-$cg+f815=_U81z z*NY*orxtp;2sbjvZDQpY$NN`rZ}0BE3{E%v6S!QpY30+jUAK|{xT!bw&FOy<8_xJZ zS`-;coCvdZrgk>}NUBX@lTS$XMn;BecXRr6;s-NQIs4or09M_j`}+N#et!LO6IpBq z8=D|ZvAUXJQNP6xZDNbdBCadh^s0W>+~^@;7Xeaq6fyQSb=TgUewC0zM)MO@A@Zf* z;PkWWS9|mF;ZK(!0KG>sK!wV#+ryZDas4J-0D+iS+DF@Mb-eGYEjUSeXx*=NoThIf zR$@oNQf3s%+wO@7&}UtZG-*&)v|e$fYaq6iX@3wRUDeBo(bN^-MkITUkv&4PPF;?C zPLP;z?XcdQ-l?vQ=nW!6F!ivBn7~FRBiS{=il_d5QTdeBetV;sS_5jMLHQVU{RtRR z|K<4kS9cGa`U2;%xPuMJQfZ92{rmN>9&|s3|21||Azrdf=}{=TfJvle4*#m%Hch<< zYfkgqWm6{cj7mg=1t6vv1#{*AXyzDhd)`4*%COsX zGTo@5cIGe{#~eI9_~Yr*>%IMA6;(O}MKTf-rmo(=Qi&+*EcJ}o7dx1okjRwJt38I3 zA@2?=sfur^4W+#3Odxp98Ydg-6Uuw>@b;_Y{oVJ+cVlhn+3-(L8`^F*NHmC?|FWC? zMeEfJ3nIO%bh-x>7wCTjwv|BIvfo)zd2%{J7irfS>mUAvj0*&@A(u>r2H6EH3aWqH^jrOp z+a480^_Ni>Kw&`4x~hHy7bEIqLHdAjZ?tRLi4pSz|5A^MMm^%2kY!j=l86^UI&kTQ z7^I$~?GUG#X1GrT5{WGjEqlFF+f?{tt<*dw4Z+G(A=Xw<6oL`jbm~tw-L}!}4>b{j z9yoPZyTOUdWwxz%db_Qww_M>N9XrPK!p9C(CunD5R;Et`$tUV4?S8M%$zWIkg57fqIY% z z?cVD8QfX>Hbq|A#gHJ3FaW!%e-A64ziqdF0lotQ*dfko|k<9dRyi`3}y|G~~qCV!} z$M42#O_moTs(KnUR3U8QH0f|`jue>=-@gyD-U%Gk4_FW_n|cfBu(|W3K9`9Q3e)I1 zD-TK8Srr!`M~}HxZLYSlJJr9@hE~Fqk;0Oe0{HwtWnv#{izLYdAC*oRFKL`k;qWc#9si>Za98f0(=HsEmtv zUVW3e!WU9%-Nm{JW8=(4lQaltlKxlM$C^<9$?V5LTI9tdGPS)yv``SRvJKHRit3k` znOHiEupiVl$}=Bp?#G!=d1#(>PQB_FD0O1A@t~E3h08? zMgajtA(+1bTZOs#7@3IDUWB_t@ihuX1dyl6sZ|KhyYvgI-n!lG^n|0U8n}9Kp*mr9 zOoz>dm!vZg!<=cMr#m{T%-d#PG9;21;w2^!0+QiSNecG-`aCBsEh|wt&Jo~*3Cv-q zNHcPgUdefCeLA4+L3)L`eDy|hoM25PNlZQYbYo$x`j4fm2d8kFk3fn~P^<|{#Ay%0 z=|o;_k(>u`iGUF7cUIKZ)aaP%l9I;JJja>T`yu*GU**wJc41PNw{1E(U0K!h=Rbo#6md}8@eyo{j3 zL0F@#CJPb#&0~>Ce!*u~xGod_z+~41f?j+&FyfSg8H@87g#tRQ)~|!$nZhl zx~-*6$SCX}15iv_G{k=V&r2cVT_XYDDJ%--YU&aJAI-f$d3C&vlyeRJK zTSoU}jagiS$efd?(!?Y<(3{8uGSPTNSi<8IFf9@98|&mNy-2QfAL92DL`T)jwt~Ht zM^$4<3rtjz6v}|Aa{3KwBgB~XIYbPP-i|rpBr*`iz8RD*iA6R(okHRs6ar0oqX$7^ z>0}}`Y$}YrskV$R5qL&%Q+^JA>%>7^goEUZZ%Wt$&pC8vZp%1miF*_T5ee&LQuh_o z(D-tHa7`hNunWJU zm7x|C1jR@{O(E)mv4=%fzyX6mIM>?J;8?Q?VR5md&bc5#ye`3T)aH_nlw%o!Z5GQ2 ztBwmy*)@q$dq}V#)a-(UVCP_vTUEBl3X}8YaBKJQo!Wz^Xns6qs9_-$fopqwpC~;B zEv|j2F-x8tN=5N%rw?=W8$AC{-(8xp-HfW0)OXt!Or_!V^i@(sWJ)t+CHy|{nU9bL zLs{rb5TKEXVI$55<6{z@a7MUhqAFbQBqQ7`mc9ySxEg`F=<^j?IBNqfZhU|UN)4d# z`u9Y0M5GINwmgKXaY9(ULjzCZR6W*vvc@oqCOLEGb~yg0x7vy02A4lE!!|D*vHlPBarU*fKb<7lo>P+&(F~K%!2$#Px}Zp%BS!q*Pc-lqsuXLEUYY zQZ4eW9Jnz9-WgLEQl-cg+5{wzFNP-H5Vd)80xko}CBg|a+8uH3v3l$e#6X9$h+Y0N zAp;$0>D$QLO4im^jXsfyx5m(t;C0^XoVMAalBc>B9#+#DH``6g6klui#LA&P4#5Gp zRC(YXD!ql)H1EHS8f+Q$b>}-P&77L~$iTjTy#9=%KF+Jo4!6S5`HS zYg9-H38lGw!0%{ce*y#`StsTi!ZrZ=ej8peOrv}T*t7J6KbI9Do+W}H^(c|uD4gv@ zXerv)=E_+gGRj0Es9IpNwMtZRebDYGc{>Q^$T)O5z=l_$ymhIEM*j&@;Jh_i;P|Z6 zn)2l>`8b`u`X~%Q;JOWp90dGCcnefq(G+nTMGt@5ja_CY0N$ZZVthHWG(oy}Ljck( zZs4wnumQG5M2n6o)Dqm=`%HIn=< z1mIQi1XL3J@8Yl-J|`HqRN$ergGN@;B!GE2*DXkU&aZ6=n9$m7aIN`2r1%qI5@f<7 z_Y-ER_IxY_5yC3o<}QcH*}b#KJj6kQ8kwfpIEIj8)<}QVDSY$DJNZh7vsbi;ktvrl z+GR^u+l=-kh7vpmzDjdx)mEln7{#+ZB?S_jq$hwl1bBNpZpmbrM8r`gtE4S1TugYL z0(#M~P^!`=*6;xVXrlONmdJbrpJ`jG{gltsJ*C>>+y{{8jvr!~%2nFVm}hbRYO_Y( zswZ1h;{4u52-$Z^hhBfaw8-$MG$jppvvyL69fb)^1$|yYJ5nI zP=q2n$7#I{WuIV|S|51E18!1P#=d(`)G%fwCRO)l{LEA9Coeh@ z_L;E+R0I-b7bc>o7dB`}EldpPdszTf&+7%7Rbd{tQs6{{mR)1g+j!UwL#;9*L)~*m z@`<)}AZ#OFja*PIgRsU=WRE|5tK3|% zdal&(TrUUt{4u;gh?0J&Lb`}BPF)zo!UNa>CPUusM;GYlN8*g+Zxp( zIK){v;oI~n#3SZbFRO)@@=K#h+8*r7Y{`|09Y{cP$)EU)csB$};2P12;r z&?3cQgZ3085VXy?#QEFj3%;gW5uRULBj>l?_M#98U=8s(e7V5;8>8y9;ik1C3vp*R z-bLH33%)7YZE^LS2DQQP5eybAs=Q2P--%uNf#=tQaaxN@)fSQPqT2@=KtM@)XG5Cp z?N&9Brbh>uU|N|ytvJovSTUkFeAw02S@@6@XRqWISMV%(^*~lmJ-|G;3?XElGYj_y z#ngi?ye&MPojWCCXaTD2n!|#xtCn9Ee2=qlFt#Q15$$tK#YJ$tQ|Be@H7D%LT8PE_ zn%j4@FVX#w9j?*rx4ZajAp&k9pf5?lW^g;ugC*rLHL{4s!7LWsgt_VH7PgEjMK_6> z+hx7SLEf>3H$gwXiffEsv?qLw0<|MHfu5vmsWeyJ460!8Jz^>>9e_i&wtm}f=g{7iy>?ScJk_~1N(Bxbv>R^WRS>3iE|NQF|ZZ) z7l)=&II3oKGx{`Hu$Tnd>s5ZXz|2y5J!}WTM3F$AGog^^FZk8ynGq63;hs%qs*~SYA%= z&#(8W40+1;ix{KIERM7zL7&yEwMPv}jg4qg>tlj=O|qa}Yf)KyRbqgg$R`OVI=Wr5 z^_!w?%(YYSC+%$w&hb*(i*cX|!rFB$-<-{elij z=!1)gexhrsZV8`|MGS8elxO)S5h|#~=x4c2ipq#&wm~97jhzNIxZDv~0aJ#1qalJb zb)m?c5{nV^B@OcPJae+u;vhPEgL$%e@f8D`6j}-;D&)ztaTSMdCn)`s+^Zuh1zL-Vp42PBqZ3g$S+Z4Y%PInD*)!;`QD&Z?8iV2 zqB=2R?6{4UIa0}vN-=7?HYWONUDUqW>9?f1^IKWP_&DPm!I*XBLZ0P3-MVYe&DJqJ zG$b$$MD)x;tcDM`4NXMFxBd2d@d=;C(QkfW&H~jBo^~OlCH&P1&7*vf8gtk85;;bM zk7AJOy<2%B8qnr({;}qN4F254}LXXFAj5vI$QpF9fHNa!` zTJt(`!nG-MmWZ5u&Y!c+KzNszK5D+nXein>`uI2_0g!$-X`;Yn@RNKSbS5h=q6-($ zXnC?kHCnt|38N*l(#S00s=YT>rY09on$bgX&NCwnezU3wo7_KR_yFz5pobD zhul*)>oLmdjxq_xv+QspYJwd_t{3}sSX7r9#tMXq&mBm3d7hd7a%1`8M%x?!24V4^ zFANvq5`0aSJ)jik&Sy^r5B6i)6aTGW``E-{p$T!nRF`J!z)5>E9&%r0`CCkA-D!L}G`xLP7G2D>2~(fJptt<-qx;B%)s^-n;!mGHv36aX}@c?JWL_TEulS?NXdR z+|l+nP)zjTLUhkNAJe~gV1E8$Ln=-SW5gSXQq)7$TFOi$Y`z_kx;Dfcp}mh)4C}Kp zk@q@PK)95I!Y{#5&RU*a%gcoXbwj8=>OO$#W5GA+?HlA0I|f^i*>Rr;h%X-Aes#RR z`~LWD{IuV*;h!81&+)Yv?q*_e;5kK9#b0%MI7~51w}-S7-p|0(1y15;6TH=zKTBh^ zzqL&mGDLZWpogItVz0gU?L-GACO#&s<^f){6Wq@LsuhHIs;KdaR)oa_0U3e<)Ud5p-Iw;XFNLSlz3N#{fe+M%SwweF{ literal 44449 zcmbuI&8{9t5r+9$AR(@T3xMgKnVuPQ8|;u3g>V8^jAPjrBy730v*ixB7@nFCC5tad zdfxWJvX9m7?)tB{s(SwY{OkAMe)f;24$ho`51e*X3={)gYa|Mu0Z`llcL z@YSn7^)cG(=U3t4{doP{AFX}=`G=2BAAZ>LyT8|d*Rkj|d)w@q?O4~>DN#o2m))@M z%ZNAZ+j`m!$F)I~;}P{gef`zbyEp&(5d;4C{ZC-Qc3qkU?b^BB{%EtOcVB${mpAYJ z`UA7S{|S4wMRTs77ZT~D>b%#4DzNsY+YS3NSQBoz?<)*gJuzQDZ+cM?^Wj!MsiLio z?)7@#xsyL$*Sv1mg(y1t*vGcpHT^7+Pg%U4dc$>Tm@Id(<5(eJc*rp>ZFSHUE*5Us zjuXKO7x#6WxN6(IpdCZ=u(wie$HTrH`fM1yKnZr(9oX;&)2sp<=CT`M-FvkImY7Y1owt7U{g1JF?@t( z7Qhf18QPrIS{@W^W9ZZF@bs z8Qv#CcWxFP^JJBCN@QGK)s`NtY5Hv2(RJtn`p4Nql^!!Hy&k&`3C1-vRe*WCFfNBy zm1{4d4=LG>3ClXjMywnIJ$kAD)!2PN0=3}=p`Y;-Ww@PEgVRGmZ{?0)Col8>qnf?= zzTj*F^Wmv5gV&5oVuIk*=g}BCO_6p~MN!I9tKdE~1EUwNnJ%V`W>4FW1KHAngiqmn zt2o~a*T{)U2#4vhcblq0^r@WOTS|P}guJv8wxH@t8mi+NR4G3xx~nCdWK=^OB%|bO zA>wcr)*Y(uetXGq8=gw@cpf>4*NRmGZ{blBP)djzGGv`F;y~wK zx2_V~kcn&ax&|~K{YW!;G&o^ynV^bB zB*>%lxMmyW3_~7{?jcb!Yo7#N)&CG45J zvPB&i_kF^(^}Dd|>;@X5Z4FIs{$l5@_W_;K`vAviM2kEvm*jR9F+%y;MxgOW#o1nU z>W)q}=rPOVEYN2b%MBOb{NEc*q9vY!g z0|$bw3fl-kkugrpf>#=!oQ$p9qTeYnsY{{AvE}h~O`87*h_pCwfkCcS8ju!N+?bV6nRHt7> zmGF>zuhv1gA)$f}=jn9=-3kx6trsTG9g>29hTt$LtkD*7MWTafrh5HTf*js~(=Nw` z4=?v(U!E2ZYMr?QU;9+6hEE4T7V+PiJB2hM=6W|ph0Je9^@oHTuN823w-mtHd(%b& zUg&fV+$;~?7|QgA1Ua?FXk9AfaRv-WnOjUQmM>;b*q?T*Q86!A6D@84%G}X!S?mk7 zXQm{}@Qju<^2yw_Z^O4ZgP^hDLnrY<_v#UJub#f=htWMK=owz_Nb24dMEq?cizK4L zE{6`G_o9otA?Os0c57%9(&ibsI1h6u*XneLfocxvg=C^rgF-=`(~NVXC4r_i?Tc`E zd3Z+phNy+sb1iH}iri8)7;TR>977P1`-&*NQ_1-zs^=t#6T;Sf8#?%cXWLb&Z@__i zPQ^*c08Nf1@(6&cJ4cK)K;Mr%HpdsK33}hR`Brkqn6(|EM<%yJv_|Fr63Ev3zH4S; z>GXBUlV4UB7)XxF4*}+M7CAyKIK3aBtt}5Dh`b32!r-crX2gqmgJ)vrBYP4<1Bg@< zygn>`T7ro+Ez)$~C6+~pjbNHNrwGGX?EDsynyQvFlVtzsO8`yo_d^GvoUC{ATDTa}=m-Z~#G8dX3JVWO^IFb) z+UE3Wf!@8YUq1JxwPhkIq8};IS8W*RI@2Z{7$LveXuitcmvX+LfRUqV86k~fLyub| zR|_8ATPEV|?Vh`H2AXkVNEp%|N!G@e_tBugJ0znBT9QEwq8R?>venLm}D*51fc?c&0g=D#Clqt3qgQGD-&UcCulNJ z8}t#-Fcj%{O>x+O`KUGmXST@0;8~(-T09701550?#i2N~3&jw8AVJA=-~&pg%_1n7 z-Uk>cx7~naUZBuNErjQh8xGpmxS^AJaO}+3FjI}O31y3xHO;>s1jx)(YupEDIim=^ zC8tQIsuz=!v8;SCPmsck5VX7s29y$a6@5d&A6K6W40u}Hu z3%!KVg*3Gp;bo)HJ`;^<`wWJsu|no)@tWS(PF#&CTr7d#a^l$p`onY-LaTCauqeH1 z_A$+?WsH7F?Q5vkvQH?NOfve5wR%{=Sp%$&mSy6TnFw$KoVef7OOPuK7XXIlMr9nn zABsjceMm43EtMbzT1Y?&0*#qyY^A?Tr`t@j>5CyxI<%uPOA~{tvG>!P9~giJ+WwLS z<{G&ko`Ad{qKVG@Ad(&?f2%JC(i4R+g|@q2$=G|Tf4Jd3d?PEJRodRr#RBI-E}TB| z0{!NU;eHJ7T4#uwCN%M?eNB9@gNcE_V^{w~O31={LdoQ@6@6H|O&$;e@@nghLD!f920DG@dOZkUXSZl2ZBOs4Yl!DgOTk-Aj$OV$) zO=uAnh`1Nfo$p%={WvsCTJi&PS+_^L8rve9882@zBX-2fty z%hNi1#sWm6O)PEz*$8h)xh_G~G#kiRCIZtw@j@6g<@$!-JrABF?8urmllK$hUgZKK za#bNY6cn(5+>v9Ky(zlKro+?nj-^hj!OFDmFncr-ic}Rc?elFSP%-7FaWo?1Mpv6klm#B`-0%tq%$O!4S)(WsZ?NdOl zbsh0HD?U8#iyc@88LI2SGt;IWcMsRn3#(PRg?sHF)r$RZZxg%YC)TbrF#muYX%0hPZV50f* zUwWqemRBW`kFD$zjX`|qSwkO;MQb-i8l8vocxwDfC^DQ@-;mxQy@pw(+{_i z0ssn`la%MKz2#jd@&KN|&AkiXfbc=(S;i(vQfc1KPfF(KnZM7J5dcrEcmr8LPLC-E zk1L)v7w(h~XOz>Qk_;(J;$pPBpw+~y(CM=y+C{W&U|3RnbTUO0#_@{IEXeI8$xmaF zSLRYNcz7Z9K0HL4d!O}wFDT9TCLZZZavZupycPuCb*Bvtc7#MZg2$~FuR%10c@4T> zw+Yico@CQ08)eg+kTc(o<}DIDNO)6H_JFj03=5Y$$)(cEMMV9uCQ-+!FUCz257hvbIHXQ+h) zTe%S@xz%2Sfi&+zmx=t>c=HI{&(-Wkcjtf;Cc2q) zRszhQS7=C>y-1zGT;2WViX_HfWxNbh)c7S;AiSA7Ai$n?CHLpn-9BKsh@V@-+4Oy= z$Ycbk(a3MMd6WSA2f@Rr0jxOJ6C83L>@bL`I0_8j W7489qJM-}!KK86#0hNaB_5T3Zzsjrt diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py new file mode 100644 index 00000000..b6f0a1cd --- /dev/null +++ b/html5lib/tests/conftest.py @@ -0,0 +1,21 @@ +import os.path + +from .tree_construction import TreeConstructionFile + +_dir = os.path.abspath(os.path.dirname(__file__)) +_testdata = os.path.join(_dir, "testdata") +_tree_construction = os.path.join(_testdata, "tree-construction") + + +def pytest_collectstart(): + """check to see if the git submodule has been init'd""" + pass + + +def pytest_collect_file(path, parent): + dir = os.path.abspath(path.dirname) + if dir == _tree_construction: + if path.basename == "template.dat": + return + if path.ext == ".dat": + return TreeConstructionFile(path, parent) diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 926cb2f2..56e09c81 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -27,16 +27,18 @@ try: import xml.etree.cElementTree as cElementTree except ImportError: - pass + treeTypes['cElementTree'] = None else: # On Python 3.3 and above cElementTree is an alias, don't run them twice. - if cElementTree.Element is not ElementTree.Element: + if cElementTree.Element is ElementTree.Element: + treeTypes['cElementTree'] = None + else: treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True) try: import lxml.etree as lxml # flake8: noqa except ImportError: - pass + treeTypes['lxml'] = None else: treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml") @@ -63,9 +65,6 @@ def __init__(self, filename, newTestHeading="data", encoding="utf8"): self.encoding = encoding self.newTestHeading = newTestHeading - def __del__(self): - self.f.close() - def __iter__(self): data = DefaultDict(None) key = None diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py deleted file mode 100644 index 9cda65f8..00000000 --- a/html5lib/tests/test_parser.py +++ /dev/null @@ -1,96 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import os -import sys -import traceback -import warnings -import re - -warnings.simplefilter("error") - -from .support import get_data_files -from .support import TestData, convert, convertExpected, treeTypes -from html5lib import html5parser, constants - -# Run the parse error checks -checkParseErrors = False - -# XXX - There should just be one function here but for some reason the testcase -# format differs from the treedump format by a single space character - - -def convertTreeDump(data): - return "\n".join(convert(3)(data).split("\n")[1:]) - -namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub - - -def runParserTest(innerHTML, input, expected, errors, treeClass, - namespaceHTMLElements): - with warnings.catch_warnings(record=True) as caughtWarnings: - warnings.simplefilter("always") - p = html5parser.HTMLParser(tree=treeClass, - namespaceHTMLElements=namespaceHTMLElements) - - try: - if innerHTML: - document = p.parseFragment(input, innerHTML) - else: - document = p.parse(input) - except: - errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, - "\nTraceback:", traceback.format_exc()]) - assert False, errorMsg - - otherWarnings = [x for x in caughtWarnings - if not issubclass(x.category, constants.DataLossWarning)] - assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings] - if len(caughtWarnings): - return - - output = convertTreeDump(p.tree.testSerializer(document)) - - expected = convertExpected(expected) - if namespaceHTMLElements: - expected = namespaceExpected(r"\1", expected) - - errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, - "\nReceived:", output]) - assert expected == output, errorMsg - - errStr = [] - for (line, col), errorcode, datavars in p.errors: - assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) - errStr.append("Line: %i Col: %i %s" % (line, col, - constants.E[errorcode] % datavars)) - - errorMsg2 = "\n".join(["\n\nInput:", input, - "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors), - "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) - if checkParseErrors: - assert len(p.errors) == len(errors), errorMsg2 - - -def test_parser(): - sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n") - files = get_data_files('tree-construction') - - for filename in files: - testName = os.path.basename(filename).replace(".dat", "") - if testName in ("template",): - continue - - tests = TestData(filename, "data") - - for index, test in enumerate(tests): - input, errors, innerHTML, expected = [test[key] for key in - ('data', 'errors', - 'document-fragment', - 'document')] - if errors: - errors = errors.split("\n") - - for treeName, treeCls in sorted(treeTypes.items()): - for namespaceHTMLElements in (True, False): - yield (runParserTest, innerHTML, input, expected, errors, treeCls, - namespaceHTMLElements) diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py new file mode 100644 index 00000000..c1125387 --- /dev/null +++ b/html5lib/tests/tree_construction.py @@ -0,0 +1,94 @@ +from __future__ import absolute_import, division, unicode_literals + +import warnings +import re + +import pytest + +from .support import TestData, convert, convertExpected, treeTypes +from html5lib import html5parser, constants + + +class TreeConstructionFile(pytest.File): + def collect(self): + tests = TestData(str(self.fspath), "data") + for i, test in enumerate(tests): + for treeName, treeClass in sorted(treeTypes.items()): + for namespaceHTMLElements in (True, False): + if namespaceHTMLElements: + nodeid = "%d::%s::namespaced" % (i, treeName) + else: + nodeid = "%d::%s::void-namespace" % (i, treeName) + item = ParserTest(nodeid, self, + test, treeClass, namespaceHTMLElements) + item.add_marker(getattr(pytest.mark, treeName)) + if namespaceHTMLElements: + item.add_marker(pytest.mark.namespaced) + if treeClass is None: + item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded")) + yield item + + +def convertTreeDump(data): + return "\n".join(convert(3)(data).split("\n")[1:]) + +namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub + + +class ParserTest(pytest.Item): + def __init__(self, name, parent, test, treeClass, namespaceHTMLElements): + super(ParserTest, self).__init__(name, parent) + self.obj = lambda: 1 # this is to hack around skipif needing a function! + self.test = test + self.treeClass = treeClass + self.namespaceHTMLElements = namespaceHTMLElements + + def runtest(self): + p = html5parser.HTMLParser(tree=self.treeClass, + namespaceHTMLElements=self.namespaceHTMLElements) + + input = self.test['data'] + fragmentContainer = self.test['document-fragment'] + expected = self.test['document'] + expectedErrors = self.test['errors'].split("\n") if self.test['errors'] else [] + + with warnings.catch_warnings(): + warnings.simplefilter("error") + try: + if fragmentContainer: + document = p.parseFragment(input, fragmentContainer) + else: + document = p.parse(input) + except constants.DataLossWarning: + pytest.skip("data loss warning") + + output = convertTreeDump(p.tree.testSerializer(document)) + + expected = convertExpected(expected) + if self.namespaceHTMLElements: + expected = namespaceExpected(r"\1", expected) + + errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, + "\nReceived:", output]) + assert expected == output, errorMsg + + errStr = [] + for (line, col), errorcode, datavars in p.errors: + assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) + errStr.append("Line: %i Col: %i %s" % (line, col, + constants.E[errorcode] % datavars)) + + errorMsg2 = "\n".join(["\n\nInput:", input, + "\nExpected errors (" + str(len(expectedErrors)) + "):\n" + "\n".join(expectedErrors), + "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) + if False: # we're currently not testing parse errors + assert len(p.errors) == len(expectedErrors), errorMsg2 + + def repr_failure(self, excinfo): + traceback = excinfo.traceback + ntraceback = traceback.cut(path=__file__) + excinfo.traceback = ntraceback.filter() + + return excinfo.getrepr(funcargs=True, + showlocals=False, + style="short", tbfilter=False) diff --git a/pytest.ini b/pytest.ini index 17209aa1..6875cc7d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,2 @@ [pytest] -addopts = -rXw -p no:doctest \ No newline at end of file +addopts = -rXw -p no:doctest From 082c042082c78779ea47c746c77535944eec957e Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Wed, 25 Nov 2015 17:52:47 +0000 Subject: [PATCH 17/17] Add AUTHORS.rst and test files to manifest. --- MANIFEST.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MANIFEST.in b/MANIFEST.in index 1edd0b7d..4b3ffe3e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,10 @@ include LICENSE +include AUTHORS.rst include CHANGES.rst include README.rst include requirements*.txt +include .pytest.expect +include tox.ini +include pytest.ini graft html5lib/tests/testdata recursive-include html5lib/tests *.py