diff --git a/.pytest.expect b/.pytest.expect new file mode 100644 index 00000000..c88e99b9 Binary files /dev/null and b/.pytest.expect differ diff --git a/.travis.yml b/.travis.yml index 3f045b37..b9a89978 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,8 @@ python: - "3.4" - "3.5" - "pypy" + - "pypy3" + sudo: false cache: @@ -36,7 +38,7 @@ install: - bash requirements-install.sh script: - - nosetests + - py.test - bash flake8-run.sh after_script: diff --git a/CHANGES.rst b/CHANGES.rst index ed951a3b..e99da143 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,9 +7,14 @@ Change Log Released on XXX * Added ordereddict as a mandatory dependency on Python 2.6. + * Added ``lxml``, ``genshi``, ``datrie``, ``charade``, and ``all`` extras that will do the right thing based on the specific interpreter implementation. +* Now requires the ``mock`` package for the testsuite. + +* Cease supporting DATrie under PyPy. + 0.9999999/1.0b8 ~~~~~~~~~~~~~~~ diff --git a/MANIFEST.in b/MANIFEST.in index 1edd0b7d..4b3ffe3e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,10 @@ include LICENSE +include AUTHORS.rst include CHANGES.rst include README.rst include requirements*.txt +include .pytest.expect +include tox.ini +include pytest.ini graft html5lib/tests/testdata recursive-include html5lib/tests *.py diff --git a/README.rst b/README.rst index 9e0a0f74..1bbcb609 100644 --- a/README.rst +++ b/README.rst @@ -104,8 +104,8 @@ Optional Dependencies The following third-party libraries may be used for additional functionality: -- ``datrie`` can be used to improve parsing performance (though in - almost all cases the improvement is marginal); +- ``datrie`` can be used under CPython to improve parsing performance + (though in almost all cases the improvement is marginal); - ``lxml`` is supported as a tree format (for both building and walking) under CPython (but *not* PyPy where it is known to cause @@ -132,9 +132,9 @@ Please report any bugs on the `issue tracker Tests ----- -Unit tests require the ``nose`` library and can be run using the -``nosetests`` command in the root directory; ``ordereddict`` is -required under Python 2.6. All should pass. +Unit tests require the ``pytest`` and ``mock`` libraries and can be +run using the ``py.test`` command in the root directory; +``ordereddict`` is required under Python 2.6. All should pass. Test data are contained in a separate `html5lib-tests `_ repository and included diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py new file mode 100644 index 00000000..b6f0a1cd --- /dev/null +++ b/html5lib/tests/conftest.py @@ -0,0 +1,21 @@ +import os.path + +from .tree_construction import TreeConstructionFile + +_dir = os.path.abspath(os.path.dirname(__file__)) +_testdata = os.path.join(_dir, "testdata") +_tree_construction = os.path.join(_testdata, "tree-construction") + + +def pytest_collectstart(): + """check to see if the git submodule has been init'd""" + pass + + +def pytest_collect_file(path, parent): + dir = os.path.abspath(path.dirname) + if dir == _tree_construction: + if path.basename == "template.dat": + return + if path.ext == ".dat": + return TreeConstructionFile(path, parent) diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index dbb735a9..56e09c81 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -21,36 +21,30 @@ # Try whatever etree implementations are available from a list that are #"supposed" to work -try: - import xml.etree.ElementTree as ElementTree - treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) -except ImportError: - try: - import elementtree.ElementTree as ElementTree - treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) - except ImportError: - pass +import xml.etree.ElementTree as ElementTree +treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) try: import xml.etree.cElementTree as cElementTree - treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True) except ImportError: - try: - import cElementTree + treeTypes['cElementTree'] = None +else: + # On Python 3.3 and above cElementTree is an alias, don't run them twice. + if cElementTree.Element is ElementTree.Element: + treeTypes['cElementTree'] = None + else: treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True) - except ImportError: - pass try: import lxml.etree as lxml # flake8: noqa except ImportError: - pass + treeTypes['lxml'] = None else: treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml") def get_data_files(subdirectory, files='*.dat'): - return glob.glob(os.path.join(test_dir, subdirectory, files)) + return sorted(glob.glob(os.path.join(test_dir, subdirectory, files))) class DefaultDict(dict): @@ -71,9 +65,6 @@ def __init__(self, filename, newTestHeading="data", encoding="utf8"): self.encoding = encoding self.newTestHeading = newTestHeading - def __del__(self): - self.f.close() - def __iter__(self): data = DefaultDict(None) key = None @@ -128,7 +119,7 @@ def convertData(data): def errorMessage(input, expected, actual): msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" % (repr(input), repr(expected), repr(actual))) - if sys.version_info.major == 2: + if sys.version_info[0] == 2: msg = msg.encode("ascii", "backslashreplace") return msg diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py new file mode 100644 index 00000000..e42eafdb --- /dev/null +++ b/html5lib/tests/test_meta.py @@ -0,0 +1,41 @@ +from __future__ import absolute_import, division, unicode_literals + +import six +from mock import Mock + +from . import support + + +def _createReprMock(r): + """Creates a mock with a __repr__ returning r + + Also provides __str__ mock with default mock behaviour""" + mock = Mock() + mock.__repr__ = Mock() + mock.__repr__.return_value = r + mock.__str__ = Mock(wraps=mock.__str__) + return mock + + +def test_errorMessage(): + # Create mock objects to take repr of + input = _createReprMock("1") + expected = _createReprMock("2") + actual = _createReprMock("3") + + # Run the actual test + r = support.errorMessage(input, expected, actual) + + # Assertions! + if six.PY2: + assert b"Input:\n1\nExpected:\n2\nRecieved\n3\n" == r + else: + assert six.PY3 + assert "Input:\n1\nExpected:\n2\nRecieved\n3\n" == r + + assert input.__repr__.call_count == 1 + assert expected.__repr__.call_count == 1 + assert actual.__repr__.call_count == 1 + assert not input.__str__.called + assert not expected.__str__.called + assert not actual.__str__.called diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py deleted file mode 100644 index 230cdb42..00000000 --- a/html5lib/tests/test_parser.py +++ /dev/null @@ -1,96 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import os -import sys -import traceback -import warnings -import re - -warnings.simplefilter("error") - -from .support import get_data_files -from .support import TestData, convert, convertExpected, treeTypes -from html5lib import html5parser, constants - -# Run the parse error checks -checkParseErrors = False - -# XXX - There should just be one function here but for some reason the testcase -# format differs from the treedump format by a single space character - - -def convertTreeDump(data): - return "\n".join(convert(3)(data).split("\n")[1:]) - -namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub - - -def runParserTest(innerHTML, input, expected, errors, treeClass, - namespaceHTMLElements): - with warnings.catch_warnings(record=True) as caughtWarnings: - warnings.simplefilter("always") - p = html5parser.HTMLParser(tree=treeClass, - namespaceHTMLElements=namespaceHTMLElements) - - try: - if innerHTML: - document = p.parseFragment(input, innerHTML) - else: - document = p.parse(input) - except: - errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, - "\nTraceback:", traceback.format_exc()]) - assert False, errorMsg - - otherWarnings = [x for x in caughtWarnings - if not issubclass(x.category, constants.DataLossWarning)] - assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings] - if len(caughtWarnings): - return - - output = convertTreeDump(p.tree.testSerializer(document)) - - expected = convertExpected(expected) - if namespaceHTMLElements: - expected = namespaceExpected(r"\1", expected) - - errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, - "\nReceived:", output]) - assert expected == output, errorMsg - - errStr = [] - for (line, col), errorcode, datavars in p.errors: - assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) - errStr.append("Line: %i Col: %i %s" % (line, col, - constants.E[errorcode] % datavars)) - - errorMsg2 = "\n".join(["\n\nInput:", input, - "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors), - "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) - if checkParseErrors: - assert len(p.errors) == len(errors), errorMsg2 - - -def test_parser(): - sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n") - files = get_data_files('tree-construction') - - for filename in files: - testName = os.path.basename(filename).replace(".dat", "") - if testName in ("template",): - continue - - tests = TestData(filename, "data") - - for index, test in enumerate(tests): - input, errors, innerHTML, expected = [test[key] for key in - ('data', 'errors', - 'document-fragment', - 'document')] - if errors: - errors = errors.split("\n") - - for treeName, treeCls in treeTypes.items(): - for namespaceHTMLElements in (True, False): - yield (runParserTest, innerHTML, input, expected, errors, treeCls, - namespaceHTMLElements) diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 20bbdf31..01f16eea 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -40,12 +40,12 @@ def test_namespace_html_elements_1_dom(self): def test_namespace_html_elements_0_etree(self): parser = html5parser.HTMLParser(namespaceHTMLElements=True) doc = parser.parse("") - self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],)) + self.assertTrue(doc.tag == "{%s}html" % (namespaces["html"],)) def test_namespace_html_elements_1_etree(self): parser = html5parser.HTMLParser(namespaceHTMLElements=False) doc = parser.parse("") - self.assertTrue(list(doc)[0].tag == "html") + self.assertTrue(doc.tag == "html") def test_unicode_file(self): parser = html5parser.HTMLParser() diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py index 6a563c32..823c6ea6 100644 --- a/html5lib/tests/test_tokenizer.py +++ b/html5lib/tests/test_tokenizer.py @@ -109,6 +109,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder, token.pop() if not ignoreErrorOrder and not ignoreErrors: + expectedTokens = concatenateCharacterTokens(expectedTokens) return expectedTokens == receivedTokens else: # Sort the tokens into two groups; non-parse errors and parse errors @@ -121,6 +122,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder, else: if not ignoreErrors: tokens[tokenType][1].append(token) + tokens[tokenType][0] = concatenateCharacterTokens(tokens[tokenType][0]) return tokens["expected"] == tokens["received"] @@ -174,13 +176,12 @@ def runTokenizerTest(test): warnings.resetwarnings() warnings.simplefilter("error") - expected = concatenateCharacterTokens(test['output']) + expected = test['output'] if 'lastStartTag' not in test: test['lastStartTag'] = None parser = TokenizerTestParser(test['initialState'], test['lastStartTag']) tokens = parser.parse(test['input']) - tokens = concatenateCharacterTokens(tokens) received = normalizeTokens(tokens) errorMsg = "\n".join(["\n\nInitial state:", test['initialState'], diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 3be12327..c79d0b1b 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -13,43 +13,12 @@ from .support import get_data_files, TestData, convertExpected -from html5lib import html5parser, treewalkers, treebuilders, constants +from html5lib import html5parser, treewalkers, treebuilders, treeadapters, constants -def PullDOMAdapter(node): - from xml.dom import Node - from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS - - if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): - for childNode in node.childNodes: - for event in PullDOMAdapter(childNode): - yield event - - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM") - - elif node.nodeType == Node.COMMENT_NODE: - yield COMMENT, node - - elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - yield CHARACTERS, node - - elif node.nodeType == Node.ELEMENT_NODE: - yield START_ELEMENT, node - for childNode in node.childNodes: - for event in PullDOMAdapter(childNode): - yield event - yield END_ELEMENT, node - - else: - raise NotImplementedError("Node type not supported: " + str(node.nodeType)) - treeTypes = { "DOM": {"builder": treebuilders.getTreeBuilder("dom"), "walker": treewalkers.getTreeWalker("dom")}, - "PullDOM": {"builder": treebuilders.getTreeBuilder("dom"), - "adapter": PullDOMAdapter, - "walker": treewalkers.getTreeWalker("pulldom")}, } # Try whatever etree implementations are available from a list that are @@ -60,7 +29,7 @@ def PullDOMAdapter(node): pass else: treeTypes['ElementTree'] = \ - {"builder": treebuilders.getTreeBuilder("etree", ElementTree), + {"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True), "walker": treewalkers.getTreeWalker("etree", ElementTree)} try: @@ -69,7 +38,7 @@ def PullDOMAdapter(node): pass else: treeTypes['cElementTree'] = \ - {"builder": treebuilders.getTreeBuilder("etree", ElementTree), + {"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True), "walker": treewalkers.getTreeWalker("etree", ElementTree)} @@ -84,59 +53,13 @@ def PullDOMAdapter(node): try: - from genshi.core import QName, Attrs - from genshi.core import START, END, TEXT, COMMENT, DOCTYPE + import genshi # flake8: noqa except ImportError: pass else: - def GenshiAdapter(tree): - text = None - for token in treewalkers.getTreeWalker("dom")(tree): - type = token["type"] - if type in ("Characters", "SpaceCharacters"): - if text is None: - text = token["data"] - else: - text += token["data"] - elif text is not None: - yield TEXT, text, (None, -1, -1) - text = None - - if type in ("StartTag", "EmptyTag"): - if token["namespace"]: - name = "{%s}%s" % (token["namespace"], token["name"]) - else: - name = token["name"] - attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) - for attr, value in token["data"].items()]) - yield (START, (QName(name), attrs), (None, -1, -1)) - if type == "EmptyTag": - type = "EndTag" - - if type == "EndTag": - if token["namespace"]: - name = "{%s}%s" % (token["namespace"], token["name"]) - else: - name = token["name"] - - yield END, QName(name), (None, -1, -1) - - elif type == "Comment": - yield COMMENT, token["data"], (None, -1, -1) - - elif type == "Doctype": - yield DOCTYPE, (token["name"], token["publicId"], - token["systemId"]), (None, -1, -1) - - else: - pass # FIXME: What to do? - - if text is not None: - yield TEXT, text, (None, -1, -1) - treeTypes["genshi"] = \ {"builder": treebuilders.getTreeBuilder("dom"), - "adapter": GenshiAdapter, + "adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)), "walker": treewalkers.getTreeWalker("genshi")} import re @@ -164,7 +87,7 @@ def test_all_tokens(self): {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'}, {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'} ] - for treeName, treeCls in treeTypes.items(): + for treeName, treeCls in sorted(treeTypes.items()): p = html5parser.HTMLParser(tree=treeCls["builder"]) document = p.parse("a
b
c") document = treeCls.get("adapter", lambda x: x)(document) @@ -207,7 +130,7 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass): def test_treewalker(): sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n") - for treeName, treeCls in treeTypes.items(): + for treeName, treeCls in sorted(treeTypes.items()): files = get_data_files('tree-construction') for filename in files: testName = os.path.basename(filename).replace(".dat", "") @@ -271,6 +194,6 @@ def test_treewalker_six_mix(): '\n href="http://example.com/cow"\n rel="alternate"\n "Example"') ] - for tree in treeTypes.items(): + for tree in sorted(treeTypes.items()): for intext, attrs, expected in sm_tests: yield runTreewalkerEditTest, intext, expected, attrs, tree diff --git a/html5lib/tests/testdata b/html5lib/tests/testdata index f6a1b202..6234baea 160000 --- a/html5lib/tests/testdata +++ b/html5lib/tests/testdata @@ -1 +1 @@ -Subproject commit f6a1b202de14fc057b196044c5ebef4672be3dd0 +Subproject commit 6234baeabc51f6d51d1cfc2c4e4656bd99531f2b diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py new file mode 100644 index 00000000..c1125387 --- /dev/null +++ b/html5lib/tests/tree_construction.py @@ -0,0 +1,94 @@ +from __future__ import absolute_import, division, unicode_literals + +import warnings +import re + +import pytest + +from .support import TestData, convert, convertExpected, treeTypes +from html5lib import html5parser, constants + + +class TreeConstructionFile(pytest.File): + def collect(self): + tests = TestData(str(self.fspath), "data") + for i, test in enumerate(tests): + for treeName, treeClass in sorted(treeTypes.items()): + for namespaceHTMLElements in (True, False): + if namespaceHTMLElements: + nodeid = "%d::%s::namespaced" % (i, treeName) + else: + nodeid = "%d::%s::void-namespace" % (i, treeName) + item = ParserTest(nodeid, self, + test, treeClass, namespaceHTMLElements) + item.add_marker(getattr(pytest.mark, treeName)) + if namespaceHTMLElements: + item.add_marker(pytest.mark.namespaced) + if treeClass is None: + item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded")) + yield item + + +def convertTreeDump(data): + return "\n".join(convert(3)(data).split("\n")[1:]) + +namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub + + +class ParserTest(pytest.Item): + def __init__(self, name, parent, test, treeClass, namespaceHTMLElements): + super(ParserTest, self).__init__(name, parent) + self.obj = lambda: 1 # this is to hack around skipif needing a function! + self.test = test + self.treeClass = treeClass + self.namespaceHTMLElements = namespaceHTMLElements + + def runtest(self): + p = html5parser.HTMLParser(tree=self.treeClass, + namespaceHTMLElements=self.namespaceHTMLElements) + + input = self.test['data'] + fragmentContainer = self.test['document-fragment'] + expected = self.test['document'] + expectedErrors = self.test['errors'].split("\n") if self.test['errors'] else [] + + with warnings.catch_warnings(): + warnings.simplefilter("error") + try: + if fragmentContainer: + document = p.parseFragment(input, fragmentContainer) + else: + document = p.parse(input) + except constants.DataLossWarning: + pytest.skip("data loss warning") + + output = convertTreeDump(p.tree.testSerializer(document)) + + expected = convertExpected(expected) + if self.namespaceHTMLElements: + expected = namespaceExpected(r"\1", expected) + + errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, + "\nReceived:", output]) + assert expected == output, errorMsg + + errStr = [] + for (line, col), errorcode, datavars in p.errors: + assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) + errStr.append("Line: %i Col: %i %s" % (line, col, + constants.E[errorcode] % datavars)) + + errorMsg2 = "\n".join(["\n\nInput:", input, + "\nExpected errors (" + str(len(expectedErrors)) + "):\n" + "\n".join(expectedErrors), + "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) + if False: # we're currently not testing parse errors + assert len(p.errors) == len(expectedErrors), errorMsg2 + + def repr_failure(self, excinfo): + traceback = excinfo.traceback + ntraceback = traceback.cut(path=__file__) + excinfo.traceback = ntraceback.filter() + + return excinfo.getrepr(funcargs=True, + showlocals=False, + style="short", tbfilter=False) diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py index e69de29b..57d71304 100644 --- a/html5lib/treeadapters/__init__.py +++ b/html5lib/treeadapters/__init__.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import sax + +__all__ = ["sax"] + +try: + from . import genshi # flake8: noqa +except ImportError: + pass +else: + __all__.append("genshi") diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py new file mode 100644 index 00000000..04e316df --- /dev/null +++ b/html5lib/treeadapters/genshi.py @@ -0,0 +1,47 @@ +from __future__ import absolute_import, division, unicode_literals + +from genshi.core import QName, Attrs +from genshi.core import START, END, TEXT, COMMENT, DOCTYPE + + +def to_genshi(walker): + text = [] + for token in walker: + type = token["type"] + if type in ("Characters", "SpaceCharacters"): + text.append(token["data"]) + elif text: + yield TEXT, "".join(text), (None, -1, -1) + text = [] + + if type in ("StartTag", "EmptyTag"): + if token["namespace"]: + name = "{%s}%s" % (token["namespace"], token["name"]) + else: + name = token["name"] + attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) + for attr, value in token["data"].items()]) + yield (START, (QName(name), attrs), (None, -1, -1)) + if type == "EmptyTag": + type = "EndTag" + + if type == "EndTag": + if token["namespace"]: + name = "{%s}%s" % (token["namespace"], token["name"]) + else: + name = token["name"] + + yield END, QName(name), (None, -1, -1) + + elif type == "Comment": + yield COMMENT, token["data"], (None, -1, -1) + + elif type == "Doctype": + yield DOCTYPE, (token["name"], token["publicId"], + token["systemId"]), (None, -1, -1) + + else: + pass # FIXME: What to do? + + if text: + yield TEXT, "".join(text), (None, -1, -1) diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 2755c485..138b30bd 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -79,7 +79,7 @@ def serializeElement(element, indent=0): next_element = next_element.getnext() elif isinstance(element, str) or isinstance(element, bytes): # Text in a fragment - assert isinstance(element, str) or sys.version_info.major == 2 + assert isinstance(element, str) or sys.version_info[0] == 2 rv.append("|%s\"%s\"" % (' ' * indent, element)) else: # Fragment case diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index 20b91b11..5414e4bb 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -10,8 +10,7 @@ from __future__ import absolute_import, division, unicode_literals -__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree", - "pulldom"] +__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree"] import sys diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 69840c21..73c8e26a 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -129,6 +129,7 @@ def getParentNode(self, node): if not parents: return parent else: + assert list(parents[-1]).count(parent) == 1 return parent, list(parents[-1]).index(parent), parents, None return locals() diff --git a/html5lib/treewalkers/pulldom.py b/html5lib/treewalkers/pulldom.py deleted file mode 100644 index 0b0f515f..00000000 --- a/html5lib/treewalkers/pulldom.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \ - COMMENT, IGNORABLE_WHITESPACE, CHARACTERS - -from . import _base - -from ..constants import voidElements - - -class TreeWalker(_base.TreeWalker): - def __iter__(self): - ignore_until = None - previous = None - for event in self.tree: - if previous is not None and \ - (ignore_until is None or previous[1] is ignore_until): - if previous[1] is ignore_until: - ignore_until = None - for token in self.tokens(previous, event): - yield token - if token["type"] == "EmptyTag": - ignore_until = previous[1] - previous = event - if ignore_until is None or previous[1] is ignore_until: - for token in self.tokens(previous, None): - yield token - elif ignore_until is not None: - raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") - - def tokens(self, event, next): - type, node = event - if type == START_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - attrs = {} - for attr in list(node.attributes.keys()): - attr = node.getAttributeNode(attr) - attrs[(attr.namespaceURI, attr.localName)] = attr.value - if name in voidElements: - for token in self.emptyTag(namespace, - name, - attrs, - not next or next[1] is not node): - yield token - else: - yield self.startTag(namespace, name, attrs) - - elif type == END_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - if name not in voidElements: - yield self.endTag(namespace, name) - - elif type == COMMENT: - yield self.comment(node.nodeValue) - - elif type in (IGNORABLE_WHITESPACE, CHARACTERS): - for token in self.text(node.nodeValue): - yield token - - else: - yield self.unknown(type) diff --git a/html5lib/utils.py b/html5lib/utils.py index ebad29fb..c196821f 100644 --- a/html5lib/utils.py +++ b/html5lib/utils.py @@ -91,13 +91,21 @@ def moduleFactory(baseModule, *args, **kwargs): else: name = b"_%s_factory" % baseModule.__name__ - if name in moduleCache: - return moduleCache[name] - else: + kwargs_tuple = tuple(kwargs.items()) + + try: + return moduleCache[name][args][kwargs_tuple] + except KeyError: mod = ModuleType(name) objs = factory(baseModule, *args, **kwargs) mod.__dict__.update(objs) - moduleCache[name] = mod + if "name" not in moduleCache: + moduleCache[name] = {} + if "args" not in moduleCache[name]: + moduleCache[name][args] = {} + if "kwargs" not in moduleCache[name][args]: + moduleCache[name][args][kwargs_tuple] = {} + moduleCache[name][args][kwargs_tuple] = mod return mod return moduleFactory diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..6875cc7d --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -rXw -p no:doctest diff --git a/requirements-install.sh b/requirements-install.sh index 5f8ba506..a8964ea0 100755 --- a/requirements-install.sh +++ b/requirements-install.sh @@ -5,12 +5,11 @@ if [[ $USE_OPTIONAL != "true" && $USE_OPTIONAL != "false" ]]; then exit 1 fi -pip install -r requirements-test.txt +# Make sure we're running setuptools >= 18.5 +pip install -U pip setuptools -if [[ $USE_OPTIONAL == "true" && $TRAVIS_PYTHON_VERSION != "pypy" ]]; then - if [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then - pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-2.6.txt - else - pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-cpython.txt - fi +pip install -U -r requirements-test.txt + +if [[ $USE_OPTIONAL == "true" ]]; then + pip install -U -r requirements-optional.txt fi diff --git a/requirements-optional-2.6.txt b/requirements-optional-2.6.txt deleted file mode 100644 index 37557ac4..00000000 --- a/requirements-optional-2.6.txt +++ /dev/null @@ -1,5 +0,0 @@ --r requirements-optional-cpython.txt - -# Can be used to force attributes to be serialized in alphabetical -# order. -ordereddict diff --git a/requirements-optional-cpython.txt b/requirements-optional-cpython.txt deleted file mode 100644 index 35ed3529..00000000 --- a/requirements-optional-cpython.txt +++ /dev/null @@ -1,5 +0,0 @@ --r requirements-optional.txt - -# lxml is supported with its own treebuilder ("lxml") and otherwise -# uses the standard ElementTree support -lxml diff --git a/requirements-optional.txt b/requirements-optional.txt index c6355270..ac6539cb 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -4,10 +4,18 @@ # streams. genshi -# DATrie can be used in place of our Python trie implementation for -# slightly better parsing performance. -datrie - # charade can be used as a fallback in case we are unable to determine # the encoding of a document. charade + +# lxml is supported with its own treebuilder ("lxml") and otherwise +# uses the standard ElementTree support +lxml ; platform_python_implementation == 'CPython' + +# DATrie can be used in place of our Python trie implementation for +# slightly better parsing performance. +datrie ; platform_python_implementation == 'CPython' + +# Can be used to force attributes to be serialized in alphabetical +# order. +ordereddict ; python_version < '2.7' diff --git a/requirements-test.txt b/requirements-test.txt index d5f8088c..0580136a 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,5 +1,7 @@ -r requirements.txt flake8 -nose -ordereddict # Python 2.6 +pytest +pytest-expect>=1.0,<2.0 +mock +ordereddict ; python_version < '2.7' diff --git a/setup.py b/setup.py index 7af4e292..7b06b45e 100644 --- a/setup.py +++ b/setup.py @@ -65,18 +65,18 @@ # A conditional extra will only install these items when the extra is # requested and the condition matches. + "datrie:python_implementation == 'CPython'": ["datrie"], "lxml:python_implementation == 'CPython'": ["lxml"], # Standard extras, will be installed when the extra is requested. "genshi": ["genshi"], - "datrie": ["datrie"], "charade": ["charade"], # The all extra combines a standard extra which will be used anytime # the all extra is requested, and it extends it with a conditional # extra that will be installed whenever the condition matches and the # all extra is requested. - "all": ["genshi", "datrie", "charade"], - "all:python_implementation == 'CPython'": ["lxml"], + "all": ["genshi", "charade"], + "all:python_implementation == 'CPython'": ["datrie", "lxml"], }, ) diff --git a/tox.ini b/tox.ini index d00e35dc..e66298d5 100644 --- a/tox.ini +++ b/tox.ini @@ -1,30 +1,15 @@ [tox] -envlist = py26,py27,py32,py33,py34,py35,pypy +envlist = {py26,py27,py32,py33,py34,py35,pypy,pypy3}-{base,optional} [testenv] deps = - -r{toxinidir}/requirements-optional-cpython.txt flake8 - nose + pytest + pytest-expect>=1.0,<2.0 + mock + py26-base: ordereddict + optional: -r{toxinidir}/requirements-optional.txt + commands = - {envbindir}/nosetests -q + {envbindir}/py.test {toxinidir}/flake8-run.sh -install_command = - pip install {opts} {packages} - -[testenv:pypy] -# lxml doesn't work and datrie doesn't make sense -# (it's slower than the pure-python version) -deps = - charade - flake8 - Genshi - nose - six - -[testenv:py26] -basepython = python2.6 -deps = - -r{toxinidir}/requirements-optional-2.6.txt - flake8 - nose