Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Sort out the tokenizer and tree-construction tests #240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 4, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,188 changes: 459 additions & 729 deletions .pytest.expect

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions html5lib/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os.path

from .tree_construction import TreeConstructionFile
from .tokenizer import TokenizerFile

_dir = os.path.abspath(os.path.dirname(__file__))
_testdata = os.path.join(_dir, "testdata")
_tree_construction = os.path.join(_testdata, "tree-construction")
_tokenizer = os.path.join(_testdata, "tokenizer")


def pytest_collectstart():
Expand All @@ -19,3 +21,6 @@ def pytest_collect_file(path, parent):
return
if path.ext == ".dat":
return TreeConstructionFile(path, parent)
elif dir == _tokenizer:
if path.ext == ".test":
return TokenizerFile(path, parent)
40 changes: 33 additions & 7 deletions html5lib/tests/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,24 @@
os.path.pardir,
os.path.pardir)))

from html5lib import treebuilders
from html5lib import treebuilders, treewalkers, treeadapters
del base_path

# Build a dict of available trees
treeTypes = {"DOM": treebuilders.getTreeBuilder("dom")}
treeTypes = {}

# Try whatever etree implementations are available from a list that are
#"supposed" to work
# DOM impls
treeTypes["DOM"] = {
"builder": treebuilders.getTreeBuilder("dom"),
"walker": treewalkers.getTreeWalker("dom")
}

# ElementTree impls
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
treeTypes['ElementTree'] = {
"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
"walker": treewalkers.getTreeWalker("etree", ElementTree)
}

try:
import xml.etree.cElementTree as cElementTree
Expand All @@ -33,14 +41,32 @@
if cElementTree.Element is ElementTree.Element:
treeTypes['cElementTree'] = None
else:
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
treeTypes['cElementTree'] = {
"builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True),
"walker": treewalkers.getTreeWalker("etree", cElementTree)
}

try:
import lxml.etree as lxml # flake8: noqa
except ImportError:
treeTypes['lxml'] = None
else:
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
treeTypes['lxml'] = {
"builder": treebuilders.getTreeBuilder("lxml"),
"walker": treewalkers.getTreeWalker("lxml")
}

# Genshi impls
try:
import genshi # flake8: noqa
except ImportError:
pass
else:
treeTypes["genshi"] = {
"builder": treebuilders.getTreeBuilder("dom"),
"adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
"walker": treewalkers.getTreeWalker("genshi")
}


def get_data_files(subdirectory, files='*.dat'):
Expand Down
161 changes: 28 additions & 133 deletions html5lib/tests/test_treewalkers.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,12 @@
from __future__ import absolute_import, division, unicode_literals

import os
import sys
import unittest
import warnings
from difflib import unified_diff
import pytest

try:
unittest.TestCase.assertEqual
except AttributeError:
unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
from .support import treeTypes

from .support import get_data_files, TestData, convertExpected

from html5lib import html5parser, treewalkers, treebuilders, treeadapters, constants
from html5lib import html5parser, treewalkers
from html5lib.filters.lint import Filter as Lint


treeTypes = {
"DOM": {"builder": treebuilders.getTreeBuilder("dom"),
"walker": treewalkers.getTreeWalker("dom")},
}

# Try whatever etree implementations are available from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
except ImportError:
pass
else:
treeTypes['ElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}

try:
import xml.etree.cElementTree as ElementTree
except ImportError:
pass
else:
treeTypes['cElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}


try:
import lxml.etree as ElementTree # flake8: noqa
except ImportError:
pass
else:
treeTypes['lxml_native'] = \
{"builder": treebuilders.getTreeBuilder("lxml"),
"walker": treewalkers.getTreeWalker("lxml")}


try:
import genshi # flake8: noqa
except ImportError:
pass
else:
treeTypes["genshi"] = \
{"builder": treebuilders.getTreeBuilder("dom"),
"adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
"walker": treewalkers.getTreeWalker("genshi")}

import re
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)

Expand All @@ -73,80 +17,29 @@ def sortattrs(x):
return "\n".join(lines)


class TokenTestCase(unittest.TestCase):
def test_all_tokens(self):
expected = [
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
{'data': 'a', 'type': 'Characters'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
{'data': 'b', 'type': 'Characters'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
{'data': 'c', 'type': 'Characters'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
]
for treeName, treeCls in sorted(treeTypes.items()):
p = html5parser.HTMLParser(tree=treeCls["builder"])
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
document = treeCls.get("adapter", lambda x: x)(document)
output = Lint(treeCls["walker"](document))
for expectedToken, outputToken in zip(expected, output):
self.assertEqual(expectedToken, outputToken)


def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
warnings.resetwarnings()
warnings.simplefilter("error")
try:
p = html5parser.HTMLParser(tree=treeClass["builder"])
if innerHTML:
document = p.parseFragment(input, innerHTML)
else:
document = p.parse(input)
except constants.DataLossWarning:
# Ignore testcases we know we don't pass
return

document = treeClass.get("adapter", lambda x: x)(document)
try:
output = treewalkers.pprint(Lint(treeClass["walker"](document)))
output = attrlist.sub(sortattrs, output)
expected = attrlist.sub(sortattrs, convertExpected(expected))
diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
[line + "\n" for line in output.splitlines()],
"Expected", "Received"))
assert expected == output, "\n".join([
"", "Input:", input,
"", "Expected:", expected,
"", "Received:", output,
"", "Diff:", diff,
])
except NotImplementedError:
pass # Amnesty for those that confess...


def test_treewalker():
sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n")

def test_all_tokens():
expected = [
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
{'data': 'a', 'type': 'Characters'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
{'data': 'b', 'type': 'Characters'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
{'data': 'c', 'type': 'Characters'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
]
for treeName, treeCls in sorted(treeTypes.items()):
files = get_data_files('tree-construction')
for filename in files:
testName = os.path.basename(filename).replace(".dat", "")
if testName in ("template",):
continue

tests = TestData(filename, "data")

for index, test in enumerate(tests):
(input, errors,
innerHTML, expected) = [test[key] for key in ("data", "errors",
"document-fragment",
"document")]
errors = errors.split("\n")
yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
if treeCls is None:
continue
p = html5parser.HTMLParser(tree=treeCls["builder"])
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
document = treeCls.get("adapter", lambda x: x)(document)
output = Lint(treeCls["walker"](document))
for expectedToken, outputToken in zip(expected, output):
assert expectedToken == outputToken


def set_attribute_on_first_child(docfrag, name, value, treeName):
Expand All @@ -164,6 +57,8 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
"""tests what happens when we add attributes to the intext"""
treeName, treeClass = tree
if treeClass is None:
pytest.skip("Treebuilder not loaded")
parser = html5parser.HTMLParser(tree=treeClass["builder"])
document = parser.parseFragment(intext)
for nom, val in attrs_to_add:
Expand All @@ -172,7 +67,7 @@ def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
document = treeClass.get("adapter", lambda x: x)(document)
output = treewalkers.pprint(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
if not output in expected:
if output not in expected:
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))


Expand Down
Loading