diff --git a/.travis.yml b/.travis.yml index 09ef5985..d05fa2c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: python python: - - "2.6" - "2.7" - "3.3" - "3.4" diff --git a/README.rst b/README.rst index 2ad46090..60ef4adb 100644 --- a/README.rst +++ b/README.rst @@ -90,7 +90,7 @@ More documentation is available at https://html5lib.readthedocs.io/. Installation ------------ -html5lib works on CPython 2.6+, CPython 3.3+ and PyPy. To install it, +html5lib works on CPython 2.7, CPython 3.3+ and PyPy. To install it, use: .. code-block:: bash @@ -128,8 +128,8 @@ Tests ----- Unit tests require the ``pytest`` and ``mock`` libraries and can be -run using the ``py.test`` command in the root directory; -``ordereddict`` is required under Python 2.6. All should pass. +run using the ``py.test`` command in the root directory. All should +pass. Test data are contained in a separate `html5lib-tests `_ repository and included diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index 79f2331e..84936682 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -40,13 +40,13 @@ else: invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) -non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, - 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, - 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, - 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, - 0x10FFFE, 0x10FFFF]) +non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, + 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, + 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, + 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, + 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, + 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, + 0x10FFFE, 0x10FFFF} ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]") diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py index 25eece46..a1158bbb 100644 --- a/html5lib/_trie/_base.py +++ b/html5lib/_trie/_base.py @@ -13,8 +13,7 @@ def keys(self, prefix=None): if prefix is None: return set(keys) - # Python 2.6: no set comprehensions - return set([x for x in keys if x.startswith(prefix)]) + return {x for x in keys if x.startswith(prefix)} def has_keys_with_prefix(self, prefix): for key in self.keys(): diff --git a/html5lib/_utils.py b/html5lib/_utils.py index 03f0dab7..91252f2c 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -1,6 +1,5 @@ from __future__ import absolute_import, division, unicode_literals -import sys from types import ModuleType from six import text_type @@ -13,11 +12,9 @@ __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", "surrogatePairToCodepoint", "moduleFactoryFactory", - "supports_lone_surrogates", "PY27"] + "supports_lone_surrogates"] -PY27 = sys.version_info[0] == 2 and sys.version_info[1] >= 7 - # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be # caught by the below test. In general this would be any platform # using UTF-16 as its encoding of unicode strings, such as diff --git a/html5lib/constants.py b/html5lib/constants.py index 9e7541d3..977b9c07 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -519,8 +519,8 @@ "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) } -unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in - adjustForeignAttributes.items()]) +unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in + adjustForeignAttributes.items()} spaceCharacters = frozenset([ "\t", @@ -544,8 +544,8 @@ digits = frozenset(string.digits) hexDigits = frozenset(string.hexdigits) -asciiUpper2Lower = dict([(ord(c), ord(c.lower())) - for c in string.ascii_uppercase]) +asciiUpper2Lower = {ord(c): ord(c.lower()) + for c in string.ascii_uppercase} # Heading elements need to be ordered headingElements = ( @@ -2933,7 +2933,7 @@ tokenTypes["EmptyTag"]]) -prefixes = dict([(v, k) for k, v in namespaces.items()]) +prefixes = {v: k for k, v in namespaces.items()} prefixes["http://www.w3.org/1998/Math/MathML"] = "math" diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py index 4795baec..5fad28fe 100644 --- a/html5lib/filters/alphabeticalattributes.py +++ b/html5lib/filters/alphabeticalattributes.py @@ -2,10 +2,7 @@ from . import base -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict +from collections import OrderedDict class Filter(base.Filter): diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 2abd63e4..094ac789 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -1,12 +1,9 @@ from __future__ import absolute_import, division, unicode_literals -from six import with_metaclass, viewkeys, PY3 +from six import with_metaclass, viewkeys import types -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict +from collections import OrderedDict from . import _inputstream from . import _tokenizer @@ -74,8 +71,8 @@ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=Fa self.tree = tree(namespaceHTMLElements) self.errors = [] - self.phases = dict([(name, cls(self, self.tree)) for name, cls in - getPhases(debug).items()]) + self.phases = {name: cls(self, self.tree) + for name, cls in getPhases(debug).items()} def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): @@ -2417,7 +2414,7 @@ def processStartTag(self, token): currentNode = self.tree.openElements[-1] if (token["name"] in self.breakoutElements or (token["name"] == "font" and - set(token["data"].keys()) & set(["color", "face", "size"]))): + set(token["data"].keys()) & {"color", "face", "size"})): self.parser.parseError("unexpected-html-element-in-foreign-content", {"name": token["name"]}) while (self.tree.openElements[-1].namespace != @@ -2711,10 +2708,7 @@ def processEndTag(self, token): def adjust_attributes(token, replacements): - if PY3 or _utils.PY27: - needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) - else: - needs_adjustment = frozenset(token['data']) & frozenset(replacements) + needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) if needs_adjustment: token['data'] = OrderedDict((replacements.get(k, k), v) for k, v in token['data'].items()) diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index 9333286e..c23592af 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -80,7 +80,7 @@ def _convertAttrib(self, attribs): def serialize_html(input, options): - options = dict([(str(k), v) for k, v in options.items()]) + options = {str(k): v for k, v in options.items()} encoding = options.get("encoding", None) if "encoding" in options: del options["encoding"] diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index a4b2792a..920c755a 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -10,9 +10,9 @@ listElementsMap = { None: (frozenset(scopingElements), False), - "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), - "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), - (namespaces["html"], "ul")])), False), + "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False), + "list": (frozenset(scopingElements | {(namespaces["html"], "ol"), + (namespaces["html"], "ul")}), False), "table": (frozenset([(namespaces["html"], "html"), (namespaces["html"], "table")]), False), "select": (frozenset([(namespaces["html"], "optgroup"), diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 8f30f078..a45f731c 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -1,12 +1,6 @@ from __future__ import absolute_import, division, unicode_literals -try: - from collections import OrderedDict -except ImportError: - try: - from ordereddict import OrderedDict - except ImportError: - OrderedDict = dict +from collections import OrderedDict import re diff --git a/requirements.txt b/requirements.txt index 3884556f..ae7ec3d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ six>=1.9 webencodings -ordereddict ; python_version < '2.7' diff --git a/setup.py b/setup.py index 6f0fc17e..7eebffe9 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,6 @@ def default_environment(): 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', @@ -106,10 +105,6 @@ def default_environment(): 'webencodings', ], extras_require={ - # A empty extra that only has a conditional marker will be - # unconditonally installed when the condition matches. - ":python_version == '2.6'": ["ordereddict"], - # A conditional extra will only install these items when the extra is # requested and the condition matches. "datrie:platform_python_implementation == 'CPython'": ["datrie"], diff --git a/tox.ini b/tox.ini index da64de71..42f7937a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = {py26,py27,py33,py34,py35,pypy}-{base,optional} +envlist = {py27,py33,py34,py35,pypy}-{base,optional} [testenv] deps = @@ -9,7 +9,6 @@ deps = mock base: six base: webencodings - py26-base: ordereddict optional: -r{toxinidir}/requirements-optional.txt commands = diff --git a/utils/spider.py b/utils/spider.py index 3a325888..374dc422 100644 --- a/utils/spider.py +++ b/utils/spider.py @@ -108,9 +108,9 @@ def updateURLs(self, tree): # Remove links not of content-type html or pages not found # XXX - need to deal with other status codes? - toVisit = set([url for url in urls if url in responseHeaders and - "html" in responseHeaders[url]['content-type'] and - responseHeaders[url]['status'] == "200"]) + toVisit = {url for url in urls if url in responseHeaders and + "html" in responseHeaders[url]['content-type'] and + responseHeaders[url]['status'] == "200"} # Now check we are allowed to spider the page for url in toVisit: