From 1c03669404bc40dc31c3e20277f038dd39751d0f Mon Sep 17 00:00:00 2001 From: John Vandenberg Date: Wed, 27 Jul 2016 02:57:57 +0700 Subject: [PATCH] Miscellaneous linting changes and enable pylint --- .pylintrc | 2 +- .travis.yml | 1 + debug-info.py | 2 +- flake8-run.sh | 1 + html5lib/_inputstream.py | 20 +++++++++++--------- html5lib/_tokenizer.py | 10 ++++------ html5lib/_utils.py | 4 ++-- html5lib/filters/sanitizer.py | 2 +- html5lib/html5parser.py | 25 ++++++++++--------------- html5lib/serializer.py | 5 ++--- html5lib/tests/conftest.py | 2 ++ html5lib/tests/support.py | 8 +++++--- html5lib/tests/test_encoding.py | 5 +++-- html5lib/tests/test_parser2.py | 6 +++++- html5lib/tests/test_serializer.py | 4 ++-- html5lib/tests/test_stream.py | 15 +++++++++------ html5lib/tests/test_treewalkers.py | 4 ++-- html5lib/tests/tokenizer.py | 2 +- html5lib/treeadapters/__init__.py | 8 +++----- html5lib/treeadapters/genshi.py | 3 +-- html5lib/treeadapters/sax.py | 2 +- html5lib/treebuilders/__init__.py | 2 +- html5lib/treebuilders/base.py | 20 +++++++++----------- html5lib/treebuilders/etree_lxml.py | 2 +- parse.py | 8 +++++--- requirements-test.txt | 14 ++++++++++++++ setup.py | 4 ++-- tox.ini | 14 ++++++++++++++ utils/entities.py | 2 ++ utils/spider.py | 10 ++++++---- 30 files changed, 122 insertions(+), 85 deletions(-) diff --git a/.pylintrc b/.pylintrc index ea74d5db..c60b8510 100644 --- a/.pylintrc +++ b/.pylintrc @@ -3,7 +3,7 @@ ignore=tests [MESSAGES CONTROL] # messages up to fixme should probably be fixed somehow -disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda +disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda,bad-option-value,star-args,abstract-class-little-used,abstract-class-not-used [FORMAT] max-line-length=139 diff --git a/.travis.yml b/.travis.yml index 94bb87e7..7f9aecd5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,6 +27,7 @@ script: - if [[ $TRAVIS_PYTHON_VERSION == pypy* ]]; then py.test; fi - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then coverage run -m pytest; fi - bash flake8-run.sh + - pylint --rcfile=.pylintrc html5lib after_script: - python debug-info.py diff --git a/debug-info.py b/debug-info.py index f93fbdbe..c213f7cc 100644 --- a/debug-info.py +++ b/debug-info.py @@ -1,4 +1,4 @@ -from __future__ import print_function, unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals import platform import sys diff --git a/flake8-run.sh b/flake8-run.sh index d9264946..e8652e9e 100755 --- a/flake8-run.sh +++ b/flake8-run.sh @@ -5,5 +5,6 @@ if [[ ! -x $(which flake8) ]]; then exit 1 fi +flake8 --version flake8 `dirname $0` exit $? diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index 79f2331e..a6787ac4 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -238,8 +238,9 @@ def position(self): return (line + 1, col) def char(self): - """ Read one character from the stream or queue if available. Return - EOF when EOF is reached. + """Read one character from the stream or queue if available. + + Return EOF when EOF is reached. """ # Read a new chunk from the input stream if necessary if self.chunkOffset >= self.chunkSize: @@ -318,7 +319,7 @@ def characterErrorsUCS2(self, data): self.errors.append("invalid-codepoint") def charsUntil(self, characters, opposite=False): - """ Returns a string of characters from the stream up to but not + """Returns a string of characters from the stream up to but not including any character in 'characters' or EOF. 'characters' must be a container that supports the 'in' method and iteration over its characters. @@ -330,7 +331,7 @@ def charsUntil(self, characters, opposite=False): except KeyError: if __debug__: for c in characters: - assert(ord(c) < 128) + assert ord(c) < 128 regex = "".join(["\\x%02x" % ord(c) for c in characters]) if not opposite: regex = "^%s" % regex @@ -449,7 +450,7 @@ def openStream(self, source): try: stream.seek(stream.tell()) - except: # pylint:disable=bare-except + except Exception: # pylint: disable=broad-except stream = BufferedStream(stream) return stream @@ -567,8 +568,7 @@ def detectBOM(self): return None def detectEncodingMeta(self): - """Report the encoding declared by the meta element - """ + """Report the encoding declared by the meta element.""" buffer = self.rawStream.read(self.numBytesMeta) assert isinstance(buffer, bytes) parser = EncodingParser(buffer) @@ -686,10 +686,12 @@ def jumpTo(self, bytes): class EncodingParser(object): - """Mini parser for detecting character encoding from meta elements""" + """Mini parser for detecting character encoding from meta elements.""" def __init__(self, data): - """string - the data to work on for encoding detection""" + """Constructor. + + data - the data to work on for encoding detection""" self.data = EncodingBytes(data) self.encoding = None diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py index 6078f66a..4cf46c2a 100644 --- a/html5lib/_tokenizer.py +++ b/html5lib/_tokenizer.py @@ -19,7 +19,7 @@ class HTMLTokenizer(object): - """ This class takes care of tokenizing HTML. + """This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. @@ -47,7 +47,7 @@ def __init__(self, stream, parser=None, **kwargs): super(HTMLTokenizer, self).__init__() def __iter__(self): - """ This is where the magic happens. + """This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token @@ -215,8 +215,7 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False): self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) def processEntityInAttribute(self, allowedChar): - """This method replaces the need for "entityInAttributeValueState". - """ + """This method replaces the need for "entityInAttributeValueState".""" self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) def emitCurrentToken(self): @@ -1686,8 +1685,7 @@ def bogusDoctypeState(self): self.stream.unget(data) self.tokenQueue.append(self.currentToken) self.state = self.dataState - else: - pass + return True def cdataSectionState(self): diff --git a/html5lib/_utils.py b/html5lib/_utils.py index 03f0dab7..8cfe5ee6 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -30,14 +30,14 @@ # We need this with u"" because of http://bugs.jython.org/issue2039 _x = eval('u"\\uD800"') # pylint:disable=eval-used assert isinstance(_x, text_type) -except: # pylint:disable=bare-except +except Exception: # pylint: disable=broad-except supports_lone_surrogates = False else: supports_lone_surrogates = True class MethodDispatcher(dict): - """Dict with 2 special properties: + """Dict with 2 special properties. On initiation, keys that are lists, sets or tuples are converted to multiple keys so accessing any one of the items in the original diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index b5ddcb93..292af95e 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -705,7 +705,7 @@ class Filter(base.Filter): - """ sanitization of XHTML+MathML+SVG and of inline style attributes.""" + """Sanitization of XHTML+MathML+SVG and of inline style attributes.""" def __init__(self, source, allowed_elements=allowed_elements, diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 2abd63e4..bb500811 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -55,10 +55,11 @@ def __new__(meta, classname, bases, classDict): class HTMLParser(object): """HTML parser. Generates a tree structure from a stream of (possibly - malformed) HTML""" + malformed) HTML""" def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): - """ + """Constructor. + strict - raise an exception when a parse error is encountered tree - a treebuilder class controlling the type of tree that will be @@ -108,10 +109,9 @@ def reset(self): self.tokenizer.state = self.tokenizer.rawtextState elif self.innerHTML == 'plaintext': self.tokenizer.state = self.tokenizer.plaintextState - else: - # state already is data state - # self.tokenizer.state = self.tokenizer.dataState - pass + # else state already is data state + # i.e. self.tokenizer.state = self.tokenizer.dataState + self.phase = self.phases["beforeHtml"] self.phase.insertHtmlElement() self.resetInsertionMode() @@ -262,7 +262,7 @@ def parseError(self, errorcode="XXX-undefined-error", datavars=None): raise ParseError(E[errorcode] % datavars) def normalizeToken(self, token): - """ HTML5 specific normalizations to the token stream """ + """HTML5 specific normalizations to the token stream.""" if token["type"] == tokenTypes["StartTag"]: raw = token["data"] @@ -358,10 +358,7 @@ def log(function): def wrapped(self, *args, **kwargs): if function.__name__.startswith("process") and len(args) > 0: token = args[0] - try: - info = {"type": type_names[token['type']]} - except: - raise + info = {"type": type_names[token['type']]} if token['type'] in tagTokenTypes: info["name"] = token['name'] @@ -383,8 +380,7 @@ def getMetaclass(use_metaclass, metaclass_func): # pylint:disable=unused-argument class Phase(with_metaclass(getMetaclass(debug, log))): - """Base class for helper object that implements each phase of processing - """ + """Base class for helper object that implements each phase of processing.""" def __init__(self, parser, tree): self.parser = parser @@ -1285,7 +1281,7 @@ def startTagSvg(self, token): token["selfClosingAcknowledged"] = True def startTagMisplaced(self, token): - """ Elements that should be children of other elements that have a + """Elements that should be children of other elements that have a different insertion mode; here they are ignored "caption", "col", "colgroup", "frame", "frameset", "head", "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", @@ -2730,4 +2726,3 @@ def impliedTagToken(name, type="EndTag", attributes=None, class ParseError(Exception): """Error in parsed document""" - pass diff --git a/html5lib/serializer.py b/html5lib/serializer.py index 103dd206..f3786ae9 100644 --- a/html5lib/serializer.py +++ b/html5lib/serializer.py @@ -166,14 +166,14 @@ def __init__(self, **kwargs): self.strict = False def encode(self, string): - assert(isinstance(string, text_type)) + assert isinstance(string, text_type) if self.encoding: return string.encode(self.encoding, "htmlentityreplace") else: return string def encodeStrict(self, string): - assert(isinstance(string, text_type)) + assert isinstance(string, text_type) if self.encoding: return string.encode(self.encoding, "strict") else: @@ -331,4 +331,3 @@ def serializeError(self, data="XXX ERROR MESSAGE NEEDED"): class SerializeError(Exception): """Error in serialized tree""" - pass diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py index ce93eff6..bfacd7e7 100644 --- a/html5lib/tests/conftest.py +++ b/html5lib/tests/conftest.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import, division, unicode_literals + import os.path import pkg_resources diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index dab65c1c..d091cdae 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -49,7 +49,8 @@ } try: - import lxml.etree as lxml # noqa + import lxml.etree as lxml + del lxml except ImportError: treeTypes['lxml'] = None else: @@ -60,7 +61,8 @@ # Genshi impls try: - import genshi # noqa + import genshi + del genshi except ImportError: treeTypes["genshi"] = None else: @@ -132,7 +134,7 @@ def normaliseOutput(self, data): def convert(stripChars): def convertData(data): - """convert the output of str(document) to the format used in the testcases""" + """Convert the output of str(document) to the format used in the testcases""" data = data.split("\n") rv = [] for line in data: diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 9a411c77..de59ef54 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals import os @@ -105,7 +105,8 @@ def test_encoding(): # pylint:disable=wrong-import-position try: - import chardet # noqa + import chardet + del chardet except ImportError: print("chardet not found, skipping chardet tests") else: diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index bcc0bf48..ad5349d7 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -4,11 +4,15 @@ import io -from . import support # noqa +from . import support + from html5lib.constants import namespaces, tokenTypes from html5lib import parse, parseFragment, HTMLParser +# above import has side-effects; mark it as used and del it +del support + # tests that aren't autogenerated from text files def test_assertDoctypeCloneable(): diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index 9333286e..f7cd0037 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -74,7 +74,7 @@ def _convertAttrib(self, attribs): attrs = {} for attrib in attribs: name = (attrib["namespace"], attrib["name"]) - assert(name not in attrs) + assert name not in attrs attrs[name] = attrib["value"] return attrs @@ -93,7 +93,7 @@ def runSerializerTest(input, expected, options): encoding = options.get("encoding", None) if encoding: - expected = list(map(lambda x: x.encode(encoding), expected)) + expected = list(x.encode(encoding) for x in expected) result = serialize_html(input, options) if len(expected) == 1: diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index 27c39538..f7f6a153 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -1,7 +1,5 @@ from __future__ import absolute_import, division, unicode_literals -from . import support # noqa - import codecs import sys from io import BytesIO, StringIO @@ -11,10 +9,15 @@ import six from six.moves import http_client, urllib +from . import support + from html5lib._inputstream import (BufferedStream, HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream) from html5lib._utils import supports_lone_surrogates +# above import has side-effects; mark it as used and del it +del support + def test_basic(): s = b"abc" @@ -182,8 +185,8 @@ def test_position2(): def test_python_issue_20007(): - """ - Make sure we have a work-around for Python bug #20007 + """Ensure we have a work-around for Python bug #20007. + http://bugs.python.org/issue20007 """ class FakeSocket(object): @@ -198,8 +201,8 @@ def makefile(self, _mode, _bufsize=None): def test_python_issue_20007_b(): - """ - Make sure we have a work-around for Python bug #20007 + """Ensure we have a work-around for Python bug #20007 (part b). + http://bugs.python.org/issue20007 """ if six.PY2: diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 67fc89e5..061b6639 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -50,7 +50,7 @@ def test_all_tokens(): def set_attribute_on_first_child(docfrag, name, value, treeName): - """naively sets an attribute on the first child of the document + """Naively sets an attribute on the first child of the document fragment passed in""" setter = {'ElementTree': lambda d: d[0].set, 'DOM': lambda d: d.firstChild.setAttribute} @@ -62,7 +62,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName): def runTreewalkerEditTest(intext, expected, attrs_to_add, tree): - """tests what happens when we add attributes to the intext""" + """Test what happens when we add attributes to the intext""" treeName, treeClass = tree if treeClass is None: pytest.skip("Treebuilder not loaded") diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index 1440a722..6649878e 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -132,7 +132,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder, def unescape(test): def decode(inp): - """Decode \\uXXXX escapes + r"""Decode \\uXXXX escapes This decodes \\uXXXX escapes, possibly into non-BMP characters when two surrogate character escapes are adjacent to each other. diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py index 4f978466..290736bb 100644 --- a/html5lib/treeadapters/__init__.py +++ b/html5lib/treeadapters/__init__.py @@ -2,11 +2,9 @@ from . import sax -__all__ = ["sax"] - try: - from . import genshi # noqa + from . import genshi except ImportError: - pass + __all__ = ("sax", ) else: - __all__.append("genshi") + __all__ = ("sax", "genshi") diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py index 04e316df..0e955b46 100644 --- a/html5lib/treeadapters/genshi.py +++ b/html5lib/treeadapters/genshi.py @@ -40,8 +40,7 @@ def to_genshi(walker): yield DOCTYPE, (token["name"], token["publicId"], token["systemId"]), (None, -1, -1) - else: - pass # FIXME: What to do? + # FIXME: What to do if type is not known? if text: yield TEXT, "".join(text), (None, -1, -1) diff --git a/html5lib/treeadapters/sax.py b/html5lib/treeadapters/sax.py index ad47df95..17ba0cf4 100644 --- a/html5lib/treeadapters/sax.py +++ b/html5lib/treeadapters/sax.py @@ -11,7 +11,7 @@ def to_sax(walker, handler): - """Call SAX-like content handler based on treewalker walker""" + """Call SAX-like content handler based on treewalker walker.""" handler.startDocument() for prefix, namespace in prefix_mapping.items(): handler.startPrefixMapping(prefix, namespace) diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py index e2328847..4dd3852b 100644 --- a/html5lib/treebuilders/__init__.py +++ b/html5lib/treebuilders/__init__.py @@ -34,7 +34,7 @@ def getTreeBuilder(treeType, implementation=None, **kwargs): - """Get a TreeBuilder class for various types of tree with built-in support + """Get a TreeBuilder class for various types of tree with built-in support. treeType - the name of the tree type required (case-insensitive). Supported values are: diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index a4b2792a..c6169ab6 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -52,8 +52,7 @@ def __repr__(self): return "<%s>" % (self.name) def appendChild(self, node): - """Insert node as a child of the current node - """ + """Insert node as a child of the current node.""" raise NotImplementedError def insertText(self, data, insertBefore=None): @@ -69,8 +68,7 @@ def insertBefore(self, node, refNode): raise NotImplementedError def removeChild(self, node): - """Remove node from the children of the current node - """ + """Remove node from the children of the current node.""" raise NotImplementedError def reparentChildren(self, newParent): @@ -90,8 +88,7 @@ def cloneNode(self): raise NotImplementedError def hasContent(self): - """Return true if the node has children or text, false otherwise - """ + """Return true if the node has children or text, false otherwise.""" raise NotImplementedError @@ -367,17 +364,18 @@ def generateImpliedEndTags(self, exclude=None): self.generateImpliedEndTags(exclude) def getDocument(self): - "Return the final tree" + """Return the final tree.""" return self.document def getFragment(self): - "Return the final fragment" + """Return the final fragment.""" # assert self.innerHTML fragment = self.fragmentClass() self.openElements[0].reparentChildren(fragment) return fragment - def testSerializer(self, node): - """Serialize the subtree of node in the format required by unit tests - node - the node from which to start serializing""" + def testSerializer(self, element): + """Serialize the subtree of node in the format required by unit tests. + + element - the node from which to start serializing""" raise NotImplementedError diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 908820c0..e8b6bc56 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -77,7 +77,7 @@ def serializeElement(element, indent=0): while next_element is not None: serializeElement(next_element, indent + 2) next_element = next_element.getnext() - elif isinstance(element, str) or isinstance(element, bytes): + elif isinstance(element, (str, bytes)): # Text in a fragment assert isinstance(element, str) or sys.version_info[0] == 2 rv.append("|%s\"%s\"" % (' ' * indent, element)) diff --git a/parse.py b/parse.py index 3e65c330..c849c1f0 100755 --- a/parse.py +++ b/parse.py @@ -3,6 +3,8 @@ Parse a document to a tree, with optional profiling """ +from __future__ import absolute_import, division, unicode_literals, print_function + import sys import traceback @@ -33,7 +35,7 @@ def parse(): if contentType: (mediaType, params) = cgi.parse_header(contentType) encoding = params.get('charset') - except: + except Exception: pass elif f == '-': f = sys.stdin @@ -94,7 +96,7 @@ def parse(): def run(parseMethod, f, encoding, scripting): try: document = parseMethod(f, override_encoding=encoding, scripting=scripting) - except: + except Exception: document = None traceback.print_exc() return document @@ -127,7 +129,7 @@ def printOutput(parser, document, opts): for opt in serializer.HTMLSerializer.options: try: kwargs[opt] = getattr(opts, opt) - except: + except AttributeError: pass if not kwargs['quote_char']: del kwargs['quote_char'] diff --git a/requirements-test.txt b/requirements-test.txt index 40df78d4..64e9cf75 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,20 @@ -r requirements.txt flake8<3.0 +flake8-docstrings +flake8-string-format +flake8-future-import +flake8-debugger +flake8-print +hacking +flake8-tuple +flake8-dodgy +ebb-lint ; python_version > '2.6' + +pylint ; python_version > '2.6' +pylint<1.4 ; python_version <= '2.6' +astroid<1.3.6 ; python_version <= '2.6' + pytest pytest-expect>=1.1,<2.0 mock diff --git a/setup.py b/setup.py index 00fee241..cd94afdc 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from __future__ import print_function +from __future__ import absolute_import, division, print_function, unicode_literals import ast import codecs @@ -83,7 +83,7 @@ def default_environment(): with open(join(here, "html5lib", "__init__.py"), "rb") as init_file: t = ast.parse(init_file.read(), filename="__init__.py", mode="exec") assert isinstance(t, ast.Module) - assignments = filter(lambda x: isinstance(x, ast.Assign), t.body) + assignments = (x for x in t.body if isinstance(x, ast.Assign)) for a in assignments: if (len(a.targets) == 1 and isinstance(a.targets[0], ast.Name) and diff --git a/tox.ini b/tox.ini index da64de71..24fcaef9 100644 --- a/tox.ini +++ b/tox.ini @@ -4,6 +4,15 @@ envlist = {py26,py27,py33,py34,py35,pypy}-{base,optional} [testenv] deps = flake8<3.0 + flake8-docstrings + flake8-string-format + flake8-future-import + flake8-debugger + flake8-print + hacking + flake8-tuple + flake8-dodgy + ebb-lint ; python_version > '2.6' pytest pytest-expect>=1.1,<2.0 mock @@ -15,3 +24,8 @@ deps = commands = {envbindir}/py.test {toxinidir}/flake8-run.sh + +[flake8] +ignore = D1,D202,D204,D205,D208,D209,D400,D401,FI13,FI50,FI51,FI53,FI54,H101,H301,H304,H306,H403,H405,L101,L102,L201,L202,L204,L205,T003 +min-version = 2.6 +require-code = True diff --git a/utils/entities.py b/utils/entities.py index 6dccf5f0..6d91587a 100644 --- a/utils/entities.py +++ b/utils/entities.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import, division, unicode_literals, print_function + import json import html5lib diff --git a/utils/spider.py b/utils/spider.py index 3a325888..2cdc0aaf 100644 --- a/utils/spider.py +++ b/utils/spider.py @@ -1,17 +1,19 @@ #!/usr/bin/env python -"""Spider to try and find bugs in the parser. Requires httplib2 and elementtree +"""Spider to try and find bugs in the parser. Requires httplib2 and elementtree. usage: import spider s = spider.Spider() s.spider("http://www.google.com", maxURLs=100) """ +from __future__ import absolute_import, division, unicode_literals, print_function import urllib.request import urllib.error import urllib.parse import urllib.robotparser -import md5 + +from hashlib import md5 import httplib2 @@ -46,7 +48,7 @@ def parse(self, content): p = html5lib.HTMLParser(tree=etree.TreeBuilder) try: tree = p.parse(content) - except: + except Exception: self.buggyURLs.add(self.currentURL) failed = True print("BUGGY:", self.currentURL) @@ -57,7 +59,7 @@ def parse(self, content): def loadURL(self, url): resp, content = self.http.request(url, "GET") self.currentURL = url - digest = md5.md5(content).hexdigest() + digest = md5(content).hexdigest() if digest in self.contentDigest: content = None self.visitedURLs.add(url)