From 1c03669404bc40dc31c3e20277f038dd39751d0f Mon Sep 17 00:00:00 2001
From: John Vandenberg <jayvdb@flip.localdomain>
Date: Wed, 27 Jul 2016 02:57:57 +0700
Subject: [PATCH] Miscellaneous linting changes and enable pylint

---
 .pylintrc                           |  2 +-
 .travis.yml                         |  1 +
 debug-info.py                       |  2 +-
 flake8-run.sh                       |  1 +
 html5lib/_inputstream.py            | 20 +++++++++++---------
 html5lib/_tokenizer.py              | 10 ++++------
 html5lib/_utils.py                  |  4 ++--
 html5lib/filters/sanitizer.py       |  2 +-
 html5lib/html5parser.py             | 25 ++++++++++---------------
 html5lib/serializer.py              |  5 ++---
 html5lib/tests/conftest.py          |  2 ++
 html5lib/tests/support.py           |  8 +++++---
 html5lib/tests/test_encoding.py     |  5 +++--
 html5lib/tests/test_parser2.py      |  6 +++++-
 html5lib/tests/test_serializer.py   |  4 ++--
 html5lib/tests/test_stream.py       | 15 +++++++++------
 html5lib/tests/test_treewalkers.py  |  4 ++--
 html5lib/tests/tokenizer.py         |  2 +-
 html5lib/treeadapters/__init__.py   |  8 +++-----
 html5lib/treeadapters/genshi.py     |  3 +--
 html5lib/treeadapters/sax.py        |  2 +-
 html5lib/treebuilders/__init__.py   |  2 +-
 html5lib/treebuilders/base.py       | 20 +++++++++-----------
 html5lib/treebuilders/etree_lxml.py |  2 +-
 parse.py                            |  8 +++++---
 requirements-test.txt               | 14 ++++++++++++++
 setup.py                            |  4 ++--
 tox.ini                             | 14 ++++++++++++++
 utils/entities.py                   |  2 ++
 utils/spider.py                     | 10 ++++++----
 30 files changed, 122 insertions(+), 85 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index ea74d5db..c60b8510 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -3,7 +3,7 @@ ignore=tests
 
 [MESSAGES CONTROL]
 # messages up to fixme should probably be fixed somehow
-disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda
+disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda,bad-option-value,star-args,abstract-class-little-used,abstract-class-not-used
 
 [FORMAT]
 max-line-length=139
diff --git a/.travis.yml b/.travis.yml
index 94bb87e7..7f9aecd5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -27,6 +27,7 @@ script:
   - if [[ $TRAVIS_PYTHON_VERSION == pypy* ]]; then py.test; fi
   - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then coverage run -m pytest; fi
   - bash flake8-run.sh
+  - pylint --rcfile=.pylintrc html5lib
 
 after_script:
   - python debug-info.py
diff --git a/debug-info.py b/debug-info.py
index f93fbdbe..c213f7cc 100644
--- a/debug-info.py
+++ b/debug-info.py
@@ -1,4 +1,4 @@
-from __future__ import print_function, unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 import platform
 import sys
diff --git a/flake8-run.sh b/flake8-run.sh
index d9264946..e8652e9e 100755
--- a/flake8-run.sh
+++ b/flake8-run.sh
@@ -5,5 +5,6 @@ if [[ ! -x $(which flake8) ]]; then
   exit 1
 fi
 
+flake8 --version
 flake8 `dirname $0`
 exit $?
diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
index 79f2331e..a6787ac4 100644
--- a/html5lib/_inputstream.py
+++ b/html5lib/_inputstream.py
@@ -238,8 +238,9 @@ def position(self):
         return (line + 1, col)
 
     def char(self):
-        """ Read one character from the stream or queue if available. Return
-            EOF when EOF is reached.
+        """Read one character from the stream or queue if available.
+
+        Return EOF when EOF is reached.
         """
         # Read a new chunk from the input stream if necessary
         if self.chunkOffset >= self.chunkSize:
@@ -318,7 +319,7 @@ def characterErrorsUCS2(self, data):
                 self.errors.append("invalid-codepoint")
 
     def charsUntil(self, characters, opposite=False):
-        """ Returns a string of characters from the stream up to but not
+        """Returns a string of characters from the stream up to but not
         including any character in 'characters' or EOF. 'characters' must be
         a container that supports the 'in' method and iteration over its
         characters.
@@ -330,7 +331,7 @@ def charsUntil(self, characters, opposite=False):
         except KeyError:
             if __debug__:
                 for c in characters:
-                    assert(ord(c) < 128)
+                    assert ord(c) < 128
             regex = "".join(["\\x%02x" % ord(c) for c in characters])
             if not opposite:
                 regex = "^%s" % regex
@@ -449,7 +450,7 @@ def openStream(self, source):
 
         try:
             stream.seek(stream.tell())
-        except:  # pylint:disable=bare-except
+        except Exception:  # pylint: disable=broad-except
             stream = BufferedStream(stream)
 
         return stream
@@ -567,8 +568,7 @@ def detectBOM(self):
             return None
 
     def detectEncodingMeta(self):
-        """Report the encoding declared by the meta element
-        """
+        """Report the encoding declared by the meta element."""
         buffer = self.rawStream.read(self.numBytesMeta)
         assert isinstance(buffer, bytes)
         parser = EncodingParser(buffer)
@@ -686,10 +686,12 @@ def jumpTo(self, bytes):
 
 
 class EncodingParser(object):
-    """Mini parser for detecting character encoding from meta elements"""
+    """Mini parser for detecting character encoding from meta elements."""
 
     def __init__(self, data):
-        """string - the data to work on for encoding detection"""
+        """Constructor.
+
+        data - the data to work on for encoding detection"""
         self.data = EncodingBytes(data)
         self.encoding = None
 
diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
index 6078f66a..4cf46c2a 100644
--- a/html5lib/_tokenizer.py
+++ b/html5lib/_tokenizer.py
@@ -19,7 +19,7 @@
 
 
 class HTMLTokenizer(object):
-    """ This class takes care of tokenizing HTML.
+    """This class takes care of tokenizing HTML.
 
     * self.currentToken
       Holds the token that is currently being processed.
@@ -47,7 +47,7 @@ def __init__(self, stream, parser=None, **kwargs):
         super(HTMLTokenizer, self).__init__()
 
     def __iter__(self):
-        """ This is where the magic happens.
+        """This is where the magic happens.
 
         We do our usually processing through the states and when we have a token
         to return we yield the token which pauses processing until the next token
@@ -215,8 +215,7 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
             self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output})
 
     def processEntityInAttribute(self, allowedChar):
-        """This method replaces the need for "entityInAttributeValueState".
-        """
+        """This method replaces the need for "entityInAttributeValueState"."""
         self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)
 
     def emitCurrentToken(self):
@@ -1686,8 +1685,7 @@ def bogusDoctypeState(self):
             self.stream.unget(data)
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
-        else:
-            pass
+
         return True
 
     def cdataSectionState(self):
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index 03f0dab7..8cfe5ee6 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -30,14 +30,14 @@
         # We need this with u"" because of http://bugs.jython.org/issue2039
         _x = eval('u"\\uD800"')  # pylint:disable=eval-used
         assert isinstance(_x, text_type)
-except:  # pylint:disable=bare-except
+except Exception:  # pylint: disable=broad-except
     supports_lone_surrogates = False
 else:
     supports_lone_surrogates = True
 
 
 class MethodDispatcher(dict):
-    """Dict with 2 special properties:
+    """Dict with 2 special properties.
 
     On initiation, keys that are lists, sets or tuples are converted to
     multiple keys so accessing any one of the items in the original
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
index b5ddcb93..292af95e 100644
--- a/html5lib/filters/sanitizer.py
+++ b/html5lib/filters/sanitizer.py
@@ -705,7 +705,7 @@
 
 
 class Filter(base.Filter):
-    """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
+    """Sanitization of XHTML+MathML+SVG and of inline style attributes."""
     def __init__(self,
                  source,
                  allowed_elements=allowed_elements,
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 2abd63e4..bb500811 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -55,10 +55,11 @@ def __new__(meta, classname, bases, classDict):
 
 class HTMLParser(object):
     """HTML parser. Generates a tree structure from a stream of (possibly
-        malformed) HTML"""
+       malformed) HTML"""
 
     def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
-        """
+        """Constructor.
+
         strict - raise an exception when a parse error is encountered
 
         tree - a treebuilder class controlling the type of tree that will be
@@ -108,10 +109,9 @@ def reset(self):
                 self.tokenizer.state = self.tokenizer.rawtextState
             elif self.innerHTML == 'plaintext':
                 self.tokenizer.state = self.tokenizer.plaintextState
-            else:
-                # state already is data state
-                # self.tokenizer.state = self.tokenizer.dataState
-                pass
+            # else state already is data state
+            # i.e. self.tokenizer.state = self.tokenizer.dataState
+
             self.phase = self.phases["beforeHtml"]
             self.phase.insertHtmlElement()
             self.resetInsertionMode()
@@ -262,7 +262,7 @@ def parseError(self, errorcode="XXX-undefined-error", datavars=None):
             raise ParseError(E[errorcode] % datavars)
 
     def normalizeToken(self, token):
-        """ HTML5 specific normalizations to the token stream """
+        """HTML5 specific normalizations to the token stream."""
 
         if token["type"] == tokenTypes["StartTag"]:
             raw = token["data"]
@@ -358,10 +358,7 @@ def log(function):
         def wrapped(self, *args, **kwargs):
             if function.__name__.startswith("process") and len(args) > 0:
                 token = args[0]
-                try:
-                    info = {"type": type_names[token['type']]}
-                except:
-                    raise
+                info = {"type": type_names[token['type']]}
                 if token['type'] in tagTokenTypes:
                     info["name"] = token['name']
 
@@ -383,8 +380,7 @@ def getMetaclass(use_metaclass, metaclass_func):
 
     # pylint:disable=unused-argument
     class Phase(with_metaclass(getMetaclass(debug, log))):
-        """Base class for helper object that implements each phase of processing
-        """
+        """Base class for helper object that implements each phase of processing."""
 
         def __init__(self, parser, tree):
             self.parser = parser
@@ -1285,7 +1281,7 @@ def startTagSvg(self, token):
                 token["selfClosingAcknowledged"] = True
 
         def startTagMisplaced(self, token):
-            """ Elements that should be children of other elements that have a
+            """Elements that should be children of other elements that have a
             different insertion mode; here they are ignored
             "caption", "col", "colgroup", "frame", "frameset", "head",
             "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
@@ -2730,4 +2726,3 @@ def impliedTagToken(name, type="EndTag", attributes=None,
 
 class ParseError(Exception):
     """Error in parsed document"""
-    pass
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
index 103dd206..f3786ae9 100644
--- a/html5lib/serializer.py
+++ b/html5lib/serializer.py
@@ -166,14 +166,14 @@ def __init__(self, **kwargs):
         self.strict = False
 
     def encode(self, string):
-        assert(isinstance(string, text_type))
+        assert isinstance(string, text_type)
         if self.encoding:
             return string.encode(self.encoding, "htmlentityreplace")
         else:
             return string
 
     def encodeStrict(self, string):
-        assert(isinstance(string, text_type))
+        assert isinstance(string, text_type)
         if self.encoding:
             return string.encode(self.encoding, "strict")
         else:
@@ -331,4 +331,3 @@ def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
 
 class SerializeError(Exception):
     """Error in serialized tree"""
-    pass
diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
index ce93eff6..bfacd7e7 100644
--- a/html5lib/tests/conftest.py
+++ b/html5lib/tests/conftest.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import, division, unicode_literals
+
 import os.path
 
 import pkg_resources
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index dab65c1c..d091cdae 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -49,7 +49,8 @@
         }
 
 try:
-    import lxml.etree as lxml  # noqa
+    import lxml.etree as lxml
+    del lxml
 except ImportError:
     treeTypes['lxml'] = None
 else:
@@ -60,7 +61,8 @@
 
 # Genshi impls
 try:
-    import genshi  # noqa
+    import genshi
+    del genshi
 except ImportError:
     treeTypes["genshi"] = None
 else:
@@ -132,7 +134,7 @@ def normaliseOutput(self, data):
 
 def convert(stripChars):
     def convertData(data):
-        """convert the output of str(document) to the format used in the testcases"""
+        """Convert the output of str(document) to the format used in the testcases"""
         data = data.split("\n")
         rv = []
         for line in data:
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 9a411c77..de59ef54 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -1,4 +1,4 @@
-from __future__ import absolute_import, division, unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 import os
 
@@ -105,7 +105,8 @@ def test_encoding():
 
 # pylint:disable=wrong-import-position
 try:
-    import chardet  # noqa
+    import chardet
+    del chardet
 except ImportError:
     print("chardet not found, skipping chardet tests")
 else:
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index bcc0bf48..ad5349d7 100644
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -4,11 +4,15 @@
 
 import io
 
-from . import support  # noqa
+from . import support
+
 
 from html5lib.constants import namespaces, tokenTypes
 from html5lib import parse, parseFragment, HTMLParser
 
+# above import has side-effects; mark it as used and del it
+del support
+
 
 # tests that aren't autogenerated from text files
 def test_assertDoctypeCloneable():
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index 9333286e..f7cd0037 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -74,7 +74,7 @@ def _convertAttrib(self, attribs):
         attrs = {}
         for attrib in attribs:
             name = (attrib["namespace"], attrib["name"])
-            assert(name not in attrs)
+            assert name not in attrs
             attrs[name] = attrib["value"]
         return attrs
 
@@ -93,7 +93,7 @@ def runSerializerTest(input, expected, options):
     encoding = options.get("encoding", None)
 
     if encoding:
-        expected = list(map(lambda x: x.encode(encoding), expected))
+        expected = list(x.encode(encoding) for x in expected)
 
     result = serialize_html(input, options)
     if len(expected) == 1:
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index 27c39538..f7f6a153 100644
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -1,7 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from . import support  # noqa
-
 import codecs
 import sys
 from io import BytesIO, StringIO
@@ -11,10 +9,15 @@
 import six
 from six.moves import http_client, urllib
 
+from . import support
+
 from html5lib._inputstream import (BufferedStream, HTMLInputStream,
                                    HTMLUnicodeInputStream, HTMLBinaryInputStream)
 from html5lib._utils import supports_lone_surrogates
 
+# above import has side-effects; mark it as used and del it
+del support
+
 
 def test_basic():
     s = b"abc"
@@ -182,8 +185,8 @@ def test_position2():
 
 
 def test_python_issue_20007():
-    """
-    Make sure we have a work-around for Python bug #20007
+    """Ensure we have a work-around for Python bug #20007.
+
     http://bugs.python.org/issue20007
     """
     class FakeSocket(object):
@@ -198,8 +201,8 @@ def makefile(self, _mode, _bufsize=None):
 
 
 def test_python_issue_20007_b():
-    """
-    Make sure we have a work-around for Python bug #20007
+    """Ensure we have a work-around for Python bug #20007 (part b).
+
     http://bugs.python.org/issue20007
     """
     if six.PY2:
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 67fc89e5..061b6639 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -50,7 +50,7 @@ def test_all_tokens():
 
 
 def set_attribute_on_first_child(docfrag, name, value, treeName):
-    """naively sets an attribute on the first child of the document
+    """Naively sets an attribute on the first child of the document
     fragment passed in"""
     setter = {'ElementTree': lambda d: d[0].set,
               'DOM': lambda d: d.firstChild.setAttribute}
@@ -62,7 +62,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
 
 
 def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
-    """tests what happens when we add attributes to the intext"""
+    """Test what happens when we add attributes to the intext"""
     treeName, treeClass = tree
     if treeClass is None:
         pytest.skip("Treebuilder not loaded")
diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
index 1440a722..6649878e 100644
--- a/html5lib/tests/tokenizer.py
+++ b/html5lib/tests/tokenizer.py
@@ -132,7 +132,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
 
 def unescape(test):
     def decode(inp):
-        """Decode \\uXXXX escapes
+        r"""Decode \\uXXXX escapes
 
         This decodes \\uXXXX escapes, possibly into non-BMP characters when
         two surrogate character escapes are adjacent to each other.
diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py
index 4f978466..290736bb 100644
--- a/html5lib/treeadapters/__init__.py
+++ b/html5lib/treeadapters/__init__.py
@@ -2,11 +2,9 @@
 
 from . import sax
 
-__all__ = ["sax"]
-
 try:
-    from . import genshi  # noqa
+    from . import genshi
 except ImportError:
-    pass
+    __all__ = ("sax", )
 else:
-    __all__.append("genshi")
+    __all__ = ("sax", "genshi")
diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py
index 04e316df..0e955b46 100644
--- a/html5lib/treeadapters/genshi.py
+++ b/html5lib/treeadapters/genshi.py
@@ -40,8 +40,7 @@ def to_genshi(walker):
             yield DOCTYPE, (token["name"], token["publicId"],
                             token["systemId"]), (None, -1, -1)
 
-        else:
-            pass  # FIXME: What to do?
+        # FIXME: What to do if type is not known?
 
     if text:
         yield TEXT, "".join(text), (None, -1, -1)
diff --git a/html5lib/treeadapters/sax.py b/html5lib/treeadapters/sax.py
index ad47df95..17ba0cf4 100644
--- a/html5lib/treeadapters/sax.py
+++ b/html5lib/treeadapters/sax.py
@@ -11,7 +11,7 @@
 
 
 def to_sax(walker, handler):
-    """Call SAX-like content handler based on treewalker walker"""
+    """Call SAX-like content handler based on treewalker walker."""
     handler.startDocument()
     for prefix, namespace in prefix_mapping.items():
         handler.startPrefixMapping(prefix, namespace)
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
index e2328847..4dd3852b 100644
--- a/html5lib/treebuilders/__init__.py
+++ b/html5lib/treebuilders/__init__.py
@@ -34,7 +34,7 @@
 
 
 def getTreeBuilder(treeType, implementation=None, **kwargs):
-    """Get a TreeBuilder class for various types of tree with built-in support
+    """Get a TreeBuilder class for various types of tree with built-in support.
 
     treeType - the name of the tree type required (case-insensitive). Supported
                values are:
diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
index a4b2792a..c6169ab6 100644
--- a/html5lib/treebuilders/base.py
+++ b/html5lib/treebuilders/base.py
@@ -52,8 +52,7 @@ def __repr__(self):
         return "<%s>" % (self.name)
 
     def appendChild(self, node):
-        """Insert node as a child of the current node
-        """
+        """Insert node as a child of the current node."""
         raise NotImplementedError
 
     def insertText(self, data, insertBefore=None):
@@ -69,8 +68,7 @@ def insertBefore(self, node, refNode):
         raise NotImplementedError
 
     def removeChild(self, node):
-        """Remove node from the children of the current node
-        """
+        """Remove node from the children of the current node."""
         raise NotImplementedError
 
     def reparentChildren(self, newParent):
@@ -90,8 +88,7 @@ def cloneNode(self):
         raise NotImplementedError
 
     def hasContent(self):
-        """Return true if the node has children or text, false otherwise
-        """
+        """Return true if the node has children or text, false otherwise."""
         raise NotImplementedError
 
 
@@ -367,17 +364,18 @@ def generateImpliedEndTags(self, exclude=None):
             self.generateImpliedEndTags(exclude)
 
     def getDocument(self):
-        "Return the final tree"
+        """Return the final tree."""
         return self.document
 
     def getFragment(self):
-        "Return the final fragment"
+        """Return the final fragment."""
         # assert self.innerHTML
         fragment = self.fragmentClass()
         self.openElements[0].reparentChildren(fragment)
         return fragment
 
-    def testSerializer(self, node):
-        """Serialize the subtree of node in the format required by unit tests
-        node - the node from which to start serializing"""
+    def testSerializer(self, element):
+        """Serialize the subtree of node in the format required by unit tests.
+
+        element - the node from which to start serializing"""
         raise NotImplementedError
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 908820c0..e8b6bc56 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -77,7 +77,7 @@ def serializeElement(element, indent=0):
                 while next_element is not None:
                     serializeElement(next_element, indent + 2)
                     next_element = next_element.getnext()
-            elif isinstance(element, str) or isinstance(element, bytes):
+            elif isinstance(element, (str, bytes)):
                 # Text in a fragment
                 assert isinstance(element, str) or sys.version_info[0] == 2
                 rv.append("|%s\"%s\"" % (' ' * indent, element))
diff --git a/parse.py b/parse.py
index 3e65c330..c849c1f0 100755
--- a/parse.py
+++ b/parse.py
@@ -3,6 +3,8 @@
 
 Parse a document to a tree, with optional profiling
 """
+from __future__ import absolute_import, division, unicode_literals, print_function
+
 
 import sys
 import traceback
@@ -33,7 +35,7 @@ def parse():
                 if contentType:
                     (mediaType, params) = cgi.parse_header(contentType)
                     encoding = params.get('charset')
-            except:
+            except Exception:
                 pass
         elif f == '-':
             f = sys.stdin
@@ -94,7 +96,7 @@ def parse():
 def run(parseMethod, f, encoding, scripting):
     try:
         document = parseMethod(f, override_encoding=encoding, scripting=scripting)
-    except:
+    except Exception:
         document = None
         traceback.print_exc()
     return document
@@ -127,7 +129,7 @@ def printOutput(parser, document, opts):
             for opt in serializer.HTMLSerializer.options:
                 try:
                     kwargs[opt] = getattr(opts, opt)
-                except:
+                except AttributeError:
                     pass
             if not kwargs['quote_char']:
                 del kwargs['quote_char']
diff --git a/requirements-test.txt b/requirements-test.txt
index 40df78d4..64e9cf75 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,6 +1,20 @@
 -r requirements.txt
 
 flake8<3.0
+flake8-docstrings
+flake8-string-format
+flake8-future-import
+flake8-debugger
+flake8-print
+hacking
+flake8-tuple
+flake8-dodgy
+ebb-lint ; python_version > '2.6'
+
+pylint ; python_version > '2.6'
+pylint<1.4 ; python_version <= '2.6'
+astroid<1.3.6 ; python_version <= '2.6'
+
 pytest
 pytest-expect>=1.1,<2.0
 mock
diff --git a/setup.py b/setup.py
index 00fee241..cd94afdc 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 import ast
 import codecs
@@ -83,7 +83,7 @@ def default_environment():
 with open(join(here, "html5lib", "__init__.py"), "rb") as init_file:
     t = ast.parse(init_file.read(), filename="__init__.py", mode="exec")
     assert isinstance(t, ast.Module)
-    assignments = filter(lambda x: isinstance(x, ast.Assign), t.body)
+    assignments = (x for x in t.body if isinstance(x, ast.Assign))
     for a in assignments:
         if (len(a.targets) == 1 and
                 isinstance(a.targets[0], ast.Name) and
diff --git a/tox.ini b/tox.ini
index da64de71..24fcaef9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,6 +4,15 @@ envlist = {py26,py27,py33,py34,py35,pypy}-{base,optional}
 [testenv]
 deps =
   flake8<3.0
+  flake8-docstrings
+  flake8-string-format
+  flake8-future-import
+  flake8-debugger
+  flake8-print
+  hacking
+  flake8-tuple
+  flake8-dodgy
+  ebb-lint ; python_version > '2.6'
   pytest
   pytest-expect>=1.1,<2.0
   mock
@@ -15,3 +24,8 @@ deps =
 commands =
   {envbindir}/py.test
   {toxinidir}/flake8-run.sh
+
+[flake8]
+ignore = D1,D202,D204,D205,D208,D209,D400,D401,FI13,FI50,FI51,FI53,FI54,H101,H301,H304,H306,H403,H405,L101,L102,L201,L202,L204,L205,T003
+min-version = 2.6
+require-code = True
diff --git a/utils/entities.py b/utils/entities.py
index 6dccf5f0..6d91587a 100644
--- a/utils/entities.py
+++ b/utils/entities.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import, division, unicode_literals, print_function
+
 import json
 
 import html5lib
diff --git a/utils/spider.py b/utils/spider.py
index 3a325888..2cdc0aaf 100644
--- a/utils/spider.py
+++ b/utils/spider.py
@@ -1,17 +1,19 @@
 #!/usr/bin/env python
-"""Spider to try and find bugs in the parser. Requires httplib2 and elementtree
+"""Spider to try and find bugs in the parser. Requires httplib2 and elementtree.
 
 usage:
 import spider
 s = spider.Spider()
 s.spider("http://www.google.com", maxURLs=100)
 """
+from __future__ import absolute_import, division, unicode_literals, print_function
 
 import urllib.request
 import urllib.error
 import urllib.parse
 import urllib.robotparser
-import md5
+
+from hashlib import md5
 
 import httplib2
 
@@ -46,7 +48,7 @@ def parse(self, content):
         p = html5lib.HTMLParser(tree=etree.TreeBuilder)
         try:
             tree = p.parse(content)
-        except:
+        except Exception:
             self.buggyURLs.add(self.currentURL)
             failed = True
             print("BUGGY:", self.currentURL)
@@ -57,7 +59,7 @@ def parse(self, content):
     def loadURL(self, url):
         resp, content = self.http.request(url, "GET")
         self.currentURL = url
-        digest = md5.md5(content).hexdigest()
+        digest = md5(content).hexdigest()
         if digest in self.contentDigest:
             content = None
             self.visitedURLs.add(url)