diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index 9e03b931..96c21d2e 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, unicode_literals from six import text_type from six.moves import http_client +from six.moves.urllib.response import addinfourl import codecs import re @@ -119,12 +120,17 @@ def _readFromBuffer(self, bytes): def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): - if isinstance(source, http_client.HTTPResponse): - # Work around Python bug #20007: read(0) closes the connection. - # http://bugs.python.org/issue20007 - isUnicode = False - elif hasattr(source, "read"): - isUnicode = isinstance(source.read(0), text_type) + if hasattr(source, "read"): + if isinstance(source, addinfourl): + checked_source = source.fp + else: + checked_source = source + if isinstance(checked_source, http_client.HTTPResponse): + # Work around Python bug #20007: read(0) closes the connection. + # http://bugs.python.org/issue20007 + isUnicode = False + else: + isUnicode = isinstance(source.read(0), text_type) else: isUnicode = isinstance(source, text_type) diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index 2a876c1d..e7c8d267 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -6,6 +6,7 @@ from io import BytesIO from six.moves import http_client +from six.moves.urllib.response import addinfourl from html5lib.inputstream import (BufferedStream, HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream) @@ -170,6 +171,17 @@ def makefile(self, _mode, _bufsize=None): stream = HTMLInputStream(source) self.assertEqual(stream.charsUntil(" "), "Text") + source = http_client.HTTPResponse(FakeSocket()) + source.begin() + try: + source = addinfourl(source, None, None) + except AttributeError: + # Fails on Python 2.x. + # Apparently, addinfourl it only used with HTTPResponse on 3.x + pass + else: + stream = HTMLInputStream(source) + self.assertEqual(stream.charsUntil(" "), "Text") def buildTestSuite(): return unittest.defaultTestLoader.loadTestsFromName(__name__)