From bf7da77839804b7ca18c0f3a23cd7d3ef642ca82 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 26 Apr 2015 05:56:49 +0100 Subject: [PATCH] Add a more general fix for #127 (CPy #20007) based on #136. --- html5lib/inputstream.py | 12 ++++++++---- html5lib/tests/test_stream.py | 22 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index ec191ab0..63373db9 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, unicode_literals + from six import text_type -from six.moves import http_client +from six.moves import http_client, urllib import codecs import re @@ -130,9 +131,12 @@ def _readFromBuffer(self, bytes): def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): - if isinstance(source, http_client.HTTPResponse): - # Work around Python bug #20007: read(0) closes the connection. - # http://bugs.python.org/issue20007 + # Work around Python bug #20007: read(0) closes the connection. + # http://bugs.python.org/issue20007 + if (isinstance(source, http_client.HTTPResponse) or + # Also check for addinfourl wrapping HTTPResponse + (isinstance(source, urllib.response.addbase) and + isinstance(source.fp, http_client.HTTPResponse))): isUnicode = False elif hasattr(source, "read"): isUnicode = isinstance(source.read(0), text_type) diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index 2a876c1d..4436ef8a 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -4,8 +4,10 @@ import unittest import codecs from io import BytesIO +import socket -from six.moves import http_client +import six +from six.moves import http_client, urllib from html5lib.inputstream import (BufferedStream, HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream) @@ -170,6 +172,24 @@ def makefile(self, _mode, _bufsize=None): stream = HTMLInputStream(source) self.assertEqual(stream.charsUntil(" "), "Text") + def test_python_issue_20007_b(self): + """ + Make sure we have a work-around for Python bug #20007 + http://bugs.python.org/issue20007 + """ + if six.PY2: + return + + class FakeSocket(object): + def makefile(self, _mode, _bufsize=None): + return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") + + source = http_client.HTTPResponse(FakeSocket()) + source.begin() + wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com") + stream = HTMLInputStream(wrapped) + self.assertEqual(stream.charsUntil(" "), "Text") + def buildTestSuite(): return unittest.defaultTestLoader.loadTestsFromName(__name__)