diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index a7cb98be..34f7ac5c 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -89,12 +89,11 @@ def _parse(self, stream, innerHTML=False, container="div", parser=self, **kwargs) self.reset() - while True: - try: - self.mainLoop() - break - except ReparseException: - self.reset() + try: + self.mainLoop() + except ReparseException: + self.reset() + self.mainLoop() def reset(self): self.tree.reset() diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index ad5ca7dc..15acba0d 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -509,8 +509,8 @@ def changeEncoding(self, newEncoding): self.charEncoding = (self.charEncoding[0], "certain") else: self.rawStream.seek(0) - self.reset() self.charEncoding = (newEncoding, "certain") + self.reset() raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) def detectBOM(self): diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 837e989f..3837fe09 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -12,6 +12,28 @@ from html5lib import HTMLParser, inputstream +def test_basic_prescan_length(): + data = "