From bf3e733bbafd5a9d5dfb12c86459a82e68be97fe Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 8 May 2016 17:57:41 +0100 Subject: [PATCH] Apply memoization to getPhases; this provides a decent perf gain --- html5lib/html5parser.py | 1 + html5lib/utils.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 34f7ac5c..b56f6238 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -418,6 +418,7 @@ def parseRCDataRawtext(self, token, contentType): self.phase = self.phases["text"] +@utils.memoize def getPhases(debug): def log(function): """Logger that records which phase processes each token""" diff --git a/html5lib/utils.py b/html5lib/utils.py index c196821f..c70de172 100644 --- a/html5lib/utils.py +++ b/html5lib/utils.py @@ -109,3 +109,15 @@ def moduleFactory(baseModule, *args, **kwargs): return mod return moduleFactory + + +def memoize(func): + cache = {} + + def wrapped(*args, **kwargs): + key = (tuple(args), tuple(kwargs.items())) + if key not in cache: + cache[key] = func(*args, **kwargs) + return cache[key] + + return wrapped