From 31b37837621d8c0d08499769c584da43161823c0 Mon Sep 17 00:00:00 2001 From: Will Kahn-Greene Date: Wed, 29 Nov 2017 16:27:09 -0500 Subject: [PATCH] First pass at documenting html5lib.filters --- html5lib/filters/alphabeticalattributes.py | 1 + html5lib/filters/inject_meta_charset.py | 8 ++++ html5lib/filters/lint.py | 12 ++++++ html5lib/filters/optionaltags.py | 1 + html5lib/filters/sanitizer.py | 43 +++++++++++++++++++--- html5lib/filters/whitespace.py | 2 +- 6 files changed, 60 insertions(+), 7 deletions(-) diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py index 5fea9f69..5ba926e3 100644 --- a/html5lib/filters/alphabeticalattributes.py +++ b/html5lib/filters/alphabeticalattributes.py @@ -17,6 +17,7 @@ def _attr_key(attr): class Filter(base.Filter): + """Alphabetizes attributes for elements""" def __iter__(self): for token in base.Filter.__iter__(self): if token["type"] in ("StartTag", "EmptyTag"): diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py index 2059ec86..aefb5c84 100644 --- a/html5lib/filters/inject_meta_charset.py +++ b/html5lib/filters/inject_meta_charset.py @@ -4,7 +4,15 @@ class Filter(base.Filter): + """Injects ```` tag into head of document""" def __init__(self, source, encoding): + """Creates a Filter + + :arg source: the source token stream + + :arg encoding: the encoding to set + + """ base.Filter.__init__(self, source) self.encoding = encoding diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index a9c0831a..acd4d7a2 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -10,7 +10,19 @@ class Filter(base.Filter): + """Lints the token stream for errors + + If it finds any errors, it'll raise an ``AssertionError``. + + """ def __init__(self, source, require_matching_tags=True): + """Creates a Filter + + :arg source: the source token stream + + :arg require_matching_tags: whether or not to require matching tags + + """ super(Filter, self).__init__(source) self.require_matching_tags = require_matching_tags diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py index f6edb734..4a865012 100644 --- a/html5lib/filters/optionaltags.py +++ b/html5lib/filters/optionaltags.py @@ -4,6 +4,7 @@ class Filter(base.Filter): + """Removes optional tags from the token stream""" def slider(self): previous1 = previous2 = None for token in self.source: diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index dc801668..6633168f 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -705,7 +705,7 @@ class Filter(base.Filter): - """ sanitization of XHTML+MathML+SVG and of inline style attributes.""" + """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" def __init__(self, source, allowed_elements=allowed_elements, @@ -718,6 +718,37 @@ def __init__(self, attr_val_is_uri=attr_val_is_uri, svg_attr_val_allows_ref=svg_attr_val_allows_ref, svg_allow_local_href=svg_allow_local_href): + """Creates a Filter + + :arg allowed_elements: set of elements to allow--everything else will + be escaped + + :arg allowed_attributes: set of attributes to allow in + elements--everything else will be stripped + + :arg allowed_css_properties: set of CSS properties to allow--everything + else will be stripped + + :arg allowed_css_keywords: set of CSS keywords to allow--everything + else will be stripped + + :arg allowed_svg_properties: set of SVG properties to allow--everything + else will be removed + + :arg allowed_protocols: set of allowed protocols for URIs + + :arg allowed_content_types: set of allowed content types for ``data`` URIs. + + :arg attr_val_is_uri: set of attributes that have URI values--values + that have a scheme not listed in ``allowed_protocols`` are removed + + :arg svg_attr_val_allows_ref: set of SVG attributes that can have + references + + :arg svg_allow_local_href: set of SVG elements that can have local + hrefs--these are removed + + """ super(Filter, self).__init__(source) self.allowed_elements = allowed_elements self.allowed_attributes = allowed_attributes @@ -737,11 +768,11 @@ def __iter__(self): yield token # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and - # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style - # attributes are parsed, and a restricted set, # specified by - # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through. - # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified - # in ALLOWED_PROTOCOLS are allowed. + # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes + # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and + # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI + # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are + # allowed. # # sanitize_html('') # => <script> do_nasty_stuff() </script> diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py index 89210528..0d12584b 100644 --- a/html5lib/filters/whitespace.py +++ b/html5lib/filters/whitespace.py @@ -10,7 +10,7 @@ class Filter(base.Filter): - + """Collapses whitespace except in pre, textarea, and script elements""" spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) def __iter__(self):