Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 17499b9

Browse files
mobengsnedders
authored andcommitted
Avoid DeprecationWarnings on Python 3.6 (#318)
Python 3.6 produces warnings on invalid escape sequences in strings, such as "\s", and they will be syntax errors in a future version of Python. See <https://docs.python.org/3/reference/lexical_analysis.html#strings>.
1 parent ec674a9 commit 17499b9

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

html5lib/filters/sanitizer.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -782,7 +782,7 @@ def allowed_token(self, token):
782782
# characters, nor why we call unescape. I just know it's always been here.
783783
# Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
784784
# this will do is remove *more* than it otherwise would.
785-
val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\s]+", '',
785+
val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
786786
unescape(attrs[attr])).lower()
787787
# remove replacement characters from unescaped characters
788788
val_unescaped = val_unescaped.replace("\ufffd", "")
@@ -807,7 +807,7 @@ def allowed_token(self, token):
807807
' ',
808808
unescape(attrs[attr]))
809809
if (token["name"] in self.svg_allow_local_href and
810-
(namespaces['xlink'], 'href') in attrs and re.search('^\s*[^#\s].*',
810+
(namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
811811
attrs[(namespaces['xlink'], 'href')])):
812812
del attrs[(namespaces['xlink'], 'href')]
813813
if (None, 'style') in attrs:
@@ -837,16 +837,16 @@ def disallowed_token(self, token):
837837

838838
def sanitize_css(self, style):
839839
# disallow urls
840-
style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
840+
style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
841841

842842
# gauntlet
843-
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
843+
if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
844844
return ''
845-
if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
845+
if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
846846
return ''
847847

848848
clean = []
849-
for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
849+
for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
850850
if not value:
851851
continue
852852
if prop.lower() in self.allowed_css_properties:
@@ -855,7 +855,7 @@ def sanitize_css(self, style):
855855
'padding']:
856856
for keyword in value.split():
857857
if keyword not in self.allowed_css_keywords and \
858-
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
858+
not re.match(r"^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
859859
break
860860
else:
861861
clean.append(prop + ': ' + value + ';')

0 commit comments

Comments
 (0)