Attempted compromise for `is_html` RegEx

This commit is contained in:
Adam Voss 2017-04-18 09:45:38 -05:00
parent 4ab407007e
commit e10caaf5de
1 changed files with 2 additions and 2 deletions

View File

@ -604,8 +604,8 @@ def is_html(text):
>>> is_html('a < b < c')
False
"""
e = re.compile('<[a-zA-Z][a-zA-Z0-9]*(\\s.*)?>')
return e.search(text) is not None
html_test = re.compile('<[a-z][a-z0-9]*(?:\s.*?>|\/?>)', re.IGNORECASE | re.DOTALL)
return bool(html_test.search(text))
def remove_html_tags(html):