diff --git a/morss/crawler.py b/morss/crawler.py index 4e44cf9..955cf02 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -368,7 +368,7 @@ class BrowserlyHeaderHandler(BaseHandler): def iter_html_tag(html_str, tag_name): " To avoid parsing whole pages when looking for a simple tag " - re_tag = r'<%s(\s*[^>])*>' % tag_name + re_tag = r'<%s\s+[^>]+>' % tag_name re_attr = r'(?P[^=\s]+)=[\'"](?P[^\'"]+)[\'"]' for tag_match in re.finditer(re_tag, html_str):