crawler: comment

master
pictuga 2021-08-29 00:18:50 +02:00
parent da5442a1dc
commit 5582fbef31
1 changed files with 2 additions and 0 deletions

View File

@ -346,6 +346,8 @@ class BrowserlyHeaderHandler(BaseHandler):
def iter_html_tag(html_str, tag_name): def iter_html_tag(html_str, tag_name):
" To avoid parsing whole pages when looking for a simple tag "
re_tag = r'<%s(\s*[^>])*>' % tag_name re_tag = r'<%s(\s*[^>])*>' % tag_name
re_attr = r'(?P<key>[^=\s]+)=[\'"](?P<value>[^\'"]+)[\'"]' re_attr = r'(?P<key>[^=\s]+)=[\'"](?P<value>[^\'"]+)[\'"]'