From 5582fbef31b454930726d652dfa405520e51f29e Mon Sep 17 00:00:00 2001 From: pictuga Date: Sun, 29 Aug 2021 00:18:50 +0200 Subject: [PATCH] crawler: comment --- morss/crawler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/morss/crawler.py b/morss/crawler.py index 4868fc2..87f0149 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -346,6 +346,8 @@ class BrowserlyHeaderHandler(BaseHandler): def iter_html_tag(html_str, tag_name): + " To avoid parsing whole pages when looking for a simple tag " + re_tag = r'<%s(\s*[^>])*>' % tag_name re_attr = r'(?P[^=\s]+)=[\'"](?P[^\'"]+)[\'"]'