readabilite: limit html comments related issues
continuous-integration/drone/push Build is passing Details

master
pictuga 2022-01-01 13:58:42 +01:00
parent afc31eb6e9
commit 3f92787b38
1 changed files with 12 additions and 3 deletions

View File

@ -148,15 +148,20 @@ def score_all(node):
for child in node: for child in node:
score = score_node(child) score = score_node(child)
child.attrib['morss_own_score'] = str(float(score)) set_score(child, score, 'morss_own_score')
if score > 0 or len(list(child.iterancestors())) <= 2: if score > 0 or len(list(child.iterancestors())) <= 2:
spread_score(child, score) spread_score(child, score)
score_all(child) score_all(child)
def set_score(node, value): def set_score(node, value, label='morss_score'):
node.attrib['morss_score'] = str(float(value)) try:
node.attrib[label] = str(float(value))
except KeyError:
# catch issues with e.g. html comments
pass
def get_score(node): def get_score(node):
@ -196,6 +201,10 @@ def clean_root(root, keep_threshold=None):
def clean_node(node, keep_threshold=None): def clean_node(node, keep_threshold=None):
parent = node.getparent() parent = node.getparent()
if (isinstance(node, lxml.html.HtmlComment)
or isinstance(node, lxml.html.HtmlProcessingInstruction)):
return
if parent is None: if parent is None:
# this is <html/> (or a removed element waiting for GC) # this is <html/> (or a removed element waiting for GC)
return return