readabilite: some technical improvements for score
Linear, removed misplaced debugging code
This commit is contained in:
		@@ -130,6 +130,9 @@ def score_all(root):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def spread_score(node, score, grades):
 | 
					def spread_score(node, score, grades):
 | 
				
			||||||
 | 
					    " Spread the node's score to its parents, on a linear way "
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    delta = score / 2
 | 
				
			||||||
    for ancestor in [node,] + list(node.iterancestors()):
 | 
					    for ancestor in [node,] + list(node.iterancestors()):
 | 
				
			||||||
        if score >= 1 or ancestor is node:
 | 
					        if score >= 1 or ancestor is node:
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
@@ -137,7 +140,7 @@ def spread_score(node, score, grades):
 | 
				
			|||||||
            except KeyError:
 | 
					            except KeyError:
 | 
				
			||||||
                grades[ancestor] = score
 | 
					                grades[ancestor] = score
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            score /= 2
 | 
					            score -= delta
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            break
 | 
					            break
 | 
				
			||||||
@@ -145,7 +148,7 @@ def spread_score(node, score, grades):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def write_score_all(root, grades):
 | 
					def write_score_all(root, grades):
 | 
				
			||||||
    for node in root.iter():
 | 
					    for node in root.iter():
 | 
				
			||||||
        node.attrib['score'] = str(int(grades[node]))
 | 
					        node.attrib['score'] = str(int(grades.get(node, 0)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def clean_node(node):
 | 
					def clean_node(node):
 | 
				
			||||||
@@ -243,15 +246,15 @@ def rank_nodes(grades):
 | 
				
			|||||||
    return sorted(grades.items(), key=lambda x: x[1], reverse=True)
 | 
					    return sorted(grades.items(), key=lambda x: x[1], reverse=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_best_node(grades, highlight=False):
 | 
					def get_best_node(grades):
 | 
				
			||||||
 | 
					    " To pick the best (raw) node. Another function will clean it "
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if len(grades) == 1:
 | 
				
			||||||
 | 
					        return grades[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    top = rank_nodes(grades)
 | 
					    top = rank_nodes(grades)
 | 
				
			||||||
    lowest = lowest_common_ancestor(top[0][0], top[1][0], 3)
 | 
					    lowest = lowest_common_ancestor(top[0][0], top[1][0], 3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if highlight:
 | 
					 | 
				
			||||||
        top[0][0].attrib['style'] = 'border: 2px solid blue'
 | 
					 | 
				
			||||||
        top[1][0].attrib['style'] = 'border: 2px solid green'
 | 
					 | 
				
			||||||
        lowest.attrib['style'] = 'outline: 2px solid red'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return lowest
 | 
					    return lowest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -259,8 +262,11 @@ def get_article(data, url=None, encoding=None):
 | 
				
			|||||||
    html = parse(data, encoding)
 | 
					    html = parse(data, encoding)
 | 
				
			||||||
    br2p(html)
 | 
					    br2p(html)
 | 
				
			||||||
    scores = score_all(html)
 | 
					    scores = score_all(html)
 | 
				
			||||||
    best = get_best_node(scores)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if not len(scores):
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    best = get_best_node(scores)
 | 
				
			||||||
    wc = count_words(best.text_content())
 | 
					    wc = count_words(best.text_content())
 | 
				
			||||||
    wca = count_words(' '.join([x.text_content() for x in best.findall('.//a')]))
 | 
					    wca = count_words(' '.join([x.text_content() for x in best.findall('.//a')]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user