readabilite: forgot count_content

Was meant to be in an earlier commit
master
pictuga 2018-10-25 01:11:29 +02:00
parent 1d6d0b8ff1
commit 72d03f21fe
1 changed files with 5 additions and 0 deletions

View File

@ -38,6 +38,11 @@ def count_words(string):
return count
def count_content(node):
# count words and imgs
return count_words(node.text_content()) + len(node.findall('.//img'))
regex_bad = re.compile('|'.join(['comment', 'community', 'extra', 'foot',
'sponsor', 'pagination', 'pager', 'tweet', 'twitter', 'com-', 'masthead',
'media', 'meta', 'related', 'shopping', 'tags', 'tool', 'author', 'about']),