parent
7fa183d713
commit
054f5c0846
27
morss.py
27
morss.py
|
@ -75,6 +75,12 @@ def lenHTML(txt):
|
||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
def countWord(txt):
|
||||||
|
if len(txt):
|
||||||
|
return len(lxml.html.fromstring(txt).text_content().split())
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
def parseOptions(available):
|
def parseOptions(available):
|
||||||
options = None
|
options = None
|
||||||
if 'REQUEST_URI' in os.environ:
|
if 'REQUEST_URI' in os.environ:
|
||||||
|
@ -237,13 +243,18 @@ def Fill(item, cache, feedurl="/", fast=False):
|
||||||
item.title = item.title.title()
|
item.title = item.title.title()
|
||||||
|
|
||||||
# content already provided?
|
# content already provided?
|
||||||
if item.content and item.desc:
|
count_content = countWord(item.content)
|
||||||
len_content = lenHTML(item.content)
|
count_desc = countWord(item.desc)
|
||||||
len_desc = lenHTML(item.desc)
|
|
||||||
log('content: %s vs %s' % (len_content, len_desc))
|
log('desc: %s words, content: %s words' % (count_content, count_desc))
|
||||||
if len_content > 5*len_desc:
|
|
||||||
log('provided')
|
if max(count_content, count_desc) > 500:
|
||||||
return True
|
log('long enough')
|
||||||
|
return True
|
||||||
|
|
||||||
|
if count_content > 5*count_desc > 0 and count_content > 50:
|
||||||
|
log('content bigger enough')
|
||||||
|
return True
|
||||||
|
|
||||||
# check cache and previous errors
|
# check cache and previous errors
|
||||||
if item.link in cache:
|
if item.link in cache:
|
||||||
|
@ -276,7 +287,7 @@ def Fill(item, cache, feedurl="/", fast=False):
|
||||||
data, url = ddl
|
data, url = ddl
|
||||||
|
|
||||||
out = readability.Document(data, url=url).summary(True)
|
out = readability.Document(data, url=url).summary(True)
|
||||||
if not item.desc or lenHTML(out) > lenHTML(item.desc):
|
if countWord(out) > max(count_content, count_desc) > 0:
|
||||||
item.content = out
|
item.content = out
|
||||||
cache.set(item.link, out)
|
cache.set(item.link, out)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue