Check url before looking for provided content
Also use lenHTML() function defined a latelymaster
parent
85e40cde4e
commit
8e2aab55e7
18
morss.py
18
morss.py
|
@ -324,14 +324,7 @@ def Fill(rss, cache, mode='feed'):
|
||||||
log('no link')
|
log('no link')
|
||||||
return
|
return
|
||||||
|
|
||||||
# content already provided?
|
# feedburner and others
|
||||||
if 'content' in item and 'desc' in item:
|
|
||||||
content_len = len(lxml.html.fromstring(item.content).text_content())
|
|
||||||
log('content: %s vs %s' % (content_len, len(item.desc)))
|
|
||||||
if content_len > 5*len(item.desc):
|
|
||||||
log('provided')
|
|
||||||
return
|
|
||||||
|
|
||||||
if '{http://rssnamespace.org/feedburner/ext/1.0}origLink' in item:
|
if '{http://rssnamespace.org/feedburner/ext/1.0}origLink' in item:
|
||||||
item.link = item['{http://rssnamespace.org/feedburner/ext/1.0}origLink']
|
item.link = item['{http://rssnamespace.org/feedburner/ext/1.0}origLink']
|
||||||
log(item.link)
|
log(item.link)
|
||||||
|
@ -347,6 +340,15 @@ def Fill(rss, cache, mode='feed'):
|
||||||
if urlparse.urlparse(item.link).netloc is '':
|
if urlparse.urlparse(item.link).netloc is '':
|
||||||
item.link = urlparse.urljoin(feedurl, item.link)
|
item.link = urlparse.urljoin(feedurl, item.link)
|
||||||
|
|
||||||
|
# content already provided?
|
||||||
|
if 'content' in item and 'desc' in item:
|
||||||
|
len_content = lenHTML(item.content)
|
||||||
|
len_desc = lenHTML(item.desc)
|
||||||
|
log('content: %s vs %s' % (len_content, len_desc))
|
||||||
|
if len_content > 5*len_desc:
|
||||||
|
log('provided')
|
||||||
|
return
|
||||||
|
|
||||||
# check cache and previous errors
|
# check cache and previous errors
|
||||||
if item.link in cache:
|
if item.link in cache:
|
||||||
content = cache.get(item.link)
|
content = cache.get(item.link)
|
||||||
|
|
Loading…
Reference in New Issue