Use separate var in Fill for final url
That way the url can be changed altogether for the article-fetching part, without changing the item link itself. Useful for upcoming twitter feeds.master
parent
fd1501a0c0
commit
208d70d3db
18
morss.py
18
morss.py
|
@ -314,9 +314,11 @@ def Fill(item, cache, feedurl='/', fast=False):
|
||||||
log('content bigger enough')
|
log('content bigger enough')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
link = item.link
|
||||||
|
|
||||||
# check cache and previous errors
|
# check cache and previous errors
|
||||||
if item.link in cache:
|
if link in cache:
|
||||||
content = cache.get(item.link)
|
content = cache.get(link)
|
||||||
match = re.search(r'^error-([a-z]{2,10})$', content)
|
match = re.search(r'^error-([a-z]{2,10})$', content)
|
||||||
if match:
|
if match:
|
||||||
if cache.isYoungerThan(DELAY):
|
if cache.isYoungerThan(DELAY):
|
||||||
|
@ -326,7 +328,7 @@ def Fill(item, cache, feedurl='/', fast=False):
|
||||||
log('old error')
|
log('old error')
|
||||||
else:
|
else:
|
||||||
log('cached')
|
log('cached')
|
||||||
setContent(item, cache.get(item.link))
|
setContent(item, cache.get(link))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# super-fast mode
|
# super-fast mode
|
||||||
|
@ -336,27 +338,27 @@ def Fill(item, cache, feedurl='/', fast=False):
|
||||||
|
|
||||||
# download
|
# download
|
||||||
try:
|
try:
|
||||||
url = item.link.encode('utf-8')
|
url = link.encode('utf-8')
|
||||||
con = urllib2.build_opener(HTMLDownloader()).open(url, timeout=TIMEOUT)
|
con = urllib2.build_opener(HTMLDownloader()).open(url, timeout=TIMEOUT)
|
||||||
data = con.read()
|
data = con.read()
|
||||||
except (urllib2.URLError, httplib.HTTPException, socket.timeout):
|
except (urllib2.URLError, httplib.HTTPException, socket.timeout):
|
||||||
log('http error')
|
log('http error')
|
||||||
cache.set(item.link, 'error-http')
|
cache.set(link, 'error-http')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if con.info().maintype != 'text':
|
if con.info().maintype != 'text':
|
||||||
log('non-text page')
|
log('non-text page')
|
||||||
cache.set(item.link, 'error-type')
|
cache.set(link, 'error-type')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
out = readability.Document(data, url=con.url).summary(True)
|
out = readability.Document(data, url=con.url).summary(True)
|
||||||
|
|
||||||
if countWord(out) > max(count_content, count_desc) > 0:
|
if countWord(out) > max(count_content, count_desc) > 0:
|
||||||
setContent(item, out)
|
setContent(item, out)
|
||||||
cache.set(item.link, out)
|
cache.set(link, out)
|
||||||
else:
|
else:
|
||||||
log('not bigger enough')
|
log('not bigger enough')
|
||||||
cache.set(item.link, 'error-length')
|
cache.set(link, 'error-length')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
Loading…
Reference in New Issue