Remove "clip" from Fill

Put that in Gather. Also removed from feeds.py. "alone" mode was also added (it removes the description).
master
pictuga 2013-10-01 19:45:54 +02:00
parent 1b7fe8fbee
commit 78706952fe
2 changed files with 15 additions and 10 deletions

View File

@ -343,14 +343,11 @@ class FeedItem(FeedBase):
description = desc = FeedDescriptor('desc')
content = FeedDescriptor('content')
def pushContent(self, value, clip=False):
def pushContent(self, value):
if not self.desc and self.content:
self.desc = self.content
if self.desc and clip:
self.content = self.desc + "<br/><br/>* * *<br/><br/>" + value
else:
self.content = value
self.content = value
def remove(self):
self.xml.getparent().remove(self.xml)

View File

@ -252,7 +252,7 @@ def decodeHTML(data, con=None):
log(enc)
return data.decode(enc, 'replace')
def Fill(item, cache, feedurl='/', fast=False, clip=False):
def Fill(item, cache, feedurl='/', fast=False):
""" Returns True when it has done its best """
if not item.link:
@ -309,7 +309,6 @@ def Fill(item, cache, feedurl='/', fast=False, clip=False):
match = lxml.html.fromstring(item.content).xpath('//a/@data-expanded-url')
if len(match):
link = match[0]
clip = True
log(link)
else:
link = None
@ -330,7 +329,7 @@ def Fill(item, cache, feedurl='/', fast=False, clip=False):
log('old error')
else:
log('cached')
item.pushContent(cache.get(link), clip)
item.pushContent(cache.get(link))
return True
# super-fast mode
@ -356,7 +355,7 @@ def Fill(item, cache, feedurl='/', fast=False, clip=False):
out = readability.Document(data, url=con.url).summary(True)
if countWord(out) > max(count_content, count_desc) > 0:
item.pushContent(out, clip)
item.pushContent(out)
cache.set(link, out)
else:
log('not bigger enough')
@ -435,11 +434,20 @@ def Gather(url, cachePath, options):
if i+1 > LIM_ITEM > 0:
item.remove()
continue
elif time.time() - startTime > MAX_TIME >= 0 or i+1 > MAX_ITEM > 0:
if Fill(item, cache, url, True) is False:
item.remove()
continue
else:
Fill(item, cache, url, clip='clip' in options)
Fill(item, cache, url)
if item.desc and item.content:
if 'clip' in options:
item.content = item.desc + "<br/><br/>* * *<br/><br/>" + item.content
del item.desc
if 'alone' in options:
del item.desc
log(len(rss.items))
log(time.time() - startTime)