Remove "clip" from Fill
Put that in Gather. Also removed from feeds.py. "alone" mode was also added (it removes the description).master
parent
1b7fe8fbee
commit
78706952fe
7
feeds.py
7
feeds.py
|
@ -343,14 +343,11 @@ class FeedItem(FeedBase):
|
||||||
description = desc = FeedDescriptor('desc')
|
description = desc = FeedDescriptor('desc')
|
||||||
content = FeedDescriptor('content')
|
content = FeedDescriptor('content')
|
||||||
|
|
||||||
def pushContent(self, value, clip=False):
|
def pushContent(self, value):
|
||||||
if not self.desc and self.content:
|
if not self.desc and self.content:
|
||||||
self.desc = self.content
|
self.desc = self.content
|
||||||
|
|
||||||
if self.desc and clip:
|
self.content = value
|
||||||
self.content = self.desc + "<br/><br/>* * *<br/><br/>" + value
|
|
||||||
else:
|
|
||||||
self.content = value
|
|
||||||
|
|
||||||
def remove(self):
|
def remove(self):
|
||||||
self.xml.getparent().remove(self.xml)
|
self.xml.getparent().remove(self.xml)
|
||||||
|
|
18
morss.py
18
morss.py
|
@ -252,7 +252,7 @@ def decodeHTML(data, con=None):
|
||||||
log(enc)
|
log(enc)
|
||||||
return data.decode(enc, 'replace')
|
return data.decode(enc, 'replace')
|
||||||
|
|
||||||
def Fill(item, cache, feedurl='/', fast=False, clip=False):
|
def Fill(item, cache, feedurl='/', fast=False):
|
||||||
""" Returns True when it has done its best """
|
""" Returns True when it has done its best """
|
||||||
|
|
||||||
if not item.link:
|
if not item.link:
|
||||||
|
@ -309,7 +309,6 @@ def Fill(item, cache, feedurl='/', fast=False, clip=False):
|
||||||
match = lxml.html.fromstring(item.content).xpath('//a/@data-expanded-url')
|
match = lxml.html.fromstring(item.content).xpath('//a/@data-expanded-url')
|
||||||
if len(match):
|
if len(match):
|
||||||
link = match[0]
|
link = match[0]
|
||||||
clip = True
|
|
||||||
log(link)
|
log(link)
|
||||||
else:
|
else:
|
||||||
link = None
|
link = None
|
||||||
|
@ -330,7 +329,7 @@ def Fill(item, cache, feedurl='/', fast=False, clip=False):
|
||||||
log('old error')
|
log('old error')
|
||||||
else:
|
else:
|
||||||
log('cached')
|
log('cached')
|
||||||
item.pushContent(cache.get(link), clip)
|
item.pushContent(cache.get(link))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# super-fast mode
|
# super-fast mode
|
||||||
|
@ -356,7 +355,7 @@ def Fill(item, cache, feedurl='/', fast=False, clip=False):
|
||||||
out = readability.Document(data, url=con.url).summary(True)
|
out = readability.Document(data, url=con.url).summary(True)
|
||||||
|
|
||||||
if countWord(out) > max(count_content, count_desc) > 0:
|
if countWord(out) > max(count_content, count_desc) > 0:
|
||||||
item.pushContent(out, clip)
|
item.pushContent(out)
|
||||||
cache.set(link, out)
|
cache.set(link, out)
|
||||||
else:
|
else:
|
||||||
log('not bigger enough')
|
log('not bigger enough')
|
||||||
|
@ -435,11 +434,20 @@ def Gather(url, cachePath, options):
|
||||||
|
|
||||||
if i+1 > LIM_ITEM > 0:
|
if i+1 > LIM_ITEM > 0:
|
||||||
item.remove()
|
item.remove()
|
||||||
|
continue
|
||||||
elif time.time() - startTime > MAX_TIME >= 0 or i+1 > MAX_ITEM > 0:
|
elif time.time() - startTime > MAX_TIME >= 0 or i+1 > MAX_ITEM > 0:
|
||||||
if Fill(item, cache, url, True) is False:
|
if Fill(item, cache, url, True) is False:
|
||||||
item.remove()
|
item.remove()
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
Fill(item, cache, url, clip='clip' in options)
|
Fill(item, cache, url)
|
||||||
|
|
||||||
|
if item.desc and item.content:
|
||||||
|
if 'clip' in options:
|
||||||
|
item.content = item.desc + "<br/><br/>* * *<br/><br/>" + item.content
|
||||||
|
del item.desc
|
||||||
|
if 'alone' in options:
|
||||||
|
del item.desc
|
||||||
|
|
||||||
log(len(rss.items))
|
log(len(rss.items))
|
||||||
log(time.time() - startTime)
|
log(time.time() - startTime)
|
||||||
|
|
Loading…
Reference in New Issue