Compare commits

..

5 Commits

4 changed files with 25 additions and 19 deletions

View File

@ -221,7 +221,7 @@ class GZIPHandler(BaseHandler):
def detect_encoding(data, resp=None): def detect_encoding(data, resp=None):
enc = detect_raw_encoding(data, resp) enc = detect_raw_encoding(data, resp)
if enc == 'gb2312': if enc.lower() == 'gb2312':
enc = 'gbk' enc = 'gbk'
return enc return enc
@ -261,12 +261,8 @@ class EncodingFixHandler(BaseHandler):
if 200 <= resp.code < 300 and maintype == 'text': if 200 <= resp.code < 300 and maintype == 'text':
data = resp.read() data = resp.read()
if not self.encoding: enc = self.encoding or detect_encoding(data, resp)
enc = detect_encoding(data, resp)
else:
enc = self.encoding
if enc:
data = data.decode(enc, 'replace') data = data.decode(enc, 'replace')
data = data.encode(enc) data = data.encode(enc)
@ -655,5 +651,8 @@ class MySQLCacheHandler(BaseCache):
if __name__ == '__main__': if __name__ == '__main__':
req = adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it') req = adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
if not sys.flags.interactive: if sys.flags.interactive:
print('>>> Interactive shell: try using `req`')
else:
print(req['data'].decode(req['encoding'])) print(req['data'].decode(req['encoding']))

View File

@ -780,6 +780,9 @@ if __name__ == '__main__':
req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss') req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss')
feed = parse(req['data'], url=req['url'], encoding=req['encoding']) feed = parse(req['data'], url=req['url'], encoding=req['encoding'])
if not sys.flags.interactive: if sys.flags.interactive:
print('>>> Interactive shell: try using `feed`')
else:
for item in feed.items: for item in feed.items:
print(item.title, item.link) print(item.title, item.link)

View File

@ -351,5 +351,8 @@ if __name__ == '__main__':
req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it') req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
article = get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='unicode') article = get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='unicode')
if not sys.flags.interactive: if sys.flags.interactive:
print('>>> Interactive shell: try using `article`')
else:
print(article) print(article)

View File

@ -33,7 +33,8 @@
} }
header { header {
text-align: center; text-align: justify;
text-align-last: center;
border-bottom: 1px solid silver; border-bottom: 1px solid silver;
} }
@ -112,7 +113,6 @@
} }
header > form { header > form {
text-align: center;
margin: 1%; margin: 1%;
} }
@ -176,7 +176,7 @@
<select> <select>
<option value="">full-text</option> <option value="">full-text</option>
<option value=":proxy">original</option> <option value=":proxy">original</option>
<option value=":clip">original + full-text</option> <option value=":clip" title="original + full-text: keep the original description above the full article. Useful for reddit feeds for example, to keep the comment links">combined (?)</option>
</select> </select>
feed as feed as
<select> <select>
@ -185,12 +185,12 @@
<option value=":html">HTML</option> <option value=":html">HTML</option>
<option value=":csv">CSV</option> <option value=":csv">CSV</option>
</select> </select>
using using the
<select> <select>
<option value="">the standard link</option> <option value="">standard</option>
<option value=":firstlink" title="Useful for Twitter feeds for example, to get the articles referred to in tweets rather than the tweet itself">the first link from the description (?)</option> <option value=":firstlink" title="Pull the article from the first available link in the description, instead of the standard link. Useful for Twitter feeds for example, to get the articles referred to in tweets rather than the tweet itself">first (?)</option>
</select> </select>
and link and
<select> <select>
<option value="">keep</option> <option value="">keep</option>
<option value=":nolink:noref">remove</option> <option value=":nolink:noref">remove</option>
@ -199,7 +199,8 @@
<input type="hidden" value="" name="extra_options"/> <input type="hidden" value="" name="extra_options"/>
</form> </form>
<p>Click <a href="/">here</a> to go back to morss</p> <p>You can find a <em>preview</em> of the feed below. You need a <em>feed reader</em> for optimal use</p>
<p>Click <a href="/">here</a> to go back to morss and/or to use the tool on another feed</p>
</header> </header>
<div id="header" dir="auto"> <div id="header" dir="auto">