2to3: first attempt to fix strings

2015-02-26 00:50:23 +08:00
parent 071288015b
commit 7bd448789d
3 changed files with 19 additions and 5 deletions
--- a/morss/crawler.py
+++ b/morss/crawler.py
@@ -14,9 +14,13 @@ except ImportError:
    from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
    from http.client import HTTPException, HTTPConnection, HTTPS_PORT
 import re
 try:
    basestring
 except NameError:
    basestring = str
 MIMETYPE = {
    'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
--- a/morss/feedify.py
+++ b/morss/feedify.py
@@ -18,6 +18,11 @@ except ImportError:
    from urllib.parse import urlparse, urljoin
    from urllib.request import urlopen
 try:
    basestring
 except NameError:
    basestring = str
 def to_class(query):
    pattern = r'\[class=([^\]]+)\]'
@@ -108,7 +113,7 @@ class Builder(object):
        self.rule = get_rule(link)
        if self.rule['mode'] == 'xpath':
-            if not isinstance(self.data, unicode):
+            if isinstance(self.data, bytes):
                self.data = self.data.decode(crawler.detect_encoding(self.data), 'replace')
            self.doc = lxml.html.fromstring(self.data)
        elif self.rule['mode'] == 'json':
--- a/morss/feeds.py
+++ b/morss/feeds.py
@@ -28,6 +28,11 @@ except ImportError:
    from io import StringIO
    from urllib.request import urlopen
 try:
    basestring
 except NameError:
    basestring = unicode = str
 Element = etree.Element
@@ -79,7 +84,7 @@ def parse(data):
    match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
    if match:
        enc = match.groups()[0].lower()
-        if not isinstance(data, unicode):
+        if isinstance(data, bytes):
            data = data.decode(enc, 'ignore')
        data = data.encode(enc)
@@ -373,8 +378,8 @@ class FeedParser(FeedBase):
        out = StringIO()
        c = csv.writer(out, dialect=csv.excel)
        for item in self.items:
-            row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if
+            row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item] # str
-                   isinstance(x[1], basestring)]
+                   #isinstance(x[1], basestring)] # bytes or str
            c.writerow(row)
        out.seek(0)
        return out.read()