2to3: first attempt to fix strings

2015-02-26 00:50:23 +08:00 · 2015-02-26 00:50:23 +08:00 · 7bd448789d
commit 7bd448789d
parent 071288015b
3 changed files with 19 additions and 5 deletions
--- a/morss/crawler.py
+++ b/morss/crawler.py
@ -14,9 +14,13 @@ except ImportError:
    from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
    from http.client import HTTPException, HTTPConnection, HTTPS_PORT

-
 import re

+try:
+    basestring
+except NameError:
+    basestring = str
+

 MIMETYPE = {
    'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
--- a/morss/feedify.py
+++ b/morss/feedify.py
@ -18,6 +18,11 @@ except ImportError:
    from urllib.parse import urlparse, urljoin
    from urllib.request import urlopen

+try:
+    basestring
+except NameError:
+    basestring = str
+

 def to_class(query):
    pattern = r'\[class=([^\]]+)\]'
@ -108,7 +113,7 @@ class Builder(object):
        self.rule = get_rule(link)

        if self.rule['mode'] == 'xpath':
-            if not isinstance(self.data, unicode):
+            if isinstance(self.data, bytes):
                self.data = self.data.decode(crawler.detect_encoding(self.data), 'replace')
            self.doc = lxml.html.fromstring(self.data)
        elif self.rule['mode'] == 'json':
--- a/morss/feeds.py
+++ b/morss/feeds.py
@ -28,6 +28,11 @@ except ImportError:
    from io import StringIO
    from urllib.request import urlopen

+try:
+    basestring
+except NameError:
+    basestring = unicode = str
+

 Element = etree.Element

@ -79,7 +84,7 @@ def parse(data):
    match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
    if match:
        enc = match.groups()[0].lower()
-        if not isinstance(data, unicode):
+        if isinstance(data, bytes):
            data = data.decode(enc, 'ignore')
        data = data.encode(enc)

@ -373,8 +378,8 @@ class FeedParser(FeedBase):
        out = StringIO()
        c = csv.writer(out, dialect=csv.excel)
        for item in self.items:
-            row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if
-                   isinstance(x[1], basestring)]
+            row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item] # str
+                   #isinstance(x[1], basestring)] # bytes or str
            c.writerow(row)
        out.seek(0)
        return out.read()