2to3: first attempt to fix strings
This commit is contained in:
		@@ -14,9 +14,13 @@ except ImportError:
 | 
			
		||||
    from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
 | 
			
		||||
    from http.client import HTTPException, HTTPConnection, HTTPS_PORT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    basestring
 | 
			
		||||
except NameError:
 | 
			
		||||
    basestring = str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
MIMETYPE = {
 | 
			
		||||
    'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
 | 
			
		||||
 
 | 
			
		||||
@@ -18,6 +18,11 @@ except ImportError:
 | 
			
		||||
    from urllib.parse import urlparse, urljoin
 | 
			
		||||
    from urllib.request import urlopen
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    basestring
 | 
			
		||||
except NameError:
 | 
			
		||||
    basestring = str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def to_class(query):
 | 
			
		||||
    pattern = r'\[class=([^\]]+)\]'
 | 
			
		||||
@@ -108,7 +113,7 @@ class Builder(object):
 | 
			
		||||
        self.rule = get_rule(link)
 | 
			
		||||
 | 
			
		||||
        if self.rule['mode'] == 'xpath':
 | 
			
		||||
            if not isinstance(self.data, unicode):
 | 
			
		||||
            if isinstance(self.data, bytes):
 | 
			
		||||
                self.data = self.data.decode(crawler.detect_encoding(self.data), 'replace')
 | 
			
		||||
            self.doc = lxml.html.fromstring(self.data)
 | 
			
		||||
        elif self.rule['mode'] == 'json':
 | 
			
		||||
 
 | 
			
		||||
@@ -28,6 +28,11 @@ except ImportError:
 | 
			
		||||
    from io import StringIO
 | 
			
		||||
    from urllib.request import urlopen
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    basestring
 | 
			
		||||
except NameError:
 | 
			
		||||
    basestring = unicode = str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Element = etree.Element
 | 
			
		||||
 | 
			
		||||
@@ -79,7 +84,7 @@ def parse(data):
 | 
			
		||||
    match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
 | 
			
		||||
    if match:
 | 
			
		||||
        enc = match.groups()[0].lower()
 | 
			
		||||
        if not isinstance(data, unicode):
 | 
			
		||||
        if isinstance(data, bytes):
 | 
			
		||||
            data = data.decode(enc, 'ignore')
 | 
			
		||||
        data = data.encode(enc)
 | 
			
		||||
 | 
			
		||||
@@ -373,8 +378,8 @@ class FeedParser(FeedBase):
 | 
			
		||||
        out = StringIO()
 | 
			
		||||
        c = csv.writer(out, dialect=csv.excel)
 | 
			
		||||
        for item in self.items:
 | 
			
		||||
            row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if
 | 
			
		||||
                   isinstance(x[1], basestring)]
 | 
			
		||||
            row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item] # str
 | 
			
		||||
                   #isinstance(x[1], basestring)] # bytes or str
 | 
			
		||||
            c.writerow(row)
 | 
			
		||||
        out.seek(0)
 | 
			
		||||
        return out.read()
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user