2to3: first attempt to fix strings
parent
071288015b
commit
7bd448789d
|
@ -14,9 +14,13 @@ except ImportError:
|
|||
from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
|
||||
from http.client import HTTPException, HTTPConnection, HTTPS_PORT
|
||||
|
||||
|
||||
import re
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
|
||||
MIMETYPE = {
|
||||
'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
|
||||
|
|
|
@ -18,6 +18,11 @@ except ImportError:
|
|||
from urllib.parse import urlparse, urljoin
|
||||
from urllib.request import urlopen
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
|
||||
def to_class(query):
|
||||
pattern = r'\[class=([^\]]+)\]'
|
||||
|
@ -108,7 +113,7 @@ class Builder(object):
|
|||
self.rule = get_rule(link)
|
||||
|
||||
if self.rule['mode'] == 'xpath':
|
||||
if not isinstance(self.data, unicode):
|
||||
if isinstance(self.data, bytes):
|
||||
self.data = self.data.decode(crawler.detect_encoding(self.data), 'replace')
|
||||
self.doc = lxml.html.fromstring(self.data)
|
||||
elif self.rule['mode'] == 'json':
|
||||
|
|
|
@ -28,6 +28,11 @@ except ImportError:
|
|||
from io import StringIO
|
||||
from urllib.request import urlopen
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = unicode = str
|
||||
|
||||
|
||||
Element = etree.Element
|
||||
|
||||
|
@ -79,7 +84,7 @@ def parse(data):
|
|||
match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
||||
if match:
|
||||
enc = match.groups()[0].lower()
|
||||
if not isinstance(data, unicode):
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode(enc, 'ignore')
|
||||
data = data.encode(enc)
|
||||
|
||||
|
@ -373,8 +378,8 @@ class FeedParser(FeedBase):
|
|||
out = StringIO()
|
||||
c = csv.writer(out, dialect=csv.excel)
|
||||
for item in self.items:
|
||||
row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if
|
||||
isinstance(x[1], basestring)]
|
||||
row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item] # str
|
||||
#isinstance(x[1], basestring)] # bytes or str
|
||||
c.writerow(row)
|
||||
out.seek(0)
|
||||
return out.read()
|
||||
|
|
Loading…
Reference in New Issue