2to3: first attempt to fix strings
parent
071288015b
commit
7bd448789d
|
@ -14,9 +14,13 @@ except ImportError:
|
||||||
from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
|
from urllib.request import HTTPSHandler, BaseHandler, AbstractHTTPHandler, Request, addinfourl
|
||||||
from http.client import HTTPException, HTTPConnection, HTTPS_PORT
|
from http.client import HTTPException, HTTPConnection, HTTPS_PORT
|
||||||
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
try:
|
||||||
|
basestring
|
||||||
|
except NameError:
|
||||||
|
basestring = str
|
||||||
|
|
||||||
|
|
||||||
MIMETYPE = {
|
MIMETYPE = {
|
||||||
'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
|
'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
|
||||||
|
|
|
@ -18,6 +18,11 @@ except ImportError:
|
||||||
from urllib.parse import urlparse, urljoin
|
from urllib.parse import urlparse, urljoin
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
try:
|
||||||
|
basestring
|
||||||
|
except NameError:
|
||||||
|
basestring = str
|
||||||
|
|
||||||
|
|
||||||
def to_class(query):
|
def to_class(query):
|
||||||
pattern = r'\[class=([^\]]+)\]'
|
pattern = r'\[class=([^\]]+)\]'
|
||||||
|
@ -108,7 +113,7 @@ class Builder(object):
|
||||||
self.rule = get_rule(link)
|
self.rule = get_rule(link)
|
||||||
|
|
||||||
if self.rule['mode'] == 'xpath':
|
if self.rule['mode'] == 'xpath':
|
||||||
if not isinstance(self.data, unicode):
|
if isinstance(self.data, bytes):
|
||||||
self.data = self.data.decode(crawler.detect_encoding(self.data), 'replace')
|
self.data = self.data.decode(crawler.detect_encoding(self.data), 'replace')
|
||||||
self.doc = lxml.html.fromstring(self.data)
|
self.doc = lxml.html.fromstring(self.data)
|
||||||
elif self.rule['mode'] == 'json':
|
elif self.rule['mode'] == 'json':
|
||||||
|
|
|
@ -28,6 +28,11 @@ except ImportError:
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
try:
|
||||||
|
basestring
|
||||||
|
except NameError:
|
||||||
|
basestring = unicode = str
|
||||||
|
|
||||||
|
|
||||||
Element = etree.Element
|
Element = etree.Element
|
||||||
|
|
||||||
|
@ -79,7 +84,7 @@ def parse(data):
|
||||||
match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
||||||
if match:
|
if match:
|
||||||
enc = match.groups()[0].lower()
|
enc = match.groups()[0].lower()
|
||||||
if not isinstance(data, unicode):
|
if isinstance(data, bytes):
|
||||||
data = data.decode(enc, 'ignore')
|
data = data.decode(enc, 'ignore')
|
||||||
data = data.encode(enc)
|
data = data.encode(enc)
|
||||||
|
|
||||||
|
@ -373,8 +378,8 @@ class FeedParser(FeedBase):
|
||||||
out = StringIO()
|
out = StringIO()
|
||||||
c = csv.writer(out, dialect=csv.excel)
|
c = csv.writer(out, dialect=csv.excel)
|
||||||
for item in self.items:
|
for item in self.items:
|
||||||
row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if
|
row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item] # str
|
||||||
isinstance(x[1], basestring)]
|
#isinstance(x[1], basestring)] # bytes or str
|
||||||
c.writerow(row)
|
c.writerow(row)
|
||||||
out.seek(0)
|
out.seek(0)
|
||||||
return out.read()
|
return out.read()
|
||||||
|
|
Loading…
Reference in New Issue