Move iTunes code to feedify.py

master
pictuga 2017-03-18 23:41:37 -10:00
parent d4937812a8
commit 0b6e553054
2 changed files with 15 additions and 11 deletions

View File

@ -15,10 +15,12 @@ try:
from ConfigParser import ConfigParser
from urlparse import urlparse, urljoin
from urllib2 import urlopen
from httplib import HTTPException
except ImportError:
from configparser import ConfigParser
from urllib.parse import urlparse, urljoin
from urllib.request import urlopen
from http.client import HTTPException
try:
basestring
@ -95,12 +97,20 @@ def format_string(string, getter, error=False):
def pre_worker(url):
if urlparse(url).netloc == 'itunes.apple.com':
if url.startswith('http://itunes.apple.com/') or url.startswith('https://itunes.apple.com/'):
match = re.search('/id([0-9]+)(\?.*)?$', url)
if match:
iid = match.groups()[0]
redirect = 'https://itunes.apple.com/lookup?id={id}'.format(id=iid)
return redirect
redirect = 'https://itunes.apple.com/lookup?id=%s' % iid
try:
con = crawler.custom_handler(basic=True).open(redirect, timeout=4)
data = con.read()
except (IOError, HTTPException):
raise
return json.loads(data.decode('utf-8', 'replace'))['results'][0]['feedUrl']
return None

View File

@ -7,7 +7,6 @@ import threading
from fnmatch import fnmatch
import re
import json
import lxml.etree
import lxml.html
@ -335,7 +334,7 @@ def FeedFetch(url, options):
if isinstance(url, bytes):
url = url.decode()
# do some useful facebook work
# allow for code execution for feedify
pre = feedify.pre_worker(url)
if pre:
url = pre
@ -357,12 +356,7 @@ def FeedFetch(url, options):
contenttype = con.info().get('Content-Type', '').split(';')[0]
if url.startswith('https://itunes.apple.com/lookup?id='):
link = json.loads(xml.decode('utf-8', 'replace'))['results'][0]['feedUrl']
log('itunes redirect: %s' % link)
return FeedFetch(link, options)
elif re.match(b'\s*<?xml', xml) is not None or contenttype in crawler.MIMETYPE['xml']:
if re.match(b'\s*<?xml', xml) is not None or contenttype in crawler.MIMETYPE['xml']:
rss = feeds.parse(xml)
elif feedify.supported(url):