crawler: avoid too many .append()

master
pictuga 2022-01-08 22:24:56 +01:00
parent c8669002e4
commit 750850c162
1 changed files with 12 additions and 12 deletions

View File

@ -111,8 +111,6 @@ def adv_get(url, post=None, timeout=None, *args, **kwargs):
def custom_opener(follow=None, policy=None, force_min=None, force_max=None): def custom_opener(follow=None, policy=None, force_min=None, force_max=None):
handlers = []
# as per urllib2 source code, these Handelers are added first # as per urllib2 source code, these Handelers are added first
# *unless* one of the custom handlers inherits from one of them # *unless* one of the custom handlers inherits from one of them
# #
@ -130,16 +128,18 @@ def custom_opener(follow=None, policy=None, force_min=None, force_max=None):
# http_error_* are run until sth is returned (other than None). If they all # http_error_* are run until sth is returned (other than None). If they all
# return nothing, a python error is raised # return nothing, a python error is raised
#handlers.append(DebugHandler()) handlers = [
handlers.append(SizeLimitHandler(500*1024)) # 500KiB #DebugHandler(),
handlers.append(HTTPCookieProcessor()) SizeLimitHandler(500*1024)) # 500KiB
handlers.append(GZIPHandler()) HTTPCookieProcessor(),
handlers.append(HTTPAllRedirectHandler()) GZIPHandler(),
handlers.append(HTTPEquivHandler()) HTTPAllRedirectHandler(),
handlers.append(HTTPRefreshHandler()) HTTPEquivHandler(),
handlers.append(UAHandler(random.choice(DEFAULT_UAS))) HTTPRefreshHandler(),
handlers.append(BrowserlyHeaderHandler()) UAHandler(random.choice(DEFAULT_UAS)),
handlers.append(EncodingFixHandler()) BrowserlyHeaderHandler(),
EncodingFixHandler(),
]
if follow: if follow:
handlers.append(AlternateHandler(MIMETYPE[follow])) handlers.append(AlternateHandler(MIMETYPE[follow]))