Move from gzip to zlib to decompress data

Faster on incomplete files
2017-11-25 19:57:41 +01:00 · 2017-11-25 19:57:41 +01:00 · 21480f90de
parent d091e74d56
commit 21480f90de
1 changed files with 4 additions and 17 deletions
--- a/morss/crawler.py
+++ b/morss/crawler.py
@ -3,7 +3,7 @@ import sys
 import ssl
 import socket
-from gzip import GzipFile
+import zlib
 from io import BytesIO, StringIO
 import re
 import chardet
@ -100,22 +100,9 @@ class SizeLimitHandler(BaseHandler):
    https_response = http_response
-def UnGzip(cprss, CHUNKSIZE=64*1024): # the bigger the CHUNKSIZE, the faster
+def UnGzip(data):
    " Supports truncated files "
-    gz = GzipFile(fileobj=cprss, mode='rb')
+    return zlib.decompressobj(zlib.MAX_WBITS | 32).decompress(data)
    data = b''
    chunk = gz.read(CHUNKSIZE)
    try:
        while chunk:
            data += chunk
            chunk = gz.read(CHUNKSIZE)
    except (IOError, EOFError):
        pass
    return data
 class GZIPHandler(BaseHandler):
@ -128,7 +115,7 @@ class GZIPHandler(BaseHandler):
            if resp.headers.get('Content-Encoding') == 'gzip':
                data = resp.read()
-                data = UnGzip(BytesIO(data))
+                data = UnGzip(data)
                resp.headers['Content-Encoding'] = 'identity'