diff --git a/CHANGES.md b/CHANGES.md index 1802fd95..8ede31f9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,6 +10,7 @@ * Change webserver startup to correctly use xheaders in reverse proxy or load balance set-ups * Update backports_abc 0.4 to 0.5 * Update Beautiful Soup 4.4.0 (r397) to 4.5.3 (r439) +* Update cachecontrol library 0.11.5 to 0.11.7 (3b3b776) [develop changelog] diff --git a/HACKS.txt b/HACKS.txt index 6780f8cd..f78bcff1 100644 --- a/HACKS.txt +++ b/HACKS.txt @@ -1,6 +1,5 @@ Libs with customisations... -/lib/cachecontrol/caches/file_cache.py /lib/dateutil/zoneinfo/__init__.py /lib/hachoir_core/config.py /lib/hachoir_core/stream/input_helpers.py diff --git a/lib/cachecontrol/__init__.py b/lib/cachecontrol/__init__.py index d6af9b93..ec9da2e3 100644 --- a/lib/cachecontrol/__init__.py +++ b/lib/cachecontrol/__init__.py @@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces. """ __author__ = 'Eric Larson' __email__ = 'eric@ionrock.org' -__version__ = '0.11.5' +__version__ = '0.11.7' from .wrapper import CacheControl from .adapter import CacheControlAdapter diff --git a/lib/cachecontrol/adapter.py b/lib/cachecontrol/adapter.py index 54f1b512..270c8b21 100644 --- a/lib/cachecontrol/adapter.py +++ b/lib/cachecontrol/adapter.py @@ -1,3 +1,4 @@ +import types import functools from requests.adapters import HTTPAdapter @@ -55,6 +56,10 @@ class CacheControlAdapter(HTTPAdapter): cached response """ if not from_cache and request.method == 'GET': + # Check for any heuristics that might update headers + # before trying to cache. + if self.heuristic: + response = self.heuristic.apply(response) # apply any expiration heuristics if response.status == 304: @@ -82,11 +87,6 @@ class CacheControlAdapter(HTTPAdapter): elif response.status == 301: self.controller.cache_response(request, response) else: - # Check for any heuristics that might update headers - # before trying to cache. - if self.heuristic: - response = self.heuristic.apply(response) - # Wrap the response file with a wrapper that will cache the # response when the stream has been consumed. response._fp = CallbackFileWrapper( @@ -97,6 +97,14 @@ class CacheControlAdapter(HTTPAdapter): response, ) ) + if response.chunked: + super_update_chunk_length = response._update_chunk_length + + def _update_chunk_length(self): + super_update_chunk_length() + if self.chunk_left == 0: + self._fp._close() + response._update_chunk_length = types.MethodType(_update_chunk_length, response) resp = super(CacheControlAdapter, self).build_response( request, response diff --git a/lib/cachecontrol/caches/__init__.py b/lib/cachecontrol/caches/__init__.py index f9e66a1f..0e1658fa 100644 --- a/lib/cachecontrol/caches/__init__.py +++ b/lib/cachecontrol/caches/__init__.py @@ -1,18 +1,2 @@ -from textwrap import dedent - -try: - from .file_cache import FileCache -except ImportError: - notice = dedent(''' - NOTE: In order to use the FileCache you must have - lockfile installed. You can install it via pip: - pip install lockfile - ''') - print(notice) - - -try: - import redis - from .redis_cache import RedisCache -except ImportError: - pass +from .file_cache import FileCache # noqa +from .redis_cache import RedisCache # noqa diff --git a/lib/cachecontrol/caches/file_cache.py b/lib/cachecontrol/caches/file_cache.py index b2152f8f..ae549043 100644 --- a/lib/cachecontrol/caches/file_cache.py +++ b/lib/cachecontrol/caches/file_cache.py @@ -1,8 +1,6 @@ import hashlib import os - -from lockfile import LockFile -from lockfile.mkdirlockfile import MkdirLockFile +from textwrap import dedent from ..cache import BaseCache from ..controller import CacheController @@ -55,11 +53,22 @@ class FileCache(BaseCache): if use_dir_lock is not None and lock_class is not None: raise ValueError("Cannot use use_dir_lock and lock_class together") - if use_dir_lock: - lock_class = MkdirLockFile + try: + from lockfile import LockFile + from lockfile.mkdirlockfile import MkdirLockFile + except ImportError: + notice = dedent(""" + NOTE: In order to use the FileCache you must have + lockfile installed. You can install it via pip: + pip install lockfile + """) + raise ImportError(notice) + else: + if use_dir_lock: + lock_class = MkdirLockFile - if lock_class is None: - lock_class = LockFile + elif lock_class is None: + lock_class = LockFile self.directory = directory self.forever = forever @@ -67,7 +76,6 @@ class FileCache(BaseCache): self.dirmode = dirmode self.lock_class = lock_class - @staticmethod def encode(x): return hashlib.sha224(x.encode()).hexdigest() @@ -103,8 +111,11 @@ class FileCache(BaseCache): def delete(self, key): name = self._fn(key) - if not self.forever and os.path.exists(name): - os.remove(name) + if not self.forever: + try: + os.remove(name) + except FileNotFoundError: + pass def url_to_file_path(url, filecache): diff --git a/lib/cachecontrol/caches/redis_cache.py b/lib/cachecontrol/caches/redis_cache.py index 9f5d55fd..a0562d4b 100644 --- a/lib/cachecontrol/caches/redis_cache.py +++ b/lib/cachecontrol/caches/redis_cache.py @@ -6,11 +6,11 @@ from datetime import datetime def total_seconds(td): """Python 2.6 compatability""" if hasattr(td, 'total_seconds'): - return td.total_seconds() + return int(td.total_seconds()) ms = td.microseconds secs = (td.seconds + td.days * 24 * 3600) - return (ms + secs * 10**6) / 10**6 + return int((ms + secs * 10**6) / 10**6) class RedisCache(object): diff --git a/lib/cachecontrol/controller.py b/lib/cachecontrol/controller.py index af7367eb..f93f0834 100644 --- a/lib/cachecontrol/controller.py +++ b/lib/cachecontrol/controller.py @@ -239,6 +239,16 @@ class CacheController(object): response_headers = CaseInsensitiveDict(response.headers) + # If we've been given a body, our response has a Content-Length, that + # Content-Length is valid then we can check to see if the body we've + # been given matches the expected size, and if it doesn't we'll just + # skip trying to cache it. + if (body is not None and + "content-length" in response_headers and + response_headers["content-length"].isdigit() and + int(response_headers["content-length"]) != len(body)): + return + cc_req = self.parse_cache_control(request.headers) cc = self.parse_cache_control(response_headers) @@ -280,7 +290,7 @@ class CacheController(object): elif 'date' in response_headers: # cache when there is a max-age > 0 if cc and cc.get('max-age'): - if int(cc['max-age']) > 0: + if cc['max-age'].isdigit() and int(cc['max-age']) > 0: logger.debug('Caching b/c date exists and max-age > 0') self.cache.set( cache_url, diff --git a/lib/cachecontrol/filewrapper.py b/lib/cachecontrol/filewrapper.py index 4b91bce0..f1e1ce05 100644 --- a/lib/cachecontrol/filewrapper.py +++ b/lib/cachecontrol/filewrapper.py @@ -45,19 +45,34 @@ class CallbackFileWrapper(object): # TODO: Add some logging here... return False + def _close(self): + if self.__callback: + self.__callback(self.__buf.getvalue()) + + # We assign this to None here, because otherwise we can get into + # really tricky problems where the CPython interpreter dead locks + # because the callback is holding a reference to something which + # has a __del__ method. Setting this to None breaks the cycle + # and allows the garbage collector to do it's thing normally. + self.__callback = None + def read(self, amt=None): data = self.__fp.read(amt) self.__buf.write(data) - if self.__is_fp_closed(): - if self.__callback: - self.__callback(self.__buf.getvalue()) - - # We assign this to None here, because otherwise we can get into - # really tricky problems where the CPython interpreter dead locks - # because the callback is holding a reference to something which - # has a __del__ method. Setting this to None breaks the cycle - # and allows the garbage collector to do it's thing normally. - self.__callback = None + self._close() + + return data + + def _safe_read(self, amt): + data = self.__fp._safe_read(amt) + if amt == 2 and data == b'\r\n': + # urllib executes this read to toss the CRLF at the end + # of the chunk. + return data + + self.__buf.write(data) + if self.__is_fp_closed(): + self._close() return data diff --git a/lib/cachecontrol/heuristics.py b/lib/cachecontrol/heuristics.py index 01b63141..94715a4e 100644 --- a/lib/cachecontrol/heuristics.py +++ b/lib/cachecontrol/heuristics.py @@ -40,10 +40,14 @@ class BaseHeuristic(object): return {} def apply(self, response): - warning_header_value = self.warning(response) - response.headers.update(self.update_headers(response)) - if warning_header_value is not None: - response.headers.update({'Warning': warning_header_value}) + updated_headers = self.update_headers(response) + + if updated_headers: + response.headers.update(updated_headers) + warning_header_value = self.warning(response) + if warning_header_value is not None: + response.headers.update({'Warning': warning_header_value}) + return response diff --git a/lib/cachecontrol/serialize.py b/lib/cachecontrol/serialize.py index 13af04bd..c325339e 100644 --- a/lib/cachecontrol/serialize.py +++ b/lib/cachecontrol/serialize.py @@ -134,6 +134,12 @@ class Serializer(object): body_raw = cached["response"].pop("body") + headers = CaseInsensitiveDict(data=cached['response']['headers']) + if headers.get('transfer-encoding', '') == 'chunked': + headers.pop('transfer-encoding') + + cached['response']['headers'] = headers + try: body = io.BytesIO(body_raw) except TypeError: @@ -168,7 +174,7 @@ class Serializer(object): def _loads_v2(self, request, data): try: cached = json.loads(zlib.decompress(data).decode("utf8")) - except ValueError: + except (ValueError, zlib.error): return # We need to decode the items that we've base64 encoded