Merge pull request #869 from JackDandy/feature/UpdateCacheCtrl

Update cachecontrol library 0.11.5 to 0.11.7 (3b3b776).
2024-12-01 00:43:37 +00:00 · 2017-02-01 03:33:08 +00:00 · 2017-02-01 03:33:08 +00:00 · ab5682a2c8
commit ab5682a2c8
parent 1af63f9a27 e7d1434f18
11 changed files with 91 additions and 53 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -10,6 +10,7 @@
 * Change webserver startup to correctly use xheaders in reverse proxy or load balance set-ups
 * Update backports_abc 0.4 to 0.5
 * Update Beautiful Soup 4.4.0 (r397) to 4.5.3 (r439)
 * Update cachecontrol library 0.11.5 to 0.11.7 (3b3b776)
 [develop changelog]
--- a/HACKS.txt
+++ b/HACKS.txt
@ -1,6 +1,5 @@
 Libs with customisations...
 /lib/cachecontrol/caches/file_cache.py
 /lib/dateutil/zoneinfo/__init__.py
 /lib/hachoir_core/config.py
 /lib/hachoir_core/stream/input_helpers.py
--- a/lib/cachecontrol/init.py
+++ b/lib/cachecontrol/init.py
@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces.
 """
 __author__ = 'Eric Larson'
 __email__ = 'eric@ionrock.org'
-__version__ = '0.11.5'
+__version__ = '0.11.7'
 from .wrapper import CacheControl
 from .adapter import CacheControlAdapter
--- a/lib/cachecontrol/adapter.py
+++ b/lib/cachecontrol/adapter.py
@ -1,3 +1,4 @@
 import types
 import functools
 from requests.adapters import HTTPAdapter
@ -55,6 +56,10 @@ class CacheControlAdapter(HTTPAdapter):
        cached response
        """
        if not from_cache and request.method == 'GET':
            # Check for any heuristics that might update headers
            # before trying to cache.
            if self.heuristic:
                response = self.heuristic.apply(response)
            # apply any expiration heuristics
            if response.status == 304:
@ -82,11 +87,6 @@ class CacheControlAdapter(HTTPAdapter):
            elif response.status == 301:
                self.controller.cache_response(request, response)
            else:
                # Check for any heuristics that might update headers
                # before trying to cache.
                if self.heuristic:
                    response = self.heuristic.apply(response)
                # Wrap the response file with a wrapper that will cache the
                #   response when the stream has been consumed.
                response._fp = CallbackFileWrapper(
@ -97,6 +97,14 @@ class CacheControlAdapter(HTTPAdapter):
                        response,
                    )
                )
                if response.chunked:
                    super_update_chunk_length = response._update_chunk_length
                    def _update_chunk_length(self):
                        super_update_chunk_length()
                        if self.chunk_left == 0:
                            self._fp._close()
                    response._update_chunk_length = types.MethodType(_update_chunk_length, response)
        resp = super(CacheControlAdapter, self).build_response(
            request, response
--- a/lib/cachecontrol/caches/init.py
+++ b/lib/cachecontrol/caches/init.py
@ -1,18 +1,2 @@
-from textwrap import dedent
+from .file_cache import FileCache  # noqa
-
+from .redis_cache import RedisCache  # noqa
 try:
    from .file_cache import FileCache
 except ImportError:
    notice = dedent('''
    NOTE: In order to use the FileCache you must have
    lockfile installed. You can install it via pip:
      pip install lockfile
    ''')
    print(notice)
 try:
    import redis
    from .redis_cache import RedisCache
 except ImportError:
    pass
--- a/lib/cachecontrol/caches/file_cache.py
+++ b/lib/cachecontrol/caches/file_cache.py
@ -1,8 +1,6 @@
 import hashlib
 import os
-
+from textwrap import dedent
 from lockfile import LockFile
 from lockfile.mkdirlockfile import MkdirLockFile
 from ..cache import BaseCache
 from ..controller import CacheController
@ -55,11 +53,22 @@ class FileCache(BaseCache):
        if use_dir_lock is not None and lock_class is not None:
            raise ValueError("Cannot use use_dir_lock and lock_class together")
-        if use_dir_lock:
+        try:
-            lock_class = MkdirLockFile
+            from lockfile import LockFile
            from lockfile.mkdirlockfile import MkdirLockFile
        except ImportError:
            notice = dedent("""
            NOTE: In order to use the FileCache you must have
            lockfile installed. You can install it via pip:
              pip install lockfile
            """)
            raise ImportError(notice)
        else:
            if use_dir_lock:
                lock_class = MkdirLockFile
-        if lock_class is None:
+            elif lock_class is None:
-            lock_class = LockFile
+                lock_class = LockFile
        self.directory = directory
        self.forever = forever
@ -67,7 +76,6 @@ class FileCache(BaseCache):
        self.dirmode = dirmode
        self.lock_class = lock_class
    @staticmethod
    def encode(x):
        return hashlib.sha224(x.encode()).hexdigest()
@ -103,8 +111,11 @@ class FileCache(BaseCache):
    def delete(self, key):
        name = self._fn(key)
-        if not self.forever and os.path.exists(name):
+        if not self.forever:
-            os.remove(name)
+            try:
                os.remove(name)
            except FileNotFoundError:
                pass
 def url_to_file_path(url, filecache):
--- a/lib/cachecontrol/caches/redis_cache.py
+++ b/lib/cachecontrol/caches/redis_cache.py
@ -6,11 +6,11 @@ from datetime import datetime
 def total_seconds(td):
    """Python 2.6 compatability"""
    if hasattr(td, 'total_seconds'):
-        return td.total_seconds()
+        return int(td.total_seconds())
    ms = td.microseconds
    secs = (td.seconds + td.days * 24 * 3600)
-    return (ms + secs * 10**6) / 10**6
+    return int((ms + secs * 10**6) / 10**6)
 class RedisCache(object):
--- a/lib/cachecontrol/controller.py
+++ b/lib/cachecontrol/controller.py
@ -239,6 +239,16 @@ class CacheController(object):
        response_headers = CaseInsensitiveDict(response.headers)
        # If we've been given a body, our response has a Content-Length, that
        # Content-Length is valid then we can check to see if the body we've
        # been given matches the expected size, and if it doesn't we'll just
        # skip trying to cache it.
        if (body is not None and
                "content-length" in response_headers and
                response_headers["content-length"].isdigit() and
                int(response_headers["content-length"]) != len(body)):
            return
        cc_req = self.parse_cache_control(request.headers)
        cc = self.parse_cache_control(response_headers)
@ -280,7 +290,7 @@ class CacheController(object):
        elif 'date' in response_headers:
            # cache when there is a max-age > 0
            if cc and cc.get('max-age'):
-                if int(cc['max-age']) > 0:
+                if cc['max-age'].isdigit() and int(cc['max-age']) > 0:
                    logger.debug('Caching b/c date exists and max-age > 0')
                    self.cache.set(
                        cache_url,
--- a/lib/cachecontrol/filewrapper.py
+++ b/lib/cachecontrol/filewrapper.py
@ -45,19 +45,34 @@ class CallbackFileWrapper(object):
        # TODO: Add some logging here...
        return False
    def _close(self):
        if self.__callback:
            self.__callback(self.__buf.getvalue())
        # We assign this to None here, because otherwise we can get into
        # really tricky problems where the CPython interpreter dead locks
        # because the callback is holding a reference to something which
        # has a __del__ method. Setting this to None breaks the cycle
        # and allows the garbage collector to do it's thing normally.
        self.__callback = None
    def read(self, amt=None):
        data = self.__fp.read(amt)
        self.__buf.write(data)
        if self.__is_fp_closed():
-            if self.__callback:
+            self._close()
-                self.__callback(self.__buf.getvalue())
+
-
+        return data
-            # We assign this to None here, because otherwise we can get into
+
-            # really tricky problems where the CPython interpreter dead locks
+    def _safe_read(self, amt):
-            # because the callback is holding a reference to something which
+        data = self.__fp._safe_read(amt)
-            # has a __del__ method. Setting this to None breaks the cycle
+        if amt == 2 and data == b'\r\n':
-            # and allows the garbage collector to do it's thing normally.
+            # urllib executes this read to toss the CRLF at the end
-            self.__callback = None
+            # of the chunk.
            return data
        self.__buf.write(data)
        if self.__is_fp_closed():
            self._close()
        return data
--- a/lib/cachecontrol/heuristics.py
+++ b/lib/cachecontrol/heuristics.py
@ -40,10 +40,14 @@ class BaseHeuristic(object):
        return {}
    def apply(self, response):
-        warning_header_value = self.warning(response)
+        updated_headers = self.update_headers(response)
-        response.headers.update(self.update_headers(response))
+
-        if warning_header_value is not None:
+        if updated_headers:
-            response.headers.update({'Warning': warning_header_value})
+            response.headers.update(updated_headers)
            warning_header_value = self.warning(response)
            if warning_header_value is not None:
                response.headers.update({'Warning': warning_header_value})
        return response
--- a/lib/cachecontrol/serialize.py
+++ b/lib/cachecontrol/serialize.py
@ -134,6 +134,12 @@ class Serializer(object):
        body_raw = cached["response"].pop("body")
        headers = CaseInsensitiveDict(data=cached['response']['headers'])
        if headers.get('transfer-encoding', '') == 'chunked':
            headers.pop('transfer-encoding')
        cached['response']['headers'] = headers
        try:
            body = io.BytesIO(body_raw)
        except TypeError:
@ -168,7 +174,7 @@ class Serializer(object):
    def _loads_v2(self, request, data):
        try:
            cached = json.loads(zlib.decompress(data).decode("utf8"))
-        except ValueError:
+        except (ValueError, zlib.error):
            return
        # We need to decode the items that we've base64 encoded