Merge pull request #869 from JackDandy/feature/UpdateCacheCtrl

Update cachecontrol library 0.11.5 to 0.11.7 (3b3b776).
2025-01-05 17:43:37 +00:00 · 2017-02-01 03:33:08 +00:00 · 2017-02-01 03:33:08 +00:00 · ab5682a2c8
commit ab5682a2c8
parent 1af63f9a27 e7d1434f18
11 changed files with 91 additions and 53 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -10,6 +10,7 @@
 * Change webserver startup to correctly use xheaders in reverse proxy or load balance set-ups
 * Update backports_abc 0.4 to 0.5
 * Update Beautiful Soup 4.4.0 (r397) to 4.5.3 (r439)
+* Update cachecontrol library 0.11.5 to 0.11.7 (3b3b776)


 [develop changelog]
--- a/HACKS.txt
+++ b/HACKS.txt
@ -1,6 +1,5 @@
 Libs with customisations...

-/lib/cachecontrol/caches/file_cache.py
 /lib/dateutil/zoneinfo/__init__.py
 /lib/hachoir_core/config.py
 /lib/hachoir_core/stream/input_helpers.py
--- a/lib/cachecontrol/init.py
+++ b/lib/cachecontrol/init.py
@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces.
 """
 __author__ = 'Eric Larson'
 __email__ = 'eric@ionrock.org'
-__version__ = '0.11.5'
+__version__ = '0.11.7'

 from .wrapper import CacheControl
 from .adapter import CacheControlAdapter
--- a/lib/cachecontrol/adapter.py
+++ b/lib/cachecontrol/adapter.py
@ -1,3 +1,4 @@
+import types
 import functools

 from requests.adapters import HTTPAdapter
@ -55,6 +56,10 @@ class CacheControlAdapter(HTTPAdapter):
        cached response
        """
        if not from_cache and request.method == 'GET':
+            # Check for any heuristics that might update headers
+            # before trying to cache.
+            if self.heuristic:
+                response = self.heuristic.apply(response)

            # apply any expiration heuristics
            if response.status == 304:
@ -82,11 +87,6 @@ class CacheControlAdapter(HTTPAdapter):
            elif response.status == 301:
                self.controller.cache_response(request, response)
            else:
-                # Check for any heuristics that might update headers
-                # before trying to cache.
-                if self.heuristic:
-                    response = self.heuristic.apply(response)
-
                # Wrap the response file with a wrapper that will cache the
                #   response when the stream has been consumed.
                response._fp = CallbackFileWrapper(
@ -97,6 +97,14 @@ class CacheControlAdapter(HTTPAdapter):
                        response,
                    )
                )
+                if response.chunked:
+                    super_update_chunk_length = response._update_chunk_length
+
+                    def _update_chunk_length(self):
+                        super_update_chunk_length()
+                        if self.chunk_left == 0:
+                            self._fp._close()
+                    response._update_chunk_length = types.MethodType(_update_chunk_length, response)

        resp = super(CacheControlAdapter, self).build_response(
            request, response
--- a/lib/cachecontrol/caches/init.py
+++ b/lib/cachecontrol/caches/init.py
@ -1,18 +1,2 @@
-from textwrap import dedent
-
-try:
-    from .file_cache import FileCache
-except ImportError:
-    notice = dedent('''
-    NOTE: In order to use the FileCache you must have
-    lockfile installed. You can install it via pip:
-      pip install lockfile
-    ''')
-    print(notice)
-
-
-try:
-    import redis
-    from .redis_cache import RedisCache
-except ImportError:
-    pass
+from .file_cache import FileCache  # noqa
+from .redis_cache import RedisCache  # noqa
--- a/lib/cachecontrol/caches/file_cache.py
+++ b/lib/cachecontrol/caches/file_cache.py
@ -1,8 +1,6 @@
 import hashlib
 import os
-
-from lockfile import LockFile
-from lockfile.mkdirlockfile import MkdirLockFile
+from textwrap import dedent

 from ..cache import BaseCache
 from ..controller import CacheController
@ -55,11 +53,22 @@ class FileCache(BaseCache):
        if use_dir_lock is not None and lock_class is not None:
            raise ValueError("Cannot use use_dir_lock and lock_class together")

-        if use_dir_lock:
-            lock_class = MkdirLockFile
+        try:
+            from lockfile import LockFile
+            from lockfile.mkdirlockfile import MkdirLockFile
+        except ImportError:
+            notice = dedent("""
+            NOTE: In order to use the FileCache you must have
+            lockfile installed. You can install it via pip:
+              pip install lockfile
+            """)
+            raise ImportError(notice)
+        else:
+            if use_dir_lock:
+                lock_class = MkdirLockFile

-        if lock_class is None:
-            lock_class = LockFile
+            elif lock_class is None:
+                lock_class = LockFile

        self.directory = directory
        self.forever = forever
@ -67,7 +76,6 @@ class FileCache(BaseCache):
        self.dirmode = dirmode
        self.lock_class = lock_class

-
    @staticmethod
    def encode(x):
        return hashlib.sha224(x.encode()).hexdigest()
@ -103,8 +111,11 @@ class FileCache(BaseCache):

    def delete(self, key):
        name = self._fn(key)
-        if not self.forever and os.path.exists(name):
-            os.remove(name)
+        if not self.forever:
+            try:
+                os.remove(name)
+            except FileNotFoundError:
+                pass


 def url_to_file_path(url, filecache):
--- a/lib/cachecontrol/caches/redis_cache.py
+++ b/lib/cachecontrol/caches/redis_cache.py
@ -6,11 +6,11 @@ from datetime import datetime
 def total_seconds(td):
    """Python 2.6 compatability"""
    if hasattr(td, 'total_seconds'):
-        return td.total_seconds()
+        return int(td.total_seconds())

    ms = td.microseconds
    secs = (td.seconds + td.days * 24 * 3600)
-    return (ms + secs * 10**6) / 10**6
+    return int((ms + secs * 10**6) / 10**6)


 class RedisCache(object):
--- a/lib/cachecontrol/controller.py
+++ b/lib/cachecontrol/controller.py
@ -239,6 +239,16 @@ class CacheController(object):

        response_headers = CaseInsensitiveDict(response.headers)

+        # If we've been given a body, our response has a Content-Length, that
+        # Content-Length is valid then we can check to see if the body we've
+        # been given matches the expected size, and if it doesn't we'll just
+        # skip trying to cache it.
+        if (body is not None and
+                "content-length" in response_headers and
+                response_headers["content-length"].isdigit() and
+                int(response_headers["content-length"]) != len(body)):
+            return
+
        cc_req = self.parse_cache_control(request.headers)
        cc = self.parse_cache_control(response_headers)

@ -280,7 +290,7 @@ class CacheController(object):
        elif 'date' in response_headers:
            # cache when there is a max-age > 0
            if cc and cc.get('max-age'):
-                if int(cc['max-age']) > 0:
+                if cc['max-age'].isdigit() and int(cc['max-age']) > 0:
                    logger.debug('Caching b/c date exists and max-age > 0')
                    self.cache.set(
                        cache_url,
--- a/lib/cachecontrol/filewrapper.py
+++ b/lib/cachecontrol/filewrapper.py
@ -45,19 +45,34 @@ class CallbackFileWrapper(object):
        # TODO: Add some logging here...
        return False

+    def _close(self):
+        if self.__callback:
+            self.__callback(self.__buf.getvalue())
+
+        # We assign this to None here, because otherwise we can get into
+        # really tricky problems where the CPython interpreter dead locks
+        # because the callback is holding a reference to something which
+        # has a __del__ method. Setting this to None breaks the cycle
+        # and allows the garbage collector to do it's thing normally.
+        self.__callback = None
+
    def read(self, amt=None):
        data = self.__fp.read(amt)
        self.__buf.write(data)
-
        if self.__is_fp_closed():
-            if self.__callback:
-                self.__callback(self.__buf.getvalue())
-
-            # We assign this to None here, because otherwise we can get into
-            # really tricky problems where the CPython interpreter dead locks
-            # because the callback is holding a reference to something which
-            # has a __del__ method. Setting this to None breaks the cycle
-            # and allows the garbage collector to do it's thing normally.
-            self.__callback = None
+            self._close()
+
+        return data
+
+    def _safe_read(self, amt):
+        data = self.__fp._safe_read(amt)
+        if amt == 2 and data == b'\r\n':
+            # urllib executes this read to toss the CRLF at the end
+            # of the chunk.
+            return data
+
+        self.__buf.write(data)
+        if self.__is_fp_closed():
+            self._close()

        return data
--- a/lib/cachecontrol/heuristics.py
+++ b/lib/cachecontrol/heuristics.py
@ -40,10 +40,14 @@ class BaseHeuristic(object):
        return {}

    def apply(self, response):
-        warning_header_value = self.warning(response)
-        response.headers.update(self.update_headers(response))
-        if warning_header_value is not None:
-            response.headers.update({'Warning': warning_header_value})
+        updated_headers = self.update_headers(response)
+
+        if updated_headers:
+            response.headers.update(updated_headers)
+            warning_header_value = self.warning(response)
+            if warning_header_value is not None:
+                response.headers.update({'Warning': warning_header_value})
+
        return response


--- a/lib/cachecontrol/serialize.py
+++ b/lib/cachecontrol/serialize.py
@ -134,6 +134,12 @@ class Serializer(object):

        body_raw = cached["response"].pop("body")

+        headers = CaseInsensitiveDict(data=cached['response']['headers'])
+        if headers.get('transfer-encoding', '') == 'chunked':
+            headers.pop('transfer-encoding')
+
+        cached['response']['headers'] = headers
+
        try:
            body = io.BytesIO(body_raw)
        except TypeError:
@ -168,7 +174,7 @@ class Serializer(object):
    def _loads_v2(self, request, data):
        try:
            cached = json.loads(zlib.decompress(data).decode("utf8"))
-        except ValueError:
+        except (ValueError, zlib.error):
            return

        # We need to decode the items that we've base64 encoded