Update cachecontrol library 0.9.3 to 0.11.2.

This commit is contained in:
JackDandy 2015-04-28 18:32:10 +01:00
parent f9568212da
commit 3ab45e19d5
14 changed files with 496 additions and 192 deletions

View file

@ -9,6 +9,7 @@
* Update change to suppress HTTPS verification InsecureRequestWarning to updated package as listed in hacks.txt * Update change to suppress HTTPS verification InsecureRequestWarning to updated package as listed in hacks.txt
* Remove listed hacks.txt record for check that SSLv3 is available because issue was addressed by vendor * Remove listed hacks.txt record for check that SSLv3 is available because issue was addressed by vendor
* Update chardet packages 2.2.1 to 2.3.0 (ff40135) * Update chardet packages 2.2.1 to 2.3.0 (ff40135)
* Update cachecontrol library 0.9.3 to 0.11.2
* Add ToTV provider * Add ToTV provider
* Fix Backlog scheduler initialization and change backlog frequency from minutes to days * Fix Backlog scheduler initialization and change backlog frequency from minutes to days
* Change to consolidate and tidy some provider code * Change to consolidate and tidy some provider code

View file

@ -2,6 +2,10 @@
Make it easy to import from cachecontrol without long namespaces. Make it easy to import from cachecontrol without long namespaces.
""" """
__author__ = 'Eric Larson'
__email__ = 'eric@ionrock.org'
__version__ = '0.11.2'
from .wrapper import CacheControl from .wrapper import CacheControl
from .adapter import CacheControlAdapter from .adapter import CacheControlAdapter
from .controller import CacheController from .controller import CacheController

View file

@ -1,16 +1,24 @@
from lib.requests.adapters import HTTPAdapter import functools
from requests.adapters import HTTPAdapter
from .controller import CacheController from .controller import CacheController
from .cache import DictCache from .cache import DictCache
from .filewrapper import CallbackFileWrapper
class CacheControlAdapter(HTTPAdapter): class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE']) invalidating_methods = set(['PUT', 'DELETE'])
def __init__(self, cache=None, cache_etags=True, controller_class=None, def __init__(self, cache=None,
serializer=None, *args, **kw): cache_etags=True,
controller_class=None,
serializer=None,
heuristic=None,
*args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw) super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache() self.cache = cache or DictCache()
self.heuristic = heuristic
controller_factory = controller_class or CacheController controller_factory = controller_class or CacheController
self.controller = controller_factory( self.controller = controller_factory(
@ -27,10 +35,13 @@ class CacheControlAdapter(HTTPAdapter):
if request.method == 'GET': if request.method == 'GET':
cached_response = self.controller.cached_request(request) cached_response = self.controller.cached_request(request)
if cached_response: if cached_response:
return self.build_response(request, cached_response, from_cache=True) return self.build_response(request, cached_response,
from_cache=True)
# check for etags and add headers if appropriate # check for etags and add headers if appropriate
request.headers.update(self.controller.conditional_headers(request)) request.headers.update(
self.controller.conditional_headers(request)
)
resp = super(CacheControlAdapter, self).send(request, **kw) resp = super(CacheControlAdapter, self).send(request, **kw)
@ -44,6 +55,8 @@ class CacheControlAdapter(HTTPAdapter):
cached response cached response
""" """
if not from_cache and request.method == 'GET': if not from_cache and request.method == 'GET':
# apply any expiration heuristics
if response.status == 304: if response.status == 304:
# We must have sent an ETag request. This could mean # We must have sent an ETag request. This could mean
# that we've been expired already or that we simply # that we've been expired already or that we simply
@ -56,14 +69,34 @@ class CacheControlAdapter(HTTPAdapter):
if cached_response is not response: if cached_response is not response:
from_cache = True from_cache = True
# We are done with the server response, read a
# possible response body (compliant servers will
# not return one, but we cannot be 100% sure) and
# release the connection back to the pool.
response.read(decode_content=False)
response.release_conn()
response = cached_response response = cached_response
# We always cache the 301 responses
elif response.status == 301:
self.controller.cache_response(request, response)
else: else:
# try to cache the response # Check for any heuristics that might update headers
try: # before trying to cache.
self.controller.cache_response(request, response) if self.heuristic:
except Exception as e: response = self.heuristic.apply(response)
# Failed to cache the results
pass # Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper(
response._fp,
functools.partial(
self.controller.cache_response,
request,
response,
)
)
resp = super(CacheControlAdapter, self).build_response( resp = super(CacheControlAdapter, self).build_response(
request, response request, response
@ -78,3 +111,7 @@ class CacheControlAdapter(HTTPAdapter):
resp.from_cache = from_cache resp.from_cache = from_cache
return resp return resp
def close(self):
self.cache.close()
super(CacheControlAdapter, self).close()

View file

@ -1,9 +1,10 @@
""" """
The cache object API for implementing caches. The default is just a The cache object API for implementing caches. The default is a thread
dictionary, which in turns means it is not threadsafe for writing. safe in-memory dictionary.
""" """
from threading import Lock from threading import Lock
class BaseCache(object): class BaseCache(object):
def get(self, key): def get(self, key):
@ -15,6 +16,10 @@ class BaseCache(object):
def delete(self, key): def delete(self, key):
raise NotImplemented() raise NotImplemented()
def close(self):
pass
class DictCache(BaseCache): class DictCache(BaseCache):
def __init__(self, init_dict=None): def __init__(self, init_dict=None):

View file

@ -3,6 +3,9 @@ import os
from lockfile import FileLock from lockfile import FileLock
from ..cache import BaseCache
from ..controller import CacheController
def _secure_open_write(filename, fmode): def _secure_open_write(filename, fmode):
# We only want to write to this file, so open it in write only mode # We only want to write to this file, so open it in write only mode
@ -44,22 +47,24 @@ def _secure_open_write(filename, fmode):
raise raise
class FileCache(object): class FileCache(BaseCache):
def __init__(self, directory, forever=False, filemode=0o0600, def __init__(self, directory, forever=False, filemode=0o0600,
dirmode=0o0700): dirmode=0o0700):
self.directory = directory self.directory = directory
self.forever = forever self.forever = forever
self.filemode = filemode self.filemode = filemode
self.dirmode = dirmode
if not os.path.isdir(self.directory):
os.makedirs(self.directory, dirmode)
@staticmethod @staticmethod
def encode(x): def encode(x):
return hashlib.sha224(x.encode()).hexdigest() return hashlib.sha224(x.encode()).hexdigest()
def _fn(self, name): def _fn(self, name):
return os.path.join(self.directory, self.encode(name)) # NOTE: This method should not change as some may depend on it.
# See: https://github.com/ionrock/cachecontrol/issues/63
hashed = self.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
def get(self, key): def get(self, key):
name = self._fn(key) name = self._fn(key)
@ -71,7 +76,15 @@ class FileCache(object):
def set(self, key, value): def set(self, key, value):
name = self._fn(key) name = self._fn(key)
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(name), self.dirmode)
except (IOError, OSError):
pass
with FileLock(name) as lock: with FileLock(name) as lock:
# Write our actual file
with _secure_open_write(lock.path, self.filemode) as fh: with _secure_open_write(lock.path, self.filemode) as fh:
fh.write(value) fh.write(value)
@ -79,3 +92,12 @@ class FileCache(object):
name = self._fn(key) name = self._fn(key)
if not self.forever: if not self.forever:
os.remove(name) os.remove(name)
def url_to_file_path(url, filecache):
"""Return the file cache path based on the URL.
This does not ensure the file exists!
"""
key = CacheController.cache_url(url)
return filecache._fn(key)

View file

@ -36,3 +36,6 @@ class RedisCache(object):
caution!""" caution!"""
for key in self.conn.keys(): for key in self.conn.keys():
self.conn.delete(key) self.conn.delete(key)
def close(self):
self.conn.disconnect()

View file

@ -4,23 +4,20 @@ except ImportError:
from urlparse import urljoin from urlparse import urljoin
try:
import email.utils
parsedate_tz = email.utils.parsedate_tz
except ImportError:
import email.Utils
parsedate_tz = email.Utils.parsedate_tz
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
# Handle the case where the requests has been patched to not have urllib3 # Handle the case where the requests module has been patched to not have
# bundled as part of it's source. # urllib3 bundled as part of its source.
try: try:
from lib.requests.packages.urllib3.response import HTTPResponse from requests.packages.urllib3.response import HTTPResponse
except ImportError: except ImportError:
from urllib3.response import HTTPResponse from urllib3.response import HTTPResponse
try:
from requests.packages.urllib3.util import is_fp_closed
except ImportError:
from urllib3.util import is_fp_closed

View file

@ -4,14 +4,14 @@ The httplib2 algorithms ported for use with requests.
import re import re
import calendar import calendar
import time import time
import datetime from email.utils import parsedate_tz
from lib.requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from .cache import DictCache from .cache import DictCache
from .compat import parsedate_tz
from .serialize import Serializer from .serialize import Serializer
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
@ -21,7 +21,7 @@ def parse_uri(uri):
(scheme, authority, path, query, fragment) = parse_uri(uri) (scheme, authority, path, query, fragment) = parse_uri(uri)
""" """
groups = URI.match(uri).groups() groups = URI.match(uri).groups()
return groups[1], groups[3], groups[4], groups[6], groups[8] return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object): class CacheController(object):
@ -32,26 +32,29 @@ class CacheController(object):
self.cache_etags = cache_etags self.cache_etags = cache_etags
self.serializer = serializer or Serializer() self.serializer = serializer or Serializer()
def _urlnorm(self, uri): @classmethod
def _urlnorm(cls, uri):
"""Normalize the URL to create a safe key for the cache""" """Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri) (scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority: if not scheme or not authority:
raise Exception("Only absolute URIs are allowed. uri = %s" % uri) raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
authority = authority.lower()
scheme = scheme.lower() scheme = scheme.lower()
authority = authority.lower()
if not path: if not path:
path = "/" path = "/"
# Could do syntax based normalization of the URI before # Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66. # computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path request_uri = query and "?".join([path, query]) or path
scheme = scheme.lower()
defrag_uri = scheme + "://" + authority + request_uri defrag_uri = scheme + "://" + authority + request_uri
return defrag_uri return defrag_uri
def cache_url(self, uri): @classmethod
return self._urlnorm(uri) def cache_url(cls, uri):
return cls._urlnorm(uri)
def parse_cache_control(self, headers): def parse_cache_control(self, headers):
""" """
@ -68,13 +71,20 @@ class CacheController(object):
parts = headers[cc_header].split(',') parts = headers[cc_header].split(',')
parts_with_args = [ parts_with_args = [
tuple([x.strip().lower() for x in part.split("=", 1)]) tuple([x.strip().lower() for x in part.split("=", 1)])
for part in parts if -1 != part.find("=")] for part in parts if -1 != part.find("=")
parts_wo_args = [(name.strip().lower(), 1) ]
for name in parts if -1 == name.find("=")] parts_wo_args = [
(name.strip().lower(), 1)
for name in parts if -1 == name.find("=")
]
retval = dict(parts_with_args + parts_wo_args) retval = dict(parts_with_args + parts_wo_args)
return retval return retval
def cached_request(self, request): def cached_request(self, request):
"""
Return a cached response if it exists in the cache, otherwise
return False.
"""
cache_url = self.cache_url(request.url) cache_url = self.cache_url(request.url)
cc = self.parse_cache_control(request.headers) cc = self.parse_cache_control(request.headers)
@ -95,7 +105,24 @@ class CacheController(object):
if not resp: if not resp:
return False return False
# If we have a cached 301, return it immediately. We don't
# need to test our response for other headers b/c it is
# intrinsically "cacheable" as it is Permanent.
# See:
# https://tools.ietf.org/html/rfc7231#section-6.4.2
#
# Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache).
if resp.status == 301:
return resp
headers = CaseInsensitiveDict(resp.headers) headers = CaseInsensitiveDict(resp.headers)
if not headers or 'date' not in headers:
# With date or etag, the cached response can never be used
# and should be deleted.
if 'etag' not in headers:
self.cache.delete(cache_url)
return False
now = time.time() now = time.time()
date = calendar.timegm( date = calendar.timegm(
@ -104,15 +131,19 @@ class CacheController(object):
current_age = max(0, now - date) current_age = max(0, now - date)
# TODO: There is an assumption that the result will be a # TODO: There is an assumption that the result will be a
# urllib3 response object. This may not be best since we # urllib3 response object. This may not be best since we
# could probably avoid instantiating or constructing the # could probably avoid instantiating or constructing the
# response until we know we need it. # response until we know we need it.
resp_cc = self.parse_cache_control(headers) resp_cc = self.parse_cache_control(headers)
# determine freshness # determine freshness
freshness_lifetime = 0 freshness_lifetime = 0
# Check the max-age pragma in the cache control header
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
freshness_lifetime = int(resp_cc['max-age']) freshness_lifetime = int(resp_cc['max-age'])
# If there isn't a max-age, check for an expires header
elif 'expires' in headers: elif 'expires' in headers:
expires = parsedate_tz(headers['expires']) expires = parsedate_tz(headers['expires'])
if expires is not None: if expires is not None:
@ -163,32 +194,24 @@ class CacheController(object):
return new_headers return new_headers
def cache_response(self, request, response): def cache_response(self, request, response, body=None):
""" """
Algorithm for caching requests. Algorithm for caching requests.
This assumes a requests Response object. This assumes a requests Response object.
""" """
# From httplib2: Don't cache 206's since we aren't going to # From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests # handle byte range requests
if response.status not in [200, 203]: if response.status not in [200, 203, 300, 301]:
return return
# Cache Session Params
cache_auto = getattr(request, 'cache_auto', False)
cache_urls = getattr(request, 'cache_urls', [])
cache_max_age = getattr(request, 'cache_max_age', None)
response_headers = CaseInsensitiveDict(response.headers) response_headers = CaseInsensitiveDict(response.headers)
# Check if we are wanting to cache responses from specific urls only
cache_url = self.cache_url(request.url)
if len(cache_urls) > 0 and not any(s in cache_url for s in cache_urls):
return
cc_req = self.parse_cache_control(request.headers) cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers) cc = self.parse_cache_control(response_headers)
cache_url = self.cache_url(request.url)
# Delete it from the cache if we happen to have it stored there # Delete it from the cache if we happen to have it stored there
no_store = cc.get('no-store') or cc_req.get('no-store') no_store = cc.get('no-store') or cc_req.get('no-store')
if no_store and self.cache.get(cache_url): if no_store and self.cache.get(cache_url):
@ -196,21 +219,18 @@ class CacheController(object):
# If we've been given an etag, then keep the response # If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in response_headers: if self.cache_etags and 'etag' in response_headers:
self.cache.set(cache_url, self.serializer.dumps(request, response)) self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
)
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response # Add to the cache any 301s. We do this before looking that
elif cache_auto and not cc and response_headers: # the Date headers.
headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age or 900)} elif response.status == 301:
response.headers.update(headers) self.cache.set(
cache_url,
if 'expires' not in response_headers: self.serializer.dumps(request, response)
if getattr(response_headers, 'expires', None) is None: )
expires = datetime.datetime.utcnow() + datetime.timedelta(days=1)
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
headers = {'Expires': expires}
response.headers.update(headers)
self.cache.set(cache_url, self.serializer.dumps(request, response))
# Add to the cache if the response headers demand it. If there # Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring # is no date header then we can't do anything about expiring
@ -219,10 +239,10 @@ class CacheController(object):
# cache when there is a max-age > 0 # cache when there is a max-age > 0
if cc and cc.get('max-age'): if cc and cc.get('max-age'):
if int(cc['max-age']) > 0: if int(cc['max-age']) > 0:
if isinstance(cache_max_age, int): self.cache.set(
cc['max-age'] = int(cache_max_age) cache_url,
response.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()]) self.serializer.dumps(request, response, body=body),
self.cache.set(cache_url, self.serializer.dumps(request, response)) )
# If the request can expire, it means we should cache it # If the request can expire, it means we should cache it
# in the meantime. # in the meantime.
@ -230,7 +250,7 @@ class CacheController(object):
if response_headers['expires']: if response_headers['expires']:
self.cache.set( self.cache.set(
cache_url, cache_url,
self.serializer.dumps(request, response), self.serializer.dumps(request, response, body=body),
) )
def update_cached_response(self, request, response): def update_cached_response(self, request, response):
@ -242,14 +262,30 @@ class CacheController(object):
""" """
cache_url = self.cache_url(request.url) cache_url = self.cache_url(request.url)
cached_response = self.serializer.loads(request, self.cache.get(cache_url)) cached_response = self.serializer.loads(
request,
self.cache.get(cache_url)
)
if not cached_response: if not cached_response:
# we didn't have a cached response # we didn't have a cached response
return response return response
# did so lets update our headers # Lets update our headers with the headers from the new request:
cached_response.headers.update(response.headers) # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
#
# The server isn't supposed to send headers that would make
# the cached body invalid. But... just in case, we'll be sure
# to strip out ones we know that might be problmatic due to
# typical assumptions.
excluded_headers = [
"content-length",
]
cached_response.headers.update(
dict((k, v) for k, v in response.headers.items()
if k.lower() not in excluded_headers)
)
# we want a 200 b/c we have content via the cache # we want a 200 b/c we have content via the cache
cached_response.status = 200 cached_response.status = 200

View file

@ -0,0 +1,63 @@
from io import BytesIO
class CallbackFileWrapper(object):
"""
Small wrapper around a fp object which will tee everything read into a
buffer, and when that file is closed it will execute a callback with the
contents of that buffer.
All attributes are proxied to the underlying file object.
This class uses members with a double underscore (__) leading prefix so as
not to accidentally shadow an attribute.
"""
def __init__(self, fp, callback):
self.__buf = BytesIO()
self.__fp = fp
self.__callback = callback
def __getattr__(self, name):
# The vaguaries of garbage collection means that self.__fp is
# not always set. By using __getattribute__ and the private
# name[0] allows looking up the attribute value and raising an
# AttributeError when it doesn't exist. This stop thigns from
# infinitely recursing calls to getattr in the case where
# self.__fp hasn't been set.
#
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
fp = self.__getattribute__('_CallbackFileWrapper__fp')
return getattr(fp, name)
def __is_fp_closed(self):
try:
return self.__fp.fp is None
except AttributeError:
pass
try:
return self.__fp.closed
except AttributeError:
pass
# We just don't cache it then.
# TODO: Add some logging here...
return False
def read(self, amt=None):
data = self.__fp.read(amt)
self.__buf.write(data)
if self.__is_fp_closed():
if self.__callback:
self.__callback(self.__buf.getvalue())
# We assign this to None here, because otherwise we can get into
# really tricky problems where the CPython interpreter dead locks
# because the callback is holding a reference to something which
# has a __del__ method. Setting this to None breaks the cycle
# and allows the garbage collector to do it's thing normally.
self.__callback = None
return data

View file

@ -0,0 +1,134 @@
import calendar
import time
from email.utils import formatdate, parsedate, parsedate_tz
from datetime import datetime, timedelta
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
def expire_after(delta, date=None):
date = date or datetime.now()
return date + delta
def datetime_to_header(dt):
return formatdate(calendar.timegm(dt.timetuple()))
class BaseHeuristic(object):
def warning(self, response):
"""
Return a valid 1xx warning header value describing the cache
adjustments.
The response is provided too allow warnings like 113
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
to explicitly say response is over 24 hours old.
"""
return '110 - "Response is Stale"'
def update_headers(self, response):
"""Update the response headers with any new headers.
NOTE: This SHOULD always include some Warning header to
signify that the response was cached by the client, not
by way of the provided headers.
"""
return {}
def apply(self, response):
warning_header_value = self.warning(response)
response.headers.update(self.update_headers(response))
if warning_header_value is not None:
response.headers.update({'Warning': warning_header_value})
return response
class OneDayCache(BaseHeuristic):
"""
Cache the response by providing an expires 1 day in the
future.
"""
def update_headers(self, response):
headers = {}
if 'expires' not in response.headers:
date = parsedate(response.headers['date'])
expires = expire_after(timedelta(days=1),
date=datetime(*date[:6]))
headers['expires'] = datetime_to_header(expires)
headers['cache-control'] = 'public'
return headers
class ExpiresAfter(BaseHeuristic):
"""
Cache **all** requests for a defined time period.
"""
def __init__(self, **kw):
self.delta = timedelta(**kw)
def update_headers(self, response):
expires = expire_after(self.delta)
return {
'expires': datetime_to_header(expires),
'cache-control': 'public',
}
def warning(self, response):
tmpl = '110 - Automatically cached for %s. Response might be stale'
return tmpl % self.delta
class LastModified(BaseHeuristic):
"""
If there is no Expires header already, fall back on Last-Modified
using the heuristic from
http://tools.ietf.org/html/rfc7234#section-4.2.2
to calculate a reasonable value.
Firefox also does something like this per
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr.
"""
cacheable_by_default_statuses = set([
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
])
def update_headers(self, resp):
headers = resp.headers
if 'expires' in headers:
return {}
if 'cache-control' in headers and headers['cache-control'] != 'public':
return {}
if resp.status not in self.cacheable_by_default_statuses:
return {}
if 'date' not in headers or 'last-modified' not in headers:
return {}
date = calendar.timegm(parsedate_tz(headers['date']))
last_modified = parsedate(headers['last-modified'])
if date is None or last_modified is None:
return {}
now = time.time()
current_age = max(0, now - date)
delta = date - calendar.timegm(last_modified)
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
if freshness_lifetime <= current_age:
return {}
expires = date + freshness_lifetime
return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))}
def warning(self, resp):
return None

View file

@ -1,56 +0,0 @@
import requests
from requests import models
from requests.packages.urllib3.response import HTTPResponse
__attrs__ = [
'_content',
'status_code',
'headers',
'url',
'history',
'encoding',
'reason',
'cookies',
'elapsed',
]
def response_getstate(self):
# consume everything
if not self._content_consumed:
self.content
state = dict(
(attr, getattr(self, attr, None))
for attr in __attrs__
)
# deal with our raw content b/c we need it for our cookie jar
state['raw_original_response'] = self.raw._original_response
return state
def response_setstate(self, state):
for name, value in state.items():
if name != 'raw_original_response':
setattr(self, name, value)
setattr(self, 'raw', HTTPResponse())
self.raw._original_response = state['raw_original_response']
def make_responses_pickleable():
try:
version_parts = [int(part) for part in requests.__version__.split('.')]
# must be >= 2.2.x
if not version_parts[0] >= 2 or not version_parts[1] >= 2:
models.Response.__getstate__ = response_getstate
models.Response.__setstate__ = response_setstate
except:
raise
pass
make_responses_pickleable()

View file

@ -1,27 +1,59 @@
import base64
import io import io
import json
import zlib
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from .compat import HTTPResponse, pickle from .compat import HTTPResponse, pickle
def _b64_encode_bytes(b):
return base64.b64encode(b).decode("ascii")
def _b64_encode_str(s):
return _b64_encode_bytes(s.encode("utf8"))
def _b64_decode_bytes(b):
return base64.b64decode(b.encode("ascii"))
def _b64_decode_str(s):
return _b64_decode_bytes(s).decode("utf8")
class Serializer(object): class Serializer(object):
def dumps(self, request, response, body=None): def dumps(self, request, response, body=None):
response_headers = CaseInsensitiveDict(response.headers) response_headers = CaseInsensitiveDict(response.headers)
if body is None: if body is None:
# TODO: Figure out a way to handle this which doesn't break
# streaming
body = response.read(decode_content=False) body = response.read(decode_content=False)
# NOTE: 99% sure this is dead code. I'm only leaving it
# here b/c I don't have a test yet to prove
# it. Basically, before using
# `cachecontrol.filewrapper.CallbackFileWrapper`,
# this made an effort to reset the file handle. The
# `CallbackFileWrapper` short circuits this code by
# setting the body as the content is consumed, the
# result being a `body` argument is *always* passed
# into cache_response, and in turn,
# `Serializer.dump`.
response._fp = io.BytesIO(body) response._fp = io.BytesIO(body)
data = { data = {
"response": { "response": {
"body": body, "body": _b64_encode_bytes(body),
"headers": response.headers, "headers": dict(
(_b64_encode_str(k), _b64_encode_str(v))
for k, v in response.headers.items()
),
"status": response.status, "status": response.status,
"version": response.version, "version": response.version,
"reason": response.reason, "reason": _b64_encode_str(response.reason),
"strict": response.strict, "strict": response.strict,
"decode_content": response.decode_content, "decode_content": response.decode_content,
}, },
@ -35,7 +67,20 @@ class Serializer(object):
header = header.strip() header = header.strip()
data["vary"][header] = request.headers.get(header, None) data["vary"][header] = request.headers.get(header, None)
return b"cc=1," + pickle.dumps(data, pickle.HIGHEST_PROTOCOL) # Encode our Vary headers to ensure they can be serialized as JSON
data["vary"] = dict(
(_b64_encode_str(k), _b64_encode_str(v) if v is not None else v)
for k, v in data["vary"].items()
)
return b",".join([
b"cc=2",
zlib.compress(
json.dumps(
data, separators=(",", ":"), sort_keys=True,
).encode("utf8"),
),
])
def loads(self, request, data): def loads(self, request, data):
# Short circuit if we've been given an empty set of data # Short circuit if we've been given an empty set of data
@ -66,6 +111,40 @@ class Serializer(object):
# just treat it as a miss and return None # just treat it as a miss and return None
return return
def prepare_response(self, request, cached):
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
# Special case the '*' Vary value as it means we cannot actually
# determine if the cached response is suitable for this request.
if "*" in cached.get("vary", {}):
return
# Ensure that the Vary headers for the cached response match our
# request
for header, value in cached.get("vary", {}).items():
if request.headers.get(header, None) != value:
return
body_raw = cached["response"].pop("body")
try:
body = io.BytesIO(body_raw)
except TypeError:
# This can happen if cachecontrol serialized to v1 format (pickle)
# using Python 2. A Python 2 str(byte string) will be unpickled as
# a Python 3 str (unicode string), which will cause the above to
# fail with:
#
# TypeError: 'str' does not support the buffer interface
body = io.BytesIO(body_raw.encode('utf8'))
return HTTPResponse(
body=body,
preload_content=False,
**cached["response"]
)
def _loads_v0(self, request, data): def _loads_v0(self, request, data):
# The original legacy cache data. This doesn't contain enough # The original legacy cache data. This doesn't contain enough
# information to construct everything we need, so we'll treat this as # information to construct everything we need, so we'll treat this as
@ -78,20 +157,28 @@ class Serializer(object):
except ValueError: except ValueError:
return return
# Special case the '*' Vary value as it means we cannot actually return self.prepare_response(request, cached)
# determine if the cached response is suitable for this request.
if "*" in cached.get("vary", {}): def _loads_v2(self, request, data):
try:
cached = json.loads(zlib.decompress(data).decode("utf8"))
except ValueError:
return return
# Ensure that the Vary headers for the cached response match our # We need to decode the items that we've base64 encoded
# request cached["response"]["body"] = _b64_decode_bytes(
for header, value in cached.get("vary", {}).items(): cached["response"]["body"]
if request.headers.get(header, None) != value:
return
body = io.BytesIO(cached["response"].pop("body"))
return HTTPResponse(
body=body,
preload_content=False,
**cached["response"]
) )
cached["response"]["headers"] = dict(
(_b64_decode_str(k), _b64_decode_str(v))
for k, v in cached["response"]["headers"].items()
)
cached["response"]["reason"] = _b64_decode_str(
cached["response"]["reason"],
)
cached["vary"] = dict(
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
for k, v in cached["vary"].items()
)
return self.prepare_response(request, cached)

View file

@ -1,34 +0,0 @@
from requests.sessions import Session
class CacheControlSession(Session):
def __init__(self):
super(CacheControlSession, self).__init__()
def get(self, *args, **kw):
# auto-cache response
self.cache_auto = False
if kw.get('cache_auto'):
self.cache_auto = kw.pop('cache_auto')
# urls allowed to cache
self.cache_urls = []
if kw.get('cache_urls'):
self.cache_urls = [str(args[0])] + kw.pop('cache_urls')
# timeout for cached responses
self.cache_max_age = None
if kw.get('cache_max_age'):
self.cache_max_age = int(kw.pop('cache_max_age'))
return super(CacheControlSession, self).get(*args, **kw)
def prepare_request(self, *args, **kw):
# get response
req = super(CacheControlSession, self).prepare_request(*args, **kw)
# attach params to request
req.cache_auto = self.cache_auto
req.cache_urls = self.cache_urls
req.cache_max_age = self.cache_max_age
return req

View file

@ -1,14 +1,19 @@
from .adapter import CacheControlAdapter from .adapter import CacheControlAdapter
from .cache import DictCache from .cache import DictCache
from .session import CacheControlSession
def CacheControl(sess=None, cache=None, cache_etags=True, serializer=None):
sess = sess or CacheControlSession() def CacheControl(sess,
cache=None,
cache_etags=True,
serializer=None,
heuristic=None):
cache = cache or DictCache() cache = cache or DictCache()
adapter = CacheControlAdapter( adapter = CacheControlAdapter(
cache, cache,
cache_etags=cache_etags, cache_etags=cache_etags,
serializer=serializer, serializer=serializer,
heuristic=heuristic,
) )
sess.mount('http://', adapter) sess.mount('http://', adapter)
sess.mount('https://', adapter) sess.mount('https://', adapter)