mirror of
https://github.com/SickGear/SickGear.git
synced 2025-01-05 17:43:37 +00:00
Update cachecontrol library 0.9.3 to 0.11.2.
This commit is contained in:
parent
f9568212da
commit
3ab45e19d5
14 changed files with 496 additions and 192 deletions
|
@ -9,6 +9,7 @@
|
|||
* Update change to suppress HTTPS verification InsecureRequestWarning to updated package as listed in hacks.txt
|
||||
* Remove listed hacks.txt record for check that SSLv3 is available because issue was addressed by vendor
|
||||
* Update chardet packages 2.2.1 to 2.3.0 (ff40135)
|
||||
* Update cachecontrol library 0.9.3 to 0.11.2
|
||||
* Add ToTV provider
|
||||
* Fix Backlog scheduler initialization and change backlog frequency from minutes to days
|
||||
* Change to consolidate and tidy some provider code
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
Make it easy to import from cachecontrol without long namespaces.
|
||||
"""
|
||||
__author__ = 'Eric Larson'
|
||||
__email__ = 'eric@ionrock.org'
|
||||
__version__ = '0.11.2'
|
||||
|
||||
from .wrapper import CacheControl
|
||||
from .adapter import CacheControlAdapter
|
||||
from .controller import CacheController
|
||||
|
|
|
@ -1,16 +1,24 @@
|
|||
from lib.requests.adapters import HTTPAdapter
|
||||
import functools
|
||||
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
from .controller import CacheController
|
||||
from .cache import DictCache
|
||||
from .filewrapper import CallbackFileWrapper
|
||||
|
||||
|
||||
class CacheControlAdapter(HTTPAdapter):
|
||||
invalidating_methods = set(['PUT', 'DELETE'])
|
||||
|
||||
def __init__(self, cache=None, cache_etags=True, controller_class=None,
|
||||
serializer=None, *args, **kw):
|
||||
def __init__(self, cache=None,
|
||||
cache_etags=True,
|
||||
controller_class=None,
|
||||
serializer=None,
|
||||
heuristic=None,
|
||||
*args, **kw):
|
||||
super(CacheControlAdapter, self).__init__(*args, **kw)
|
||||
self.cache = cache or DictCache()
|
||||
self.heuristic = heuristic
|
||||
|
||||
controller_factory = controller_class or CacheController
|
||||
self.controller = controller_factory(
|
||||
|
@ -27,10 +35,13 @@ class CacheControlAdapter(HTTPAdapter):
|
|||
if request.method == 'GET':
|
||||
cached_response = self.controller.cached_request(request)
|
||||
if cached_response:
|
||||
return self.build_response(request, cached_response, from_cache=True)
|
||||
return self.build_response(request, cached_response,
|
||||
from_cache=True)
|
||||
|
||||
# check for etags and add headers if appropriate
|
||||
request.headers.update(self.controller.conditional_headers(request))
|
||||
request.headers.update(
|
||||
self.controller.conditional_headers(request)
|
||||
)
|
||||
|
||||
resp = super(CacheControlAdapter, self).send(request, **kw)
|
||||
|
||||
|
@ -44,6 +55,8 @@ class CacheControlAdapter(HTTPAdapter):
|
|||
cached response
|
||||
"""
|
||||
if not from_cache and request.method == 'GET':
|
||||
|
||||
# apply any expiration heuristics
|
||||
if response.status == 304:
|
||||
# We must have sent an ETag request. This could mean
|
||||
# that we've been expired already or that we simply
|
||||
|
@ -56,14 +69,34 @@ class CacheControlAdapter(HTTPAdapter):
|
|||
if cached_response is not response:
|
||||
from_cache = True
|
||||
|
||||
# We are done with the server response, read a
|
||||
# possible response body (compliant servers will
|
||||
# not return one, but we cannot be 100% sure) and
|
||||
# release the connection back to the pool.
|
||||
response.read(decode_content=False)
|
||||
response.release_conn()
|
||||
|
||||
response = cached_response
|
||||
|
||||
# We always cache the 301 responses
|
||||
elif response.status == 301:
|
||||
self.controller.cache_response(request, response)
|
||||
else:
|
||||
# try to cache the response
|
||||
try:
|
||||
self.controller.cache_response(request, response)
|
||||
except Exception as e:
|
||||
# Failed to cache the results
|
||||
pass
|
||||
# Check for any heuristics that might update headers
|
||||
# before trying to cache.
|
||||
if self.heuristic:
|
||||
response = self.heuristic.apply(response)
|
||||
|
||||
# Wrap the response file with a wrapper that will cache the
|
||||
# response when the stream has been consumed.
|
||||
response._fp = CallbackFileWrapper(
|
||||
response._fp,
|
||||
functools.partial(
|
||||
self.controller.cache_response,
|
||||
request,
|
||||
response,
|
||||
)
|
||||
)
|
||||
|
||||
resp = super(CacheControlAdapter, self).build_response(
|
||||
request, response
|
||||
|
@ -78,3 +111,7 @@ class CacheControlAdapter(HTTPAdapter):
|
|||
resp.from_cache = from_cache
|
||||
|
||||
return resp
|
||||
|
||||
def close(self):
|
||||
self.cache.close()
|
||||
super(CacheControlAdapter, self).close()
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
"""
|
||||
The cache object API for implementing caches. The default is just a
|
||||
dictionary, which in turns means it is not threadsafe for writing.
|
||||
The cache object API for implementing caches. The default is a thread
|
||||
safe in-memory dictionary.
|
||||
"""
|
||||
from threading import Lock
|
||||
|
||||
|
||||
class BaseCache(object):
|
||||
|
||||
def get(self, key):
|
||||
|
@ -15,6 +16,10 @@ class BaseCache(object):
|
|||
def delete(self, key):
|
||||
raise NotImplemented()
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
class DictCache(BaseCache):
|
||||
|
||||
def __init__(self, init_dict=None):
|
||||
|
|
|
@ -3,6 +3,9 @@ import os
|
|||
|
||||
from lockfile import FileLock
|
||||
|
||||
from ..cache import BaseCache
|
||||
from ..controller import CacheController
|
||||
|
||||
|
||||
def _secure_open_write(filename, fmode):
|
||||
# We only want to write to this file, so open it in write only mode
|
||||
|
@ -44,22 +47,24 @@ def _secure_open_write(filename, fmode):
|
|||
raise
|
||||
|
||||
|
||||
class FileCache(object):
|
||||
class FileCache(BaseCache):
|
||||
def __init__(self, directory, forever=False, filemode=0o0600,
|
||||
dirmode=0o0700):
|
||||
self.directory = directory
|
||||
self.forever = forever
|
||||
self.filemode = filemode
|
||||
|
||||
if not os.path.isdir(self.directory):
|
||||
os.makedirs(self.directory, dirmode)
|
||||
self.dirmode = dirmode
|
||||
|
||||
@staticmethod
|
||||
def encode(x):
|
||||
return hashlib.sha224(x.encode()).hexdigest()
|
||||
|
||||
def _fn(self, name):
|
||||
return os.path.join(self.directory, self.encode(name))
|
||||
# NOTE: This method should not change as some may depend on it.
|
||||
# See: https://github.com/ionrock/cachecontrol/issues/63
|
||||
hashed = self.encode(name)
|
||||
parts = list(hashed[:5]) + [hashed]
|
||||
return os.path.join(self.directory, *parts)
|
||||
|
||||
def get(self, key):
|
||||
name = self._fn(key)
|
||||
|
@ -71,7 +76,15 @@ class FileCache(object):
|
|||
|
||||
def set(self, key, value):
|
||||
name = self._fn(key)
|
||||
|
||||
# Make sure the directory exists
|
||||
try:
|
||||
os.makedirs(os.path.dirname(name), self.dirmode)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
|
||||
with FileLock(name) as lock:
|
||||
# Write our actual file
|
||||
with _secure_open_write(lock.path, self.filemode) as fh:
|
||||
fh.write(value)
|
||||
|
||||
|
@ -79,3 +92,12 @@ class FileCache(object):
|
|||
name = self._fn(key)
|
||||
if not self.forever:
|
||||
os.remove(name)
|
||||
|
||||
|
||||
def url_to_file_path(url, filecache):
|
||||
"""Return the file cache path based on the URL.
|
||||
|
||||
This does not ensure the file exists!
|
||||
"""
|
||||
key = CacheController.cache_url(url)
|
||||
return filecache._fn(key)
|
||||
|
|
|
@ -36,3 +36,6 @@ class RedisCache(object):
|
|||
caution!"""
|
||||
for key in self.conn.keys():
|
||||
self.conn.delete(key)
|
||||
|
||||
def close(self):
|
||||
self.conn.disconnect()
|
||||
|
|
|
@ -4,23 +4,20 @@ except ImportError:
|
|||
from urlparse import urljoin
|
||||
|
||||
|
||||
try:
|
||||
import email.utils
|
||||
parsedate_tz = email.utils.parsedate_tz
|
||||
except ImportError:
|
||||
import email.Utils
|
||||
parsedate_tz = email.Utils.parsedate_tz
|
||||
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError:
|
||||
import pickle
|
||||
|
||||
|
||||
# Handle the case where the requests has been patched to not have urllib3
|
||||
# bundled as part of it's source.
|
||||
# Handle the case where the requests module has been patched to not have
|
||||
# urllib3 bundled as part of its source.
|
||||
try:
|
||||
from lib.requests.packages.urllib3.response import HTTPResponse
|
||||
from requests.packages.urllib3.response import HTTPResponse
|
||||
except ImportError:
|
||||
from urllib3.response import HTTPResponse
|
||||
|
||||
try:
|
||||
from requests.packages.urllib3.util import is_fp_closed
|
||||
except ImportError:
|
||||
from urllib3.util import is_fp_closed
|
||||
|
|
|
@ -4,14 +4,14 @@ The httplib2 algorithms ported for use with requests.
|
|||
import re
|
||||
import calendar
|
||||
import time
|
||||
import datetime
|
||||
from email.utils import parsedate_tz
|
||||
|
||||
from lib.requests.structures import CaseInsensitiveDict
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from .cache import DictCache
|
||||
from .compat import parsedate_tz
|
||||
from .serialize import Serializer
|
||||
|
||||
|
||||
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ def parse_uri(uri):
|
|||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||
"""
|
||||
groups = URI.match(uri).groups()
|
||||
return groups[1], groups[3], groups[4], groups[6], groups[8]
|
||||
return (groups[1], groups[3], groups[4], groups[6], groups[8])
|
||||
|
||||
|
||||
class CacheController(object):
|
||||
|
@ -32,26 +32,29 @@ class CacheController(object):
|
|||
self.cache_etags = cache_etags
|
||||
self.serializer = serializer or Serializer()
|
||||
|
||||
def _urlnorm(self, uri):
|
||||
@classmethod
|
||||
def _urlnorm(cls, uri):
|
||||
"""Normalize the URL to create a safe key for the cache"""
|
||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||
if not scheme or not authority:
|
||||
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
|
||||
authority = authority.lower()
|
||||
|
||||
scheme = scheme.lower()
|
||||
authority = authority.lower()
|
||||
|
||||
if not path:
|
||||
path = "/"
|
||||
|
||||
# Could do syntax based normalization of the URI before
|
||||
# computing the digest. See Section 6.2.2 of Std 66.
|
||||
request_uri = query and "?".join([path, query]) or path
|
||||
scheme = scheme.lower()
|
||||
defrag_uri = scheme + "://" + authority + request_uri
|
||||
|
||||
return defrag_uri
|
||||
|
||||
def cache_url(self, uri):
|
||||
return self._urlnorm(uri)
|
||||
@classmethod
|
||||
def cache_url(cls, uri):
|
||||
return cls._urlnorm(uri)
|
||||
|
||||
def parse_cache_control(self, headers):
|
||||
"""
|
||||
|
@ -68,13 +71,20 @@ class CacheController(object):
|
|||
parts = headers[cc_header].split(',')
|
||||
parts_with_args = [
|
||||
tuple([x.strip().lower() for x in part.split("=", 1)])
|
||||
for part in parts if -1 != part.find("=")]
|
||||
parts_wo_args = [(name.strip().lower(), 1)
|
||||
for name in parts if -1 == name.find("=")]
|
||||
for part in parts if -1 != part.find("=")
|
||||
]
|
||||
parts_wo_args = [
|
||||
(name.strip().lower(), 1)
|
||||
for name in parts if -1 == name.find("=")
|
||||
]
|
||||
retval = dict(parts_with_args + parts_wo_args)
|
||||
return retval
|
||||
|
||||
def cached_request(self, request):
|
||||
"""
|
||||
Return a cached response if it exists in the cache, otherwise
|
||||
return False.
|
||||
"""
|
||||
cache_url = self.cache_url(request.url)
|
||||
cc = self.parse_cache_control(request.headers)
|
||||
|
||||
|
@ -95,7 +105,24 @@ class CacheController(object):
|
|||
if not resp:
|
||||
return False
|
||||
|
||||
# If we have a cached 301, return it immediately. We don't
|
||||
# need to test our response for other headers b/c it is
|
||||
# intrinsically "cacheable" as it is Permanent.
|
||||
# See:
|
||||
# https://tools.ietf.org/html/rfc7231#section-6.4.2
|
||||
#
|
||||
# Client can try to refresh the value by repeating the request
|
||||
# with cache busting headers as usual (ie no-cache).
|
||||
if resp.status == 301:
|
||||
return resp
|
||||
|
||||
headers = CaseInsensitiveDict(resp.headers)
|
||||
if not headers or 'date' not in headers:
|
||||
# With date or etag, the cached response can never be used
|
||||
# and should be deleted.
|
||||
if 'etag' not in headers:
|
||||
self.cache.delete(cache_url)
|
||||
return False
|
||||
|
||||
now = time.time()
|
||||
date = calendar.timegm(
|
||||
|
@ -104,15 +131,19 @@ class CacheController(object):
|
|||
current_age = max(0, now - date)
|
||||
|
||||
# TODO: There is an assumption that the result will be a
|
||||
# urllib3 response object. This may not be best since we
|
||||
# could probably avoid instantiating or constructing the
|
||||
# response until we know we need it.
|
||||
# urllib3 response object. This may not be best since we
|
||||
# could probably avoid instantiating or constructing the
|
||||
# response until we know we need it.
|
||||
resp_cc = self.parse_cache_control(headers)
|
||||
|
||||
# determine freshness
|
||||
freshness_lifetime = 0
|
||||
|
||||
# Check the max-age pragma in the cache control header
|
||||
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
|
||||
freshness_lifetime = int(resp_cc['max-age'])
|
||||
|
||||
# If there isn't a max-age, check for an expires header
|
||||
elif 'expires' in headers:
|
||||
expires = parsedate_tz(headers['expires'])
|
||||
if expires is not None:
|
||||
|
@ -163,32 +194,24 @@ class CacheController(object):
|
|||
|
||||
return new_headers
|
||||
|
||||
def cache_response(self, request, response):
|
||||
def cache_response(self, request, response, body=None):
|
||||
"""
|
||||
Algorithm for caching requests.
|
||||
|
||||
This assumes a requests Response object.
|
||||
"""
|
||||
# From httplib2: Don't cache 206's since we aren't going to
|
||||
# handle byte range requests
|
||||
if response.status not in [200, 203]:
|
||||
# handle byte range requests
|
||||
if response.status not in [200, 203, 300, 301]:
|
||||
return
|
||||
|
||||
# Cache Session Params
|
||||
cache_auto = getattr(request, 'cache_auto', False)
|
||||
cache_urls = getattr(request, 'cache_urls', [])
|
||||
cache_max_age = getattr(request, 'cache_max_age', None)
|
||||
|
||||
response_headers = CaseInsensitiveDict(response.headers)
|
||||
|
||||
# Check if we are wanting to cache responses from specific urls only
|
||||
cache_url = self.cache_url(request.url)
|
||||
if len(cache_urls) > 0 and not any(s in cache_url for s in cache_urls):
|
||||
return
|
||||
|
||||
cc_req = self.parse_cache_control(request.headers)
|
||||
cc = self.parse_cache_control(response_headers)
|
||||
|
||||
cache_url = self.cache_url(request.url)
|
||||
|
||||
# Delete it from the cache if we happen to have it stored there
|
||||
no_store = cc.get('no-store') or cc_req.get('no-store')
|
||||
if no_store and self.cache.get(cache_url):
|
||||
|
@ -196,21 +219,18 @@ class CacheController(object):
|
|||
|
||||
# If we've been given an etag, then keep the response
|
||||
if self.cache_etags and 'etag' in response_headers:
|
||||
self.cache.set(cache_url, self.serializer.dumps(request, response))
|
||||
self.cache.set(
|
||||
cache_url,
|
||||
self.serializer.dumps(request, response, body=body),
|
||||
)
|
||||
|
||||
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response
|
||||
elif cache_auto and not cc and response_headers:
|
||||
headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age or 900)}
|
||||
response.headers.update(headers)
|
||||
|
||||
if 'expires' not in response_headers:
|
||||
if getattr(response_headers, 'expires', None) is None:
|
||||
expires = datetime.datetime.utcnow() + datetime.timedelta(days=1)
|
||||
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||
headers = {'Expires': expires}
|
||||
response.headers.update(headers)
|
||||
|
||||
self.cache.set(cache_url, self.serializer.dumps(request, response))
|
||||
# Add to the cache any 301s. We do this before looking that
|
||||
# the Date headers.
|
||||
elif response.status == 301:
|
||||
self.cache.set(
|
||||
cache_url,
|
||||
self.serializer.dumps(request, response)
|
||||
)
|
||||
|
||||
# Add to the cache if the response headers demand it. If there
|
||||
# is no date header then we can't do anything about expiring
|
||||
|
@ -219,10 +239,10 @@ class CacheController(object):
|
|||
# cache when there is a max-age > 0
|
||||
if cc and cc.get('max-age'):
|
||||
if int(cc['max-age']) > 0:
|
||||
if isinstance(cache_max_age, int):
|
||||
cc['max-age'] = int(cache_max_age)
|
||||
response.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()])
|
||||
self.cache.set(cache_url, self.serializer.dumps(request, response))
|
||||
self.cache.set(
|
||||
cache_url,
|
||||
self.serializer.dumps(request, response, body=body),
|
||||
)
|
||||
|
||||
# If the request can expire, it means we should cache it
|
||||
# in the meantime.
|
||||
|
@ -230,7 +250,7 @@ class CacheController(object):
|
|||
if response_headers['expires']:
|
||||
self.cache.set(
|
||||
cache_url,
|
||||
self.serializer.dumps(request, response),
|
||||
self.serializer.dumps(request, response, body=body),
|
||||
)
|
||||
|
||||
def update_cached_response(self, request, response):
|
||||
|
@ -242,14 +262,30 @@ class CacheController(object):
|
|||
"""
|
||||
cache_url = self.cache_url(request.url)
|
||||
|
||||
cached_response = self.serializer.loads(request, self.cache.get(cache_url))
|
||||
cached_response = self.serializer.loads(
|
||||
request,
|
||||
self.cache.get(cache_url)
|
||||
)
|
||||
|
||||
if not cached_response:
|
||||
# we didn't have a cached response
|
||||
return response
|
||||
|
||||
# did so lets update our headers
|
||||
cached_response.headers.update(response.headers)
|
||||
# Lets update our headers with the headers from the new request:
|
||||
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
|
||||
#
|
||||
# The server isn't supposed to send headers that would make
|
||||
# the cached body invalid. But... just in case, we'll be sure
|
||||
# to strip out ones we know that might be problmatic due to
|
||||
# typical assumptions.
|
||||
excluded_headers = [
|
||||
"content-length",
|
||||
]
|
||||
|
||||
cached_response.headers.update(
|
||||
dict((k, v) for k, v in response.headers.items()
|
||||
if k.lower() not in excluded_headers)
|
||||
)
|
||||
|
||||
# we want a 200 b/c we have content via the cache
|
||||
cached_response.status = 200
|
||||
|
|
63
lib/cachecontrol/filewrapper.py
Normal file
63
lib/cachecontrol/filewrapper.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
from io import BytesIO
|
||||
|
||||
|
||||
class CallbackFileWrapper(object):
|
||||
"""
|
||||
Small wrapper around a fp object which will tee everything read into a
|
||||
buffer, and when that file is closed it will execute a callback with the
|
||||
contents of that buffer.
|
||||
|
||||
All attributes are proxied to the underlying file object.
|
||||
|
||||
This class uses members with a double underscore (__) leading prefix so as
|
||||
not to accidentally shadow an attribute.
|
||||
"""
|
||||
|
||||
def __init__(self, fp, callback):
|
||||
self.__buf = BytesIO()
|
||||
self.__fp = fp
|
||||
self.__callback = callback
|
||||
|
||||
def __getattr__(self, name):
|
||||
# The vaguaries of garbage collection means that self.__fp is
|
||||
# not always set. By using __getattribute__ and the private
|
||||
# name[0] allows looking up the attribute value and raising an
|
||||
# AttributeError when it doesn't exist. This stop thigns from
|
||||
# infinitely recursing calls to getattr in the case where
|
||||
# self.__fp hasn't been set.
|
||||
#
|
||||
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
|
||||
fp = self.__getattribute__('_CallbackFileWrapper__fp')
|
||||
return getattr(fp, name)
|
||||
|
||||
def __is_fp_closed(self):
|
||||
try:
|
||||
return self.__fp.fp is None
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return self.__fp.closed
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# We just don't cache it then.
|
||||
# TODO: Add some logging here...
|
||||
return False
|
||||
|
||||
def read(self, amt=None):
|
||||
data = self.__fp.read(amt)
|
||||
self.__buf.write(data)
|
||||
|
||||
if self.__is_fp_closed():
|
||||
if self.__callback:
|
||||
self.__callback(self.__buf.getvalue())
|
||||
|
||||
# We assign this to None here, because otherwise we can get into
|
||||
# really tricky problems where the CPython interpreter dead locks
|
||||
# because the callback is holding a reference to something which
|
||||
# has a __del__ method. Setting this to None breaks the cycle
|
||||
# and allows the garbage collector to do it's thing normally.
|
||||
self.__callback = None
|
||||
|
||||
return data
|
134
lib/cachecontrol/heuristics.py
Normal file
134
lib/cachecontrol/heuristics.py
Normal file
|
@ -0,0 +1,134 @@
|
|||
import calendar
|
||||
import time
|
||||
|
||||
from email.utils import formatdate, parsedate, parsedate_tz
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
|
||||
|
||||
|
||||
def expire_after(delta, date=None):
|
||||
date = date or datetime.now()
|
||||
return date + delta
|
||||
|
||||
|
||||
def datetime_to_header(dt):
|
||||
return formatdate(calendar.timegm(dt.timetuple()))
|
||||
|
||||
|
||||
class BaseHeuristic(object):
|
||||
|
||||
def warning(self, response):
|
||||
"""
|
||||
Return a valid 1xx warning header value describing the cache
|
||||
adjustments.
|
||||
|
||||
The response is provided too allow warnings like 113
|
||||
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
|
||||
to explicitly say response is over 24 hours old.
|
||||
"""
|
||||
return '110 - "Response is Stale"'
|
||||
|
||||
def update_headers(self, response):
|
||||
"""Update the response headers with any new headers.
|
||||
|
||||
NOTE: This SHOULD always include some Warning header to
|
||||
signify that the response was cached by the client, not
|
||||
by way of the provided headers.
|
||||
"""
|
||||
return {}
|
||||
|
||||
def apply(self, response):
|
||||
warning_header_value = self.warning(response)
|
||||
response.headers.update(self.update_headers(response))
|
||||
if warning_header_value is not None:
|
||||
response.headers.update({'Warning': warning_header_value})
|
||||
return response
|
||||
|
||||
|
||||
class OneDayCache(BaseHeuristic):
|
||||
"""
|
||||
Cache the response by providing an expires 1 day in the
|
||||
future.
|
||||
"""
|
||||
def update_headers(self, response):
|
||||
headers = {}
|
||||
|
||||
if 'expires' not in response.headers:
|
||||
date = parsedate(response.headers['date'])
|
||||
expires = expire_after(timedelta(days=1),
|
||||
date=datetime(*date[:6]))
|
||||
headers['expires'] = datetime_to_header(expires)
|
||||
headers['cache-control'] = 'public'
|
||||
return headers
|
||||
|
||||
|
||||
class ExpiresAfter(BaseHeuristic):
|
||||
"""
|
||||
Cache **all** requests for a defined time period.
|
||||
"""
|
||||
|
||||
def __init__(self, **kw):
|
||||
self.delta = timedelta(**kw)
|
||||
|
||||
def update_headers(self, response):
|
||||
expires = expire_after(self.delta)
|
||||
return {
|
||||
'expires': datetime_to_header(expires),
|
||||
'cache-control': 'public',
|
||||
}
|
||||
|
||||
def warning(self, response):
|
||||
tmpl = '110 - Automatically cached for %s. Response might be stale'
|
||||
return tmpl % self.delta
|
||||
|
||||
|
||||
class LastModified(BaseHeuristic):
|
||||
"""
|
||||
If there is no Expires header already, fall back on Last-Modified
|
||||
using the heuristic from
|
||||
http://tools.ietf.org/html/rfc7234#section-4.2.2
|
||||
to calculate a reasonable value.
|
||||
|
||||
Firefox also does something like this per
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
|
||||
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
|
||||
Unlike mozilla we limit this to 24-hr.
|
||||
"""
|
||||
cacheable_by_default_statuses = set([
|
||||
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
|
||||
])
|
||||
|
||||
def update_headers(self, resp):
|
||||
headers = resp.headers
|
||||
|
||||
if 'expires' in headers:
|
||||
return {}
|
||||
|
||||
if 'cache-control' in headers and headers['cache-control'] != 'public':
|
||||
return {}
|
||||
|
||||
if resp.status not in self.cacheable_by_default_statuses:
|
||||
return {}
|
||||
|
||||
if 'date' not in headers or 'last-modified' not in headers:
|
||||
return {}
|
||||
|
||||
date = calendar.timegm(parsedate_tz(headers['date']))
|
||||
last_modified = parsedate(headers['last-modified'])
|
||||
if date is None or last_modified is None:
|
||||
return {}
|
||||
|
||||
now = time.time()
|
||||
current_age = max(0, now - date)
|
||||
delta = date - calendar.timegm(last_modified)
|
||||
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
|
||||
if freshness_lifetime <= current_age:
|
||||
return {}
|
||||
|
||||
expires = date + freshness_lifetime
|
||||
return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))}
|
||||
|
||||
def warning(self, resp):
|
||||
return None
|
|
@ -1,56 +0,0 @@
|
|||
import requests
|
||||
|
||||
from requests import models
|
||||
from requests.packages.urllib3.response import HTTPResponse
|
||||
|
||||
__attrs__ = [
|
||||
'_content',
|
||||
'status_code',
|
||||
'headers',
|
||||
'url',
|
||||
'history',
|
||||
'encoding',
|
||||
'reason',
|
||||
'cookies',
|
||||
'elapsed',
|
||||
]
|
||||
|
||||
|
||||
def response_getstate(self):
|
||||
# consume everything
|
||||
if not self._content_consumed:
|
||||
self.content
|
||||
|
||||
state = dict(
|
||||
(attr, getattr(self, attr, None))
|
||||
for attr in __attrs__
|
||||
)
|
||||
|
||||
# deal with our raw content b/c we need it for our cookie jar
|
||||
state['raw_original_response'] = self.raw._original_response
|
||||
return state
|
||||
|
||||
|
||||
def response_setstate(self, state):
|
||||
for name, value in state.items():
|
||||
if name != 'raw_original_response':
|
||||
setattr(self, name, value)
|
||||
|
||||
setattr(self, 'raw', HTTPResponse())
|
||||
self.raw._original_response = state['raw_original_response']
|
||||
|
||||
|
||||
def make_responses_pickleable():
|
||||
try:
|
||||
version_parts = [int(part) for part in requests.__version__.split('.')]
|
||||
|
||||
# must be >= 2.2.x
|
||||
if not version_parts[0] >= 2 or not version_parts[1] >= 2:
|
||||
models.Response.__getstate__ = response_getstate
|
||||
models.Response.__setstate__ = response_setstate
|
||||
except:
|
||||
raise
|
||||
pass
|
||||
|
||||
|
||||
make_responses_pickleable()
|
|
@ -1,27 +1,59 @@
|
|||
import base64
|
||||
import io
|
||||
import json
|
||||
import zlib
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from .compat import HTTPResponse, pickle
|
||||
|
||||
|
||||
def _b64_encode_bytes(b):
|
||||
return base64.b64encode(b).decode("ascii")
|
||||
|
||||
|
||||
def _b64_encode_str(s):
|
||||
return _b64_encode_bytes(s.encode("utf8"))
|
||||
|
||||
|
||||
def _b64_decode_bytes(b):
|
||||
return base64.b64decode(b.encode("ascii"))
|
||||
|
||||
|
||||
def _b64_decode_str(s):
|
||||
return _b64_decode_bytes(s).decode("utf8")
|
||||
|
||||
|
||||
class Serializer(object):
|
||||
|
||||
def dumps(self, request, response, body=None):
|
||||
response_headers = CaseInsensitiveDict(response.headers)
|
||||
|
||||
if body is None:
|
||||
# TODO: Figure out a way to handle this which doesn't break
|
||||
# streaming
|
||||
body = response.read(decode_content=False)
|
||||
|
||||
# NOTE: 99% sure this is dead code. I'm only leaving it
|
||||
# here b/c I don't have a test yet to prove
|
||||
# it. Basically, before using
|
||||
# `cachecontrol.filewrapper.CallbackFileWrapper`,
|
||||
# this made an effort to reset the file handle. The
|
||||
# `CallbackFileWrapper` short circuits this code by
|
||||
# setting the body as the content is consumed, the
|
||||
# result being a `body` argument is *always* passed
|
||||
# into cache_response, and in turn,
|
||||
# `Serializer.dump`.
|
||||
response._fp = io.BytesIO(body)
|
||||
|
||||
data = {
|
||||
"response": {
|
||||
"body": body,
|
||||
"headers": response.headers,
|
||||
"body": _b64_encode_bytes(body),
|
||||
"headers": dict(
|
||||
(_b64_encode_str(k), _b64_encode_str(v))
|
||||
for k, v in response.headers.items()
|
||||
),
|
||||
"status": response.status,
|
||||
"version": response.version,
|
||||
"reason": response.reason,
|
||||
"reason": _b64_encode_str(response.reason),
|
||||
"strict": response.strict,
|
||||
"decode_content": response.decode_content,
|
||||
},
|
||||
|
@ -35,7 +67,20 @@ class Serializer(object):
|
|||
header = header.strip()
|
||||
data["vary"][header] = request.headers.get(header, None)
|
||||
|
||||
return b"cc=1," + pickle.dumps(data, pickle.HIGHEST_PROTOCOL)
|
||||
# Encode our Vary headers to ensure they can be serialized as JSON
|
||||
data["vary"] = dict(
|
||||
(_b64_encode_str(k), _b64_encode_str(v) if v is not None else v)
|
||||
for k, v in data["vary"].items()
|
||||
)
|
||||
|
||||
return b",".join([
|
||||
b"cc=2",
|
||||
zlib.compress(
|
||||
json.dumps(
|
||||
data, separators=(",", ":"), sort_keys=True,
|
||||
).encode("utf8"),
|
||||
),
|
||||
])
|
||||
|
||||
def loads(self, request, data):
|
||||
# Short circuit if we've been given an empty set of data
|
||||
|
@ -66,6 +111,40 @@ class Serializer(object):
|
|||
# just treat it as a miss and return None
|
||||
return
|
||||
|
||||
def prepare_response(self, request, cached):
|
||||
"""Verify our vary headers match and construct a real urllib3
|
||||
HTTPResponse object.
|
||||
"""
|
||||
# Special case the '*' Vary value as it means we cannot actually
|
||||
# determine if the cached response is suitable for this request.
|
||||
if "*" in cached.get("vary", {}):
|
||||
return
|
||||
|
||||
# Ensure that the Vary headers for the cached response match our
|
||||
# request
|
||||
for header, value in cached.get("vary", {}).items():
|
||||
if request.headers.get(header, None) != value:
|
||||
return
|
||||
|
||||
body_raw = cached["response"].pop("body")
|
||||
|
||||
try:
|
||||
body = io.BytesIO(body_raw)
|
||||
except TypeError:
|
||||
# This can happen if cachecontrol serialized to v1 format (pickle)
|
||||
# using Python 2. A Python 2 str(byte string) will be unpickled as
|
||||
# a Python 3 str (unicode string), which will cause the above to
|
||||
# fail with:
|
||||
#
|
||||
# TypeError: 'str' does not support the buffer interface
|
||||
body = io.BytesIO(body_raw.encode('utf8'))
|
||||
|
||||
return HTTPResponse(
|
||||
body=body,
|
||||
preload_content=False,
|
||||
**cached["response"]
|
||||
)
|
||||
|
||||
def _loads_v0(self, request, data):
|
||||
# The original legacy cache data. This doesn't contain enough
|
||||
# information to construct everything we need, so we'll treat this as
|
||||
|
@ -78,20 +157,28 @@ class Serializer(object):
|
|||
except ValueError:
|
||||
return
|
||||
|
||||
# Special case the '*' Vary value as it means we cannot actually
|
||||
# determine if the cached response is suitable for this request.
|
||||
if "*" in cached.get("vary", {}):
|
||||
return self.prepare_response(request, cached)
|
||||
|
||||
def _loads_v2(self, request, data):
|
||||
try:
|
||||
cached = json.loads(zlib.decompress(data).decode("utf8"))
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
# Ensure that the Vary headers for the cached response match our
|
||||
# request
|
||||
for header, value in cached.get("vary", {}).items():
|
||||
if request.headers.get(header, None) != value:
|
||||
return
|
||||
|
||||
body = io.BytesIO(cached["response"].pop("body"))
|
||||
return HTTPResponse(
|
||||
body=body,
|
||||
preload_content=False,
|
||||
**cached["response"]
|
||||
# We need to decode the items that we've base64 encoded
|
||||
cached["response"]["body"] = _b64_decode_bytes(
|
||||
cached["response"]["body"]
|
||||
)
|
||||
cached["response"]["headers"] = dict(
|
||||
(_b64_decode_str(k), _b64_decode_str(v))
|
||||
for k, v in cached["response"]["headers"].items()
|
||||
)
|
||||
cached["response"]["reason"] = _b64_decode_str(
|
||||
cached["response"]["reason"],
|
||||
)
|
||||
cached["vary"] = dict(
|
||||
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
|
||||
for k, v in cached["vary"].items()
|
||||
)
|
||||
|
||||
return self.prepare_response(request, cached)
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
from requests.sessions import Session
|
||||
|
||||
class CacheControlSession(Session):
|
||||
def __init__(self):
|
||||
super(CacheControlSession, self).__init__()
|
||||
|
||||
def get(self, *args, **kw):
|
||||
# auto-cache response
|
||||
self.cache_auto = False
|
||||
if kw.get('cache_auto'):
|
||||
self.cache_auto = kw.pop('cache_auto')
|
||||
|
||||
# urls allowed to cache
|
||||
self.cache_urls = []
|
||||
if kw.get('cache_urls'):
|
||||
self.cache_urls = [str(args[0])] + kw.pop('cache_urls')
|
||||
|
||||
# timeout for cached responses
|
||||
self.cache_max_age = None
|
||||
if kw.get('cache_max_age'):
|
||||
self.cache_max_age = int(kw.pop('cache_max_age'))
|
||||
|
||||
return super(CacheControlSession, self).get(*args, **kw)
|
||||
|
||||
def prepare_request(self, *args, **kw):
|
||||
# get response
|
||||
req = super(CacheControlSession, self).prepare_request(*args, **kw)
|
||||
|
||||
# attach params to request
|
||||
req.cache_auto = self.cache_auto
|
||||
req.cache_urls = self.cache_urls
|
||||
req.cache_max_age = self.cache_max_age
|
||||
|
||||
return req
|
|
@ -1,14 +1,19 @@
|
|||
from .adapter import CacheControlAdapter
|
||||
from .cache import DictCache
|
||||
from .session import CacheControlSession
|
||||
|
||||
def CacheControl(sess=None, cache=None, cache_etags=True, serializer=None):
|
||||
sess = sess or CacheControlSession()
|
||||
|
||||
def CacheControl(sess,
|
||||
cache=None,
|
||||
cache_etags=True,
|
||||
serializer=None,
|
||||
heuristic=None):
|
||||
|
||||
cache = cache or DictCache()
|
||||
adapter = CacheControlAdapter(
|
||||
cache,
|
||||
cache_etags=cache_etags,
|
||||
serializer=serializer,
|
||||
heuristic=heuristic,
|
||||
)
|
||||
sess.mount('http://', adapter)
|
||||
sess.mount('https://', adapter)
|
||||
|
|
Loading…
Reference in a new issue