Update CacheControl 0.12.4 (bd94f7e) → 0.12.5 (cd91309).

This commit is contained in:
JackDandy 2018-09-09 13:07:57 +01:00
parent 2f86aa3797
commit 779efa3f28
11 changed files with 209 additions and 222 deletions

View file

@ -1,6 +1,7 @@
### 0.18.0 (2018-xx-xx xx:xx:xx UTC) ### 0.18.0 (2018-xx-xx xx:xx:xx UTC)
* Update Beautiful Soup 4.6.0 (r449) to 4.6.3 (r475) * Update Beautiful Soup 4.6.0 (r449) to 4.6.3 (r475)
* Update CacheControl library 0.12.4 (bd94f7e) to 0.12.5 (cd91309)
[develop changelog] [develop changelog]

View file

@ -2,9 +2,9 @@
Make it easy to import from cachecontrol without long namespaces. Make it easy to import from cachecontrol without long namespaces.
""" """
__author__ = 'Eric Larson' __author__ = "Eric Larson"
__email__ = 'eric@ionrock.org' __email__ = "eric@ionrock.org"
__version__ = '0.12.4' __version__ = "0.12.5"
from .wrapper import CacheControl from .wrapper import CacheControl
from .adapter import CacheControlAdapter from .adapter import CacheControlAdapter

View file

@ -17,14 +17,11 @@ def setup_logging():
def get_session(): def get_session():
adapter = CacheControlAdapter( adapter = CacheControlAdapter(
DictCache(), DictCache(), cache_etags=True, serializer=None, heuristic=None
cache_etags=True,
serializer=None,
heuristic=None,
) )
sess = requests.Session() sess = requests.Session()
sess.mount('http://', adapter) sess.mount("http://", adapter)
sess.mount('https://', adapter) sess.mount("https://", adapter)
sess.cache_controller = adapter.controller sess.cache_controller = adapter.controller
return sess return sess
@ -32,7 +29,7 @@ def get_session():
def get_args(): def get_args():
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument('url', help='The URL to try and cache') parser.add_argument("url", help="The URL to try and cache")
return parser.parse_args() return parser.parse_args()
@ -51,10 +48,10 @@ def main(args=None):
# Now try to get it # Now try to get it
if sess.cache_controller.cached_request(resp.request): if sess.cache_controller.cached_request(resp.request):
print('Cached!') print("Cached!")
else: else:
print('Not cached :(') print("Not cached :(")
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View file

@ -10,25 +10,27 @@ from .filewrapper import CallbackFileWrapper
class CacheControlAdapter(HTTPAdapter): class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE']) invalidating_methods = {"PUT", "DELETE"}
def __init__(self, cache=None, def __init__(
cache_etags=True, self,
controller_class=None, cache=None,
serializer=None, cache_etags=True,
heuristic=None, controller_class=None,
cacheable_methods=None, serializer=None,
*args, **kw): heuristic=None,
cacheable_methods=None,
*args,
**kw
):
super(CacheControlAdapter, self).__init__(*args, **kw) super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache() self.cache = cache or DictCache()
self.heuristic = heuristic self.heuristic = heuristic
self.cacheable_methods = cacheable_methods or ('GET',) self.cacheable_methods = cacheable_methods or ("GET",)
controller_factory = controller_class or CacheController controller_factory = controller_class or CacheController
self.controller = controller_factory( self.controller = controller_factory(
self.cache, self.cache, cache_etags=cache_etags, serializer=serializer
cache_etags=cache_etags,
serializer=serializer,
) )
def send(self, request, cacheable_methods=None, **kw): def send(self, request, cacheable_methods=None, **kw):
@ -43,20 +45,18 @@ class CacheControlAdapter(HTTPAdapter):
except zlib.error: except zlib.error:
cached_response = None cached_response = None
if cached_response: if cached_response:
return self.build_response(request, cached_response, return self.build_response(request, cached_response, from_cache=True)
from_cache=True)
# check for etags and add headers if appropriate # check for etags and add headers if appropriate
request.headers.update( request.headers.update(self.controller.conditional_headers(request))
self.controller.conditional_headers(request)
)
resp = super(CacheControlAdapter, self).send(request, **kw) resp = super(CacheControlAdapter, self).send(request, **kw)
return resp return resp
def build_response(self, request, response, from_cache=False, def build_response(
cacheable_methods=None): self, request, response, from_cache=False, cacheable_methods=None
):
""" """
Build a response by making a request or using the cache. Build a response by making a request or using the cache.
@ -101,10 +101,8 @@ class CacheControlAdapter(HTTPAdapter):
response._fp = CallbackFileWrapper( response._fp = CallbackFileWrapper(
response._fp, response._fp,
functools.partial( functools.partial(
self.controller.cache_response, self.controller.cache_response, request, response
request, ),
response,
)
) )
if response.chunked: if response.chunked:
super_update_chunk_length = response._update_chunk_length super_update_chunk_length = response._update_chunk_length
@ -113,11 +111,12 @@ class CacheControlAdapter(HTTPAdapter):
super_update_chunk_length() super_update_chunk_length()
if self.chunk_left == 0: if self.chunk_left == 0:
self._fp._close() self._fp._close()
response._update_chunk_length = types.MethodType(_update_chunk_length, response)
resp = super(CacheControlAdapter, self).build_response( response._update_chunk_length = types.MethodType(
request, response _update_chunk_length, response
) )
resp = super(CacheControlAdapter, self).build_response(request, response)
# See if we should invalidate the cache. # See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok: if request.method in self.invalidating_methods and resp.ok:

View file

@ -9,7 +9,7 @@ try:
FileNotFoundError FileNotFoundError
except NameError: except NameError:
# py2.X # py2.X
FileNotFoundError = OSError FileNotFoundError = (IOError, OSError)
def _secure_open_write(filename, fmode): def _secure_open_write(filename, fmode):
@ -46,6 +46,7 @@ def _secure_open_write(filename, fmode):
fd = os.open(filename, flags, fmode) fd = os.open(filename, flags, fmode)
try: try:
return os.fdopen(fd, "wb") return os.fdopen(fd, "wb")
except: except:
# An error occurred wrapping our FD in a file object # An error occurred wrapping our FD in a file object
os.close(fd) os.close(fd)
@ -53,8 +54,16 @@ def _secure_open_write(filename, fmode):
class FileCache(BaseCache): class FileCache(BaseCache):
def __init__(self, directory, forever=False, filemode=0o0600,
dirmode=0o0700, use_dir_lock=None, lock_class=None): def __init__(
self,
directory,
forever=False,
filemode=0o0600,
dirmode=0o0700,
use_dir_lock=None,
lock_class=None,
):
if use_dir_lock is not None and lock_class is not None: if use_dir_lock is not None and lock_class is not None:
raise ValueError("Cannot use use_dir_lock and lock_class together") raise ValueError("Cannot use use_dir_lock and lock_class together")
@ -63,12 +72,15 @@ class FileCache(BaseCache):
from lockfile import LockFile from lockfile import LockFile
from lockfile.mkdirlockfile import MkdirLockFile from lockfile.mkdirlockfile import MkdirLockFile
except ImportError: except ImportError:
notice = dedent(""" notice = dedent(
"""
NOTE: In order to use the FileCache you must have NOTE: In order to use the FileCache you must have
lockfile installed. You can install it via pip: lockfile installed. You can install it via pip:
pip install lockfile pip install lockfile
""") """
)
raise ImportError(notice) raise ImportError(notice)
else: else:
if use_dir_lock: if use_dir_lock:
lock_class = MkdirLockFile lock_class = MkdirLockFile
@ -95,11 +107,12 @@ class FileCache(BaseCache):
def get(self, key): def get(self, key):
name = self._fn(key) name = self._fn(key)
if not os.path.exists(name): try:
return None with open(name, "rb") as fh:
return fh.read()
with open(name, 'rb') as fh: except FileNotFoundError:
return fh.read() return None
def set(self, key, value): def set(self, key, value):
name = self._fn(key) name = self._fn(key)

View file

@ -4,16 +4,6 @@ from datetime import datetime
from cachecontrol.cache import BaseCache from cachecontrol.cache import BaseCache
def total_seconds(td):
"""Python 2.6 compatability"""
if hasattr(td, 'total_seconds'):
return int(td.total_seconds())
ms = td.microseconds
secs = (td.seconds + td.days * 24 * 3600)
return int((ms + secs * 10**6) / 10**6)
class RedisCache(BaseCache): class RedisCache(BaseCache):
def __init__(self, conn): def __init__(self, conn):
@ -27,7 +17,7 @@ class RedisCache(BaseCache):
self.conn.set(key, value) self.conn.set(key, value)
else: else:
expires = expires - datetime.utcnow() expires = expires - datetime.utcnow()
self.conn.setex(key, total_seconds(expires), value) self.conn.setex(key, int(expires.total_seconds()), value)
def delete(self, key): def delete(self, key):
self.conn.delete(key) self.conn.delete(key)

View file

@ -30,8 +30,10 @@ def parse_uri(uri):
class CacheController(object): class CacheController(object):
"""An interface to see if request should cached or not. """An interface to see if request should cached or not.
""" """
def __init__(self, cache=None, cache_etags=True, serializer=None,
status_codes=None): def __init__(
self, cache=None, cache_etags=True, serializer=None, status_codes=None
):
self.cache = cache or DictCache() self.cache = cache or DictCache()
self.cache_etags = cache_etags self.cache_etags = cache_etags
self.serializer = serializer or Serializer() self.serializer = serializer or Serializer()
@ -64,34 +66,35 @@ class CacheController(object):
def parse_cache_control(self, headers): def parse_cache_control(self, headers):
known_directives = { known_directives = {
# https://tools.ietf.org/html/rfc7234#section-5.2 # https://tools.ietf.org/html/rfc7234#section-5.2
'max-age': (int, True,), "max-age": (int, True),
'max-stale': (int, False,), "max-stale": (int, False),
'min-fresh': (int, True,), "min-fresh": (int, True),
'no-cache': (None, False,), "no-cache": (None, False),
'no-store': (None, False,), "no-store": (None, False),
'no-transform': (None, False,), "no-transform": (None, False),
'only-if-cached' : (None, False,), "only-if-cached": (None, False),
'must-revalidate': (None, False,), "must-revalidate": (None, False),
'public': (None, False,), "public": (None, False),
'private': (None, False,), "private": (None, False),
'proxy-revalidate': (None, False,), "proxy-revalidate": (None, False),
's-maxage': (int, True,) "s-maxage": (int, True),
} }
cc_headers = headers.get('cache-control', cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
headers.get('Cache-Control', ''))
retval = {} retval = {}
for cc_directive in cc_headers.split(','): for cc_directive in cc_headers.split(","):
parts = cc_directive.split('=', 1) if not cc_directive.strip():
continue
parts = cc_directive.split("=", 1)
directive = parts[0].strip() directive = parts[0].strip()
try: try:
typ, required = known_directives[directive] typ, required = known_directives[directive]
except KeyError: except KeyError:
logger.debug('Ignoring unknown cache-control directive: %s', logger.debug("Ignoring unknown cache-control directive: %s", directive)
directive)
continue continue
if not typ or not required: if not typ or not required:
@ -101,11 +104,16 @@ class CacheController(object):
retval[directive] = typ(parts[1].strip()) retval[directive] = typ(parts[1].strip())
except IndexError: except IndexError:
if required: if required:
logger.debug('Missing value for cache-control ' logger.debug(
'directive: %s', directive) "Missing value for cache-control " "directive: %s",
directive,
)
except ValueError: except ValueError:
logger.debug('Invalid value for cache-control directive ' logger.debug(
'%s, must be %s', directive, typ.__name__) "Invalid value for cache-control directive " "%s, must be %s",
directive,
typ.__name__,
)
return retval return retval
@ -119,24 +127,24 @@ class CacheController(object):
cc = self.parse_cache_control(request.headers) cc = self.parse_cache_control(request.headers)
# Bail out if the request insists on fresh data # Bail out if the request insists on fresh data
if 'no-cache' in cc: if "no-cache" in cc:
logger.debug('Request header has "no-cache", cache bypassed') logger.debug('Request header has "no-cache", cache bypassed')
return False return False
if 'max-age' in cc and cc['max-age'] == 0: if "max-age" in cc and cc["max-age"] == 0:
logger.debug('Request header has "max_age" as 0, cache bypassed') logger.debug('Request header has "max_age" as 0, cache bypassed')
return False return False
# Request allows serving from the cache, let's see if we find something # Request allows serving from the cache, let's see if we find something
cache_data = self.cache.get(cache_url) cache_data = self.cache.get(cache_url)
if cache_data is None: if cache_data is None:
logger.debug('No cache entry available') logger.debug("No cache entry available")
return False return False
# Check whether it can be deserialized # Check whether it can be deserialized
resp = self.serializer.loads(request, cache_data) resp = self.serializer.loads(request, cache_data)
if not resp: if not resp:
logger.warning('Cache entry deserialization failed, entry ignored') logger.warning("Cache entry deserialization failed, entry ignored")
return False return False
# If we have a cached 301, return it immediately. We don't # If we have a cached 301, return it immediately. We don't
@ -148,27 +156,27 @@ class CacheController(object):
# Client can try to refresh the value by repeating the request # Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache). # with cache busting headers as usual (ie no-cache).
if resp.status == 301: if resp.status == 301:
msg = ('Returning cached "301 Moved Permanently" response ' msg = (
'(ignoring date and etag information)') 'Returning cached "301 Moved Permanently" response '
"(ignoring date and etag information)"
)
logger.debug(msg) logger.debug(msg)
return resp return resp
headers = CaseInsensitiveDict(resp.headers) headers = CaseInsensitiveDict(resp.headers)
if not headers or 'date' not in headers: if not headers or "date" not in headers:
if 'etag' not in headers: if "etag" not in headers:
# Without date or etag, the cached response can never be used # Without date or etag, the cached response can never be used
# and should be deleted. # and should be deleted.
logger.debug('Purging cached response: no date or etag') logger.debug("Purging cached response: no date or etag")
self.cache.delete(cache_url) self.cache.delete(cache_url)
logger.debug('Ignoring cached response: no date') logger.debug("Ignoring cached response: no date")
return False return False
now = time.time() now = time.time()
date = calendar.timegm( date = calendar.timegm(parsedate_tz(headers["date"]))
parsedate_tz(headers['date'])
)
current_age = max(0, now - date) current_age = max(0, now - date)
logger.debug('Current age based on date: %i', current_age) logger.debug("Current age based on date: %i", current_age)
# TODO: There is an assumption that the result will be a # TODO: There is an assumption that the result will be a
# urllib3 response object. This may not be best since we # urllib3 response object. This may not be best since we
@ -180,45 +188,41 @@ class CacheController(object):
freshness_lifetime = 0 freshness_lifetime = 0
# Check the max-age pragma in the cache control header # Check the max-age pragma in the cache control header
if 'max-age' in resp_cc: if "max-age" in resp_cc:
freshness_lifetime = resp_cc['max-age'] freshness_lifetime = resp_cc["max-age"]
logger.debug('Freshness lifetime from max-age: %i', logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
freshness_lifetime)
# If there isn't a max-age, check for an expires header # If there isn't a max-age, check for an expires header
elif 'expires' in headers: elif "expires" in headers:
expires = parsedate_tz(headers['expires']) expires = parsedate_tz(headers["expires"])
if expires is not None: if expires is not None:
expire_time = calendar.timegm(expires) - date expire_time = calendar.timegm(expires) - date
freshness_lifetime = max(0, expire_time) freshness_lifetime = max(0, expire_time)
logger.debug("Freshness lifetime from expires: %i", logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
freshness_lifetime)
# Determine if we are setting freshness limit in the # Determine if we are setting freshness limit in the
# request. Note, this overrides what was in the response. # request. Note, this overrides what was in the response.
if 'max-age' in cc: if "max-age" in cc:
freshness_lifetime = cc['max-age'] freshness_lifetime = cc["max-age"]
logger.debug('Freshness lifetime from request max-age: %i', logger.debug(
freshness_lifetime) "Freshness lifetime from request max-age: %i", freshness_lifetime
)
if 'min-fresh' in cc: if "min-fresh" in cc:
min_fresh = cc['min-fresh'] min_fresh = cc["min-fresh"]
# adjust our current age by our min fresh # adjust our current age by our min fresh
current_age += min_fresh current_age += min_fresh
logger.debug('Adjusted current age from min-fresh: %i', logger.debug("Adjusted current age from min-fresh: %i", current_age)
current_age)
# Return entry if it is fresh enough # Return entry if it is fresh enough
if freshness_lifetime > current_age: if freshness_lifetime > current_age:
logger.debug('The response is "fresh", returning cached response') logger.debug('The response is "fresh", returning cached response')
logger.debug('%i > %i', freshness_lifetime, current_age) logger.debug("%i > %i", freshness_lifetime, current_age)
return resp return resp
# we're not fresh. If we don't have an Etag, clear it out # we're not fresh. If we don't have an Etag, clear it out
if 'etag' not in headers: if "etag" not in headers:
logger.debug( logger.debug('The cached response is "stale" with no etag, purging')
'The cached response is "stale" with no etag, purging'
)
self.cache.delete(cache_url) self.cache.delete(cache_url)
# return the original handler # return the original handler
@ -232,16 +236,15 @@ class CacheController(object):
if resp: if resp:
headers = CaseInsensitiveDict(resp.headers) headers = CaseInsensitiveDict(resp.headers)
if 'etag' in headers: if "etag" in headers:
new_headers['If-None-Match'] = headers['ETag'] new_headers["If-None-Match"] = headers["ETag"]
if 'last-modified' in headers: if "last-modified" in headers:
new_headers['If-Modified-Since'] = headers['Last-Modified'] new_headers["If-Modified-Since"] = headers["Last-Modified"]
return new_headers return new_headers
def cache_response(self, request, response, body=None, def cache_response(self, request, response, body=None, status_codes=None):
status_codes=None):
""" """
Algorithm for caching requests. Algorithm for caching requests.
@ -252,9 +255,7 @@ class CacheController(object):
cacheable_status_codes = status_codes or self.cacheable_status_codes cacheable_status_codes = status_codes or self.cacheable_status_codes
if response.status not in cacheable_status_codes: if response.status not in cacheable_status_codes:
logger.debug( logger.debug(
'Status code %s not in %s', "Status code %s not in %s", response.status, cacheable_status_codes
response.status,
cacheable_status_codes
) )
return return
@ -264,10 +265,12 @@ class CacheController(object):
# Content-Length is valid then we can check to see if the body we've # Content-Length is valid then we can check to see if the body we've
# been given matches the expected size, and if it doesn't we'll just # been given matches the expected size, and if it doesn't we'll just
# skip trying to cache it. # skip trying to cache it.
if (body is not None and if (
"content-length" in response_headers and body is not None
response_headers["content-length"].isdigit() and and "content-length" in response_headers
int(response_headers["content-length"]) != len(body)): and response_headers["content-length"].isdigit()
and int(response_headers["content-length"]) != len(body)
):
return return
cc_req = self.parse_cache_control(request.headers) cc_req = self.parse_cache_control(request.headers)
@ -278,53 +281,49 @@ class CacheController(object):
# Delete it from the cache if we happen to have it stored there # Delete it from the cache if we happen to have it stored there
no_store = False no_store = False
if 'no-store' in cc: if "no-store" in cc:
no_store = True no_store = True
logger.debug('Response header has "no-store"') logger.debug('Response header has "no-store"')
if 'no-store' in cc_req: if "no-store" in cc_req:
no_store = True no_store = True
logger.debug('Request header has "no-store"') logger.debug('Request header has "no-store"')
if no_store and self.cache.get(cache_url): if no_store and self.cache.get(cache_url):
logger.debug('Purging existing cache entry to honor "no-store"') logger.debug('Purging existing cache entry to honor "no-store"')
self.cache.delete(cache_url) self.cache.delete(cache_url)
if no_store:
return
# If we've been given an etag, then keep the response # If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in response_headers: if self.cache_etags and "etag" in response_headers:
logger.debug('Caching due to etag') logger.debug("Caching due to etag")
self.cache.set( self.cache.set(
cache_url, cache_url, self.serializer.dumps(request, response, body=body)
self.serializer.dumps(request, response, body=body),
) )
# Add to the cache any 301s. We do this before looking that # Add to the cache any 301s. We do this before looking that
# the Date headers. # the Date headers.
elif response.status == 301: elif response.status == 301:
logger.debug('Caching permanant redirect') logger.debug("Caching permanant redirect")
self.cache.set( self.cache.set(cache_url, self.serializer.dumps(request, response))
cache_url,
self.serializer.dumps(request, response)
)
# Add to the cache if the response headers demand it. If there # Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring # is no date header then we can't do anything about expiring
# the cache. # the cache.
elif 'date' in response_headers: elif "date" in response_headers:
# cache when there is a max-age > 0 # cache when there is a max-age > 0
if 'max-age' in cc and cc['max-age'] > 0: if "max-age" in cc and cc["max-age"] > 0:
logger.debug('Caching b/c date exists and max-age > 0') logger.debug("Caching b/c date exists and max-age > 0")
self.cache.set( self.cache.set(
cache_url, cache_url, self.serializer.dumps(request, response, body=body)
self.serializer.dumps(request, response, body=body),
) )
# If the request can expire, it means we should cache it # If the request can expire, it means we should cache it
# in the meantime. # in the meantime.
elif 'expires' in response_headers: elif "expires" in response_headers:
if response_headers['expires']: if response_headers["expires"]:
logger.debug('Caching b/c of expires header') logger.debug("Caching b/c of expires header")
self.cache.set( self.cache.set(
cache_url, cache_url, self.serializer.dumps(request, response, body=body)
self.serializer.dumps(request, response, body=body),
) )
def update_cached_response(self, request, response): def update_cached_response(self, request, response):
@ -336,10 +335,7 @@ class CacheController(object):
""" """
cache_url = self.cache_url(request.url) cache_url = self.cache_url(request.url)
cached_response = self.serializer.loads( cached_response = self.serializer.loads(request, self.cache.get(cache_url))
request,
self.cache.get(cache_url)
)
if not cached_response: if not cached_response:
# we didn't have a cached response # we didn't have a cached response
@ -352,22 +348,20 @@ class CacheController(object):
# the cached body invalid. But... just in case, we'll be sure # the cached body invalid. But... just in case, we'll be sure
# to strip out ones we know that might be problmatic due to # to strip out ones we know that might be problmatic due to
# typical assumptions. # typical assumptions.
excluded_headers = [ excluded_headers = ["content-length"]
"content-length",
]
cached_response.headers.update( cached_response.headers.update(
dict((k, v) for k, v in response.headers.items() dict(
if k.lower() not in excluded_headers) (k, v)
for k, v in response.headers.items()
if k.lower() not in excluded_headers
)
) )
# we want a 200 b/c we have content via the cache # we want a 200 b/c we have content via the cache
cached_response.status = 200 cached_response.status = 200
# update our cache # update our cache
self.cache.set( self.cache.set(cache_url, self.serializer.dumps(request, cached_response))
cache_url,
self.serializer.dumps(request, cached_response),
)
return cached_response return cached_response

View file

@ -27,17 +27,19 @@ class CallbackFileWrapper(object):
# self.__fp hasn't been set. # self.__fp hasn't been set.
# #
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers # [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
fp = self.__getattribute__('_CallbackFileWrapper__fp') fp = self.__getattribute__("_CallbackFileWrapper__fp")
return getattr(fp, name) return getattr(fp, name)
def __is_fp_closed(self): def __is_fp_closed(self):
try: try:
return self.__fp.fp is None return self.__fp.fp is None
except AttributeError: except AttributeError:
pass pass
try: try:
return self.__fp.closed return self.__fp.closed
except AttributeError: except AttributeError:
pass pass
@ -66,7 +68,7 @@ class CallbackFileWrapper(object):
def _safe_read(self, amt): def _safe_read(self, amt):
data = self.__fp._safe_read(amt) data = self.__fp._safe_read(amt)
if amt == 2 and data == b'\r\n': if amt == 2 and data == b"\r\n":
# urllib executes this read to toss the CRLF at the end # urllib executes this read to toss the CRLF at the end
# of the chunk. # of the chunk.
return data return data

View file

@ -46,7 +46,7 @@ class BaseHeuristic(object):
response.headers.update(updated_headers) response.headers.update(updated_headers)
warning_header_value = self.warning(response) warning_header_value = self.warning(response)
if warning_header_value is not None: if warning_header_value is not None:
response.headers.update({'Warning': warning_header_value}) response.headers.update({"Warning": warning_header_value})
return response return response
@ -56,15 +56,15 @@ class OneDayCache(BaseHeuristic):
Cache the response by providing an expires 1 day in the Cache the response by providing an expires 1 day in the
future. future.
""" """
def update_headers(self, response): def update_headers(self, response):
headers = {} headers = {}
if 'expires' not in response.headers: if "expires" not in response.headers:
date = parsedate(response.headers['date']) date = parsedate(response.headers["date"])
expires = expire_after(timedelta(days=1), expires = expire_after(timedelta(days=1), date=datetime(*date[:6]))
date=datetime(*date[:6])) headers["expires"] = datetime_to_header(expires)
headers['expires'] = datetime_to_header(expires) headers["cache-control"] = "public"
headers['cache-control'] = 'public'
return headers return headers
@ -78,13 +78,10 @@ class ExpiresAfter(BaseHeuristic):
def update_headers(self, response): def update_headers(self, response):
expires = expire_after(self.delta) expires = expire_after(self.delta)
return { return {"expires": datetime_to_header(expires), "cache-control": "public"}
'expires': datetime_to_header(expires),
'cache-control': 'public',
}
def warning(self, response): def warning(self, response):
tmpl = '110 - Automatically cached for %s. Response might be stale' tmpl = "110 - Automatically cached for %s. Response might be stale"
return tmpl % self.delta return tmpl % self.delta
@ -100,27 +97,27 @@ class LastModified(BaseHeuristic):
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397 http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr. Unlike mozilla we limit this to 24-hr.
""" """
cacheable_by_default_statuses = set([ cacheable_by_default_statuses = {
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501 200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
]) }
def update_headers(self, resp): def update_headers(self, resp):
headers = resp.headers headers = resp.headers
if 'expires' in headers: if "expires" in headers:
return {} return {}
if 'cache-control' in headers and headers['cache-control'] != 'public': if "cache-control" in headers and headers["cache-control"] != "public":
return {} return {}
if resp.status not in self.cacheable_by_default_statuses: if resp.status not in self.cacheable_by_default_statuses:
return {} return {}
if 'date' not in headers or 'last-modified' not in headers: if "date" not in headers or "last-modified" not in headers:
return {} return {}
date = calendar.timegm(parsedate_tz(headers['date'])) date = calendar.timegm(parsedate_tz(headers["date"]))
last_modified = parsedate(headers['last-modified']) last_modified = parsedate(headers["last-modified"])
if date is None or last_modified is None: if date is None or last_modified is None:
return {} return {}
@ -132,7 +129,7 @@ class LastModified(BaseHeuristic):
return {} return {}
expires = date + freshness_lifetime expires = date + freshness_lifetime
return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))} return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
def warning(self, resp): def warning(self, resp):
return None return None

View file

@ -48,23 +48,22 @@ class Serializer(object):
u"response": { u"response": {
u"body": body, u"body": body,
u"headers": dict( u"headers": dict(
(text_type(k), text_type(v)) (text_type(k), text_type(v)) for k, v in response.headers.items()
for k, v in response.headers.items()
), ),
u"status": response.status, u"status": response.status,
u"version": response.version, u"version": response.version,
u"reason": text_type(response.reason), u"reason": text_type(response.reason),
u"strict": response.strict, u"strict": response.strict,
u"decode_content": response.decode_content, u"decode_content": response.decode_content,
}, }
} }
# Construct our vary headers # Construct our vary headers
data[u"vary"] = {} data[u"vary"] = {}
if u"vary" in response_headers: if u"vary" in response_headers:
varied_headers = response_headers[u'vary'].split(',') varied_headers = response_headers[u"vary"].split(",")
for header in varied_headers: for header in varied_headers:
header = header.strip() header = text_type(header).strip()
header_value = request.headers.get(header, None) header_value = request.headers.get(header, None)
if header_value is not None: if header_value is not None:
header_value = text_type(header_value) header_value = text_type(header_value)
@ -95,7 +94,8 @@ class Serializer(object):
# Dispatch to the actual load method for the given version # Dispatch to the actual load method for the given version
try: try:
return getattr(self, "_loads_v{0}".format(ver))(request, data) return getattr(self, "_loads_v{}".format(ver))(request, data)
except AttributeError: except AttributeError:
# This is a version we don't have a loads function for, so we'll # This is a version we don't have a loads function for, so we'll
# just treat it as a miss and return None # just treat it as a miss and return None
@ -118,11 +118,11 @@ class Serializer(object):
body_raw = cached["response"].pop("body") body_raw = cached["response"].pop("body")
headers = CaseInsensitiveDict(data=cached['response']['headers']) headers = CaseInsensitiveDict(data=cached["response"]["headers"])
if headers.get('transfer-encoding', '') == 'chunked': if headers.get("transfer-encoding", "") == "chunked":
headers.pop('transfer-encoding') headers.pop("transfer-encoding")
cached['response']['headers'] = headers cached["response"]["headers"] = headers
try: try:
body = io.BytesIO(body_raw) body = io.BytesIO(body_raw)
@ -133,13 +133,9 @@ class Serializer(object):
# fail with: # fail with:
# #
# TypeError: 'str' does not support the buffer interface # TypeError: 'str' does not support the buffer interface
body = io.BytesIO(body_raw.encode('utf8')) body = io.BytesIO(body_raw.encode("utf8"))
return HTTPResponse( return HTTPResponse(body=body, preload_content=False, **cached["response"])
body=body,
preload_content=False,
**cached["response"]
)
def _loads_v0(self, request, data): def _loads_v0(self, request, data):
# The original legacy cache data. This doesn't contain enough # The original legacy cache data. This doesn't contain enough
@ -162,16 +158,12 @@ class Serializer(object):
return return
# We need to decode the items that we've base64 encoded # We need to decode the items that we've base64 encoded
cached["response"]["body"] = _b64_decode_bytes( cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"])
cached["response"]["body"]
)
cached["response"]["headers"] = dict( cached["response"]["headers"] = dict(
(_b64_decode_str(k), _b64_decode_str(v)) (_b64_decode_str(k), _b64_decode_str(v))
for k, v in cached["response"]["headers"].items() for k, v in cached["response"]["headers"].items()
) )
cached["response"]["reason"] = _b64_decode_str( cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"])
cached["response"]["reason"],
)
cached["vary"] = dict( cached["vary"] = dict(
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v) (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
for k, v in cached["vary"].items() for k, v in cached["vary"].items()
@ -187,7 +179,7 @@ class Serializer(object):
def _loads_v4(self, request, data): def _loads_v4(self, request, data):
try: try:
cached = msgpack.loads(data, encoding='utf-8') cached = msgpack.loads(data, encoding="utf-8")
except ValueError: except ValueError:
return return

View file

@ -2,14 +2,16 @@ from .adapter import CacheControlAdapter
from .cache import DictCache from .cache import DictCache
def CacheControl(sess, def CacheControl(
cache=None, sess,
cache_etags=True, cache=None,
serializer=None, cache_etags=True,
heuristic=None, serializer=None,
controller_class=None, heuristic=None,
adapter_class=None, controller_class=None,
cacheable_methods=None): adapter_class=None,
cacheable_methods=None,
):
cache = cache or DictCache() cache = cache or DictCache()
adapter_class = adapter_class or CacheControlAdapter adapter_class = adapter_class or CacheControlAdapter
@ -19,9 +21,9 @@ def CacheControl(sess,
serializer=serializer, serializer=serializer,
heuristic=heuristic, heuristic=heuristic,
controller_class=controller_class, controller_class=controller_class,
cacheable_methods=cacheable_methods cacheable_methods=cacheable_methods,
) )
sess.mount('http://', adapter) sess.mount("http://", adapter)
sess.mount('https://', adapter) sess.mount("https://", adapter)
return sess return sess