Merge pull request #614 from JackDandy/feature/UpdateCachecontrol

Update cachecontrol library 0.11.2 to 0.11.5.
This commit is contained in:
JackDandy 2016-01-12 03:10:02 +00:00
commit cd9fc4cf36
7 changed files with 157 additions and 27 deletions

View file

@ -8,6 +8,7 @@
* Remove redundant MultipartPostHandler * Remove redundant MultipartPostHandler
* Update Beautiful Soup 4.4.0 (r390) to 4.4.0 (r397) * Update Beautiful Soup 4.4.0 (r390) to 4.4.0 (r397)
* Update backports/ssl_match_hostname 3.4.0.2 to 3.5.0.1 (r18) * Update backports/ssl_match_hostname 3.4.0.2 to 3.5.0.1 (r18)
* Update cachecontrol library 0.11.2 to 0.11.5
### 0.11.0 (2016-01-10 22:30:00 UTC) ### 0.11.0 (2016-01-10 22:30:00 UTC)

View file

@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces.
""" """
__author__ = 'Eric Larson' __author__ = 'Eric Larson'
__email__ = 'eric@ionrock.org' __email__ = 'eric@ionrock.org'
__version__ = '0.11.2' __version__ = '0.11.5'
from .wrapper import CacheControl from .wrapper import CacheControl
from .adapter import CacheControlAdapter from .adapter import CacheControlAdapter

60
lib/cachecontrol/_cmd.py Normal file
View file

@ -0,0 +1,60 @@
import logging
import requests
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import DictCache
from cachecontrol.controller import logger
from argparse import ArgumentParser
def setup_logging():
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
logger.addHandler(handler)
def get_session():
adapter = CacheControlAdapter(
DictCache(),
cache_etags=True,
serializer=None,
heuristic=None,
)
sess = requests.Session()
sess.mount('http://', adapter)
sess.mount('https://', adapter)
sess.cache_controller = adapter.controller
return sess
def get_args():
parser = ArgumentParser()
parser.add_argument('url', help='The URL to try and cache')
return parser.parse_args()
def main(args=None):
args = get_args()
sess = get_session()
# Make a request to get a response
resp = sess.get(args.url)
# Turn on logging
setup_logging()
# try setting the cache
sess.cache_controller.cache_response(resp.request, resp.raw)
# Now try to get it
if sess.cache_controller.cached_request(resp.request):
print('Cached!')
else:
print('Not cached :(')
if __name__ == '__main__':
main()

View file

@ -1,7 +1,8 @@
import hashlib import hashlib
import os import os
from lockfile import FileLock from lockfile import LockFile
from lockfile.mkdirlockfile import MkdirLockFile
from ..cache import BaseCache from ..cache import BaseCache
from ..controller import CacheController from ..controller import CacheController
@ -49,11 +50,23 @@ def _secure_open_write(filename, fmode):
class FileCache(BaseCache): class FileCache(BaseCache):
def __init__(self, directory, forever=False, filemode=0o0600, def __init__(self, directory, forever=False, filemode=0o0600,
dirmode=0o0700): dirmode=0o0700, use_dir_lock=None, lock_class=None):
if use_dir_lock is not None and lock_class is not None:
raise ValueError("Cannot use use_dir_lock and lock_class together")
if use_dir_lock:
lock_class = MkdirLockFile
if lock_class is None:
lock_class = LockFile
self.directory = directory self.directory = directory
self.forever = forever self.forever = forever
self.filemode = filemode self.filemode = filemode
self.dirmode = dirmode self.dirmode = dirmode
self.lock_class = lock_class
@staticmethod @staticmethod
def encode(x): def encode(x):
@ -83,7 +96,7 @@ class FileCache(BaseCache):
except (IOError, OSError): except (IOError, OSError):
pass pass
with FileLock(name) as lock: with self.lock_class(name) as lock:
# Write our actual file # Write our actual file
with _secure_open_write(lock.path, self.filemode) as fh: with _secure_open_write(lock.path, self.filemode) as fh:
fh.write(value) fh.write(value)

View file

@ -21,3 +21,9 @@ try:
from requests.packages.urllib3.util import is_fp_closed from requests.packages.urllib3.util import is_fp_closed
except ImportError: except ImportError:
from urllib3.util import is_fp_closed from urllib3.util import is_fp_closed
# Replicate some six behaviour
try:
text_type = (unicode,)
except NameError:
text_type = (str,)

View file

@ -1,6 +1,7 @@
""" """
The httplib2 algorithms ported for use with requests. The httplib2 algorithms ported for use with requests.
""" """
import logging
import re import re
import calendar import calendar
import time import time
@ -12,6 +13,8 @@ from .cache import DictCache
from .serialize import Serializer from .serialize import Serializer
logger = logging.getLogger(__name__)
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
@ -86,23 +89,28 @@ class CacheController(object):
return False. return False.
""" """
cache_url = self.cache_url(request.url) cache_url = self.cache_url(request.url)
logger.debug('Looking up "%s" in the cache', cache_url)
cc = self.parse_cache_control(request.headers) cc = self.parse_cache_control(request.headers)
# non-caching states # Bail out if the request insists on fresh data
no_cache = True if 'no-cache' in cc else False if 'no-cache' in cc:
if 'max-age' in cc and cc['max-age'] == 0: logger.debug('Request header has "no-cache", cache bypassed')
no_cache = True
# Bail out if no-cache was set
if no_cache:
return False return False
# It is in the cache, so lets see if it is going to be if 'max-age' in cc and cc['max-age'] == 0:
# fresh enough logger.debug('Request header has "max_age" as 0, cache bypassed')
resp = self.serializer.loads(request, self.cache.get(cache_url)) return False
# Check to see if we have a cached object # Request allows serving from the cache, let's see if we find something
cache_data = self.cache.get(cache_url)
if cache_data is None:
logger.debug('No cache entry available')
return False
# Check whether it can be deserialized
resp = self.serializer.loads(request, cache_data)
if not resp: if not resp:
logger.warning('Cache entry deserialization failed, entry ignored')
return False return False
# If we have a cached 301, return it immediately. We don't # If we have a cached 301, return it immediately. We don't
@ -114,14 +122,19 @@ class CacheController(object):
# Client can try to refresh the value by repeating the request # Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache). # with cache busting headers as usual (ie no-cache).
if resp.status == 301: if resp.status == 301:
msg = ('Returning cached "301 Moved Permanently" response '
'(ignoring date and etag information)')
logger.debug(msg)
return resp return resp
headers = CaseInsensitiveDict(resp.headers) headers = CaseInsensitiveDict(resp.headers)
if not headers or 'date' not in headers: if not headers or 'date' not in headers:
# With date or etag, the cached response can never be used
# and should be deleted.
if 'etag' not in headers: if 'etag' not in headers:
# Without date or etag, the cached response can never be used
# and should be deleted.
logger.debug('Purging cached response: no date or etag')
self.cache.delete(cache_url) self.cache.delete(cache_url)
logger.debug('Ignoring cached response: no date')
return False return False
now = time.time() now = time.time()
@ -129,6 +142,7 @@ class CacheController(object):
parsedate_tz(headers['date']) parsedate_tz(headers['date'])
) )
current_age = max(0, now - date) current_age = max(0, now - date)
logger.debug('Current age based on date: %i', current_age)
# TODO: There is an assumption that the result will be a # TODO: There is an assumption that the result will be a
# urllib3 response object. This may not be best since we # urllib3 response object. This may not be best since we
@ -142,6 +156,8 @@ class CacheController(object):
# Check the max-age pragma in the cache control header # Check the max-age pragma in the cache control header
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
freshness_lifetime = int(resp_cc['max-age']) freshness_lifetime = int(resp_cc['max-age'])
logger.debug('Freshness lifetime from max-age: %i',
freshness_lifetime)
# If there isn't a max-age, check for an expires header # If there isn't a max-age, check for an expires header
elif 'expires' in headers: elif 'expires' in headers:
@ -149,11 +165,16 @@ class CacheController(object):
if expires is not None: if expires is not None:
expire_time = calendar.timegm(expires) - date expire_time = calendar.timegm(expires) - date
freshness_lifetime = max(0, expire_time) freshness_lifetime = max(0, expire_time)
logger.debug("Freshness lifetime from expires: %i",
freshness_lifetime)
# determine if we are setting freshness limit in the req # Determine if we are setting freshness limit in the
# request. Note, this overrides what was in the response.
if 'max-age' in cc: if 'max-age' in cc:
try: try:
freshness_lifetime = int(cc['max-age']) freshness_lifetime = int(cc['max-age'])
logger.debug('Freshness lifetime from request max-age: %i',
freshness_lifetime)
except ValueError: except ValueError:
freshness_lifetime = 0 freshness_lifetime = 0
@ -164,15 +185,20 @@ class CacheController(object):
min_fresh = 0 min_fresh = 0
# adjust our current age by our min fresh # adjust our current age by our min fresh
current_age += min_fresh current_age += min_fresh
logger.debug('Adjusted current age from min-fresh: %i',
current_age)
# see how fresh we actually are # Return entry if it is fresh enough
fresh = (freshness_lifetime > current_age) if freshness_lifetime > current_age:
logger.debug('The response is "fresh", returning cached response')
if fresh: logger.debug('%i > %i', freshness_lifetime, current_age)
return resp return resp
# we're not fresh. If we don't have an Etag, clear it out # we're not fresh. If we don't have an Etag, clear it out
if 'etag' not in headers: if 'etag' not in headers:
logger.debug(
'The cached response is "stale" with no etag, purging'
)
self.cache.delete(cache_url) self.cache.delete(cache_url)
# return the original handler # return the original handler
@ -202,7 +228,13 @@ class CacheController(object):
""" """
# From httplib2: Don't cache 206's since we aren't going to # From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests # handle byte range requests
if response.status not in [200, 203, 300, 301]: cacheable_status_codes = [200, 203, 300, 301]
if response.status not in cacheable_status_codes:
logger.debug(
'Status code %s not in %s',
response.status,
cacheable_status_codes
)
return return
response_headers = CaseInsensitiveDict(response.headers) response_headers = CaseInsensitiveDict(response.headers)
@ -211,14 +243,23 @@ class CacheController(object):
cc = self.parse_cache_control(response_headers) cc = self.parse_cache_control(response_headers)
cache_url = self.cache_url(request.url) cache_url = self.cache_url(request.url)
logger.debug('Updating cache with response from "%s"', cache_url)
# Delete it from the cache if we happen to have it stored there # Delete it from the cache if we happen to have it stored there
no_store = cc.get('no-store') or cc_req.get('no-store') no_store = False
if cc.get('no-store'):
no_store = True
logger.debug('Response header has "no-store"')
if cc_req.get('no-store'):
no_store = True
logger.debug('Request header has "no-store"')
if no_store and self.cache.get(cache_url): if no_store and self.cache.get(cache_url):
logger.debug('Purging existing cache entry to honor "no-store"')
self.cache.delete(cache_url) self.cache.delete(cache_url)
# If we've been given an etag, then keep the response # If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in response_headers: if self.cache_etags and 'etag' in response_headers:
logger.debug('Caching due to etag')
self.cache.set( self.cache.set(
cache_url, cache_url,
self.serializer.dumps(request, response, body=body), self.serializer.dumps(request, response, body=body),
@ -227,6 +268,7 @@ class CacheController(object):
# Add to the cache any 301s. We do this before looking that # Add to the cache any 301s. We do this before looking that
# the Date headers. # the Date headers.
elif response.status == 301: elif response.status == 301:
logger.debug('Caching permanant redirect')
self.cache.set( self.cache.set(
cache_url, cache_url,
self.serializer.dumps(request, response) self.serializer.dumps(request, response)
@ -239,6 +281,7 @@ class CacheController(object):
# cache when there is a max-age > 0 # cache when there is a max-age > 0
if cc and cc.get('max-age'): if cc and cc.get('max-age'):
if int(cc['max-age']) > 0: if int(cc['max-age']) > 0:
logger.debug('Caching b/c date exists and max-age > 0')
self.cache.set( self.cache.set(
cache_url, cache_url,
self.serializer.dumps(request, response, body=body), self.serializer.dumps(request, response, body=body),
@ -248,6 +291,7 @@ class CacheController(object):
# in the meantime. # in the meantime.
elif 'expires' in response_headers: elif 'expires' in response_headers:
if response_headers['expires']: if response_headers['expires']:
logger.debug('Caching b/c of expires header')
self.cache.set( self.cache.set(
cache_url, cache_url,
self.serializer.dumps(request, response, body=body), self.serializer.dumps(request, response, body=body),

View file

@ -5,7 +5,7 @@ import zlib
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from .compat import HTTPResponse, pickle from .compat import HTTPResponse, pickle, text_type
def _b64_encode_bytes(b): def _b64_encode_bytes(b):
@ -16,6 +16,12 @@ def _b64_encode_str(s):
return _b64_encode_bytes(s.encode("utf8")) return _b64_encode_bytes(s.encode("utf8"))
def _b64_encode(s):
if isinstance(s, text_type):
return _b64_encode_str(s)
return _b64_encode_bytes(s)
def _b64_decode_bytes(b): def _b64_decode_bytes(b):
return base64.b64decode(b.encode("ascii")) return base64.b64decode(b.encode("ascii"))
@ -48,7 +54,7 @@ class Serializer(object):
"response": { "response": {
"body": _b64_encode_bytes(body), "body": _b64_encode_bytes(body),
"headers": dict( "headers": dict(
(_b64_encode_str(k), _b64_encode_str(v)) (_b64_encode(k), _b64_encode(v))
for k, v in response.headers.items() for k, v in response.headers.items()
), ),
"status": response.status, "status": response.status,
@ -69,7 +75,7 @@ class Serializer(object):
# Encode our Vary headers to ensure they can be serialized as JSON # Encode our Vary headers to ensure they can be serialized as JSON
data["vary"] = dict( data["vary"] = dict(
(_b64_encode_str(k), _b64_encode_str(v) if v is not None else v) (_b64_encode(k), _b64_encode(v) if v is not None else v)
for k, v in data["vary"].items() for k, v in data["vary"].items()
) )