Re-coded our cache session handler and made vast improvements to it

This commit is contained in:
echel0n 2014-03-29 21:54:23 -07:00
parent 77834f4599
commit 381049c373
7 changed files with 45 additions and 39 deletions

View file

@ -11,4 +11,3 @@ import cachecontrol.patch_requests
from cachecontrol.wrapper import CacheControl from cachecontrol.wrapper import CacheControl
from cachecontrol.adapter import CacheControlAdapter from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.controller import CacheController from cachecontrol.controller import CacheController
from cachecontrol.session import CacheControlSession

View file

@ -2,16 +2,14 @@ from requests.adapters import HTTPAdapter
from cachecontrol.controller import CacheController from cachecontrol.controller import CacheController
from cachecontrol.cache import DictCache from cachecontrol.cache import DictCache
from cachecontrol.session import CacheControlSession
class CacheControlAdapter(HTTPAdapter): class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE']) invalidating_methods = set(['PUT', 'DELETE'])
def __init__(self, sess=None, cache=None, cache_etags=True, *args, **kw): def __init__(self, cache=None, cache_etags=True, *args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw) super(CacheControlAdapter, self).__init__(*args, **kw)
self.sess = sess or CacheControlSession()
self.cache = cache or DictCache() self.cache = cache or DictCache()
self.controller = CacheController(self.sess, self.cache, cache_etags=cache_etags) self.controller = CacheController(self.cache, cache_etags=cache_etags)
def send(self, request, **kw): def send(self, request, **kw):
"""Send a request. Use the request information to see if it """Send a request. Use the request information to see if it

View file

@ -4,7 +4,6 @@ dictionary, which in turns means it is not threadsafe for writing.
""" """
from threading import Lock from threading import Lock
class BaseCache(object): class BaseCache(object):
def get(self, key): def get(self, key):
@ -16,7 +15,6 @@ class BaseCache(object):
def delete(self, key): def delete(self, key):
raise NotImplemented() raise NotImplemented()
class DictCache(BaseCache): class DictCache(BaseCache):
def __init__(self, init_dict=None): def __init__(self, init_dict=None):

View file

@ -25,8 +25,7 @@ def parse_uri(uri):
class CacheController(object): class CacheController(object):
"""An interface to see if request should cached or not. """An interface to see if request should cached or not.
""" """
def __init__(self, sess=None, cache=None, cache_etags=True): def __init__(self, cache=None, sess=None, cache_etags=True):
self.sess = sess or CacheControlSession()
self.cache = cache or DictCache() self.cache = cache or DictCache()
self.cache_etags = cache_etags self.cache_etags = cache_etags
@ -185,27 +184,16 @@ class CacheController(object):
if resp.status_code not in [200, 203]: if resp.status_code not in [200, 203]:
return return
# Cache Session Params
cache_auto = getattr(request, 'cache_auto', False)
cache_urls = getattr(request, 'cache_urls', [])
cache_max_age = getattr(request, 'cache_max_age', None)
# Check if we are wanting to cache responses from specific urls only
cache_url = self.cache_url(request.url) cache_url = self.cache_url(request.url)
cache_auto = getattr(self.sess, 'cache_auto', None) if len(cache_urls) > 0 and not any(s in cache_url for s in cache_urls):
cache_urls = getattr(self.sess, 'cache_urls', None)
cache_max_age = getattr(self.sess, 'cache_max_age', None)
if cache_urls:
if not any(s in cache_url for s in cache_urls):
return return
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response
if cache_auto and getattr(resp.headers, 'cache-control', None) is None:
cache_max_age = int(cache_max_age or 3600)
headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age)}
resp.headers.update(headers)
if getattr(resp.headers, 'expires', None) is None:
expires = datetime.datetime.utcnow() + datetime.timedelta(days=(1))
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
headers = {'Expires': expires}
resp.headers.update(headers)
cc_req = self.parse_cache_control(request.headers) cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(resp.headers) cc = self.parse_cache_control(resp.headers)
@ -218,6 +206,18 @@ class CacheController(object):
if self.cache_etags and 'etag' in resp.headers: if self.cache_etags and 'etag' in resp.headers:
self.cache.set(cache_url, resp) self.cache.set(cache_url, resp)
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response
elif cache_auto and not cc and resp.headers:
headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age or 900)}
resp.headers.update(headers)
if 'expires' not in resp.headers:
if getattr(resp.headers, 'expires', None) is None:
expires = datetime.datetime.utcnow() + datetime.timedelta(days=(1))
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
headers = {'Expires': expires}
resp.headers.update(headers)
# Add to the cache if the response headers demand it. If there # Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring # is no date header then we can't do anything about expiring
# the cache. # the cache.
@ -225,14 +225,15 @@ class CacheController(object):
# cache when there is a max-age > 0 # cache when there is a max-age > 0
if cc and cc.get('max-age'): if cc and cc.get('max-age'):
if int(cc['max-age']) > 0: if int(cc['max-age']) > 0:
if cache_max_age: if isinstance(cache_max_age, (int, long)):
cc['max-age'] = int(cache_max_age) cc['max-age'] = int(cache_max_age)
resp.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()]) resp.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()])
self.cache.set(cache_url, resp) self.cache.set(cache_url, resp)
# If the request can expire, it means we should cache it # If the request can expire, it means we should cache it
# in the meantime. # in the meantime.
elif getattr(resp.headers, 'expires', None) is None: elif 'expires' in resp.headers:
if getattr(resp.headers, 'expires', None) is not None:
self.cache.set(cache_url, resp) self.cache.set(cache_url, resp)
def update_cached_response(self, request, response): def update_cached_response(self, request, response):

View file

@ -52,5 +52,4 @@ def make_responses_pickleable():
raise raise
pass pass
make_responses_pickleable()
make_responses_pickleable()

View file

@ -1,23 +1,34 @@
from requests.sessions import Session from requests.sessions import Session
class CacheControlSession(Session): class CacheControlSession(Session):
def __init__(self, *args, **kw): def __init__(self):
super(CacheControlSession, self).__init__(*args, **kw) super(CacheControlSession, self).__init__()
def request(self, *args, **kw): def get(self, *args, **kw):
# auto-cache response # auto-cache response
self.cache_auto = False self.cache_auto = False
if kw.has_key('cache_auto'): if kw.has_key('cache_auto'):
self.cache_auto = kw.pop('cache_auto') self.cache_auto = kw.pop('cache_auto')
# urls allowed to cache # urls allowed to cache
self.cache_urls = None self.cache_urls = []
if kw.has_key('cache_urls'): if kw.has_key('cache_urls'):
self.cache_urls = [str(args[1])] + kw.pop('cache_urls') self.cache_urls = [str(args[0])] + kw.pop('cache_urls')
# timeout for cached responses # timeout for cached responses
self.cache_max_age = None self.cache_max_age = None
if kw.has_key('cache_max_age'): if kw.has_key('cache_max_age'):
self.cache_max_age = int(kw.pop('cache_max_age')) self.cache_max_age = int(kw.pop('cache_max_age'))
return super(CacheControlSession, self).request(*args, **kw) return super(CacheControlSession, self).get(*args, **kw)
def prepare_request(self, *args, **kw):
# get response
req = super(CacheControlSession, self).prepare_request(*args, **kw)
# attach params to request
req.cache_auto = self.cache_auto
req.cache_urls = self.cache_urls
req.cache_max_age = self.cache_max_age
return req

View file

@ -1,11 +1,11 @@
from cachecontrol.session import CacheControlSession
from cachecontrol.adapter import CacheControlAdapter from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import DictCache from cachecontrol.cache import DictCache
from cachecontrol.session import CacheControlSession
def CacheControl(sess=None, cache=None, cache_etags=True): def CacheControl(sess=None, cache=None, cache_etags=True):
sess = sess or CacheControlSession() sess = sess or CacheControlSession()
cache = cache or DictCache() cache = cache or DictCache()
adapter = CacheControlAdapter(sess, cache, cache_etags=cache_etags) adapter = CacheControlAdapter(cache, cache_etags=cache_etags)
sess.mount('http://', adapter) sess.mount('http://', adapter)
return sess return sess