Re-coded our cache session handler and made vast improvements to it

This commit is contained in:
echel0n 2014-03-29 21:54:23 -07:00
parent 77834f4599
commit 381049c373
7 changed files with 45 additions and 39 deletions

View file

@ -11,4 +11,3 @@ import cachecontrol.patch_requests
from cachecontrol.wrapper import CacheControl
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.controller import CacheController
from cachecontrol.session import CacheControlSession

View file

@ -2,16 +2,14 @@ from requests.adapters import HTTPAdapter
from cachecontrol.controller import CacheController
from cachecontrol.cache import DictCache
from cachecontrol.session import CacheControlSession
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE'])
def __init__(self, sess=None, cache=None, cache_etags=True, *args, **kw):
def __init__(self, cache=None, cache_etags=True, *args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw)
self.sess = sess or CacheControlSession()
self.cache = cache or DictCache()
self.controller = CacheController(self.sess, self.cache, cache_etags=cache_etags)
self.controller = CacheController(self.cache, cache_etags=cache_etags)
def send(self, request, **kw):
"""Send a request. Use the request information to see if it

View file

@ -4,7 +4,6 @@ dictionary, which in turns means it is not threadsafe for writing.
"""
from threading import Lock
class BaseCache(object):
def get(self, key):
@ -16,7 +15,6 @@ class BaseCache(object):
def delete(self, key):
raise NotImplemented()
class DictCache(BaseCache):
def __init__(self, init_dict=None):

View file

@ -25,8 +25,7 @@ def parse_uri(uri):
class CacheController(object):
"""An interface to see if request should cached or not.
"""
def __init__(self, sess=None, cache=None, cache_etags=True):
self.sess = sess or CacheControlSession()
def __init__(self, cache=None, sess=None, cache_etags=True):
self.cache = cache or DictCache()
self.cache_etags = cache_etags
@ -185,27 +184,16 @@ class CacheController(object):
if resp.status_code not in [200, 203]:
return
# Cache Session Params
cache_auto = getattr(request, 'cache_auto', False)
cache_urls = getattr(request, 'cache_urls', [])
cache_max_age = getattr(request, 'cache_max_age', None)
# Check if we are wanting to cache responses from specific urls only
cache_url = self.cache_url(request.url)
cache_auto = getattr(self.sess, 'cache_auto', None)
cache_urls = getattr(self.sess, 'cache_urls', None)
cache_max_age = getattr(self.sess, 'cache_max_age', None)
if cache_urls:
if not any(s in cache_url for s in cache_urls):
if len(cache_urls) > 0 and not any(s in cache_url for s in cache_urls):
return
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response
if cache_auto and getattr(resp.headers, 'cache-control', None) is None:
cache_max_age = int(cache_max_age or 3600)
headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age)}
resp.headers.update(headers)
if getattr(resp.headers, 'expires', None) is None:
expires = datetime.datetime.utcnow() + datetime.timedelta(days=(1))
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
headers = {'Expires': expires}
resp.headers.update(headers)
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(resp.headers)
@ -218,6 +206,18 @@ class CacheController(object):
if self.cache_etags and 'etag' in resp.headers:
self.cache.set(cache_url, resp)
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response
elif cache_auto and not cc and resp.headers:
headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age or 900)}
resp.headers.update(headers)
if 'expires' not in resp.headers:
if getattr(resp.headers, 'expires', None) is None:
expires = datetime.datetime.utcnow() + datetime.timedelta(days=(1))
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
headers = {'Expires': expires}
resp.headers.update(headers)
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
@ -225,14 +225,15 @@ class CacheController(object):
# cache when there is a max-age > 0
if cc and cc.get('max-age'):
if int(cc['max-age']) > 0:
if cache_max_age:
if isinstance(cache_max_age, (int, long)):
cc['max-age'] = int(cache_max_age)
resp.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()])
self.cache.set(cache_url, resp)
# If the request can expire, it means we should cache it
# in the meantime.
elif getattr(resp.headers, 'expires', None) is None:
elif 'expires' in resp.headers:
if getattr(resp.headers, 'expires', None) is not None:
self.cache.set(cache_url, resp)
def update_cached_response(self, request, response):

View file

@ -52,5 +52,4 @@ def make_responses_pickleable():
raise
pass
make_responses_pickleable()
make_responses_pickleable()

View file

@ -1,23 +1,34 @@
from requests.sessions import Session
class CacheControlSession(Session):
def __init__(self, *args, **kw):
super(CacheControlSession, self).__init__(*args, **kw)
def __init__(self):
super(CacheControlSession, self).__init__()
def request(self, *args, **kw):
def get(self, *args, **kw):
# auto-cache response
self.cache_auto = False
if kw.has_key('cache_auto'):
self.cache_auto = kw.pop('cache_auto')
# urls allowed to cache
self.cache_urls = None
self.cache_urls = []
if kw.has_key('cache_urls'):
self.cache_urls = [str(args[1])] + kw.pop('cache_urls')
self.cache_urls = [str(args[0])] + kw.pop('cache_urls')
# timeout for cached responses
self.cache_max_age = None
if kw.has_key('cache_max_age'):
self.cache_max_age = int(kw.pop('cache_max_age'))
return super(CacheControlSession, self).request(*args, **kw)
return super(CacheControlSession, self).get(*args, **kw)
def prepare_request(self, *args, **kw):
# get response
req = super(CacheControlSession, self).prepare_request(*args, **kw)
# attach params to request
req.cache_auto = self.cache_auto
req.cache_urls = self.cache_urls
req.cache_max_age = self.cache_max_age
return req

View file

@ -1,11 +1,11 @@
from cachecontrol.session import CacheControlSession
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import DictCache
from cachecontrol.session import CacheControlSession
def CacheControl(sess=None, cache=None, cache_etags=True):
sess = sess or CacheControlSession()
cache = cache or DictCache()
adapter = CacheControlAdapter(sess, cache, cache_etags=cache_etags)
adapter = CacheControlAdapter(cache, cache_etags=cache_etags)
sess.mount('http://', adapter)
return sess