From 2acafcb603af74e4c9a6fc62523136d74fd6f7d5 Mon Sep 17 00:00:00 2001 From: echel0n Date: Sat, 29 Mar 2014 02:49:51 -0700 Subject: [PATCH] Fixed bugs in cache control --- lib/cachecontrol/adapter.py | 2 +- lib/cachecontrol/cache.py | 1 - lib/cachecontrol/controller.py | 24 ++++++++++++++---------- lib/cachecontrol/session.py | 5 ++--- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/lib/cachecontrol/adapter.py b/lib/cachecontrol/adapter.py index 264fb1be..e7e1829a 100644 --- a/lib/cachecontrol/adapter.py +++ b/lib/cachecontrol/adapter.py @@ -11,7 +11,7 @@ class CacheControlAdapter(HTTPAdapter): super(CacheControlAdapter, self).__init__(*args, **kw) self.sess = sess or CacheControlSession() self.cache = cache or DictCache() - self.controller = CacheController(sess=sess, cache=cache, cache_etags=cache_etags) + self.controller = CacheController(self.sess, self.cache, cache_etags=cache_etags) def send(self, request, **kw): """Send a request. Use the request information to see if it diff --git a/lib/cachecontrol/cache.py b/lib/cachecontrol/cache.py index abcd6fda..feb7d3ed 100644 --- a/lib/cachecontrol/cache.py +++ b/lib/cachecontrol/cache.py @@ -34,4 +34,3 @@ class DictCache(BaseCache): with self.lock: if key in self.data: self.data.pop(key) -0 \ No newline at end of file diff --git a/lib/cachecontrol/controller.py b/lib/cachecontrol/controller.py index 9dfea3cc..04361227 100644 --- a/lib/cachecontrol/controller.py +++ b/lib/cachecontrol/controller.py @@ -26,9 +26,9 @@ class CacheController(object): """An interface to see if request should cached or not. """ def __init__(self, sess=None, cache=None, cache_etags=True): + self.sess = sess or CacheControlSession() self.cache = cache or DictCache() self.cache_etags = cache_etags - self.sess = sess or CacheControlSession() def _urlnorm(self, uri): """Normalize the URL to create a safe key for the cache""" @@ -185,13 +185,18 @@ class CacheController(object): if resp.status_code not in [200, 203]: return - cache_url = self.cache_url(request.url) - if self.sess.cache_urls and not any(s in cache_url for s in self.sess.cache_urls): - return + cache_auto = getattr(self.sess, 'cache_auto', None) + cache_urls = getattr(self.sess, 'cache_urls', None) + cache_max_age = getattr(self.sess, 'cache_max_age', None) - if self.sess.cache_auto and ('cache-control' not in resp.headers or 'Cache-Control' not in resp.headers): - cache_max_age = int(self.sess.cache_max_age or 900) + if cache_urls: + if not any(s in cache_url for s in cache_urls): + return + + # If we want to cache sites not setup with cache headers then add the proper headers and keep the response + if cache_auto and getattr(resp.headers, 'cache-control', None) is None: + cache_max_age = int(cache_max_age or 3600) headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age)} resp.headers.update(headers) @@ -220,15 +225,14 @@ class CacheController(object): # cache when there is a max-age > 0 if cc and cc.get('max-age'): if int(cc['max-age']) > 0: - if self.sess.cache_max_age: - cc['max-age'] = int(self.sess.cache_max_age) + if cache_max_age: + cc['max-age'] = int(cache_max_age) resp.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()]) self.cache.set(cache_url, resp) # If the request can expire, it means we should cache it # in the meantime. - elif 'expires' in resp.headers: - if resp.headers['expires']: + elif getattr(resp.headers, 'expires', None) is None: self.cache.set(cache_url, resp) def update_cached_response(self, request, response): diff --git a/lib/cachecontrol/session.py b/lib/cachecontrol/session.py index 258405af..a4b0759a 100644 --- a/lib/cachecontrol/session.py +++ b/lib/cachecontrol/session.py @@ -1,4 +1,3 @@ -import datetime from requests.sessions import Session class CacheControlSession(Session): @@ -12,11 +11,11 @@ class CacheControlSession(Session): self.cache_auto = kw.pop('cache_auto') # urls allowed to cache - self.cache_urls = [] + self.cache_urls = None if kw.has_key('cache_urls'): self.cache_urls = [str(args[1])] + kw.pop('cache_urls') - # timeout for cacheed responses + # timeout for cached responses self.cache_max_age = None if kw.has_key('cache_max_age'): self.cache_max_age = int(kw.pop('cache_max_age'))