diff --git a/lib/cachecontrol/__init__.py b/lib/cachecontrol/__init__.py index d5b3d9f3..693e11f1 100644 --- a/lib/cachecontrol/__init__.py +++ b/lib/cachecontrol/__init__.py @@ -11,4 +11,3 @@ import cachecontrol.patch_requests from cachecontrol.wrapper import CacheControl from cachecontrol.adapter import CacheControlAdapter from cachecontrol.controller import CacheController -from cachecontrol.session import CacheControlSession \ No newline at end of file diff --git a/lib/cachecontrol/adapter.py b/lib/cachecontrol/adapter.py index e7e1829a..2e818c7b 100644 --- a/lib/cachecontrol/adapter.py +++ b/lib/cachecontrol/adapter.py @@ -2,16 +2,14 @@ from requests.adapters import HTTPAdapter from cachecontrol.controller import CacheController from cachecontrol.cache import DictCache -from cachecontrol.session import CacheControlSession class CacheControlAdapter(HTTPAdapter): invalidating_methods = set(['PUT', 'DELETE']) - def __init__(self, sess=None, cache=None, cache_etags=True, *args, **kw): + def __init__(self, cache=None, cache_etags=True, *args, **kw): super(CacheControlAdapter, self).__init__(*args, **kw) - self.sess = sess or CacheControlSession() self.cache = cache or DictCache() - self.controller = CacheController(self.sess, self.cache, cache_etags=cache_etags) + self.controller = CacheController(self.cache, cache_etags=cache_etags) def send(self, request, **kw): """Send a request. Use the request information to see if it diff --git a/lib/cachecontrol/cache.py b/lib/cachecontrol/cache.py index feb7d3ed..b8a0098c 100644 --- a/lib/cachecontrol/cache.py +++ b/lib/cachecontrol/cache.py @@ -4,7 +4,6 @@ dictionary, which in turns means it is not threadsafe for writing. """ from threading import Lock - class BaseCache(object): def get(self, key): @@ -16,7 +15,6 @@ class BaseCache(object): def delete(self, key): raise NotImplemented() - class DictCache(BaseCache): def __init__(self, init_dict=None): diff --git a/lib/cachecontrol/controller.py b/lib/cachecontrol/controller.py index 04361227..fbda9482 100644 --- a/lib/cachecontrol/controller.py +++ b/lib/cachecontrol/controller.py @@ -25,8 +25,7 @@ def parse_uri(uri): class CacheController(object): """An interface to see if request should cached or not. """ - def __init__(self, sess=None, cache=None, cache_etags=True): - self.sess = sess or CacheControlSession() + def __init__(self, cache=None, sess=None, cache_etags=True): self.cache = cache or DictCache() self.cache_etags = cache_etags @@ -185,27 +184,16 @@ class CacheController(object): if resp.status_code not in [200, 203]: return + # Cache Session Params + cache_auto = getattr(request, 'cache_auto', False) + cache_urls = getattr(request, 'cache_urls', []) + cache_max_age = getattr(request, 'cache_max_age', None) + + # Check if we are wanting to cache responses from specific urls only cache_url = self.cache_url(request.url) - cache_auto = getattr(self.sess, 'cache_auto', None) - cache_urls = getattr(self.sess, 'cache_urls', None) - cache_max_age = getattr(self.sess, 'cache_max_age', None) - - if cache_urls: - if not any(s in cache_url for s in cache_urls): + if len(cache_urls) > 0 and not any(s in cache_url for s in cache_urls): return - # If we want to cache sites not setup with cache headers then add the proper headers and keep the response - if cache_auto and getattr(resp.headers, 'cache-control', None) is None: - cache_max_age = int(cache_max_age or 3600) - headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age)} - resp.headers.update(headers) - - if getattr(resp.headers, 'expires', None) is None: - expires = datetime.datetime.utcnow() + datetime.timedelta(days=(1)) - expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT") - headers = {'Expires': expires} - resp.headers.update(headers) - cc_req = self.parse_cache_control(request.headers) cc = self.parse_cache_control(resp.headers) @@ -218,6 +206,18 @@ class CacheController(object): if self.cache_etags and 'etag' in resp.headers: self.cache.set(cache_url, resp) + # If we want to cache sites not setup with cache headers then add the proper headers and keep the response + elif cache_auto and not cc and resp.headers: + headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age or 900)} + resp.headers.update(headers) + + if 'expires' not in resp.headers: + if getattr(resp.headers, 'expires', None) is None: + expires = datetime.datetime.utcnow() + datetime.timedelta(days=(1)) + expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT") + headers = {'Expires': expires} + resp.headers.update(headers) + # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. @@ -225,14 +225,15 @@ class CacheController(object): # cache when there is a max-age > 0 if cc and cc.get('max-age'): if int(cc['max-age']) > 0: - if cache_max_age: + if isinstance(cache_max_age, (int, long)): cc['max-age'] = int(cache_max_age) resp.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()]) self.cache.set(cache_url, resp) # If the request can expire, it means we should cache it # in the meantime. - elif getattr(resp.headers, 'expires', None) is None: + elif 'expires' in resp.headers: + if getattr(resp.headers, 'expires', None) is not None: self.cache.set(cache_url, resp) def update_cached_response(self, request, response): diff --git a/lib/cachecontrol/patch_requests.py b/lib/cachecontrol/patch_requests.py index cad60e17..a5563531 100644 --- a/lib/cachecontrol/patch_requests.py +++ b/lib/cachecontrol/patch_requests.py @@ -52,5 +52,4 @@ def make_responses_pickleable(): raise pass - -make_responses_pickleable() +make_responses_pickleable() \ No newline at end of file diff --git a/lib/cachecontrol/session.py b/lib/cachecontrol/session.py index a4b0759a..99337991 100644 --- a/lib/cachecontrol/session.py +++ b/lib/cachecontrol/session.py @@ -1,23 +1,34 @@ from requests.sessions import Session class CacheControlSession(Session): - def __init__(self, *args, **kw): - super(CacheControlSession, self).__init__(*args, **kw) + def __init__(self): + super(CacheControlSession, self).__init__() - def request(self, *args, **kw): + def get(self, *args, **kw): # auto-cache response self.cache_auto = False if kw.has_key('cache_auto'): self.cache_auto = kw.pop('cache_auto') # urls allowed to cache - self.cache_urls = None + self.cache_urls = [] if kw.has_key('cache_urls'): - self.cache_urls = [str(args[1])] + kw.pop('cache_urls') + self.cache_urls = [str(args[0])] + kw.pop('cache_urls') # timeout for cached responses self.cache_max_age = None if kw.has_key('cache_max_age'): self.cache_max_age = int(kw.pop('cache_max_age')) - return super(CacheControlSession, self).request(*args, **kw) \ No newline at end of file + return super(CacheControlSession, self).get(*args, **kw) + + def prepare_request(self, *args, **kw): + # get response + req = super(CacheControlSession, self).prepare_request(*args, **kw) + + # attach params to request + req.cache_auto = self.cache_auto + req.cache_urls = self.cache_urls + req.cache_max_age = self.cache_max_age + + return req \ No newline at end of file diff --git a/lib/cachecontrol/wrapper.py b/lib/cachecontrol/wrapper.py index 5e0d4ce3..88dc2c97 100644 --- a/lib/cachecontrol/wrapper.py +++ b/lib/cachecontrol/wrapper.py @@ -1,11 +1,11 @@ -from cachecontrol.session import CacheControlSession from cachecontrol.adapter import CacheControlAdapter from cachecontrol.cache import DictCache +from cachecontrol.session import CacheControlSession def CacheControl(sess=None, cache=None, cache_etags=True): sess = sess or CacheControlSession() cache = cache or DictCache() - adapter = CacheControlAdapter(sess, cache, cache_etags=cache_etags) + adapter = CacheControlAdapter(cache, cache_etags=cache_etags) sess.mount('http://', adapter) return sess