Replaced cache handler with a modified version of our own, gives quicker Indexer API responses to our requests

This commit is contained in:
echel0n 2014-03-27 14:06:03 -07:00
parent ff1e6e6dbc
commit 6a1ccef8d9
30 changed files with 591 additions and 1434 deletions

View file

@ -0,0 +1,13 @@
"""CacheControl import Interface.
Make it easy to import from cachecontrol without long namespaces.
"""
# patch our requests.models.Response to make them pickleable in older
# versions of requests.
import cachecontrol.patch_requests
from cachecontrol.wrapper import CacheControl
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.controller import CacheController

View file

@ -0,0 +1,75 @@
from requests.adapters import HTTPAdapter
from cachecontrol.controller import CacheController
from cachecontrol.cache import DictCache
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE'])
def __init__(self, cache=None, cache_etags=True, cache_all=False, *args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache()
self.controller = CacheController(self.cache, cache_etags=cache_etags, cache_all=cache_all)
def send(self, request, **kw):
"""Send a request. Use the request information to see if it
exists in the cache.
"""
if request.method == 'GET':
cached_response = self.controller.cached_request(
request.url, request.headers
)
if cached_response:
# Cached responses should not have a raw field since
# they *cannot* be created from some stream.
cached_response.raw = None
return cached_response
# check for etags and add headers if appropriate
headers = self.controller.add_headers(request.url)
request.headers.update(headers)
resp = super(CacheControlAdapter, self).send(request, **kw)
return resp
def build_response(self, request, response):
"""Build a response by making a request or using the cache.
This will end up calling send and returning a potentially
cached response
"""
resp = super(CacheControlAdapter, self).build_response(
request, response
)
# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
cache_url = self.controller.cache_url(request.url)
self.cache.delete(cache_url)
# Try to store the response if it is a GET
elif request.method == 'GET':
if response.status == 304:
# We must have sent an ETag request. This could mean
# that we've been expired already or that we simply
# have an etag. In either case, we want to try and
# update the cache if that is the case.
resp = self.controller.update_cached_response(
request, response
)
# Fix possible exception when using missing `raw` field in
# requests
# TODO: remove when requests will be bump to 2.2.2 or 2.3
# version
resp.raw = None
else:
# try to cache the response
self.controller.cache_response(request, resp)
# Give the request a from_cache attr to let people use it
# rather than testing for hasattr.
if not hasattr(resp, 'from_cache'):
resp.from_cache = False
return resp

36
lib/cachecontrol/cache.py Normal file
View file

@ -0,0 +1,36 @@
"""
The cache object API for implementing caches. The default is just a
dictionary, which in turns means it is not threadsafe for writing.
"""
from threading import Lock
class BaseCache(object):
def get(self, key):
raise NotImplemented()
def set(self, key, value):
raise NotImplemented()
def delete(self, key):
raise NotImplemented()
class DictCache(BaseCache):
def __init__(self, init_dict=None):
self.lock = Lock()
self.data = init_dict or {}
def get(self, key):
return self.data.get(key, None)
def set(self, key, value):
with self.lock:
self.data.update({key: value})
def delete(self, key):
with self.lock:
if key in self.data:
self.data.pop(key)

View file

@ -0,0 +1,18 @@
from textwrap import dedent
try:
from cachecontrol.caches.file_cache import FileCache
except ImportError:
notice = dedent('''
NOTE: In order to use the FileCache you must have
lockfile installed. You can install it via pip:
pip install lockfile
''')
print(notice)
try:
import redis
from cachecontrol.caches.redis_cache import RedisCache
except ImportError:
pass

View file

@ -0,0 +1,51 @@
import os
import sys
from hashlib import md5
try:
from pickle import load, dump, HIGHEST_PROTOCOL
except ImportError:
from cPickle import load, dump, HIGHEST_PROTOCOL
from lockfile import FileLock
class FileCache(object):
def __init__(self, directory, forever=False):
self.directory = directory
self.forever = forever
if not os.path.isdir(self.directory):
os.mkdir(self.directory)
@staticmethod
def encode(x):
return md5(x.encode()).hexdigest()
def _fn(self, name):
return os.path.join(self.directory, self.encode(name))
def get(self, key):
name = self._fn(key)
if not os.path.exists(name):
return None
with open(name, 'rb') as fh:
try:
if sys.version < '3':
return load(fh)
else:
return load(fh, encoding='latin1')
except ValueError:
return None
def set(self, key, value):
name = self._fn(key)
with FileLock(name) as lock:
with open(lock.path, 'wb') as fh:
dump(value, fh, HIGHEST_PROTOCOL)
def delete(self, key):
name = self._fn(key)
if not self.forever:
os.remove(name)

View file

@ -0,0 +1,46 @@
from __future__ import division
from datetime import datetime
try:
from cPickle import loads, dumps
except ImportError: # Python 3.x
from pickle import loads, dumps
def total_seconds(td):
"""Python 2.6 compatability"""
if hasattr(td, 'total_seconds'):
return td.total_seconds()
ms = td.microseconds
secs = (td.seconds + td.days * 24 * 3600)
return (ms + secs * 10**6) / 10**6
class RedisCache(object):
def __init__(self, conn):
self.conn = conn
def get(self, key):
val = self.conn.get(key)
if val:
return loads(val)
return None
def set(self, key, value, expires=None):
if not expires:
self.conn.set(key, dumps(value))
else:
expires = expires - datetime.now()
self.conn.setex(key, total_seconds(expires), value)
def delete(self, key):
self.conn.delete(key)
def clear(self):
"""Helper for clearing all the keys in a database. Use with
caution!"""
for key in self.conn.keys():
self.conn.delete(key)

View file

@ -0,0 +1,12 @@
try:
from urllib.parse import urljoin
except ImportError:
from urlparse import urljoin
try:
import email.utils
parsedate_tz = email.utils.parsedate_tz
except ImportError:
import email.Utils
parsedate_tz = email.Utils.parsedate_tz

View file

@ -0,0 +1,258 @@
"""
The httplib2 algorithms ported for use with requests.
"""
import re
import calendar
import time
import datetime
from cachecontrol.cache import DictCache
from cachecontrol.compat import parsedate_tz
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
def parse_uri(uri):
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
groups = URI.match(uri).groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object):
"""An interface to see if request should cached or not.
"""
def __init__(self, cache=None, cache_etags=True, cache_all=False):
self.cache = cache or DictCache()
self.cache_etags = cache_etags
self.cache_all = cache_all
def _urlnorm(self, uri):
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
authority = authority.lower()
scheme = scheme.lower()
if not path:
path = "/"
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
scheme = scheme.lower()
defrag_uri = scheme + "://" + authority + request_uri
return defrag_uri
def cache_url(self, uri):
return self._urlnorm(uri)
def parse_cache_control(self, headers):
"""
Parse the cache control headers returning a dictionary with values
for the different directives.
"""
retval = {}
cc_header = 'cache-control'
if 'Cache-Control' in headers:
cc_header = 'Cache-Control'
if cc_header in headers:
parts = headers[cc_header].split(',')
parts_with_args = [
tuple([x.strip().lower() for x in part.split("=", 1)])
for part in parts if -1 != part.find("=")]
parts_wo_args = [(name.strip().lower(), 1)
for name in parts if -1 == name.find("=")]
retval = dict(parts_with_args + parts_wo_args)
return retval
def cached_request(self, url, headers):
cache_url = self.cache_url(url)
cc = self.parse_cache_control(headers)
# non-caching states
no_cache = True if 'no-cache' in cc else False
if 'max-age' in cc and cc['max-age'] == 0:
no_cache = True
# see if it is in the cache anyways
in_cache = self.cache.get(cache_url)
if no_cache or not in_cache:
return False
# It is in the cache, so lets see if it is going to be
# fresh enough
resp = self.cache.get(cache_url)
# Check our Vary header to make sure our request headers match
# up. We don't delete it from the though, we just don't return
# our cached value.
#
# NOTE: Because httplib2 stores raw content, it denotes
# headers that were sent in the original response by
# adding -varied-$name. We don't have to do that b/c we
# are storing the object which has a reference to the
# original request. If that changes, then I'd propose
# using the varied headers in the cache key to avoid the
# situation all together.
if 'vary' in resp.headers:
varied_headers = resp.headers['vary'].replace(' ', '').split(',')
original_headers = resp.request.headers
for header in varied_headers:
# If our headers don't match for the headers listed in
# the vary header, then don't use the cached response
if headers.get(header, None) != original_headers.get(header):
return False
now = time.time()
date = calendar.timegm(
parsedate_tz(resp.headers['date'])
)
current_age = max(0, now - date)
# TODO: There is an assumption that the result will be a
# requests response object. This may not be best since we
# could probably avoid instantiating or constructing the
# response until we know we need it.
resp_cc = self.parse_cache_control(resp.headers)
# determine freshness
freshness_lifetime = 0
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
freshness_lifetime = int(resp_cc['max-age'])
elif 'expires' in resp.headers:
expires = parsedate_tz(resp.headers['expires'])
if expires is not None:
expire_time = calendar.timegm(expires) - date
freshness_lifetime = max(0, expire_time)
# determine if we are setting freshness limit in the req
if 'max-age' in cc:
try:
freshness_lifetime = int(cc['max-age'])
except ValueError:
freshness_lifetime = 0
if 'min-fresh' in cc:
try:
min_fresh = int(cc['min-fresh'])
except ValueError:
min_fresh = 0
# adjust our current age by our min fresh
current_age += min_fresh
# see how fresh we actually are
fresh = (freshness_lifetime > current_age)
if fresh:
# make sure we set the from_cache to true
resp.from_cache = True
return resp
# we're not fresh. If we don't have an Etag, clear it out
if 'etag' not in resp.headers:
self.cache.delete(cache_url)
if 'etag' in resp.headers:
headers['If-None-Match'] = resp.headers['ETag']
if 'last-modified' in resp.headers:
headers['If-Modified-Since'] = resp.headers['Last-Modified']
# return the original handler
return False
def add_headers(self, url):
resp = self.cache.get(url)
if resp and 'etag' in resp.headers:
return {'If-None-Match': resp.headers['etag']}
return {}
def cache_response(self, request, resp):
"""
Algorithm for caching requests.
This assumes a requests Response object.
"""
# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
if resp.status_code not in [200, 203]:
return
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(resp.headers)
cache_url = self.cache_url(request.url)
# Delete it from the cache if we happen to have it stored there
no_store = cc.get('no-store') or cc_req.get('no-store')
if no_store and self.cache.get(cache_url):
self.cache.delete(cache_url)
# If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in resp.headers:
self.cache.set(cache_url, resp)
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response
if self.cache_all:
expires = datetime.datetime.utcnow() + datetime.timedelta(days=(25 * 365))
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
headers = {'Cache-Control': 'public,max-age=%d' % int(3600),
'Expires': expires}
resp.headers.update(headers)
self.cache.set(cache_url, resp)
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
elif 'date' in resp.headers:
# cache when there is a max-age > 0
if cc and cc.get('max-age'):
if int(cc['max-age']) > 0:
self.cache.set(cache_url, resp)
# If the request can expire, it means we should cache it
# in the meantime.
elif 'expires' in resp.headers:
if resp.headers['expires']:
self.cache.set(cache_url, resp)
def update_cached_response(self, request, response):
"""On a 304 we will get a new set of headers that we want to
update our cached value with, assuming we have one.
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
cache_url = self.cache_url(request.url)
resp = self.cache.get(cache_url)
if not resp:
# we didn't have a cached response
return response
# did so lets update our headers
resp.headers.update(resp.headers)
# we want a 200 b/c we have content via the cache
request.status_code = 200
# update the request as it has the if-none-match header + any
# other headers that the server might have updated (ie Date,
# Cache-Control, Expires, etc.)
resp.request = request
# update our cache
self.cache.set(cache_url, resp)
# Let everyone know this was from the cache.
resp.from_cache = True
return resp

View file

@ -0,0 +1,56 @@
import requests
from requests import models
from requests.packages.urllib3.response import HTTPResponse
__attrs__ = [
'_content',
'status_code',
'headers',
'url',
'history',
'encoding',
'reason',
'cookies',
'elapsed',
]
def response_getstate(self):
# consume everything
if not self._content_consumed:
self.content
state = dict(
(attr, getattr(self, attr, None))
for attr in __attrs__
)
# deal with our raw content b/c we need it for our cookie jar
state['raw_original_response'] = self.raw._original_response
return state
def response_setstate(self, state):
for name, value in state.items():
if name != 'raw_original_response':
setattr(self, name, value)
setattr(self, 'raw', HTTPResponse())
self.raw._original_response = state['raw_original_response']
def make_responses_pickleable():
try:
version_parts = [int(part) for part in requests.__version__.split('.')]
# must be >= 2.2.x
if not version_parts[0] >= 2 or not version_parts[1] >= 2:
models.Response.__getstate__ = response_getstate
models.Response.__setstate__ = response_setstate
except:
raise
pass
make_responses_pickleable()

View file

@ -0,0 +1,10 @@
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import DictCache
def CacheControl(sess, cache=None, cache_etags=True, cache_all=False):
cache = cache or DictCache()
adapter = CacheControlAdapter(cache, cache_etags=cache_etags, cache_all=cache_all)
sess.mount('http://', adapter)
return sess

View file

@ -1,14 +0,0 @@
# -*- coding: utf-8 -*-
"""
__init__.py
~~~~~~~~~~~
Defines the public API to the httpcache module.
"""
__version__ = '0.1.3'
from .cache import HTTPCache
from .adapter import CachingHTTPAdapter
__all__ = [HTTPCache, CachingHTTPAdapter]

View file

@ -1,55 +0,0 @@
"""
adapter.py
~~~~~~~~~~
Contains an implementation of an HTTP adapter for Requests that is aware of the
cache contained in this module.
"""
from requests.adapters import HTTPAdapter
from .cache import HTTPCache
class CachingHTTPAdapter(HTTPAdapter):
"""
A HTTP-caching-aware Transport Adapter for Python Requests. The central
portion of the API.
:param capacity: The maximum capacity of the backing cache.
"""
def __init__(self, capacity=50, **kwargs):
super(CachingHTTPAdapter, self).__init__(**kwargs)
#: The HTTP Cache backing the adapter.
self.cache = HTTPCache(capacity=capacity)
def send(self, request, **kwargs):
"""
Sends a PreparedRequest object, respecting RFC 2616's rules about HTTP
caching. Returns a Response object that may have been cached.
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object to send.
"""
cached_resp = self.cache.retrieve(request)
if cached_resp is not None:
return cached_resp
else:
return super(CachingHTTPAdapter, self).send(request, **kwargs)
def build_response(self, request, response):
"""
Builds a Response object from a urllib3 response. May involve returning
a cached Response.
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object sent.
:param response: The urllib3 response.
"""
resp = super(CachingHTTPAdapter, self).build_response(request,
response)
if resp.status_code == 304:
resp = self.cache.handle_304(resp)
else:
self.cache.store(resp)
return resp

View file

@ -1,207 +0,0 @@
# -*- coding: utf-8 -*-
"""
cache.py
~~~~~~~~
Contains the primary cache structure used in http-cache.
"""
from .structures import RecentOrderedDict
from .utils import (parse_date_header, build_date_header,
expires_from_cache_control, url_contains_query)
from datetime import datetime
# RFC 2616 specifies that we can cache 200 OK, 203 Non Authoritative,
# 206 Partial Content, 300 Multiple Choices, 301 Moved Permanently and
# 410 Gone responses. We don't cache 206s at the moment because we
# don't handle Range and Content-Range headers.
CACHEABLE_RCS = (200, 203, 300, 301, 410)
# Cacheable verbs.
CACHEABLE_VERBS = ('GET', 'HEAD', 'OPTIONS')
# Some verbs MUST invalidate the resource in the cache, according to RFC 2616.
# If we send one of these, or any verb we don't recognise, invalidate the
# cache entry for that URL. As it happens, these are also the cacheable
# verbs. That works out well for us.
NON_INVALIDATING_VERBS = CACHEABLE_VERBS
class HTTPCache(object):
"""
The HTTP Cache object. Manages caching of responses according to RFC 2616,
adding necessary headers to HTTP request objects, and returning cached
responses based on server responses.
This object is not expected to be used by most users. It is exposed as part
of the public API for users who feel the need for more control. This API
may change in a minor version increase. Be warned.
:param capacity: (Optional) The maximum capacity of the HTTP cache.
"""
def __init__(self, capacity=50):
#: The maximum capacity of the HTTP cache. When this many cache entries
#: end up in the cache, the oldest entries are removed.
self.capacity = capacity
#: The cache backing store. Cache entries are stored here as key-value
#: pairs. The key is the URL used to retrieve the cached response. The
#: value is a python dict, which stores three objects: the response
#: (keyed off of 'response'), the retrieval or creation date (keyed off
#: of 'creation') and the cache expiry date (keyed off of 'expiry').
#: This last value may be None.
self._cache = RecentOrderedDict()
def store(self, response):
"""
Takes an HTTP response object and stores it in the cache according to
RFC 2616. Returns a boolean value indicating whether the response was
cached or not.
:param response: Requests :class:`Response <Response>` object to cache.
"""
# Define an internal utility function.
def date_header_or_default(header_name, default, response):
try:
date_header = response.headers[header_name]
except KeyError:
value = default
else:
value = parse_date_header(date_header)
return value
if response.status_code not in CACHEABLE_RCS:
return False
if response.request.method not in CACHEABLE_VERBS:
return False
url = response.url
now = datetime.utcnow()
# Get the value of the 'Date' header, if it exists. If it doesn't, just
# use now.
creation = date_header_or_default('Date', now, response)
# Get the value of the 'Cache-Control' header, if it exists.
cc = response.headers.get('Cache-Control', None)
if cc is not None:
expiry = expires_from_cache_control(cc, now)
# If the above returns None, we are explicitly instructed not to
# cache this.
if expiry is None:
return False
# Get the value of the 'Expires' header, if it exists, and if we don't
# have anything from the 'Cache-Control' header.
if cc is None:
expiry = date_header_or_default('Expires', None, response)
# If the expiry date is earlier or the same as the Date header, don't
# cache the response at all.
if expiry is not None and expiry <= creation:
return False
# If there's a query portion of the url and it's a GET, don't cache
# this unless explicitly instructed to.
if expiry is None and response.request.method == 'GET':
if url_contains_query(url):
return False
self._cache[url] = {'response': response,
'creation': creation,
'expiry': expiry}
self.__reduce_cache_count()
return True
def handle_304(self, response):
"""
Given a 304 response, retrieves the cached entry. This unconditionally
returns the cached entry, so it can be used when the 'intelligent'
behaviour of retrieve() is not desired.
Returns None if there is no entry in the cache.
:param response: The 304 response to find the cached entry for. Should be a Requests :class:`Response <Response>`.
"""
try:
cached_response = self._cache[response.url]['response']
except KeyError:
cached_response = None
return cached_response
def retrieve(self, request):
"""
Retrieves a cached response if possible.
If there is a response that can be unconditionally returned (e.g. one
that had a Cache-Control header set), that response is returned. If
there is one that can be conditionally returned (if a 304 is returned),
applies an If-Modified-Since header to the request and returns None.
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object.
"""
return_response = None
url = request.url
try:
cached_response = self._cache[url]
except KeyError:
return None
if request.method not in NON_INVALIDATING_VERBS:
del self._cache[url]
return None
if cached_response['expiry'] is None:
# We have no explicit expiry time, so we weren't instructed to
# cache. Add an 'If-Modified-Since' header.
creation = cached_response['creation']
header = build_date_header(creation)
request.headers['If-Modified-Since'] = header
else:
# We have an explicit expiry time. If we're earlier than the expiry
# time, return the response.
now = datetime.utcnow()
if now <= cached_response['expiry']:
return_response = cached_response['response']
else:
del self._cache[url]
return return_response
def __reduce_cache_count(self):
"""
Drops the number of entries in the cache to the capacity of the cache.
Walks the backing RecentOrderedDict in order from oldest to youngest.
Deletes cache entries that are either invalid or being speculatively
cached until the number of cache entries drops to the capacity. If this
leaves the cache above capacity, begins deleting the least-used cache
entries that are still valid until the cache has space.
"""
if len(self._cache) <= self.capacity:
return
to_delete = len(self._cache) - self.capacity
keys = list(self._cache.keys())
for key in keys:
if self._cache[key]['expiry'] is None:
del self._cache[key]
to_delete -= 1
if to_delete == 0:
return
keys = list(self._cache.keys())
for i in range(to_delete):
del self._cache[keys[i]]
return

View file

@ -1,10 +0,0 @@
# -*- coding: utf-8 -*-
"""
compat.py
~~~~~~~~~
Defines cross-platform functions and classes needed to achieve proper
functionality.
"""
pass

View file

@ -1,59 +0,0 @@
"""
structures.py
~~~~~~~~~~~~~
Defines structures used by the httpcache module.
"""
class RecentOrderedDict(dict):
"""
A custom variant of the dictionary that ensures that the object most
recently inserted _or_ retrieved from the dictionary is enumerated first.
"""
def __init__(self):
self._data = {}
self._order = []
def __setitem__(self, key, value):
if key in self._data:
self._order.remove(key)
self._order.append(key)
self._data[key] = value
def __getitem__(self, key):
value = self._data[key]
self._order.remove(key)
self._order.append(key)
return value
def __delitem__(self, key):
del self._data[key]
self._order.remove(key)
def __iter__(self):
return self._order
def __len__(self):
return len(self._order)
def __contains__(self, value):
return self._data.__contains__(value)
def items(self):
return [(key, self._data[key]) for key in self._order]
def keys(self):
return self._order
def values(self):
return [self._data[key] for key in self._order]
def clear(self):
self._data = {}
self._order = []
def copy(self):
c = RecentOrderedDict()
c._data = self._data.copy()
c._order = self._order[:]

View file

@ -1,97 +0,0 @@
# -*- coding: utf-8 -*-
"""
utils.py
~~~~~~~~
Utility functions for use with httpcache.
"""
from datetime import datetime, timedelta
try: # Python 2
from urlparse import urlparse
except ImportError: # Python 3
from urllib.parse import urlparse
RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT"
RFC_850_DT_STR = "%A, %d-%b-%y %H:%M:%S GMT"
def parse_date_header(header):
"""
Given a date header in the form specified by RFC 2616, return a Python
datetime object.
RFC 2616 specifies three possible formats for date/time headers, and
makes it clear that all dates/times should be in UTC/GMT. That is assumed
by this library, which simply does everything in UTC. This currently does
not parse the C asctime() string, because that's effort.
This function does _not_ follow Postel's Law. If a format does not strictly
match the defined strings, this function returns None. This is considered
'safe' behaviour.
"""
try:
dt = datetime.strptime(header, RFC_1123_DT_STR)
except ValueError:
try:
dt = datetime.strptime(header, RFC_850_DT_STR)
except ValueError:
dt = None
except TypeError:
dt = None
return dt
def build_date_header(dt):
"""
Given a Python datetime object, build a Date header value according to
RFC 2616.
RFC 2616 specifies that the RFC 1123 form is to be preferred, so that is
what we use.
"""
return dt.strftime(RFC_1123_DT_STR)
def expires_from_cache_control(header, current_time):
"""
Given a Cache-Control header, builds a Python datetime object corresponding
to the expiry time (in UTC). This function should respect all relevant
Cache-Control directives.
Takes current_time as an argument to ensure that 'max-age=0' generates the
correct behaviour without being special-cased.
Returns None to indicate that a request must not be cached.
"""
# Cache control header values are made of multiple comma separated fields.
# Splitting them like this is probably a bad idea, but I'm going to roll with
# it for now. We'll come back to it.
fields = header.split(', ')
duration = None
for field in fields:
# Right now we don't handle no-cache applied to specific fields. To be
# as 'nice' as possible, treat any no-cache as applying to the whole
# request. Bail early, because there's no reason to stick around.
if field.startswith('no-cache') or field == 'no-store':
return None
if field.startswith('max-age'):
_, duration = field.split('=')
duration = int(duration)
if duration:
interval = timedelta(seconds=int(duration))
return current_time + interval
def url_contains_query(url):
"""
A very stupid function for determining if a URL contains a query string
or not.
"""
if urlparse(url).query:
return True
else:
return False

View file

@ -1,31 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
requests_cache
~~~~~~~~~~~~~~
Transparent cache for ``requests`` library with persistence and async support
Just write::
import requests_cache
requests_cache.install_cache()
And requests to resources will be cached for faster repeated access::
import requests
for i in range(10):
r = requests.get('http://httpbin.org/delay/5')
# will took approximately 5 seconds instead 50
:copyright: (c) 2012 by Roman Haritonov.
:license: BSD, see LICENSE for more details.
"""
__docformat__ = 'restructuredtext'
__version__ = '0.4.4'
from .core import(
CachedSession, install_cache, uninstall_cache,
disabled, enabled, get_cache, clear, configure
)

View file

@ -1,50 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
requests_cache.backends
~~~~~~~~~~~~~~~~~~~~~~~
Classes and functions for cache persistence
"""
from .base import BaseCache
registry = {
'memory': BaseCache,
}
try:
# Heroku doesn't allow the SQLite3 module to be installed
from .sqlite import DbCache
registry['sqlite'] = DbCache
except ImportError:
DbCache = None
try:
from .mongo import MongoCache
registry['mongo'] = registry['mongodb'] = MongoCache
except ImportError:
MongoCache = None
try:
from .redis import RedisCache
registry['redis'] = RedisCache
except ImportError:
RedisCache = None
def create_backend(backend_name, cache_name, options):
if backend_name is None:
backend_name = _get_default_backend_name()
try:
return registry[backend_name](cache_name, **options)
except KeyError:
raise ValueError('Unsupported backend "%s" try one of: %s' %
(backend_name, ', '.join(registry.keys())))
def _get_default_backend_name():
if 'sqlite' in registry:
return 'sqlite'
return 'memory'

View file

@ -1,171 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
requests_cache.backends.base
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Contains BaseCache class which can be used as in-memory cache backend or
extended to support persistence.
"""
from datetime import datetime
import hashlib
from copy import copy
import requests
from ..compat import is_py2
class BaseCache(object):
""" Base class for cache implementations, can be used as in-memory cache.
To extend it you can provide dictionary-like objects for
:attr:`keys_map` and :attr:`responses` or override public methods.
"""
def __init__(self, *args, **kwargs):
#: `key` -> `key_in_responses` mapping
self.keys_map = {}
#: `key_in_cache` -> `response` mapping
self.responses = {}
def save_response(self, key, response):
""" Save response to cache
:param key: key for this response
:param response: response to save
.. note:: Response is reduced before saving (with :meth:`reduce_response`)
to make it picklable
"""
self.responses[key] = self.reduce_response(response), datetime.utcnow()
def add_key_mapping(self, new_key, key_to_response):
"""
Adds mapping of `new_key` to `key_to_response` to make it possible to
associate many keys with single response
:param new_key: new key (e.g. url from redirect)
:param key_to_response: key which can be found in :attr:`responses`
:return:
"""
self.keys_map[new_key] = key_to_response
def get_response_and_time(self, key, default=(None, None)):
""" Retrieves response and timestamp for `key` if it's stored in cache,
otherwise returns `default`
:param key: key of resource
:param default: return this if `key` not found in cache
:returns: tuple (response, datetime)
.. note:: Response is restored after unpickling with :meth:`restore_response`
"""
try:
if key not in self.responses:
key = self.keys_map[key]
response, timestamp = self.responses[key]
except KeyError:
return default
return self.restore_response(response), timestamp
def delete(self, key):
""" Delete `key` from cache. Also deletes all responses from response history
"""
try:
if key in self.responses:
response, _ = self.responses[key]
del self.responses[key]
else:
response, _ = self.responses[self.keys_map[key]]
del self.keys_map[key]
for r in response.history:
del self.keys_map[self.create_key(r.request)]
except KeyError:
pass
def delete_url(self, url):
""" Delete response associated with `url` from cache.
Also deletes all responses from response history. Works only for GET requests
"""
self.delete(self._url_to_key(url))
def clear(self):
""" Clear cache
"""
self.responses.clear()
self.keys_map.clear()
def has_key(self, key):
""" Returns `True` if cache has `key`, `False` otherwise
"""
return key in self.responses or key in self.keys_map
def has_url(self, url):
""" Returns `True` if cache has `url`, `False` otherwise.
Works only for GET request urls
"""
return self.has_key(self._url_to_key(url))
def _url_to_key(self, url):
from requests import Request
return self.create_key(Request('GET', url).prepare())
_response_attrs = ['_content', 'url', 'status_code', 'cookies',
'headers', 'encoding', 'request', 'reason', 'raw']
_raw_response_attrs = ['_original_response', 'decode_content', 'headers',
'reason', 'status', 'strict', 'version']
def reduce_response(self, response):
""" Reduce response object to make it compatible with ``pickle``
"""
result = _Store()
# prefetch
response.content
for field in self._response_attrs:
setattr(result, field, self._picklable_field(response, field))
result.history = tuple(self.reduce_response(r) for r in response.history)
return result
def _picklable_field(self, response, name):
value = getattr(response, name)
if name == 'request':
value = copy(value)
value.hooks = []
elif name == 'raw':
result = _Store()
for field in self._raw_response_attrs:
setattr(result, field, getattr(value, field, None))
value = result
return value
def restore_response(self, response):
""" Restore response object after unpickling
"""
result = requests.Response()
for field in self._response_attrs:
setattr(result, field, getattr(response, field, None))
result.history = tuple(self.restore_response(r) for r in response.history)
return result
def create_key(self, request):
key = hashlib.sha256()
key.update(_to_bytes(request.method.upper()))
key.update(_to_bytes(request.url))
if request.body:
key.update(_to_bytes(request.body))
return key.hexdigest()
def __str__(self):
return 'keys: %s\nresponses: %s' % (self.keys_map, self.responses)
# used for saving response attributes
class _Store(object):
pass
def _to_bytes(s, encoding='utf-8'):
if is_py2 or isinstance(s, bytes):
return s
return bytes(s, encoding)

View file

@ -1,25 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
requests_cache.backends.mongo
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``mongo`` cache backend
"""
from .base import BaseCache
from .storage.mongodict import MongoDict, MongoPickleDict
class MongoCache(BaseCache):
""" ``mongo`` cache backend.
"""
def __init__(self, db_name='requests-cache', **options):
"""
:param db_name: database name (default: ``'requests-cache'``)
:param connection: (optional) ``pymongo.Connection``
"""
super(MongoCache, self).__init__()
self.responses = MongoPickleDict(db_name, 'responses',
options.get('connection'))
self.keys_map = MongoDict(db_name, 'urls', self.responses.connection)

View file

@ -1,24 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
requests_cache.backends.redis
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``redis`` cache backend
"""
from .base import BaseCache
from .storage.redisdict import RedisDict
class RedisCache(BaseCache):
""" ``redis`` cache backend.
"""
def __init__(self, namespace='requests-cache', **options):
"""
:param namespace: redis namespace (default: ``'requests-cache'``)
:param connection: (optional) ``redis.StrictRedis``
"""
super(RedisCache, self).__init__()
self.responses = RedisDict(namespace, 'responses',
options.get('connection'))
self.keys_map = RedisDict(namespace, 'urls', self.responses.connection)

View file

@ -1,30 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
requests_cache.backends.sqlite
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``sqlite3`` cache backend
"""
from .base import BaseCache
from .storage.dbdict import DbDict, DbPickleDict
class DbCache(BaseCache):
""" sqlite cache backend.
Reading is fast, saving is a bit slower. It can store big amount of data
with low memory usage.
"""
def __init__(self, location='cache',
fast_save=False, extension='.sqlite', **options):
"""
:param location: database filename prefix (default: ``'cache'``)
:param fast_save: Speedup cache saving up to 50 times but with possibility of data loss.
See :ref:`backends.DbDict <backends_dbdict>` for more info
:param extension: extension for filename (default: ``'.sqlite'``)
"""
super(DbCache, self).__init__()
self.responses = DbPickleDict(location + extension, 'responses', fast_save=fast_save)
self.keys_map = DbDict(location + extension, 'urls')

View file

@ -1,171 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
requests_cache.backends.dbdict
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Dictionary-like objects for saving large data sets to `sqlite` database
"""
from collections import MutableMapping
import sqlite3 as sqlite
from contextlib import contextmanager
try:
import threading
except ImportError:
import dummy_threading as threading
try:
import cPickle as pickle
except ImportError:
import pickle
from requests_cache.compat import bytes
class DbDict(MutableMapping):
""" DbDict - a dictionary-like object for saving large datasets to `sqlite` database
It's possible to create multiply DbDict instances, which will be stored as separate
tables in one database::
d1 = DbDict('test', 'table1')
d2 = DbDict('test', 'table2')
d3 = DbDict('test', 'table3')
all data will be stored in ``test.sqlite`` database into
correspondent tables: ``table1``, ``table2`` and ``table3``
"""
def __init__(self, filename, table_name='data', fast_save=False, **options):
"""
:param filename: filename for database (without extension)
:param table_name: table name
:param fast_save: If it's True, then sqlite will be configured with
`"PRAGMA synchronous = 0;" <http://www.sqlite.org/pragma.html#pragma_synchronous>`_
to speedup cache saving, but be careful, it's dangerous.
Tests showed that insertion order of records can be wrong with this option.
"""
self.filename = filename
self.table_name = table_name
self.fast_save = fast_save
#: Transactions can be commited if this property is set to `True`
self.can_commit = True
self._bulk_commit = False
self._pending_connection = None
self._lock = threading.RLock()
with self.connection() as con:
con.execute("create table if not exists `%s` (key PRIMARY KEY, value)" % self.table_name)
@contextmanager
def connection(self, commit_on_success=False):
with self._lock:
if self._bulk_commit:
if self._pending_connection is None:
self._pending_connection = sqlite.connect(self.filename)
con = self._pending_connection
else:
con = sqlite.connect(self.filename)
try:
if self.fast_save:
con.execute("PRAGMA synchronous = 0;")
yield con
if commit_on_success and self.can_commit:
con.commit()
finally:
if not self._bulk_commit:
con.close()
def commit(self, force=False):
"""
Commits pending transaction if :attr:`can_commit` or `force` is `True`
:param force: force commit, ignore :attr:`can_commit`
"""
if force or self.can_commit:
if self._pending_connection is not None:
self._pending_connection.commit()
@contextmanager
def bulk_commit(self):
"""
Context manager used to speedup insertion of big number of records
::
>>> d1 = DbDict('test')
>>> with d1.bulk_commit():
... for i in range(1000):
... d1[i] = i * 2
"""
self._bulk_commit = True
self.can_commit = False
try:
yield
self.commit(True)
finally:
self._bulk_commit = False
self.can_commit = True
self._pending_connection.close()
self._pending_connection = None
def __getitem__(self, key):
with self.connection() as con:
row = con.execute("select value from `%s` where key=?" %
self.table_name, (key,)).fetchone()
if not row:
raise KeyError
return row[0]
def __setitem__(self, key, item):
with self.connection(True) as con:
if con.execute("select key from `%s` where key=?" %
self.table_name, (key,)).fetchone():
con.execute("update `%s` set value=? where key=?" %
self.table_name, (item, key))
else:
con.execute("insert into `%s` (key,value) values (?,?)" %
self.table_name, (key, item))
def __delitem__(self, key):
with self.connection(True) as con:
if con.execute("select key from `%s` where key=?" %
self.table_name, (key,)).fetchone():
con.execute("delete from `%s` where key=?" %
self.table_name, (key,))
else:
raise KeyError
def __iter__(self):
with self.connection() as con:
for row in con.execute("select key from `%s`" %
self.table_name):
yield row[0]
def __len__(self):
with self.connection() as con:
return con.execute("select count(key) from `%s`" %
self.table_name).fetchone()[0]
def clear(self):
with self.connection(True) as con:
con.execute("drop table `%s`" % self.table_name)
con.execute("create table `%s` (key PRIMARY KEY, value)" %
self.table_name)
def __str__(self):
return str(dict(self.items()))
class DbPickleDict(DbDict):
""" Same as :class:`DbDict`, but pickles values before saving
"""
def __setitem__(self, key, item):
super(DbPickleDict, self).__setitem__(key,
sqlite.Binary(pickle.dumps(item)))
def __getitem__(self, key):
return pickle.loads(bytes(super(DbPickleDict, self).__getitem__(key)))

View file

@ -1,74 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
requests_cache.backends.mongodict
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Dictionary-like objects for saving large data sets to ``mongodb`` database
"""
from collections import MutableMapping
try:
import cPickle as pickle
except ImportError:
import pickle
from pymongo import Connection
class MongoDict(MutableMapping):
""" MongoDict - a dictionary-like interface for ``mongo`` database
"""
def __init__(self, db_name,
collection_name='mongo_dict_data', connection=None):
"""
:param db_name: database name (be careful with production databases)
:param collection_name: collection name (default: mongo_dict_data)
:param connection: ``pymongo.Connection`` instance. If it's ``None``
(default) new connection with default options will
be created
"""
if connection is not None:
self.connection = connection
else:
self.connection = Connection()
self.db = self.connection[db_name]
self.collection = self.db[collection_name]
def __getitem__(self, key):
result = self.collection.find_one({'_id': key})
if result is None:
raise KeyError
return result['data']
def __setitem__(self, key, item):
self.collection.save({'_id': key, 'data': item})
def __delitem__(self, key):
spec = {'_id': key}
if self.collection.find_one(spec, fields=['_id']):
self.collection.remove(spec)
else:
raise KeyError
def __len__(self):
return self.collection.count()
def __iter__(self):
for d in self.collection.find(fields=['_id']):
yield d['_id']
def clear(self):
self.collection.drop()
def __str__(self):
return str(dict(self.items()))
class MongoPickleDict(MongoDict):
""" Same as :class:`MongoDict`, but pickles values before saving
"""
def __setitem__(self, key, item):
super(MongoPickleDict, self).__setitem__(key, pickle.dumps(item))
def __getitem__(self, key):
return pickle.loads(bytes(super(MongoPickleDict, self).__getitem__(key)))

View file

@ -1,68 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
requests_cache.backends.redisdict
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Dictionary-like objects for saving large data sets to ``redis`` key-store
"""
from collections import MutableMapping
try:
import cPickle as pickle
except ImportError:
import pickle
from redis import StrictRedis as Redis
class RedisDict(MutableMapping):
""" RedisDict - a dictionary-like interface for ``redis`` key-stores
"""
def __init__(self, namespace, collection_name='redis_dict_data',
connection=None):
"""
The actual key name on the redis server will be
``namespace``:``collection_name``
In order to deal with how redis stores data/keys,
everything, i.e. keys and data, must be pickled.
:param namespace: namespace to use
:param collection_name: name of the hash map stored in redis
(default: redis_dict_data)
:param connection: ``redis.StrictRedis`` instance.
If it's ``None`` (default), a new connection with
default options will be created
"""
if connection is not None:
self.connection = connection
else:
self.connection = Redis()
self._self_key = ':'.join([namespace, collection_name])
def __getitem__(self, key):
result = self.connection.hget(self._self_key, pickle.dumps(key))
if result is None:
raise KeyError
return pickle.loads(bytes(result))
def __setitem__(self, key, item):
self.connection.hset(self._self_key, pickle.dumps(key),
pickle.dumps(item))
def __delitem__(self, key):
if not self.connection.hdel(self._self_key, pickle.dumps(key)):
raise KeyError
def __len__(self):
return self.connection.hlen(self._self_key)
def __iter__(self):
for v in self.connection.hkeys(self._self_key):
yield pickle.loads(bytes(v))
def clear(self):
self.connection.delete(self._self_key)
def __str__(self):
return str(dict(self.items()))

View file

@ -1,103 +0,0 @@
# -*- coding: utf-8 -*-
# taken from requests library: https://github.com/kennethreitz/requests
"""
pythoncompat
"""
import sys
# -------
# Pythons
# -------
# Syntax sugar.
_ver = sys.version_info
#: Python 2.x?
is_py2 = (_ver[0] == 2)
#: Python 3.x?
is_py3 = (_ver[0] == 3)
#: Python 3.0.x
is_py30 = (is_py3 and _ver[1] == 0)
#: Python 3.1.x
is_py31 = (is_py3 and _ver[1] == 1)
#: Python 3.2.x
is_py32 = (is_py3 and _ver[1] == 2)
#: Python 3.3.x
is_py33 = (is_py3 and _ver[1] == 3)
#: Python 3.4.x
is_py34 = (is_py3 and _ver[1] == 4)
#: Python 2.7.x
is_py27 = (is_py2 and _ver[1] == 7)
#: Python 2.6.x
is_py26 = (is_py2 and _ver[1] == 6)
#: Python 2.5.x
is_py25 = (is_py2 and _ver[1] == 5)
#: Python 2.4.x
is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice.
# ---------
# Platforms
# ---------
# Syntax sugar.
_ver = sys.version.lower()
is_pypy = ('pypy' in _ver)
is_jython = ('jython' in _ver)
is_ironpython = ('iron' in _ver)
# Assume CPython, if nothing else.
is_cpython = not any((is_pypy, is_jython, is_ironpython))
# Windows-based system.
is_windows = 'win32' in str(sys.platform).lower()
# Standard Linux 2+ system.
is_linux = ('linux' in str(sys.platform).lower())
is_osx = ('darwin' in str(sys.platform).lower())
is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess.
is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess.
# ---------
# Specifics
# ---------
if is_py2:
from urllib import quote, unquote, urlencode
from urlparse import urlparse, urlunparse, urljoin, urlsplit
from urllib2 import parse_http_list
import cookielib
from StringIO import StringIO
bytes = str
str = unicode
basestring = basestring
elif is_py3:
from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote
from urllib.request import parse_http_list
from http import cookiejar as cookielib
from http.cookies import SimpleCookie
from io import StringIO
str = str
bytes = bytes
basestring = (str,bytes)

View file

@ -1,227 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
requests_cache.core
~~~~~~~~~~~~~~~~~~~
Core functions for configuring cache and monkey patching ``requests``
"""
from contextlib import contextmanager
from datetime import datetime, timedelta
import requests
from requests import Session as OriginalSession
from requests.hooks import dispatch_hook
from requests_cache import backends
from requests_cache.compat import str, basestring
try:
ver = tuple(map(int, requests.__version__.split(".")))
except ValueError:
pass
else:
# We don't need to dispatch hook in Requests <= 1.1.0
if ver < (1, 2, 0):
dispatch_hook = lambda key, hooks, hook_data, *a, **kw: hook_data
del ver
class CachedSession(OriginalSession):
""" Requests ``Sessions`` with caching support.
"""
def __init__(self, cache_name='cache', backend=None, expire_after=None,
allowable_codes=(200,), allowable_methods=('GET',),
**backend_options):
"""
:param cache_name: for ``sqlite`` backend: cache file will start with this prefix,
e.g ``cache.sqlite``
for ``mongodb``: it's used as database name
for ``redis``: it's used as the namespace. This means all keys
are prefixed with ``'cache_name:'``
:param backend: cache backend name e.g ``'sqlite'``, ``'mongodb'``, ``'redis'``, ``'memory'``.
(see :ref:`persistence`). Or instance of backend implementation.
Default value is ``None``, which means use ``'sqlite'`` if available,
otherwise fallback to ``'memory'``.
:param expire_after: number of seconds after cache will be expired
or `None` (default) to ignore expiration
:type expire_after: float
:param allowable_codes: limit caching only for response with this codes (default: 200)
:type allowable_codes: tuple
:param allowable_methods: cache only requests of this methods (default: 'GET')
:type allowable_methods: tuple
:kwarg backend_options: options for chosen backend. See corresponding
:ref:`sqlite <backends_sqlite>`, :ref:`mongo <backends_mongo>`
and :ref:`redis <backends_redis>` backends API documentation
"""
if backend is None or isinstance(backend, basestring):
self.cache = backends.create_backend(backend, cache_name,
backend_options)
else:
self.cache = backend
self._cache_expire_after = expire_after
self._cache_allowable_codes = allowable_codes
self._cache_allowable_methods = allowable_methods
self._is_cache_disabled = False
super(CachedSession, self).__init__()
def send(self, request, **kwargs):
if (self._is_cache_disabled
or request.method not in self._cache_allowable_methods):
response = super(CachedSession, self).send(request, **kwargs)
response.from_cache = False
return response
cache_key = self.cache.create_key(request)
def send_request_and_cache_response():
response = super(CachedSession, self).send(request, **kwargs)
if response.status_code in self._cache_allowable_codes:
self.cache.save_response(cache_key, response)
response.from_cache = False
return response
response, timestamp = self.cache.get_response_and_time(cache_key)
if response is None:
return send_request_and_cache_response()
if self._cache_expire_after is not None:
difference = datetime.utcnow() - timestamp
if difference > timedelta(seconds=self._cache_expire_after):
self.cache.delete(cache_key)
return send_request_and_cache_response()
# dispatch hook here, because we've removed it before pickling
response.from_cache = True
response = dispatch_hook('response', request.hooks, response, **kwargs)
return response
def request(self, method, url, params=None, data=None, headers=None,
cookies=None, files=None, auth=None, timeout=None,
allow_redirects=True, proxies=None, hooks=None, stream=None,
verify=None, cert=None):
response = super(CachedSession, self).request(method, url, params, data,
headers, cookies, files,
auth, timeout,
allow_redirects, proxies,
hooks, stream, verify, cert)
if self._is_cache_disabled:
return response
main_key = self.cache.create_key(response.request)
for r in response.history:
self.cache.add_key_mapping(
self.cache.create_key(r.request), main_key
)
return response
@contextmanager
def cache_disabled(self):
"""
Context manager for temporary disabling cache
::
>>> s = CachedSession()
>>> with s.cache_disabled():
... s.get('http://httpbin.org/ip')
"""
self._is_cache_disabled = True
try:
yield
finally:
self._is_cache_disabled = False
def install_cache(cache_name='cache', backend=None, expire_after=None,
allowable_codes=(200,), allowable_methods=('GET',),
session_factory=CachedSession, **backend_options):
"""
Installs cache for all ``Requests`` requests by monkey-patching ``Session``
Parameters are the same as in :class:`CachedSession`. Additional parameters:
:param session_factory: Session factory. It should inherit :class:`CachedSession` (default)
"""
if backend:
backend = backends.create_backend(backend, cache_name, backend_options)
_patch_session_factory(
lambda : session_factory(cache_name=cache_name,
backend=backend,
expire_after=expire_after,
allowable_codes=allowable_codes,
allowable_methods=allowable_methods,
**backend_options)
)
# backward compatibility
configure = install_cache
def uninstall_cache():
""" Restores ``requests.Session`` and disables cache
"""
_patch_session_factory(OriginalSession)
@contextmanager
def disabled():
"""
Context manager for temporary disabling globally installed cache
.. warning:: not thread-safe
::
>>> with requests_cache.disabled():
... requests.get('http://httpbin.org/ip')
... requests.get('http://httpbin.org/get')
"""
previous = requests.Session
uninstall_cache()
try:
yield
finally:
_patch_session_factory(previous)
@contextmanager
def enabled(*args, **kwargs):
"""
Context manager for temporary installing global cache.
Accepts same arguments as :func:`install_cache`
.. warning:: not thread-safe
::
>>> with requests_cache.enabled('cache_db'):
... requests.get('http://httpbin.org/get')
"""
install_cache(*args, **kwargs)
try:
yield
finally:
uninstall_cache()
def get_cache():
""" Returns internal cache object from globally installed ``CachedSession``
"""
return requests.Session().cache
def clear():
""" Clears globally installed cache
"""
get_cache().clear()
def _patch_session_factory(session_factory=CachedSession):
requests.Session = requests.sessions.Session = session_factory

View file

@ -37,15 +37,13 @@ except ImportError:
gzip = None
from lib import requests
from lib import requests_cache
from lib import cachecontrol
from lib.cachecontrol import caches
from tvdb_ui import BaseUI, ConsoleUI
from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound,
tvdb_seasonnotfound, tvdb_episodenotfound, tvdb_attributenotfound)
# Cached Session Handler
from lib.httpcache import CachingHTTPAdapter
def log():
return logging.getLogger("tvdb_api")
@ -429,12 +427,14 @@ class Tvdb:
if cache is True:
self.config['cache_enabled'] = True
requests_cache.install_cache(self._getTempDir())
self.sess = cachecontrol.CacheControl(requests.Session(),
cache=caches.FileCache(self._getTempDir()), cache_all=True)
elif cache is False:
self.config['cache_enabled'] = False
elif isinstance(cache, basestring):
self.config['cache_enabled'] = True
requests_cache.install_cache(cache)
self.sess = cachecontrol.CacheControl(requests.Session(),
cache=caches.FileCache(cache), cache_all=True)
else:
raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache)))
@ -537,10 +537,9 @@ class Tvdb:
# get response from TVDB
if self.config['cache_enabled']:
resp = requests.get(url, params=params)
resp = self.sess.get(url, params=sorted(params))
else:
with requests_cache.disabled():
resp = requests.get(url, params=params)
resp = requests.get(url, params=params)
except requests.HTTPError, e:
raise tvdb_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))

View file

@ -32,15 +32,13 @@ except ImportError:
from lib.dateutil.parser import parse
from lib import requests
from lib import requests_cache
from lib import cachecontrol
from lib.cachecontrol import caches
from tvrage_ui import BaseUI
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
# Cached Session Handler
from lib.httpcache import CachingHTTPAdapter
def log():
return logging.getLogger("tvrage_api")
@ -272,12 +270,14 @@ class TVRage:
if cache is True:
self.config['cache_enabled'] = True
requests_cache.install_cache(self._getTempDir())
self.sess = cachecontrol.CacheControl(requests.Session(),
cache=caches.FileCache(self._getTempDir()), cache_all=True)
elif cache is False:
self.config['cache_enabled'] = False
elif isinstance(cache, basestring):
self.config['cache_enabled'] = True
requests_cache.install_cache(cache)
self.sess = cachecontrol.CacheControl(requests.Session(),
cache=caches.FileCache(cache), cache_all=True)
else:
raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache)))
@ -370,10 +370,9 @@ class TVRage:
# get response from TVRage
if self.config['cache_enabled']:
resp = requests.get(url, params=params)
resp = self.sess.get(url, params=params)
else:
with requests_cache.disabled():
resp = requests.get(url, params=params)
resp = requests.get(url, params=params)
except requests.HTTPError, e:
raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))