From 7e3cb26d4dd8f4f484a68f5c1998c79bdb0ae29f Mon Sep 17 00:00:00 2001 From: JackDandy Date: Tue, 28 Apr 2015 18:28:47 +0100 Subject: [PATCH] Update Requests library 2.4.3 to 2.6.2 (ff71b25). --- CHANGES.md | 1 + lib/requests/__init__.py | 8 +- lib/requests/adapters.py | 19 +- lib/requests/api.py | 31 ++- lib/requests/auth.py | 14 +- lib/requests/compat.py | 55 +--- lib/requests/cookies.py | 107 +++++--- lib/requests/exceptions.py | 5 + lib/requests/models.py | 54 ++-- lib/requests/packages/chardet/__init__.py | 2 +- lib/requests/packages/chardet/chardetect.py | 66 +++-- lib/requests/packages/chardet/jpcntx.py | 8 + lib/requests/packages/chardet/latin1prober.py | 6 +- lib/requests/packages/chardet/mbcssm.py | 9 +- lib/requests/packages/chardet/sjisprober.py | 2 +- .../packages/chardet/universaldetector.py | 4 +- lib/requests/packages/urllib3/__init__.py | 8 +- lib/requests/packages/urllib3/_collections.py | 234 ++++++++++++++---- lib/requests/packages/urllib3/connection.py | 26 +- .../packages/urllib3/connectionpool.py | 144 +++++++---- .../packages/urllib3/contrib/pyopenssl.py | 62 ++--- lib/requests/packages/urllib3/exceptions.py | 23 +- lib/requests/packages/urllib3/poolmanager.py | 19 +- lib/requests/packages/urllib3/request.py | 28 ++- lib/requests/packages/urllib3/response.py | 183 +++++++++++--- .../packages/urllib3/util/connection.py | 1 + lib/requests/packages/urllib3/util/retry.py | 26 +- lib/requests/packages/urllib3/util/ssl_.py | 232 +++++++++++++---- lib/requests/packages/urllib3/util/url.py | 45 +++- lib/requests/sessions.py | 36 +-- lib/requests/utils.py | 22 +- 31 files changed, 1042 insertions(+), 438 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 9f452a12..a90ab258 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,7 @@ * Update fix for API response header for JSON content type and the return of JSONP data to updated package as listed in hacks.txt * Change network names to only display on top line of Day by Day layout on Episode View * Reposition country part of network name into the hover over in Day by Day layout +* Update Requests library 2.4.3 to 2.6.2 (ff71b25) * Add ToTV provider * Fix Backlog scheduler initialization and change backlog frequency from minutes to days * Change to consolidate and tidy some provider code diff --git a/lib/requests/__init__.py b/lib/requests/__init__.py index d5e1956e..46116bea 100644 --- a/lib/requests/__init__.py +++ b/lib/requests/__init__.py @@ -36,17 +36,17 @@ usage: The other HTTP methods are supported - see `requests.api`. Full documentation is at . -:copyright: (c) 2014 by Kenneth Reitz. +:copyright: (c) 2015 by Kenneth Reitz. :license: Apache 2.0, see LICENSE for more details. """ __title__ = 'requests' -__version__ = '2.4.3' -__build__ = 0x020403 +__version__ = '2.6.2' +__build__ = 0x020602 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2014 Kenneth Reitz' +__copyright__ = 'Copyright 2015 Kenneth Reitz' # Attempt to enable urllib3's SNI support, if possible try: diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py index 40088900..02e0dd1f 100644 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -11,10 +11,10 @@ and maintain connections. import socket from .models import Response -from .packages.urllib3 import Retry from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse from .packages.urllib3.util import Timeout as TimeoutSauce +from .packages.urllib3.util.retry import Retry from .compat import urlparse, basestring from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, prepend_scheme_if_needed, get_auth_from_url, urldefragauth) @@ -26,9 +26,10 @@ from .packages.urllib3.exceptions import ProxyError as _ProxyError from .packages.urllib3.exceptions import ProtocolError from .packages.urllib3.exceptions import ReadTimeoutError from .packages.urllib3.exceptions import SSLError as _SSLError +from .packages.urllib3.exceptions import ResponseError from .cookies import extract_cookies_to_jar from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, - ProxyError) + ProxyError, RetryError) from .auth import _basic_auth_str DEFAULT_POOLBLOCK = False @@ -63,7 +64,9 @@ class HTTPAdapter(BaseAdapter): should attempt. Note, this applies only to failed DNS lookups, socket connections and connection timeouts, never to requests where data has made it to the server. By default, Requests does not retry failed - connections. + connections. If you need granular control over the conditions under + which we retry a request, import urllib3's ``Retry`` class and pass + that instead. :param pool_block: Whether the connection pool should block for connections. Usage:: @@ -79,7 +82,10 @@ class HTTPAdapter(BaseAdapter): def __init__(self, pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES, pool_block=DEFAULT_POOLBLOCK): - self.max_retries = max_retries + if max_retries == DEFAULT_RETRIES: + self.max_retries = Retry(0, read=False) + else: + self.max_retries = Retry.from_int(max_retries) self.config = {} self.proxy_manager = {} @@ -360,7 +366,7 @@ class HTTPAdapter(BaseAdapter): assert_same_host=False, preload_content=False, decode_content=False, - retries=Retry(self.max_retries, read=False), + retries=self.max_retries, timeout=timeout ) @@ -412,6 +418,9 @@ class HTTPAdapter(BaseAdapter): if isinstance(e.reason, ConnectTimeoutError): raise ConnectTimeout(e, request=request) + if isinstance(e.reason, ResponseError): + raise RetryError(e, request=request) + raise ConnectionError(e, request=request) except _ProxyError as e: diff --git a/lib/requests/api.py b/lib/requests/api.py index 1469b05c..98c92298 100644 --- a/lib/requests/api.py +++ b/lib/requests/api.py @@ -16,7 +16,6 @@ from . import sessions def request(method, url, **kwargs): """Constructs and sends a :class:`Request `. - Returns :class:`Response ` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. @@ -37,6 +36,8 @@ def request(method, url, **kwargs): :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. :param stream: (optional) if ``False``, the response content will be immediately downloaded. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. + :return: :class:`Response ` object + :rtype: requests.Response Usage:: @@ -55,10 +56,12 @@ def request(method, url, **kwargs): def get(url, **kwargs): - """Sends a GET request. Returns :class:`Response` object. + """Sends a GET request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) @@ -66,10 +69,12 @@ def get(url, **kwargs): def options(url, **kwargs): - """Sends a OPTIONS request. Returns :class:`Response` object. + """Sends a OPTIONS request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) @@ -77,10 +82,12 @@ def options(url, **kwargs): def head(url, **kwargs): - """Sends a HEAD request. Returns :class:`Response` object. + """Sends a HEAD request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', False) @@ -88,44 +95,52 @@ def head(url, **kwargs): def post(url, data=None, json=None, **kwargs): - """Sends a POST request. Returns :class:`Response` object. + """Sends a POST request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('post', url, data=data, json=json, **kwargs) def put(url, data=None, **kwargs): - """Sends a PUT request. Returns :class:`Response` object. + """Sends a PUT request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('put', url, data=data, **kwargs) def patch(url, data=None, **kwargs): - """Sends a PATCH request. Returns :class:`Response` object. + """Sends a PATCH request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('patch', url, data=data, **kwargs) def delete(url, **kwargs): - """Sends a DELETE request. Returns :class:`Response` object. + """Sends a DELETE request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('delete', url, **kwargs) diff --git a/lib/requests/auth.py b/lib/requests/auth.py index 618a902a..0ff9c298 100644 --- a/lib/requests/auth.py +++ b/lib/requests/auth.py @@ -67,6 +67,7 @@ class HTTPDigestAuth(AuthBase): self.nonce_count = 0 self.chal = {} self.pos = None + self.num_401_calls = 1 def build_digest_header(self, method, url): @@ -102,7 +103,8 @@ class HTTPDigestAuth(AuthBase): # XXX not implemented yet entdig = None p_parsed = urlparse(url) - path = p_parsed.path + #: path is request-uri defined in RFC 2616 which should not be empty + path = p_parsed.path or "/" if p_parsed.query: path += '?' + p_parsed.query @@ -123,13 +125,15 @@ class HTTPDigestAuth(AuthBase): s += os.urandom(8) cnonce = (hashlib.sha1(s).hexdigest()[:16]) - noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, HA2) if _algorithm == 'MD5-SESS': HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) if qop is None: respdig = KD(HA1, "%s:%s" % (nonce, HA2)) elif qop == 'auth' or 'auth' in qop.split(','): + noncebit = "%s:%s:%s:%s:%s" % ( + nonce, ncvalue, cnonce, 'auth', HA2 + ) respdig = KD(HA1, noncebit) else: # XXX handle auth-int. @@ -154,7 +158,7 @@ class HTTPDigestAuth(AuthBase): def handle_redirect(self, r, **kwargs): """Reset num_401_calls counter on redirects.""" if r.is_redirect: - setattr(self, 'num_401_calls', 1) + self.num_401_calls = 1 def handle_401(self, r, **kwargs): """Takes the given response and tries digest-auth, if needed.""" @@ -168,7 +172,7 @@ class HTTPDigestAuth(AuthBase): if 'digest' in s_auth.lower() and num_401_calls < 2: - setattr(self, 'num_401_calls', num_401_calls + 1) + self.num_401_calls += 1 pat = re.compile(r'digest ', flags=re.IGNORECASE) self.chal = parse_dict_header(pat.sub('', s_auth, count=1)) @@ -188,7 +192,7 @@ class HTTPDigestAuth(AuthBase): return _r - setattr(self, 'num_401_calls', num_401_calls + 1) + self.num_401_calls = 1 return r def __call__(self, r): diff --git a/lib/requests/compat.py b/lib/requests/compat.py index be5a1ed6..70edff78 100644 --- a/lib/requests/compat.py +++ b/lib/requests/compat.py @@ -21,62 +21,10 @@ is_py2 = (_ver[0] == 2) #: Python 3.x? is_py3 = (_ver[0] == 3) -#: Python 3.0.x -is_py30 = (is_py3 and _ver[1] == 0) - -#: Python 3.1.x -is_py31 = (is_py3 and _ver[1] == 1) - -#: Python 3.2.x -is_py32 = (is_py3 and _ver[1] == 2) - -#: Python 3.3.x -is_py33 = (is_py3 and _ver[1] == 3) - -#: Python 3.4.x -is_py34 = (is_py3 and _ver[1] == 4) - -#: Python 2.7.x -is_py27 = (is_py2 and _ver[1] == 7) - -#: Python 2.6.x -is_py26 = (is_py2 and _ver[1] == 6) - -#: Python 2.5.x -is_py25 = (is_py2 and _ver[1] == 5) - -#: Python 2.4.x -is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice. - - -# --------- -# Platforms -# --------- - - -# Syntax sugar. -_ver = sys.version.lower() - -is_pypy = ('pypy' in _ver) -is_jython = ('jython' in _ver) -is_ironpython = ('iron' in _ver) - -# Assume CPython, if nothing else. -is_cpython = not any((is_pypy, is_jython, is_ironpython)) - -# Windows-based system. -is_windows = 'win32' in str(sys.platform).lower() - -# Standard Linux 2+ system. -is_linux = ('linux' in str(sys.platform).lower()) -is_osx = ('darwin' in str(sys.platform).lower()) -is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess. -is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. - try: import simplejson as json except (ImportError, SyntaxError): - # simplejson does not support Python 3.2, it thows a SyntaxError + # simplejson does not support Python 3.2, it throws a SyntaxError # because of u'...' Unicode literals. import json @@ -99,7 +47,6 @@ if is_py2: basestring = basestring numeric_types = (int, long, float) - elif is_py3: from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag from urllib.request import parse_http_list, getproxies, proxy_bypass diff --git a/lib/requests/cookies.py b/lib/requests/cookies.py index 831c49c6..1fbc934c 100644 --- a/lib/requests/cookies.py +++ b/lib/requests/cookies.py @@ -6,6 +6,7 @@ Compatibility code to be able to use `cookielib.CookieJar` with requests. requests.utils imports from here, so be careful with imports. """ +import copy import time import collections from .compat import cookielib, urlparse, urlunparse, Morsel @@ -157,26 +158,28 @@ class CookieConflictError(RuntimeError): class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): - """Compatibility class; is a cookielib.CookieJar, but exposes a dict interface. + """Compatibility class; is a cookielib.CookieJar, but exposes a dict + interface. This is the CookieJar we create by default for requests and sessions that don't specify one, since some clients may expect response.cookies and session.cookies to support dict operations. - Don't use the dict interface internally; it's just for compatibility with - with external client code. All `requests` code should work out of the box - with externally provided instances of CookieJar, e.g., LWPCookieJar and - FileCookieJar. - - Caution: dictionary operations that are normally O(1) may be O(n). + Requests does not use the dict interface internally; it's just for + compatibility with external client code. All requests code should work + out of the box with externally provided instances of ``CookieJar``, e.g. + ``LWPCookieJar`` and ``FileCookieJar``. Unlike a regular CookieJar, this class is pickleable. - """ + .. warning:: dictionary operations that are normally O(1) may be O(n). + """ def get(self, name, default=None, domain=None, path=None): """Dict-like get() that also supports optional domain and path args in order to resolve naming collisions from using one cookie jar over - multiple domains. Caution: operation is O(n), not O(1).""" + multiple domains. + + .. warning:: operation is O(n), not O(1).""" try: return self._find_no_duplicates(name, domain, path) except KeyError: @@ -199,37 +202,38 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): return c def iterkeys(self): - """Dict-like iterkeys() that returns an iterator of names of cookies from the jar. - See itervalues() and iteritems().""" + """Dict-like iterkeys() that returns an iterator of names of cookies + from the jar. See itervalues() and iteritems().""" for cookie in iter(self): yield cookie.name def keys(self): - """Dict-like keys() that returns a list of names of cookies from the jar. - See values() and items().""" + """Dict-like keys() that returns a list of names of cookies from the + jar. See values() and items().""" return list(self.iterkeys()) def itervalues(self): - """Dict-like itervalues() that returns an iterator of values of cookies from the jar. - See iterkeys() and iteritems().""" + """Dict-like itervalues() that returns an iterator of values of cookies + from the jar. See iterkeys() and iteritems().""" for cookie in iter(self): yield cookie.value def values(self): - """Dict-like values() that returns a list of values of cookies from the jar. - See keys() and items().""" + """Dict-like values() that returns a list of values of cookies from the + jar. See keys() and items().""" return list(self.itervalues()) def iteritems(self): - """Dict-like iteritems() that returns an iterator of name-value tuples from the jar. - See iterkeys() and itervalues().""" + """Dict-like iteritems() that returns an iterator of name-value tuples + from the jar. See iterkeys() and itervalues().""" for cookie in iter(self): yield cookie.name, cookie.value def items(self): - """Dict-like items() that returns a list of name-value tuples from the jar. - See keys() and values(). Allows client-code to call "dict(RequestsCookieJar) - and get a vanilla python dict of key value pairs.""" + """Dict-like items() that returns a list of name-value tuples from the + jar. See keys() and values(). Allows client-code to call + ``dict(RequestsCookieJar)`` and get a vanilla python dict of key value + pairs.""" return list(self.iteritems()) def list_domains(self): @@ -259,8 +263,9 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): return False # there is only one domain in jar def get_dict(self, domain=None, path=None): - """Takes as an argument an optional domain and path and returns a plain old - Python dict of name-value pairs of cookies that meet the requirements.""" + """Takes as an argument an optional domain and path and returns a plain + old Python dict of name-value pairs of cookies that meet the + requirements.""" dictionary = {} for cookie in iter(self): if (domain is None or cookie.domain == domain) and (path is None @@ -269,21 +274,24 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): return dictionary def __getitem__(self, name): - """Dict-like __getitem__() for compatibility with client code. Throws exception - if there are more than one cookie with name. In that case, use the more - explicit get() method instead. Caution: operation is O(n), not O(1).""" + """Dict-like __getitem__() for compatibility with client code. Throws + exception if there are more than one cookie with name. In that case, + use the more explicit get() method instead. + + .. warning:: operation is O(n), not O(1).""" return self._find_no_duplicates(name) def __setitem__(self, name, value): - """Dict-like __setitem__ for compatibility with client code. Throws exception - if there is already a cookie of that name in the jar. In that case, use the more - explicit set() method instead.""" + """Dict-like __setitem__ for compatibility with client code. Throws + exception if there is already a cookie of that name in the jar. In that + case, use the more explicit set() method instead.""" self.set(name, value) def __delitem__(self, name): - """Deletes a cookie given a name. Wraps cookielib.CookieJar's remove_cookie_by_name().""" + """Deletes a cookie given a name. Wraps ``cookielib.CookieJar``'s + ``remove_cookie_by_name()``.""" remove_cookie_by_name(self, name) def set_cookie(self, cookie, *args, **kwargs): @@ -295,15 +303,16 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): """Updates this jar with cookies from another CookieJar or dict-like""" if isinstance(other, cookielib.CookieJar): for cookie in other: - self.set_cookie(cookie) + self.set_cookie(copy.copy(cookie)) else: super(RequestsCookieJar, self).update(other) def _find(self, name, domain=None, path=None): - """Requests uses this method internally to get cookie values. Takes as args name - and optional domain and path. Returns a cookie.value. If there are conflicting cookies, - _find arbitrarily chooses one. See _find_no_duplicates if you want an exception thrown - if there are conflicting cookies.""" + """Requests uses this method internally to get cookie values. Takes as + args name and optional domain and path. Returns a cookie.value. If + there are conflicting cookies, _find arbitrarily chooses one. See + _find_no_duplicates if you want an exception thrown if there are + conflicting cookies.""" for cookie in iter(self): if cookie.name == name: if domain is None or cookie.domain == domain: @@ -313,10 +322,11 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path)) def _find_no_duplicates(self, name, domain=None, path=None): - """__get_item__ and get call _find_no_duplicates -- never used in Requests internally. - Takes as args name and optional domain and path. Returns a cookie.value. - Throws KeyError if cookie is not found and CookieConflictError if there are - multiple cookies that match name and optionally domain and path.""" + """Both ``__get_item__`` and ``get`` call this function: it's never + used elsewhere in Requests. Takes as args name and optional domain and + path. Returns a cookie.value. Throws KeyError if cookie is not found + and CookieConflictError if there are multiple cookies that match name + and optionally domain and path.""" toReturn = None for cookie in iter(self): if cookie.name == name: @@ -350,6 +360,21 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): return new_cj +def _copy_cookie_jar(jar): + if jar is None: + return None + + if hasattr(jar, 'copy'): + # We're dealing with an instane of RequestsCookieJar + return jar.copy() + # We're dealing with a generic CookieJar instance + new_jar = copy.copy(jar) + new_jar.clear() + for cookie in jar: + new_jar.set_cookie(copy.copy(cookie)) + return new_jar + + def create_cookie(name, value, **kwargs): """Make a cookie from underspecified parameters. @@ -440,7 +465,7 @@ def merge_cookies(cookiejar, cookies): """ if not isinstance(cookiejar, cookielib.CookieJar): raise ValueError('You can only merge into CookieJar') - + if isinstance(cookies, dict): cookiejar = cookiejar_from_dict( cookies, cookiejar=cookiejar, overwrite=False) diff --git a/lib/requests/exceptions.py b/lib/requests/exceptions.py index 34c7a0db..89135a80 100644 --- a/lib/requests/exceptions.py +++ b/lib/requests/exceptions.py @@ -90,5 +90,10 @@ class ChunkedEncodingError(RequestException): class ContentDecodingError(RequestException, BaseHTTPError): """Failed to decode response content""" + class StreamConsumedError(RequestException, TypeError): """The content for this response was already consumed""" + + +class RetryError(RequestException): + """Custom retries logic failed""" diff --git a/lib/requests/models.py b/lib/requests/models.py index 2370b67f..45b3ea96 100644 --- a/lib/requests/models.py +++ b/lib/requests/models.py @@ -15,16 +15,15 @@ from .hooks import default_hooks from .structures import CaseInsensitiveDict from .auth import HTTPBasicAuth -from .cookies import cookiejar_from_dict, get_cookie_header +from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar from .packages.urllib3.fields import RequestField from .packages.urllib3.filepost import encode_multipart_formdata from .packages.urllib3.util import parse_url from .packages.urllib3.exceptions import ( - DecodeError, ReadTimeoutError, ProtocolError) + DecodeError, ReadTimeoutError, ProtocolError, LocationParseError) from .exceptions import ( - HTTPError, RequestException, MissingSchema, InvalidURL, - ChunkedEncodingError, ContentDecodingError, ConnectionError, - StreamConsumedError) + HTTPError, MissingSchema, InvalidURL, ChunkedEncodingError, + ContentDecodingError, ConnectionError, StreamConsumedError) from .utils import ( guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, @@ -144,12 +143,13 @@ class RequestEncodingMixin(object): else: fn = guess_filename(v) or k fp = v - if isinstance(fp, str): - fp = StringIO(fp) - if isinstance(fp, bytes): - fp = BytesIO(fp) - rf = RequestField(name=k, data=fp.read(), + if isinstance(fp, (str, bytes, bytearray)): + fdata = fp + else: + fdata = fp.read() + + rf = RequestField(name=k, data=fdata, filename=fn, headers=fh) rf.make_multipart(content_type=ft) new_fields.append(rf) @@ -320,7 +320,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): p.method = self.method p.url = self.url p.headers = self.headers.copy() if self.headers is not None else None - p._cookies = self._cookies.copy() if self._cookies is not None else None + p._cookies = _copy_cookie_jar(self._cookies) p.body = self.body p.hooks = self.hooks return p @@ -351,11 +351,15 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): return # Support for unicode domain names and paths. - scheme, auth, host, port, path, query, fragment = parse_url(url) + try: + scheme, auth, host, port, path, query, fragment = parse_url(url) + except LocationParseError as e: + raise InvalidURL(*e.args) if not scheme: raise MissingSchema("Invalid URL {0!r}: No schema supplied. " - "Perhaps you meant http://{0}?".format(url)) + "Perhaps you meant http://{0}?".format( + to_native_string(url, 'utf8'))) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) @@ -498,7 +502,15 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.prepare_content_length(self.body) def prepare_cookies(self, cookies): - """Prepares the given HTTP cookie data.""" + """Prepares the given HTTP cookie data. + + This function eventually generates a ``Cookie`` header from the + given cookies using cookielib. Due to cookielib's design, the header + will not be regenerated if it already exists, meaning this function + can only be called once for the life of the + :class:`PreparedRequest ` object. Any subsequent calls + to ``prepare_cookies`` will have no actual effect, unless the "Cookie" + header is removed beforehand.""" if isinstance(cookies, cookielib.CookieJar): self._cookies = cookies @@ -511,6 +523,10 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): def prepare_hooks(self, hooks): """Prepares the given hooks.""" + # hooks can be passed as None to the prepare method and to this + # method. To prevent iterating over None, simply use an empty list + # if hooks is False-y + hooks = hooks or [] for event in hooks: self.register_hook(event, hooks[event]) @@ -570,7 +586,11 @@ class Response(object): self.cookies = cookiejar_from_dict({}) #: The amount of time elapsed between sending the request - #: and the arrival of the response (as a timedelta) + #: and the arrival of the response (as a timedelta). + #: This property specifically measures the time taken between sending + #: the first byte of the request and finishing parsing the headers. It + #: is therefore unaffected by consuming the response content or the + #: value of the ``stream`` keyword argument. self.elapsed = datetime.timedelta(0) #: The :class:`PreparedRequest ` object to which this @@ -615,7 +635,7 @@ class Response(object): def ok(self): try: self.raise_for_status() - except RequestException: + except HTTPError: return False return True @@ -686,6 +706,8 @@ class Response(object): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. + + .. note:: This method is not reentrant safe. """ pending = None diff --git a/lib/requests/packages/chardet/__init__.py b/lib/requests/packages/chardet/__init__.py index e4f0799d..82c2a48d 100644 --- a/lib/requests/packages/chardet/__init__.py +++ b/lib/requests/packages/chardet/__init__.py @@ -15,7 +15,7 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -__version__ = "2.2.1" +__version__ = "2.3.0" from sys import version_info diff --git a/lib/requests/packages/chardet/chardetect.py b/lib/requests/packages/chardet/chardetect.py index ecd0163b..ffe892f2 100644 --- a/lib/requests/packages/chardet/chardetect.py +++ b/lib/requests/packages/chardet/chardetect.py @@ -12,34 +12,68 @@ Example:: If no paths are provided, it takes its input from stdin. """ -from io import open -from sys import argv, stdin +from __future__ import absolute_import, print_function, unicode_literals + +import argparse +import sys +from io import open + +from chardet import __version__ from chardet.universaldetector import UniversalDetector -def description_of(file, name='stdin'): - """Return a string describing the probable encoding of a file.""" +def description_of(lines, name='stdin'): + """ + Return a string describing the probable encoding of a file or + list of strings. + + :param lines: The lines to get the encoding of. + :type lines: Iterable of bytes + :param name: Name of file or collection of lines + :type name: str + """ u = UniversalDetector() - for line in file: + for line in lines: u.feed(line) u.close() result = u.result if result['encoding']: - return '%s: %s with confidence %s' % (name, - result['encoding'], - result['confidence']) + return '{0}: {1} with confidence {2}'.format(name, result['encoding'], + result['confidence']) else: - return '%s: no result' % name + return '{0}: no result'.format(name) -def main(): - if len(argv) <= 1: - print(description_of(stdin)) - else: - for path in argv[1:]: - with open(path, 'rb') as f: - print(description_of(f, path)) +def main(argv=None): + ''' + Handles command line arguments and gets things started. + + :param argv: List of arguments, as if specified on the command-line. + If None, ``sys.argv[1:]`` is used instead. + :type argv: list of str + ''' + # Get command line arguments + parser = argparse.ArgumentParser( + description="Takes one or more file paths and reports their detected \ + encodings", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + conflict_handler='resolve') + parser.add_argument('input', + help='File whose encoding we would like to determine.', + type=argparse.FileType('rb'), nargs='*', + default=[sys.stdin]) + parser.add_argument('--version', action='version', + version='%(prog)s {0}'.format(__version__)) + args = parser.parse_args(argv) + + for f in args.input: + if f.isatty(): + print("You are running chardetect interactively. Press " + + "CTRL-D twice at the start of a blank line to signal the " + + "end of your input. If you want help, run chardetect " + + "--help\n", file=sys.stderr) + print(description_of(f, f.name)) if __name__ == '__main__': diff --git a/lib/requests/packages/chardet/jpcntx.py b/lib/requests/packages/chardet/jpcntx.py index f7f69ba4..59aeb6a8 100644 --- a/lib/requests/packages/chardet/jpcntx.py +++ b/lib/requests/packages/chardet/jpcntx.py @@ -177,6 +177,12 @@ class JapaneseContextAnalysis: return -1, 1 class SJISContextAnalysis(JapaneseContextAnalysis): + def __init__(self): + self.charset_name = "SHIFT_JIS" + + def get_charset_name(self): + return self.charset_name + def get_order(self, aBuf): if not aBuf: return -1, 1 @@ -184,6 +190,8 @@ class SJISContextAnalysis(JapaneseContextAnalysis): first_char = wrap_ord(aBuf[0]) if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): charLen = 2 + if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): + self.charset_name = "CP932" else: charLen = 1 diff --git a/lib/requests/packages/chardet/latin1prober.py b/lib/requests/packages/chardet/latin1prober.py index ad695f57..eef35735 100644 --- a/lib/requests/packages/chardet/latin1prober.py +++ b/lib/requests/packages/chardet/latin1prober.py @@ -129,11 +129,11 @@ class Latin1Prober(CharSetProber): if total < 0.01: confidence = 0.0 else: - confidence = ((self._mFreqCounter[3] / total) - - (self._mFreqCounter[1] * 20.0 / total)) + confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) + / total) if confidence < 0.0: confidence = 0.0 # lower the confidence of latin1 so that other more accurate # detector can take priority. - confidence = confidence * 0.5 + confidence = confidence * 0.73 return confidence diff --git a/lib/requests/packages/chardet/mbcssm.py b/lib/requests/packages/chardet/mbcssm.py index 3f93cfb0..efe678ca 100644 --- a/lib/requests/packages/chardet/mbcssm.py +++ b/lib/requests/packages/chardet/mbcssm.py @@ -353,7 +353,7 @@ SJIS_cls = ( 2,2,2,2,2,2,2,2, # 68 - 6f 2,2,2,2,2,2,2,2, # 70 - 77 2,2,2,2,2,2,2,1, # 78 - 7f - 3,3,3,3,3,3,3,3, # 80 - 87 + 3,3,3,3,3,2,2,3, # 80 - 87 3,3,3,3,3,3,3,3, # 88 - 8f 3,3,3,3,3,3,3,3, # 90 - 97 3,3,3,3,3,3,3,3, # 98 - 9f @@ -369,9 +369,8 @@ SJIS_cls = ( 2,2,2,2,2,2,2,2, # d8 - df 3,3,3,3,3,3,3,3, # e0 - e7 3,3,3,3,3,4,4,4, # e8 - ef - 4,4,4,4,4,4,4,4, # f0 - f7 - 4,4,4,4,4,0,0,0 # f8 - ff -) + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,0,0,0) # f8 - ff SJIS_st = ( @@ -571,5 +570,3 @@ UTF8SMModel = {'classTable': UTF8_cls, 'stateTable': UTF8_st, 'charLenTable': UTF8CharLenTable, 'name': 'UTF-8'} - -# flake8: noqa diff --git a/lib/requests/packages/chardet/sjisprober.py b/lib/requests/packages/chardet/sjisprober.py index b173614e..cd0e9e70 100644 --- a/lib/requests/packages/chardet/sjisprober.py +++ b/lib/requests/packages/chardet/sjisprober.py @@ -47,7 +47,7 @@ class SJISProber(MultiByteCharSetProber): self._mContextAnalyzer.reset() def get_charset_name(self): - return "SHIFT_JIS" + return self._mContextAnalyzer.get_charset_name() def feed(self, aBuf): aLen = len(aBuf) diff --git a/lib/requests/packages/chardet/universaldetector.py b/lib/requests/packages/chardet/universaldetector.py index 9a03ad3d..476522b9 100644 --- a/lib/requests/packages/chardet/universaldetector.py +++ b/lib/requests/packages/chardet/universaldetector.py @@ -71,9 +71,9 @@ class UniversalDetector: if not self._mGotData: # If the data starts with BOM, we know it is UTF - if aBuf[:3] == codecs.BOM: + if aBuf[:3] == codecs.BOM_UTF8: # EF BB BF UTF-8 with BOM - self.result = {'encoding': "UTF-8", 'confidence': 1.0} + self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0} elif aBuf[:4] == codecs.BOM_UTF32_LE: # FF FE 00 00 UTF-32, little-endian BOM self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} diff --git a/lib/requests/packages/urllib3/__init__.py b/lib/requests/packages/urllib3/__init__.py index 4b36b5ae..333060c2 100644 --- a/lib/requests/packages/urllib3/__init__.py +++ b/lib/requests/packages/urllib3/__init__.py @@ -4,7 +4,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = 'dev' +__version__ = '1.10.3' from .connectionpool import ( @@ -55,9 +55,11 @@ def add_stderr_logger(level=logging.DEBUG): del NullHandler -# Set security warning to only go off once by default. import warnings -warnings.simplefilter('module', exceptions.SecurityWarning) +# SecurityWarning's always go off by default. +warnings.simplefilter('always', exceptions.SecurityWarning) +# InsecurePlatformWarning's don't vary between requests, so we keep it default. +warnings.simplefilter('default', exceptions.InsecurePlatformWarning) def disable_warnings(category=exceptions.HTTPWarning): """ diff --git a/lib/requests/packages/urllib3/_collections.py b/lib/requests/packages/urllib3/_collections.py index d77ebb8d..279416ce 100644 --- a/lib/requests/packages/urllib3/_collections.py +++ b/lib/requests/packages/urllib3/_collections.py @@ -1,7 +1,7 @@ from collections import Mapping, MutableMapping try: from threading import RLock -except ImportError: # Platform-specific: No threads available +except ImportError: # Platform-specific: No threads available class RLock: def __enter__(self): pass @@ -10,11 +10,11 @@ except ImportError: # Platform-specific: No threads available pass -try: # Python 2.7+ +try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict -from .packages.six import itervalues +from .packages.six import iterkeys, itervalues, PY3 __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] @@ -85,8 +85,7 @@ class RecentlyUsedContainer(MutableMapping): def clear(self): with self.lock: # Copy pointers to all values, then wipe the mapping - # under Python 2, this copies the list of values twice :-| - values = list(self._container.values()) + values = list(itervalues(self._container)) self._container.clear() if self.dispose_func: @@ -95,10 +94,17 @@ class RecentlyUsedContainer(MutableMapping): def keys(self): with self.lock: - return self._container.keys() + return list(iterkeys(self._container)) -class HTTPHeaderDict(MutableMapping): +_dict_setitem = dict.__setitem__ +_dict_getitem = dict.__getitem__ +_dict_delitem = dict.__delitem__ +_dict_contains = dict.__contains__ +_dict_setdefault = dict.setdefault + + +class HTTPHeaderDict(dict): """ :param headers: An iterable of field-value pairs. Must not contain multiple field names @@ -130,25 +136,75 @@ class HTTPHeaderDict(MutableMapping): 'foo=bar, baz=quxx' >>> headers['Content-Length'] '7' - - If you want to access the raw headers with their original casing - for debugging purposes you can access the private ``._data`` attribute - which is a normal python ``dict`` that maps the case-insensitive key to a - list of tuples stored as (case-sensitive-original-name, value). Using the - structure from above as our example: - - >>> headers._data - {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], - 'content-length': [('content-length', '7')]} """ def __init__(self, headers=None, **kwargs): - self._data = {} - if headers is None: - headers = {} - self.update(headers, **kwargs) + dict.__init__(self) + if headers is not None: + if isinstance(headers, HTTPHeaderDict): + self._copy_from(headers) + else: + self.extend(headers) + if kwargs: + self.extend(kwargs) - def add(self, key, value): + def __setitem__(self, key, val): + return _dict_setitem(self, key.lower(), (key, val)) + + def __getitem__(self, key): + val = _dict_getitem(self, key.lower()) + return ', '.join(val[1:]) + + def __delitem__(self, key): + return _dict_delitem(self, key.lower()) + + def __contains__(self, key): + return _dict_contains(self, key.lower()) + + def __eq__(self, other): + if not isinstance(other, Mapping) and not hasattr(other, 'keys'): + return False + if not isinstance(other, type(self)): + other = type(self)(other) + return dict((k1, self[k1]) for k1 in self) == dict((k2, other[k2]) for k2 in other) + + def __ne__(self, other): + return not self.__eq__(other) + + values = MutableMapping.values + get = MutableMapping.get + update = MutableMapping.update + + if not PY3: # Python 2 + iterkeys = MutableMapping.iterkeys + itervalues = MutableMapping.itervalues + + __marker = object() + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + # Using the MutableMapping function directly fails due to the private marker. + # Using ordinary dict.pop would expose the internal structures. + # So let's reinvent the wheel. + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def discard(self, key): + try: + del self[key] + except KeyError: + pass + + def add(self, key, val): """Adds a (name, value) pair, doesn't overwrite the value if it already exists. @@ -157,43 +213,111 @@ class HTTPHeaderDict(MutableMapping): >>> headers['foo'] 'bar, baz' """ - self._data.setdefault(key.lower(), []).append((key, value)) + key_lower = key.lower() + new_vals = key, val + # Keep the common case aka no item present as fast as possible + vals = _dict_setdefault(self, key_lower, new_vals) + if new_vals is not vals: + # new_vals was not inserted, as there was a previous one + if isinstance(vals, list): + # If already several items got inserted, we have a list + vals.append(val) + else: + # vals should be a tuple then, i.e. only one item so far + # Need to convert the tuple to list for further extension + _dict_setitem(self, key_lower, [vals[0], vals[1], val]) + + def extend(self, *args, **kwargs): + """Generic import function for any type of header-like object. + Adapted version of MutableMapping.update in order to insert items + with self.add instead of self.__setitem__ + """ + if len(args) > 1: + raise TypeError("extend() takes at most 1 positional " + "arguments ({} given)".format(len(args))) + other = args[0] if len(args) >= 1 else () + + if isinstance(other, HTTPHeaderDict): + for key, val in other.iteritems(): + self.add(key, val) + elif isinstance(other, Mapping): + for key in other: + self.add(key, other[key]) + elif hasattr(other, "keys"): + for key in other.keys(): + self.add(key, other[key]) + else: + for key, value in other: + self.add(key, value) + + for key, value in kwargs.items(): + self.add(key, value) def getlist(self, key): """Returns a list of all the values for the named field. Returns an empty list if the key doesn't exist.""" - return self[key].split(', ') if key in self else [] + try: + vals = _dict_getitem(self, key.lower()) + except KeyError: + return [] + else: + if isinstance(vals, tuple): + return [vals[1]] + else: + return vals[1:] - def copy(self): - h = HTTPHeaderDict() - for key in self._data: - for rawkey, value in self._data[key]: - h.add(rawkey, value) - return h - - def __eq__(self, other): - if not isinstance(other, Mapping): - return False - other = HTTPHeaderDict(other) - return dict((k1, self[k1]) for k1 in self._data) == \ - dict((k2, other[k2]) for k2 in other._data) - - def __getitem__(self, key): - values = self._data[key.lower()] - return ', '.join(value[1] for value in values) - - def __setitem__(self, key, value): - self._data[key.lower()] = [(key, value)] - - def __delitem__(self, key): - del self._data[key.lower()] - - def __len__(self): - return len(self._data) - - def __iter__(self): - for headers in itervalues(self._data): - yield headers[0][0] + # Backwards compatibility for httplib + getheaders = getlist + getallmatchingheaders = getlist + iget = getlist def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, dict(self.items())) + return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) + + def _copy_from(self, other): + for key in other: + val = _dict_getitem(other, key) + if isinstance(val, list): + # Don't need to convert tuples + val = list(val) + _dict_setitem(self, key, val) + + def copy(self): + clone = type(self)() + clone._copy_from(self) + return clone + + def iteritems(self): + """Iterate over all header lines, including duplicate ones.""" + for key in self: + vals = _dict_getitem(self, key) + for val in vals[1:]: + yield vals[0], val + + def itermerged(self): + """Iterate over all headers, merging duplicate ones together.""" + for key in self: + val = _dict_getitem(self, key) + yield val[0], ', '.join(val[1:]) + + def items(self): + return list(self.iteritems()) + + @classmethod + def from_httplib(cls, message): # Python 2 + """Read headers from a Python 2 httplib message object.""" + # python2.7 does not expose a proper API for exporting multiheaders + # efficiently. This function re-reads raw lines from the message + # object and extracts the multiheaders properly. + headers = [] + + for line in message.headers: + if line.startswith((' ', '\t')): + key, value = headers[-1] + headers[-1] = (key, value + '\r\n' + line.rstrip()) + continue + + key, value = line.split(':', 1) + headers.append((key, value.strip())) + + return cls(headers) diff --git a/lib/requests/packages/urllib3/connection.py b/lib/requests/packages/urllib3/connection.py index c6e1959a..2a8c3596 100644 --- a/lib/requests/packages/urllib3/connection.py +++ b/lib/requests/packages/urllib3/connection.py @@ -3,6 +3,7 @@ import sys import socket from socket import timeout as SocketTimeout import warnings +from .packages import six try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException @@ -26,12 +27,20 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. pass +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: # Python 2: + class ConnectionError(Exception): + pass + + from .exceptions import ( ConnectTimeoutError, SystemTimeWarning, + SecurityWarning, ) from .packages.ssl_match_hostname import match_hostname -from .packages import six from .util.ssl_ import ( resolve_cert_reqs, @@ -40,8 +49,8 @@ from .util.ssl_ import ( assert_fingerprint, ) -from .util import connection +from .util import connection port_by_scheme = { 'http': 80, @@ -233,8 +242,15 @@ class VerifiedHTTPSConnection(HTTPSConnection): self.assert_fingerprint) elif resolved_cert_reqs != ssl.CERT_NONE \ and self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or hostname) + cert = self.sock.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn(( + 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' + 'This feature is being removed by major browsers and deprecated by RFC 2818. ' + '(See https://github.com/shazow/urllib3/issues/497 for details.)'), + SecurityWarning + ) + match_hostname(cert, self.assert_hostname or hostname) self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED or self.assert_fingerprint is not None) @@ -244,3 +260,5 @@ if ssl: # Make a copy for testing. UnverifiedHTTPSConnection = HTTPSConnection HTTPSConnection = VerifiedHTTPSConnection +else: + HTTPSConnection = DummyConnection diff --git a/lib/requests/packages/urllib3/connectionpool.py b/lib/requests/packages/urllib3/connectionpool.py index e693bbd8..117269ac 100644 --- a/lib/requests/packages/urllib3/connectionpool.py +++ b/lib/requests/packages/urllib3/connectionpool.py @@ -32,7 +32,7 @@ from .connection import ( port_by_scheme, DummyConnection, HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, - HTTPException, BaseSSLError, + HTTPException, BaseSSLError, ConnectionError ) from .request import RequestMethods from .response import HTTPResponse @@ -72,6 +72,21 @@ class ConnectionPool(object): return '%s(host=%r, port=%r)' % (type(self).__name__, self.host, self.port) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(): + """ + Close all pooled connections and disable the pool. + """ + pass + + # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) @@ -266,6 +281,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ pass + def _prepare_proxy(self, conn): + # Nothing to do for HTTP connections. + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -278,6 +297,23 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # can be removed later return Timeout.from_float(timeout) + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + + if isinstance(err, SocketTimeout): + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, 'errno') and err.errno in _blocking_errnos: + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ @@ -301,7 +337,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.timeout = timeout_obj.connect_timeout # Trigger any extra validation we need to do. - self._validate_conn(conn) + try: + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise # conn.request() calls httplib.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. @@ -327,32 +368,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() - except SocketTimeout: - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - - except BaseSSLError as e: - # Catch possible read timeouts thrown as SSL errors. If not the - # case, rethrow the original. We need to do this because of: - # http://bugs.python.org/issue10272 - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Python 2.6 - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - - raise - - except SocketError as e: # Platform-specific: Python 2 - # See the above comment about EAGAIN in Python 3. In Python 2 we - # have to specifically catch it and throw the timeout error - if e.errno in _blocking_errnos: - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) raise # AppEngine doesn't have a version attr. @@ -508,11 +529,18 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) conn = self._get_conn(timeout=pool_timeout) + conn.timeout = timeout_obj.connect_timeout + + is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None) + if is_new_proxy_conn: + self._prepare_proxy(conn) + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, - timeout=timeout, + timeout=timeout_obj, body=body, headers=headers) # If we're going to release the connection in ``finally:``, then @@ -537,26 +565,36 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise EmptyPoolError(self, "No pool connections are available.") except (BaseSSLError, CertificateError) as e: - # Release connection unconditionally because there is no way to - # close it externally in case of exception. - release_conn = True + # Close the connection. If a connection is reused on which there + # was a Certificate error, the next request will certainly raise + # another Certificate error. + if conn: + conn.close() + conn = None raise SSLError(e) - except (TimeoutError, HTTPException, SocketError) as e: + except SSLError: + # Treat SSLError separately from BaseSSLError to preserve + # traceback. + if conn: + conn.close() + conn = None + raise + + except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: if conn: # Discard the connection for these exceptions. It will be # be replaced during the next _get_conn() call. conn.close() conn = None - stacktrace = sys.exc_info()[2] if isinstance(e, SocketError) and self.proxy: e = ProxyError('Cannot connect to proxy.', e) elif isinstance(e, (SocketError, HTTPException)): e = ProtocolError('Connection aborted.', e) - retries = retries.increment(method, url, error=e, - _pool=self, _stacktrace=stacktrace) + retries = retries.increment(method, url, error=e, _pool=self, + _stacktrace=sys.exc_info()[2]) retries.sleep() # Keep track of the error for the retry warning. @@ -668,24 +706,26 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - if self.proxy is not None: - # Python 2.7+ - try: - set_tunnel = conn.set_tunnel - except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = conn._set_tunnel - - if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older - set_tunnel(self.host, self.port) - else: - set_tunnel(self.host, self.port, self.proxy_headers) - - # Establish tunnel connection early, because otherwise httplib - # would improperly set Host: header to proxy's IP:port. - conn.connect() - return conn + def _prepare_proxy(self, conn): + """ + Establish tunnel connection early, because otherwise httplib + would improperly set Host: header to proxy's IP:port. + """ + # Python 2.7+ + try: + set_tunnel = conn.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = conn._set_tunnel + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + + conn.connect() + def _new_conn(self): """ Return a fresh :class:`httplib.HTTPSConnection`. @@ -695,7 +735,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): % (self.num_connections, self.host)) if not self.ConnectionCls or self.ConnectionCls is DummyConnection: - # Platform-specific: Python without ssl raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") @@ -721,15 +760,12 @@ class HTTPSConnectionPool(HTTPConnectionPool): if not getattr(conn, 'sock', None): # AppEngine might not have `.sock` conn.connect() - """ if not conn.is_verified: warnings.warn(( 'Unverified HTTPS request is being made. ' 'Adding certificate verification is strongly advised. See: ' - 'https://urllib3.readthedocs.org/en/latest/security.html ' - '(This warning will only appear once by default.)'), + 'https://urllib3.readthedocs.org/en/latest/security.html'), InsecureRequestWarning) - """ def connection_from_url(url, **kw): diff --git a/lib/requests/packages/urllib3/contrib/pyopenssl.py b/lib/requests/packages/urllib3/contrib/pyopenssl.py index 374b544f..b2c34a89 100644 --- a/lib/requests/packages/urllib3/contrib/pyopenssl.py +++ b/lib/requests/packages/urllib3/contrib/pyopenssl.py @@ -29,7 +29,7 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. Activating this module also has the positive side effect of disabling SSL/TLS -encryption in Python 2 (see `CRIME attack`_). +compression in Python 2 (see `CRIME attack`_). If you want to configure the default list of supported cipher suites, you can set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. @@ -38,8 +38,6 @@ Module Variables ---------------- :var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. - Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: - ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` .. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication .. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) @@ -68,17 +66,15 @@ __all__ = ['inject_into_urllib3', 'extract_from_urllib3'] HAS_SNI = SUBJ_ALT_NAME_SUPPORT # Map from urllib3 to PyOpenSSL compatible parameter-values. +_openssl_versions = { + ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} + try: - _openssl_versions = { - ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, - ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, - ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, - } + _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) except AttributeError: - _openssl_versions = { - ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, - ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, - } + pass _openssl_verify = { ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, @@ -87,22 +83,7 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } -# A secure default. -# Sources for more information on TLS ciphers: -# -# - https://wiki.mozilla.org/Security/Server_Side_TLS -# - https://www.ssllabs.com/projects/best-practices/index.html -# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ -# -# The general intent is: -# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), -# - prefer ECDHE over DHE for better performance, -# - prefer any AES-GCM over any AES-CBC for better performance and security, -# - use 3DES as fallback which is secure but slow, -# - disable NULL authentication, MD5 MACs and DSS for security reasons. -DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ - "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ - "!aNULL:!MD5:!DSS" +DEFAULT_SSL_CIPHER_LIST = util.ssl_.DEFAULT_CIPHERS orig_util_HAS_SNI = util.HAS_SNI @@ -193,6 +174,11 @@ class WrappedSocket(object): return b'' else: raise + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return b'' + else: + raise except OpenSSL.SSL.WantReadError: rd, wd, ed = select.select( [self.socket], [], [], self.socket.gettimeout()) @@ -206,8 +192,21 @@ class WrappedSocket(object): def settimeout(self, timeout): return self.socket.settimeout(timeout) + def _send_until_done(self, data): + while True: + try: + return self.connection.send(data) + except OpenSSL.SSL.WantWriteError: + _, wlist, _ = select.select([], [self.socket], [], + self.socket.gettimeout()) + if not wlist: + raise timeout() + continue + def sendall(self, data): - return self.connection.sendall(data) + while len(data): + sent = self._send_until_done(data) + data = data[sent:] def close(self): if self._makefile_refs < 1: @@ -255,6 +254,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ssl_version=None): ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) if certfile: + keyfile = keyfile or certfile # Match behaviour of the normal python ssl library ctx.use_certificate_file(certfile) if keyfile: ctx.use_privatekey_file(keyfile) @@ -282,7 +282,9 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, try: cnx.do_handshake() except OpenSSL.SSL.WantReadError: - select.select([sock], [], []) + rd, _, _ = select.select([sock], [], [], sock.gettimeout()) + if not rd: + raise timeout('select timed out') continue except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad handshake', e) diff --git a/lib/requests/packages/urllib3/exceptions.py b/lib/requests/packages/urllib3/exceptions.py index 7519ba98..31bda1c0 100644 --- a/lib/requests/packages/urllib3/exceptions.py +++ b/lib/requests/packages/urllib3/exceptions.py @@ -72,11 +72,8 @@ class MaxRetryError(RequestError): def __init__(self, pool, url, reason=None): self.reason = reason - message = "Max retries exceeded with url: %s" % url - if reason: - message += " (Caused by %r)" % reason - else: - message += " (Caused by redirect)" + message = "Max retries exceeded with url: %s (Caused by %r)" % ( + url, reason) RequestError.__init__(self, pool, url, message) @@ -141,6 +138,12 @@ class LocationParseError(LocationValueError): self.location = location +class ResponseError(HTTPError): + "Used as a container for an error reason supplied in a MaxRetryError." + GENERIC_ERROR = 'too many error responses' + SPECIFIC_ERROR = 'too many {status_code} error responses' + + class SecurityWarning(HTTPWarning): "Warned when perfoming security reducing actions" pass @@ -154,3 +157,13 @@ class InsecureRequestWarning(SecurityWarning): class SystemTimeWarning(SecurityWarning): "Warned when system time is suspected to be wrong" pass + + +class InsecurePlatformWarning(SecurityWarning): + "Warned when certain SSL configuration is not available on a platform." + pass + + +class ResponseNotChunked(ProtocolError, ValueError): + "Response needs to be chunked in order to read it as chunks." + pass diff --git a/lib/requests/packages/urllib3/poolmanager.py b/lib/requests/packages/urllib3/poolmanager.py index 515dc962..b8d1e745 100644 --- a/lib/requests/packages/urllib3/poolmanager.py +++ b/lib/requests/packages/urllib3/poolmanager.py @@ -8,7 +8,7 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError +from .exceptions import LocationValueError, MaxRetryError from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry @@ -64,6 +64,14 @@ class PoolManager(RequestMethods): self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + def _new_pool(self, scheme, host, port): """ Create a new :class:`ConnectionPool` based on host, port and scheme. @@ -167,7 +175,14 @@ class PoolManager(RequestMethods): if not isinstance(retries, Retry): retries = Retry.from_int(retries, redirect=redirect) - kw['retries'] = retries.increment(method, redirect_location) + try: + retries = retries.increment(method, url, response=response, _pool=conn) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + + kw['retries'] = retries kw['redirect'] = redirect log.info("Redirecting %s -> %s" % (url, redirect_location)) diff --git a/lib/requests/packages/urllib3/request.py b/lib/requests/packages/urllib3/request.py index 51fe2386..b08d6c92 100644 --- a/lib/requests/packages/urllib3/request.py +++ b/lib/requests/packages/urllib3/request.py @@ -118,18 +118,24 @@ class RequestMethods(object): which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ - if encode_multipart: - body, content_type = encode_multipart_formdata( - fields or {}, boundary=multipart_boundary) - else: - body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') - if headers is None: headers = self.headers - headers_ = {'Content-Type': content_type} - headers_.update(headers) + extra_kw = {'headers': {}} - return self.urlopen(method, url, body=body, headers=headers_, - **urlopen_kw) + if fields: + if 'body' in urlopen_kw: + raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.') + + if encode_multipart: + body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) + else: + body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' + + extra_kw['body'] = body + extra_kw['headers'] = {'Content-Type': content_type} + + extra_kw['headers'].update(headers) + extra_kw.update(urlopen_kw) + + return self.urlopen(method, url, **extra_kw) diff --git a/lib/requests/packages/urllib3/response.py b/lib/requests/packages/urllib3/response.py index e69de957..f1ea9bb5 100644 --- a/lib/requests/packages/urllib3/response.py +++ b/lib/requests/packages/urllib3/response.py @@ -1,15 +1,20 @@ +try: + import http.client as httplib +except ImportError: + import httplib import zlib import io from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import ProtocolError, DecodeError, ReadTimeoutError -from .packages.six import string_types as basestring, binary_type +from .exceptions import ( + ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked +) +from .packages.six import string_types as basestring, binary_type, PY3 from .connection import HTTPException, BaseSSLError from .util.response import is_fp_closed - class DeflateDecoder(object): def __init__(self): @@ -21,6 +26,9 @@ class DeflateDecoder(object): return getattr(self._obj, name) def decompress(self, data): + if not data: + return data + if not self._first_try: return self._obj.decompress(data) @@ -36,9 +44,23 @@ class DeflateDecoder(object): self._data = None +class GzipDecoder(object): + + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + return self._obj.decompress(data) + + def _get_decoder(mode): if mode == 'gzip': - return zlib.decompressobj(16 + zlib.MAX_WBITS) + return GzipDecoder() return DeflateDecoder() @@ -76,9 +98,10 @@ class HTTPResponse(io.IOBase): strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = HTTPHeaderDict() - if headers: - self.headers.update(headers) + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) self.status = status self.version = version self.reason = reason @@ -100,8 +123,17 @@ class HTTPResponse(io.IOBase): if hasattr(body, 'read'): self._fp = body - if preload_content and not self._body: - self._body = self.read(decode_content=decode_content) + # Are we using the chunked-style of transfer encoding? + self.chunked = False + self.chunk_left = None + tr_enc = self.headers.get('transfer-encoding', '') + if tr_enc.lower() == "chunked": + self.chunked = True + + # We certainly don't want to preload content when the response is chunked. + if not self.chunked: + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) def get_redirect_location(self): """ @@ -140,6 +172,36 @@ class HTTPResponse(io.IOBase): """ return self._fp_bytes_read + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessar. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, e) + + if flush_decoder and decode_content and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + + return data + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -161,12 +223,7 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 7230 - # Section 3.2 - content_encoding = self.headers.get('content-encoding', '').lower() - if self._decoder is None: - if content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) + self._init_decoder() if decode_content is None: decode_content = self.decode_content @@ -202,7 +259,7 @@ class HTTPResponse(io.IOBase): except BaseSSLError as e: # FIXME: Is there a better way to differentiate between SSLErrors? - if not 'read operation timed out' in str(e): # Defensive: + if 'read operation timed out' not in str(e): # Defensive: # This shouldn't happen but just in case we're missing an edge # case, let's avoid swallowing SSL errors. raise @@ -215,17 +272,7 @@ class HTTPResponse(io.IOBase): self._fp_bytes_read += len(data) - try: - if decode_content and self._decoder: - data = self._decoder.decompress(data) - except (IOError, zlib.error) as e: - raise DecodeError( - "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, e) - - if flush_decoder and decode_content and self._decoder: - buf = self._decoder.decompress(binary_type()) - data += buf + self._decoder.flush() + data = self._decode(data, decode_content, flush_decoder) if cache_content: self._body = data @@ -252,11 +299,16 @@ class HTTPResponse(io.IOBase): If True, will attempt to decode the body based on the 'content-encoding' header. """ - while not is_fp_closed(self._fp): - data = self.read(amt=amt, decode_content=decode_content) + self._init_decoder() + if self.chunked: + for line in self.read_chunked(amt): + yield self._decode(line, decode_content, True) + else: + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) - if data: - yield data + if data: + yield data @classmethod def from_httplib(ResponseCls, r, **response_kw): @@ -267,14 +319,16 @@ class HTTPResponse(io.IOBase): Remaining parameters are passed to the HTTPResponse constructor, along with ``original_response=r``. """ - - headers = HTTPHeaderDict() - for k, v in r.getheaders(): - headers.add(k, v) + headers = r.msg + if not isinstance(headers, HTTPHeaderDict): + if PY3: # Python 3 + headers = HTTPHeaderDict(headers.items()) + else: # Python 2 + headers = HTTPHeaderDict.from_httplib(headers) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) - return ResponseCls(body=r, + resp = ResponseCls(body=r, headers=headers, status=r.status, version=r.version, @@ -282,6 +336,7 @@ class HTTPResponse(io.IOBase): strict=strict, original_response=r, **response_kw) + return resp # Backwards-compatibility methods for httplib.HTTPResponse def getheaders(self): @@ -331,3 +386,59 @@ class HTTPResponse(io.IOBase): else: b[:len(temp)] = temp return len(temp) + + def read_chunked(self, amt=None): + # FIXME: Rewrite this method and make it a class with + # a better structured logic. + if not self.chunked: + raise ResponseNotChunked("Response is not chunked. " + "Header 'transfer-encoding: chunked' is missing.") + while True: + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is None: + line = self._fp.fp.readline() + line = line.decode() + # See RFC 7230: Chunked Transfer Coding. + i = line.find(';') + if i >= 0: + line = line[:i] # Strip chunk-extensions. + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise httplib.IncompleteRead(''.join(line)) + if self.chunk_left == 0: + break + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + yield chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + yield value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + yield value + else: # amt > self.chunk_left + yield self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b'\r\n': + break + + # We read everything; close the "file". + self.release_conn() + diff --git a/lib/requests/packages/urllib3/util/connection.py b/lib/requests/packages/urllib3/util/connection.py index 2156993a..859aec6e 100644 --- a/lib/requests/packages/urllib3/util/connection.py +++ b/lib/requests/packages/urllib3/util/connection.py @@ -82,6 +82,7 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, err = _ if sock is not None: sock.close() + sock = None if err is not None: raise err diff --git a/lib/requests/packages/urllib3/util/retry.py b/lib/requests/packages/urllib3/util/retry.py index eb560dfc..7e0959df 100644 --- a/lib/requests/packages/urllib3/util/retry.py +++ b/lib/requests/packages/urllib3/util/retry.py @@ -2,10 +2,11 @@ import time import logging from ..exceptions import ( - ProtocolError, ConnectTimeoutError, - ReadTimeoutError, MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, ) from ..packages import six @@ -36,7 +37,6 @@ class Retry(object): Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless retries are disabled, in which case the causing exception will be raised. - :param int total: Total number of retries to allow. Takes precedence over other counts. @@ -184,13 +184,13 @@ class Retry(object): return isinstance(err, ConnectTimeoutError) def _is_read_error(self, err): - """ Errors that occur after the request has been started, so we can't - assume that the server did not process any of it. + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. """ return isinstance(err, (ReadTimeoutError, ProtocolError)) def is_forced_retry(self, method, status_code): - """ Is this method/response retryable? (Based on method/codes whitelists) + """ Is this method/status code retryable? (Based on method/codes whitelists) """ if self.method_whitelist and method.upper() not in self.method_whitelist: return False @@ -198,8 +198,7 @@ class Retry(object): return self.status_forcelist and status_code in self.status_forcelist def is_exhausted(self): - """ Are we out of retries? - """ + """ Are we out of retries? """ retry_counts = (self.total, self.connect, self.read, self.redirect) retry_counts = list(filter(None, retry_counts)) if not retry_counts: @@ -230,6 +229,7 @@ class Retry(object): connect = self.connect read = self.read redirect = self.redirect + cause = 'unknown' if error and self._is_connection_error(error): # Connect retry? @@ -251,10 +251,16 @@ class Retry(object): # Redirect retry? if redirect is not None: redirect -= 1 + cause = 'too many redirects' else: - # FIXME: Nothing changed, scenario doesn't make sense. + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist _observed_errors += 1 + cause = ResponseError.GENERIC_ERROR + if response and response.status: + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) new_retry = self.new( total=total, @@ -262,7 +268,7 @@ class Retry(object): _observed_errors=_observed_errors) if new_retry.is_exhausted(): - raise MaxRetryError(_pool, url, error) + raise MaxRetryError(_pool, url, error or ResponseError(cause)) log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) diff --git a/lib/requests/packages/urllib3/util/ssl_.py b/lib/requests/packages/urllib3/util/ssl_.py index 9cfe2d2a..b846d42c 100644 --- a/lib/requests/packages/urllib3/util/ssl_.py +++ b/lib/requests/packages/urllib3/util/ssl_.py @@ -1,21 +1,107 @@ from binascii import hexlify, unhexlify -from hashlib import md5, sha1 +from hashlib import md5, sha1, sha256 -from ..exceptions import SSLError +from ..exceptions import SSLError, InsecurePlatformWarning +SSLContext = None +HAS_SNI = False +create_default_context = None + +import errno +import warnings + try: # Test for SSL features - SSLContext = None - HAS_SNI = False - import ssl from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import SSLContext # Modern SSL? from ssl import HAS_SNI # Has SNI? except ImportError: pass +try: + from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION +except ImportError: + OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 + OP_NO_COMPRESSION = 0x20000 + +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_CIPHERS = ( + 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' + 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:' + '!eNULL:!MD5' +) + +try: + from ssl import SSLContext # Modern SSL? +except ImportError: + import sys + + class SSLContext(object): # Platform-specific: Python 2 & 3.1 + supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or + (3, 2) <= sys.version_info) + + def __init__(self, protocol_version): + self.protocol = protocol_version + # Use default values from a real SSLContext + self.check_hostname = False + self.verify_mode = ssl.CERT_NONE + self.ca_certs = None + self.options = 0 + self.certfile = None + self.keyfile = None + self.ciphers = None + + def load_cert_chain(self, certfile, keyfile): + self.certfile = certfile + self.keyfile = keyfile + + def load_verify_locations(self, location): + self.ca_certs = location + + def set_ciphers(self, cipher_suite): + if not self.supports_set_ciphers: + raise TypeError( + 'Your version of Python does not support setting ' + 'a custom cipher suite. Please upgrade to Python ' + '2.7, 3.2, or later if you need this functionality.' + ) + self.ciphers = cipher_suite + + def wrap_socket(self, socket, server_hostname=None): + warnings.warn( + 'A true SSLContext object is not available. This prevents ' + 'urllib3 from configuring SSL appropriately and may cause ' + 'certain SSL connections to fail. For more information, see ' + 'https://urllib3.readthedocs.org/en/latest/security.html' + '#insecureplatformwarning.', + InsecurePlatformWarning + ) + kwargs = { + 'keyfile': self.keyfile, + 'certfile': self.certfile, + 'ca_certs': self.ca_certs, + 'cert_reqs': self.verify_mode, + 'ssl_version': self.protocol, + } + if self.supports_set_ciphers: # Platform-specific: Python 2.7+ + return wrap_socket(socket, ciphers=self.ciphers, **kwargs) + else: # Platform-specific: Python 2.6 + return wrap_socket(socket, **kwargs) + + def assert_fingerprint(cert, fingerprint): """ Checks if given fingerprint matches the supplied certificate. @@ -30,7 +116,8 @@ def assert_fingerprint(cert, fingerprint): # this digest. hashfunc_map = { 16: md5, - 20: sha1 + 20: sha1, + 32: sha256, } fingerprint = fingerprint.replace(':', '').lower() @@ -91,42 +178,103 @@ def resolve_ssl_version(candidate): return candidate -if SSLContext is not None: # Python 3.2+ - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - """ - All arguments except `server_hostname` have the same meaning as for - :func:`ssl.wrap_socket` +def create_urllib3_context(ssl_version=None, cert_reqs=None, + options=None, ciphers=None): + """All arguments have the same meaning as ``ssl_wrap_socket``. - :param server_hostname: - Hostname of the expected certificate - """ - context = SSLContext(ssl_version) - context.verify_mode = cert_reqs + By default, this function does a lot of the same work that + ``ssl.create_default_context`` does on Python 3.4+. It: - # Disable TLS compression to migitate CRIME attack (issue #309) - OP_NO_COMPRESSION = 0x20000 - context.options |= OP_NO_COMPRESSION + - Disables SSLv2, SSLv3, and compression + - Sets a restricted set of server ciphers - if ca_certs: - try: - context.load_verify_locations(ca_certs) - # Py32 raises IOError - # Py33 raises FileNotFoundError - except Exception as e: # Reraise as SSLError + If you wish to enable SSLv3, you can do:: + + from urllib3.util import ssl_ + context = ssl_.create_urllib3_context() + context.options &= ~ssl_.OP_NO_SSLv3 + + You can do the same to enable compression (substituting ``COMPRESSION`` + for ``SSLv3`` in the last line above). + + :param ssl_version: + The desired protocol version to use. This will default to + PROTOCOL_SSLv23 which will negotiate the highest protocol that both + the server and your installation of OpenSSL support. + :param cert_reqs: + Whether to require the certificate verification. This defaults to + ``ssl.CERT_REQUIRED``. + :param options: + Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + :param ciphers: + Which cipher suites to allow the server to select. + :returns: + Constructed SSLContext object with specified options + :rtype: SSLContext + """ + context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + + # Setting the default here, as we may have no ssl module on import + cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs + + if options is None: + options = 0 + # SSLv2 is easily broken and is considered harmful and dangerous + options |= OP_NO_SSLv2 + # SSLv3 has several problems and is now dangerous + options |= OP_NO_SSLv3 + # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ + # (issue #309) + options |= OP_NO_COMPRESSION + + context.options |= options + + if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 + context.set_ciphers(ciphers or DEFAULT_CIPHERS) + + context.verify_mode = cert_reqs + if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 + # We do our own verification, including fingerprints and alternative + # hostnames. So disable it here + context.check_hostname = False + return context + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None, ciphers=None, ssl_context=None): + """ + All arguments except for server_hostname and ssl_context have the same + meaning as they do when using :func:`ssl.wrap_socket`. + + :param server_hostname: + When SNI is supported, the expected hostname of the certificate + :param ssl_context: + A pre-made :class:`SSLContext` object. If none is provided, one will + be created using :func:`create_urllib3_context`. + :param ciphers: + A string of ciphers we wish the client to support. This is not + supported on Python 2.6 as the ssl module does not support it. + """ + context = ssl_context + if context is None: + context = create_urllib3_context(ssl_version, cert_reqs, + ciphers=ciphers) + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: raise SSLError(e) - if certfile: - # FIXME: This block needs a test. - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) - -else: # Python 3.1 and earlier - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - return wrap_socket(sock, keyfile=keyfile, certfile=certfile, - ca_certs=ca_certs, cert_reqs=cert_reqs, - ssl_version=ssl_version) + raise + if certfile: + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) diff --git a/lib/requests/packages/urllib3/util/url.py b/lib/requests/packages/urllib3/util/url.py index 487d456c..b2ec834f 100644 --- a/lib/requests/packages/urllib3/util/url.py +++ b/lib/requests/packages/urllib3/util/url.py @@ -40,6 +40,48 @@ class Url(namedtuple('Url', url_attrs)): return '%s:%d' % (self.host, self.port) return self.host + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + + return url + + def __str__(self): + return self.url def split_first(s, delims): """ @@ -84,7 +126,7 @@ def parse_url(url): Example:: >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) >>> parse_url('google.com:80') Url(scheme=None, host='google.com', port=80, path=None, ...) >>> parse_url('/foo?bar') @@ -162,7 +204,6 @@ def parse_url(url): return Url(scheme, auth, host, port, path, query, fragment) - def get_host(url): """ Deprecated. Use :func:`.parse_url` instead. diff --git a/lib/requests/sessions.py b/lib/requests/sessions.py index c2f42b14..820919ee 100644 --- a/lib/requests/sessions.py +++ b/lib/requests/sessions.py @@ -36,6 +36,8 @@ from .status_codes import codes # formerly defined here, reexposed here for backward compatibility from .models import REDIRECT_STATI +REDIRECT_CACHE_SIZE = 1000 + def merge_setting(request_setting, session_setting, dict_class=OrderedDict): """ @@ -88,7 +90,7 @@ def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict): class SessionRedirectMixin(object): def resolve_redirects(self, resp, req, stream=False, timeout=None, - verify=True, cert=None, proxies=None): + verify=True, cert=None, proxies=None, **adapter_kwargs): """Receives a Response. Returns a generator of Responses.""" i = 0 @@ -129,7 +131,7 @@ class SessionRedirectMixin(object): # Facilitate relative 'location' headers, as allowed by RFC 7231. # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. - if not urlparse(url).netloc: + if not parsed.netloc: url = urljoin(resp.url, requote_uri(url)) else: url = requote_uri(url) @@ -169,7 +171,10 @@ class SessionRedirectMixin(object): except KeyError: pass - extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw) + # Extract any cookies sent on the response to the cookiejar + # in the new request. Because we've mutated our copied prepared + # request, use the old one that we haven't yet touched. + extract_cookies_to_jar(prepared_request._cookies, req, resp.raw) prepared_request._cookies.update(self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) @@ -188,6 +193,7 @@ class SessionRedirectMixin(object): cert=cert, proxies=proxies, allow_redirects=False, + **adapter_kwargs ) extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) @@ -274,7 +280,7 @@ class Session(SessionRedirectMixin): __attrs__ = [ 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'prefetch', 'adapters', 'stream', 'trust_env', - 'max_redirects', 'redirect_cache' + 'max_redirects', ] def __init__(self): @@ -329,7 +335,7 @@ class Session(SessionRedirectMixin): self.mount('http://', HTTPAdapter()) # Only store 1000 redirects to prevent using infinite memory - self.redirect_cache = RecentlyUsedContainer(1000) + self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) def __enter__(self): return self @@ -555,10 +561,6 @@ class Session(SessionRedirectMixin): # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') - timeout = kwargs.get('timeout') - verify = kwargs.get('verify') - cert = kwargs.get('cert') - proxies = kwargs.get('proxies') hooks = request.hooks # Get the appropriate adapter to use @@ -586,12 +588,7 @@ class Session(SessionRedirectMixin): extract_cookies_to_jar(self.cookies, request, r.raw) # Redirect resolving generator. - gen = self.resolve_redirects(r, request, - stream=stream, - timeout=timeout, - verify=verify, - cert=cert, - proxies=proxies) + gen = self.resolve_redirects(r, request, **kwargs) # Resolve redirects if allowed. history = [resp for resp in gen] if allow_redirects else [] @@ -660,12 +657,19 @@ class Session(SessionRedirectMixin): self.adapters[key] = self.adapters.pop(key) def __getstate__(self): - return dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) + state = dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) + state['redirect_cache'] = dict(self.redirect_cache) + return state def __setstate__(self, state): + redirect_cache = state.pop('redirect_cache', {}) for attr, value in state.items(): setattr(self, attr, value) + self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) + for redirect, to in redirect_cache.items(): + self.redirect_cache[redirect] = to + def session(): """Returns a :class:`Session` for context-management.""" diff --git a/lib/requests/utils.py b/lib/requests/utils.py index aa5c140e..8fba62dd 100644 --- a/lib/requests/utils.py +++ b/lib/requests/utils.py @@ -25,7 +25,8 @@ from . import __version__ from . import certs from .compat import parse_http_list as _parse_list_header from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, - builtin_str, getproxies, proxy_bypass, urlunparse) + builtin_str, getproxies, proxy_bypass, urlunparse, + basestring) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict from .exceptions import InvalidURL @@ -115,7 +116,8 @@ def get_netrc_auth(url): def guess_filename(obj): """Tries to guess the filename of the given object.""" name = getattr(obj, 'name', None) - if name and name[0] != '<' and name[-1] != '>': + if (name and isinstance(name, basestring) and name[0] != '<' and + name[-1] != '>'): return os.path.basename(name) @@ -418,10 +420,18 @@ def requote_uri(uri): This function passes the given URI through an unquote/quote cycle to ensure that it is fully and consistently quoted. """ - # Unquote only the unreserved characters - # Then quote only illegal characters (do not quote reserved, unreserved, - # or '%') - return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") + safe_with_percent = "!#$%&'()*+,/:;=?@[]~" + safe_without_percent = "!#$&'()*+,/:;=?@[]~" + try: + # Unquote only the unreserved characters + # Then quote only illegal characters (do not quote reserved, + # unreserved, or '%') + return quote(unquote_unreserved(uri), safe=safe_with_percent) + except InvalidURL: + # We couldn't unquote the given URI, so let's try quoting it, but + # there may be unquoted '%'s in the URI. We need to make sure they're + # properly quoted so they do not cause issues elsewhere. + return quote(uri, safe=safe_without_percent) def address_in_network(ip, net):