From 556e92a730c93ee0ba6d46849386b6ecbbf50b67 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Thu, 20 Nov 2014 02:00:00 +0000 Subject: [PATCH] Update Requests library 2.3.0 to 2.4.3 (9dc6602). --- CHANGES.md | 1 + lib/requests/__init__.py | 8 +- lib/requests/adapters.py | 114 +-- lib/requests/api.py | 21 +- lib/requests/auth.py | 23 +- lib/requests/certs.py | 13 +- lib/requests/compat.py | 6 +- lib/requests/exceptions.py | 21 +- lib/requests/models.py | 120 ++-- lib/requests/packages/urllib3/__init__.py | 24 +- lib/requests/packages/urllib3/_collections.py | 112 ++- lib/requests/packages/urllib3/connection.py | 204 ++++-- .../packages/urllib3/connectionpool.py | 292 +++++--- .../packages/urllib3/contrib/ntlmpool.py | 6 - .../packages/urllib3/contrib/pyopenssl.py | 285 +++----- lib/requests/packages/urllib3/exceptions.py | 53 +- lib/requests/packages/urllib3/fields.py | 28 +- lib/requests/packages/urllib3/filepost.py | 15 +- .../packages/urllib3/packages/ordered_dict.py | 1 - .../packages/ssl_match_hostname/__init__.py | 2 +- lib/requests/packages/urllib3/poolmanager.py | 43 +- lib/requests/packages/urllib3/request.py | 44 +- lib/requests/packages/urllib3/response.py | 115 ++-- lib/requests/packages/urllib3/util.py | 648 ------------------ .../packages/urllib3/util/__init__.py | 24 + .../packages/urllib3/util/connection.py | 97 +++ lib/requests/packages/urllib3/util/request.py | 71 ++ .../packages/urllib3/util/response.py | 22 + lib/requests/packages/urllib3/util/retry.py | 279 ++++++++ lib/requests/packages/urllib3/util/ssl_.py | 132 ++++ lib/requests/packages/urllib3/util/timeout.py | 240 +++++++ lib/requests/packages/urllib3/util/url.py | 171 +++++ lib/requests/sessions.py | 214 ++++-- lib/requests/status_codes.py | 3 +- lib/requests/structures.py | 28 +- lib/requests/utils.py | 84 ++- 36 files changed, 2177 insertions(+), 1387 deletions(-) delete mode 100644 lib/requests/packages/urllib3/util.py create mode 100644 lib/requests/packages/urllib3/util/__init__.py create mode 100644 lib/requests/packages/urllib3/util/connection.py create mode 100644 lib/requests/packages/urllib3/util/request.py create mode 100644 lib/requests/packages/urllib3/util/response.py create mode 100644 lib/requests/packages/urllib3/util/retry.py create mode 100644 lib/requests/packages/urllib3/util/ssl_.py create mode 100644 lib/requests/packages/urllib3/util/timeout.py create mode 100644 lib/requests/packages/urllib3/util/url.py diff --git a/CHANGES.md b/CHANGES.md index 939b3d6d..8ad0cb50 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -28,6 +28,7 @@ * Add Config Post Processing failed downloads Sabnzbd setup guide * Fix Config Post Processing "Anime name pattern" custom javascript validation * Add check that SSLv3 is available before use by requests lib +* Update Requests library 2.3.0 to 2.4.3 (9dc6602) [develop changelog] diff --git a/lib/requests/__init__.py b/lib/requests/__init__.py index bba19002..d5e1956e 100644 --- a/lib/requests/__init__.py +++ b/lib/requests/__init__.py @@ -13,7 +13,7 @@ Requests is an HTTP library, written in Python, for human beings. Basic GET usage: >>> import requests - >>> r = requests.get('http://python.org') + >>> r = requests.get('https://www.python.org') >>> r.status_code 200 >>> 'Python is a programming language' in r.content @@ -22,7 +22,7 @@ usage: ... or POST: >>> payload = dict(key1='value1', key2='value2') - >>> r = requests.post("http://httpbin.org/post", data=payload) + >>> r = requests.post('http://httpbin.org/post', data=payload) >>> print(r.text) { ... @@ -42,8 +42,8 @@ is at . """ __title__ = 'requests' -__version__ = '2.3.0' -__build__ = 0x020300 +__version__ = '2.4.3' +__build__ = 0x020403 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' __copyright__ = 'Copyright 2014 Kenneth Reitz' diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py index 0f297ab2..40088900 100644 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -9,23 +9,26 @@ and maintain connections. """ import socket -import copy from .models import Response +from .packages.urllib3 import Retry from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse from .packages.urllib3.util import Timeout as TimeoutSauce -from .compat import urlparse, basestring, urldefrag, unquote +from .compat import urlparse, basestring from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - except_on_missing_scheme, get_auth_from_url) + prepend_scheme_if_needed, get_auth_from_url, urldefragauth) from .structures import CaseInsensitiveDict -from .packages.urllib3.exceptions import MaxRetryError -from .packages.urllib3.exceptions import TimeoutError -from .packages.urllib3.exceptions import SSLError as _SSLError +from .packages.urllib3.exceptions import ConnectTimeoutError from .packages.urllib3.exceptions import HTTPError as _HTTPError +from .packages.urllib3.exceptions import MaxRetryError from .packages.urllib3.exceptions import ProxyError as _ProxyError +from .packages.urllib3.exceptions import ProtocolError +from .packages.urllib3.exceptions import ReadTimeoutError +from .packages.urllib3.exceptions import SSLError as _SSLError from .cookies import extract_cookies_to_jar -from .exceptions import ConnectionError, Timeout, SSLError, ProxyError +from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, + ProxyError) from .auth import _basic_auth_str DEFAULT_POOLBLOCK = False @@ -57,13 +60,15 @@ class HTTPAdapter(BaseAdapter): :param pool_connections: The number of urllib3 connection pools to cache. :param pool_maxsize: The maximum number of connections to save in the pool. :param int max_retries: The maximum number of retries each connection - should attempt. Note, this applies only to failed connections and - timeouts, never to requests where the server returns a response. + should attempt. Note, this applies only to failed DNS lookups, socket + connections and connection timeouts, never to requests where data has + made it to the server. By default, Requests does not retry failed + connections. :param pool_block: Whether the connection pool should block for connections. Usage:: - >>> import lib.requests + >>> import requests >>> s = requests.Session() >>> a = requests.adapters.HTTPAdapter(max_retries=3) >>> s.mount('http://', a) @@ -102,14 +107,17 @@ class HTTPAdapter(BaseAdapter): self.init_poolmanager(self._pool_connections, self._pool_maxsize, block=self._pool_block) - def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK): - """Initializes a urllib3 PoolManager. This method should not be called - from user code, and is only exposed for use when subclassing the + def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs): + """Initializes a urllib3 PoolManager. + + This method should not be called from user code, and is only + exposed for use when subclassing the :class:`HTTPAdapter `. :param connections: The number of urllib3 connection pools to cache. :param maxsize: The maximum number of connections to save in the pool. :param block: Block when no free connections are available. + :param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager. """ # save these values for pickling self._pool_connections = connections @@ -117,7 +125,30 @@ class HTTPAdapter(BaseAdapter): self._pool_block = block self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize, - block=block) + block=block, strict=True, **pool_kwargs) + + def proxy_manager_for(self, proxy, **proxy_kwargs): + """Return urllib3 ProxyManager for the given proxy. + + This method should not be called from user code, and is only + exposed for use when subclassing the + :class:`HTTPAdapter `. + + :param proxy: The proxy to return a urllib3 ProxyManager for. + :param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager. + :returns: ProxyManager + """ + if not proxy in self.proxy_manager: + proxy_headers = self.proxy_headers(proxy) + self.proxy_manager[proxy] = proxy_from_url( + proxy, + proxy_headers=proxy_headers, + num_pools=self._pool_connections, + maxsize=self._pool_maxsize, + block=self._pool_block, + **proxy_kwargs) + + return self.proxy_manager[proxy] def cert_verify(self, conn, url, verify, cert): """Verify a SSL certificate. This method should not be called from user @@ -204,18 +235,9 @@ class HTTPAdapter(BaseAdapter): proxy = proxies.get(urlparse(url.lower()).scheme) if proxy: - except_on_missing_scheme(proxy) - proxy_headers = self.proxy_headers(proxy) - - if not proxy in self.proxy_manager: - self.proxy_manager[proxy] = proxy_from_url( - proxy, - proxy_headers=proxy_headers, - num_pools=self._pool_connections, - maxsize=self._pool_maxsize, - block=self._pool_block) - - conn = self.proxy_manager[proxy].connection_from_url(url) + proxy = prepend_scheme_if_needed(proxy, 'http') + proxy_manager = self.proxy_manager_for(proxy) + conn = proxy_manager.connection_from_url(url) else: # Only scheme should be lower case parsed = urlparse(url) @@ -250,7 +272,7 @@ class HTTPAdapter(BaseAdapter): proxy = proxies.get(scheme) if proxy and scheme != 'https': - url, _ = urldefrag(request.url) + url = urldefragauth(request.url) else: url = request.path_url @@ -297,7 +319,10 @@ class HTTPAdapter(BaseAdapter): :param request: The :class:`PreparedRequest ` being sent. :param stream: (optional) Whether to stream the request content. - :param timeout: (optional) The timeout on the request. + :param timeout: (optional) How long to wait for the server to send + data before giving up, as a float, or a (`connect timeout, read + timeout `_) tuple. + :type timeout: float or tuple :param verify: (optional) Whether to verify SSL certificates. :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. @@ -311,7 +336,18 @@ class HTTPAdapter(BaseAdapter): chunked = not (request.body is None or 'Content-Length' in request.headers) - timeout = TimeoutSauce(connect=timeout, read=timeout) + if isinstance(timeout, tuple): + try: + connect, read = timeout + timeout = TimeoutSauce(connect=connect, read=read) + except ValueError as e: + # this may raise a string formatting error. + err = ("Invalid timeout {0}. Pass a (connect, read) " + "timeout tuple, or a single float to set " + "both timeouts to the same value".format(timeout)) + raise ValueError(err) + else: + timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: @@ -324,7 +360,7 @@ class HTTPAdapter(BaseAdapter): assert_same_host=False, preload_content=False, decode_content=False, - retries=self.max_retries, + retries=Retry(self.max_retries, read=False), timeout=timeout ) @@ -369,10 +405,13 @@ class HTTPAdapter(BaseAdapter): # All is well, return the connection to the pool. conn._put_conn(low_conn) - except socket.error as sockerr: - raise ConnectionError(sockerr, request=request) + except (ProtocolError, socket.error) as err: + raise ConnectionError(err, request=request) except MaxRetryError as e: + if isinstance(e.reason, ConnectTimeoutError): + raise ConnectTimeout(e, request=request) + raise ConnectionError(e, request=request) except _ProxyError as e: @@ -381,14 +420,9 @@ class HTTPAdapter(BaseAdapter): except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): raise SSLError(e, request=request) - elif isinstance(e, TimeoutError): - raise Timeout(e, request=request) + elif isinstance(e, ReadTimeoutError): + raise ReadTimeout(e, request=request) else: raise - r = self.build_response(request, resp) - - if not stream: - r.content - - return r \ No newline at end of file + return self.build_response(request, resp) diff --git a/lib/requests/api.py b/lib/requests/api.py index 01d853d5..1469b05c 100644 --- a/lib/requests/api.py +++ b/lib/requests/api.py @@ -22,12 +22,17 @@ def request(method, url, **kwargs): :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload. + :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request in seconds. + :param timeout: (optional) How long to wait for the server to send data + before giving up, as a float, or a (`connect timeout, read timeout + `_) tuple. + :type timeout: float or tuple :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. + :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. :param stream: (optional) if ``False``, the response content will be immediately downloaded. @@ -41,7 +46,12 @@ def request(method, url, **kwargs): """ session = sessions.Session() - return session.request(method=method, url=url, **kwargs) + response = session.request(method=method, url=url, **kwargs) + # By explicitly closing the session, we avoid leaving sockets open which + # can trigger a ResourceWarning in some cases, and look like a memory leak + # in others. + session.close() + return response def get(url, **kwargs): @@ -77,15 +87,16 @@ def head(url, **kwargs): return request('head', url, **kwargs) -def post(url, data=None, **kwargs): +def post(url, data=None, json=None, **kwargs): """Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. """ - return request('post', url, data=data, **kwargs) + return request('post', url, data=data, json=json, **kwargs) def put(url, data=None, **kwargs): diff --git a/lib/requests/auth.py b/lib/requests/auth.py index 9f831b7a..618a902a 100644 --- a/lib/requests/auth.py +++ b/lib/requests/auth.py @@ -16,7 +16,8 @@ from base64 import b64encode from .compat import urlparse, str from .cookies import extract_cookies_to_jar -from .utils import parse_dict_header +from .utils import parse_dict_header, to_native_string +from .status_codes import codes CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' CONTENT_TYPE_MULTI_PART = 'multipart/form-data' @@ -25,7 +26,11 @@ CONTENT_TYPE_MULTI_PART = 'multipart/form-data' def _basic_auth_str(username, password): """Returns a Basic Auth string.""" - return 'Basic ' + b64encode(('%s:%s' % (username, password)).encode('latin1')).strip().decode('latin1') + authstr = 'Basic ' + to_native_string( + b64encode(('%s:%s' % (username, password)).encode('latin1')).strip() + ) + + return authstr class AuthBase(object): @@ -146,6 +151,11 @@ class HTTPDigestAuth(AuthBase): return 'Digest %s' % (base) + def handle_redirect(self, r, **kwargs): + """Reset num_401_calls counter on redirects.""" + if r.is_redirect: + setattr(self, 'num_401_calls', 1) + def handle_401(self, r, **kwargs): """Takes the given response and tries digest-auth, if needed.""" @@ -178,7 +188,7 @@ class HTTPDigestAuth(AuthBase): return _r - setattr(self, 'num_401_calls', 1) + setattr(self, 'num_401_calls', num_401_calls + 1) return r def __call__(self, r): @@ -188,6 +198,11 @@ class HTTPDigestAuth(AuthBase): try: self.pos = r.body.tell() except AttributeError: - pass + # In the case of HTTPDigestAuth being reused and the body of + # the previous request was a file-like object, pos has the + # file position of the previous body. Ensure it's set to + # None. + self.pos = None r.register_hook('response', self.handle_401) + r.register_hook('response', self.handle_redirect) return r diff --git a/lib/requests/certs.py b/lib/requests/certs.py index bc008261..07e64750 100644 --- a/lib/requests/certs.py +++ b/lib/requests/certs.py @@ -11,14 +11,15 @@ If you are packaging Requests, e.g., for a Linux distribution or a managed environment, you can change the definition of where() to return a separately packaged CA bundle. """ - import os.path - -def where(): - """Return the preferred certificate bundle.""" - # vendored bundle inside Requests - return os.path.join(os.path.dirname(__file__), 'cacert.pem') +try: + from certifi import where +except ImportError: + def where(): + """Return the preferred certificate bundle.""" + # vendored bundle inside Requests + return os.path.join(os.path.dirname(__file__), 'cacert.pem') if __name__ == '__main__': print(where()) diff --git a/lib/requests/compat.py b/lib/requests/compat.py index bdf10d6a..be5a1ed6 100644 --- a/lib/requests/compat.py +++ b/lib/requests/compat.py @@ -75,7 +75,9 @@ is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. try: import simplejson as json -except ImportError: +except (ImportError, SyntaxError): + # simplejson does not support Python 3.2, it thows a SyntaxError + # because of u'...' Unicode literals. import json # --------- @@ -90,7 +92,6 @@ if is_py2: from Cookie import Morsel from StringIO import StringIO from .packages.urllib3.packages.ordered_dict import OrderedDict - from httplib import IncompleteRead builtin_str = str bytes = str @@ -106,7 +107,6 @@ elif is_py3: from http.cookies import Morsel from io import StringIO from collections import OrderedDict - from http.client import IncompleteRead builtin_str = str str = str diff --git a/lib/requests/exceptions.py b/lib/requests/exceptions.py index a4ee9d63..34c7a0db 100644 --- a/lib/requests/exceptions.py +++ b/lib/requests/exceptions.py @@ -44,7 +44,23 @@ class SSLError(ConnectionError): class Timeout(RequestException): - """The request timed out.""" + """The request timed out. + + Catching this error will catch both + :exc:`~requests.exceptions.ConnectTimeout` and + :exc:`~requests.exceptions.ReadTimeout` errors. + """ + + +class ConnectTimeout(ConnectionError, Timeout): + """The request timed out while trying to connect to the remote server. + + Requests that produced this error are safe to retry. + """ + + +class ReadTimeout(Timeout): + """The server did not send any data in the allotted amount of time.""" class URLRequired(RequestException): @@ -73,3 +89,6 @@ class ChunkedEncodingError(RequestException): class ContentDecodingError(RequestException, BaseHTTPError): """Failed to decode response content""" + +class StreamConsumedError(RequestException, TypeError): + """The content for this response was already consumed""" diff --git a/lib/requests/models.py b/lib/requests/models.py index e2fa09f8..2370b67f 100644 --- a/lib/requests/models.py +++ b/lib/requests/models.py @@ -19,31 +19,36 @@ from .cookies import cookiejar_from_dict, get_cookie_header from .packages.urllib3.fields import RequestField from .packages.urllib3.filepost import encode_multipart_formdata from .packages.urllib3.util import parse_url -from .packages.urllib3.exceptions import DecodeError +from .packages.urllib3.exceptions import ( + DecodeError, ReadTimeoutError, ProtocolError) from .exceptions import ( - HTTPError, RequestException, MissingSchema, InvalidURL, - ChunkedEncodingError, ContentDecodingError) + HTTPError, RequestException, MissingSchema, InvalidURL, + ChunkedEncodingError, ContentDecodingError, ConnectionError, + StreamConsumedError) from .utils import ( guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, iter_slices, guess_json_utf, super_len, to_native_string) from .compat import ( cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, - is_py2, chardet, json, builtin_str, basestring, IncompleteRead) + is_py2, chardet, json, builtin_str, basestring) from .status_codes import codes #: The set of HTTP status codes that indicate an automatically #: processable redirect. REDIRECT_STATI = ( - codes.moved, # 301 - codes.found, # 302 - codes.other, # 303 - codes.temporary_moved, # 307 + codes.moved, # 301 + codes.found, # 302 + codes.other, # 303 + codes.temporary_redirect, # 307 + codes.permanent_redirect, # 308 ) DEFAULT_REDIRECT_LIMIT = 30 CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 +json_dumps = json.dumps + class RequestEncodingMixin(object): @property @@ -187,7 +192,8 @@ class Request(RequestHooksMixin): :param url: URL to send. :param headers: dictionary of headers to send. :param files: dictionary of {filename: fileobject} files to multipart upload. - :param data: the body to attach the request. If a dictionary is provided, form-encoding will take place. + :param data: the body to attach to the request. If a dictionary is provided, form-encoding will take place. + :param json: json for the body to attach to the request (if data is not specified). :param params: dictionary of URL parameters to append to the URL. :param auth: Auth handler or (user, pass) tuple. :param cookies: dictionary or CookieJar of cookies to attach to this request. @@ -210,7 +216,8 @@ class Request(RequestHooksMixin): params=None, auth=None, cookies=None, - hooks=None): + hooks=None, + json=None): # Default empty dicts for dict params. data = [] if data is None else data @@ -228,6 +235,7 @@ class Request(RequestHooksMixin): self.headers = headers self.files = files self.data = data + self.json = json self.params = params self.auth = auth self.cookies = cookies @@ -244,6 +252,7 @@ class Request(RequestHooksMixin): headers=self.headers, files=self.files, data=self.data, + json=self.json, params=self.params, auth=self.auth, cookies=self.cookies, @@ -287,14 +296,15 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.hooks = default_hooks() def prepare(self, method=None, url=None, headers=None, files=None, - data=None, params=None, auth=None, cookies=None, hooks=None): + data=None, params=None, auth=None, cookies=None, hooks=None, + json=None): """Prepares the entire request with the given parameters.""" self.prepare_method(method) self.prepare_url(url, params) self.prepare_headers(headers) self.prepare_cookies(cookies) - self.prepare_body(data, files) + self.prepare_body(data, files, json) self.prepare_auth(auth, url) # Note that prepare_auth must be last to enable authentication schemes # such as OAuth to work on a fully prepared request. @@ -309,8 +319,8 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): p = PreparedRequest() p.method = self.method p.url = self.url - p.headers = self.headers.copy() - p._cookies = self._cookies.copy() + p.headers = self.headers.copy() if self.headers is not None else None + p._cookies = self._cookies.copy() if self._cookies is not None else None p.body = self.body p.hooks = self.hooks return p @@ -324,15 +334,18 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. - try: - url = unicode(url) - except NameError: - # We're on Python 3. - url = str(url) - except UnicodeDecodeError: - pass + #: We're unable to blindy call unicode/str functions + #: as this will include the bytestring indicator (b'') + #: on python 3.x. + #: https://github.com/kennethreitz/requests/pull/2238 + if isinstance(url, bytes): + url = url.decode('utf8') + else: + url = unicode(url) if is_py2 else str(url) - # Don't do any URL preparation for oddball schemes + # Don't do any URL preparation for non-HTTP schemes like `mailto`, + # `data` etc to work around exceptions from `url_parse`, which + # handles RFC 3986 only. if ':' in url and not url.lower().startswith('http'): self.url = url return @@ -395,7 +408,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): else: self.headers = CaseInsensitiveDict() - def prepare_body(self, data, files): + def prepare_body(self, data, files, json=None): """Prepares the given HTTP body data.""" # Check if file, fo, generator, iterator. @@ -406,11 +419,13 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): content_type = None length = None + if json is not None: + content_type = 'application/json' + body = json_dumps(json) + is_stream = all([ hasattr(data, '__iter__'), - not isinstance(data, basestring), - not isinstance(data, list), - not isinstance(data, dict) + not isinstance(data, (basestring, list, tuple, dict)) ]) try: @@ -433,9 +448,9 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if files: (body, content_type) = self._encode_files(files, data) else: - if data: + if data and json is None: body = self._encode_params(data) - if isinstance(data, str) or isinstance(data, builtin_str) or hasattr(data, 'read'): + if isinstance(data, basestring) or hasattr(data, 'read'): content_type = None else: content_type = 'application/x-www-form-urlencoded' @@ -443,7 +458,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.prepare_content_length(body) # Add content-type if it wasn't explicitly provided. - if (content_type) and (not 'content-type' in self.headers): + if content_type and ('content-type' not in self.headers): self.headers['Content-Type'] = content_type self.body = body @@ -457,7 +472,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): l = super_len(body) if l: self.headers['Content-Length'] = builtin_str(l) - elif self.method not in ('GET', 'HEAD'): + elif (self.method not in ('GET', 'HEAD')) and (self.headers.get('Content-Length') is None): self.headers['Content-Length'] = '0' def prepare_auth(self, auth, url=''): @@ -558,6 +573,10 @@ class Response(object): #: and the arrival of the response (as a timedelta) self.elapsed = datetime.timedelta(0) + #: The :class:`PreparedRequest ` object to which this + #: is a response. + self.request = None + def __getstate__(self): # Consume everything; accessing the content attribute makes # sure the content has been fully read. @@ -607,6 +626,11 @@ class Response(object): """ return ('location' in self.headers and self.status_code in REDIRECT_STATI) + @property + def is_permanent_redirect(self): + """True if this Response one of the permanant versions of redirect""" + return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect)) + @property def apparent_encoding(self): """The apparent encoding, provided by the chardet library""" @@ -618,21 +642,22 @@ class Response(object): large responses. The chunk size is the number of bytes it should read into memory. This is not necessarily the length of each item returned as decoding can take place. - """ - if self._content_consumed: - # simulate reading small chunks of the content - return iter_slices(self._content, chunk_size) + If decode_unicode is True, content will be decoded using the best + available encoding based on the response. + """ def generate(): try: # Special case for urllib3. try: for chunk in self.raw.stream(chunk_size, decode_content=True): yield chunk - except IncompleteRead as e: + except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) + except ReadTimeoutError as e: + raise ConnectionError(e) except AttributeError: # Standard file-like object. while True: @@ -643,14 +668,21 @@ class Response(object): self._content_consumed = True - gen = generate() + if self._content_consumed and isinstance(self._content, bool): + raise StreamConsumedError() + # simulate reading small chunks of the content + reused_chunks = iter_slices(self._content, chunk_size) + + stream_chunks = generate() + + chunks = reused_chunks if self._content_consumed else stream_chunks if decode_unicode: - gen = stream_decode_response_unicode(gen, self) + chunks = stream_decode_response_unicode(chunks, self) - return gen + return chunks - def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None): + def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None, delimiter=None): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. @@ -662,7 +694,11 @@ class Response(object): if pending is not None: chunk = pending + chunk - lines = chunk.splitlines() + + if delimiter: + lines = chunk.split(delimiter) + else: + lines = chunk.splitlines() if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]: pending = lines.pop() @@ -793,8 +829,8 @@ class Response(object): raise HTTPError(http_error_msg, response=self) def close(self): - """Closes the underlying file descriptor and releases the connection - back to the pool. + """Releases the connection back to the pool. Once this method has been + called the underlying ``raw`` object must not be accessed again. *Note: Should not normally need to be called explicitly.* """ diff --git a/lib/requests/packages/urllib3/__init__.py b/lib/requests/packages/urllib3/__init__.py index 73071f70..4b36b5ae 100644 --- a/lib/requests/packages/urllib3/__init__.py +++ b/lib/requests/packages/urllib3/__init__.py @@ -1,9 +1,3 @@ -# urllib3/__init__.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ urllib3 - Thread-safe connection pooling and re-using. """ @@ -23,7 +17,10 @@ from . import exceptions from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .util import make_headers, get_host, Timeout +from .util.request import make_headers +from .util.url import get_host +from .util.timeout import Timeout +from .util.retry import Retry # Set default logging handler to avoid "No handler found" warnings. @@ -51,8 +48,19 @@ def add_stderr_logger(level=logging.DEBUG): handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) logger.addHandler(handler) logger.setLevel(level) - logger.debug('Added an stderr logging handler to logger: %s' % __name__) + logger.debug('Added a stderr logging handler to logger: %s' % __name__) return handler # ... Clean up. del NullHandler + + +# Set security warning to only go off once by default. +import warnings +warnings.simplefilter('module', exceptions.SecurityWarning) + +def disable_warnings(category=exceptions.HTTPWarning): + """ + Helper for quickly disabling all urllib3 warnings. + """ + warnings.simplefilter('ignore', category) diff --git a/lib/requests/packages/urllib3/_collections.py b/lib/requests/packages/urllib3/_collections.py index 5907b0dc..d77ebb8d 100644 --- a/lib/requests/packages/urllib3/_collections.py +++ b/lib/requests/packages/urllib3/_collections.py @@ -1,10 +1,4 @@ -# urllib3/_collections.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - -from collections import MutableMapping +from collections import Mapping, MutableMapping try: from threading import RLock except ImportError: # Platform-specific: No threads available @@ -20,9 +14,10 @@ try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict +from .packages.six import itervalues -__all__ = ['RecentlyUsedContainer'] +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] _Null = object() @@ -101,3 +96,104 @@ class RecentlyUsedContainer(MutableMapping): def keys(self): with self.lock: return self._container.keys() + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 7230. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + + If you want to access the raw headers with their original casing + for debugging purposes you can access the private ``._data`` attribute + which is a normal python ``dict`` that maps the case-insensitive key to a + list of tuples stored as (case-sensitive-original-name, value). Using the + structure from above as our example: + + >>> headers._data + {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], + 'content-length': [('content-length', '7')]} + """ + + def __init__(self, headers=None, **kwargs): + self._data = {} + if headers is None: + headers = {} + self.update(headers, **kwargs) + + def add(self, key, value): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + self._data.setdefault(key.lower(), []).append((key, value)) + + def getlist(self, key): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + return self[key].split(', ') if key in self else [] + + def copy(self): + h = HTTPHeaderDict() + for key in self._data: + for rawkey, value in self._data[key]: + h.add(rawkey, value) + return h + + def __eq__(self, other): + if not isinstance(other, Mapping): + return False + other = HTTPHeaderDict(other) + return dict((k1, self[k1]) for k1 in self._data) == \ + dict((k2, other[k2]) for k2 in other._data) + + def __getitem__(self, key): + values = self._data[key.lower()] + return ', '.join(value[1] for value in values) + + def __setitem__(self, key, value): + self._data[key.lower()] = [(key, value)] + + def __delitem__(self, key): + del self._data[key.lower()] + + def __len__(self): + return len(self._data) + + def __iter__(self): + for headers in itervalues(self._data): + yield headers[0][0] + + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, dict(self.items())) diff --git a/lib/requests/packages/urllib3/connection.py b/lib/requests/packages/urllib3/connection.py index c7d5b77d..c6e1959a 100644 --- a/lib/requests/packages/urllib3/connection.py +++ b/lib/requests/packages/urllib3/connection.py @@ -1,88 +1,146 @@ -# urllib3/connection.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - +import datetime +import sys import socket from socket import timeout as SocketTimeout +import warnings -try: # Python 3 +try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException except ImportError: from httplib import HTTPConnection as _HTTPConnection, HTTPException + class DummyConnection(object): "Used to detect a failed ConnectionCls import." pass -try: # Compiled with SSL? - ssl = None + +try: # Compiled with SSL? HTTPSConnection = DummyConnection + import ssl + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None class BaseSSLError(BaseException): pass - try: # Python 3 - from http.client import HTTPSConnection as _HTTPSConnection - except ImportError: - from httplib import HTTPSConnection as _HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass from .exceptions import ( ConnectTimeoutError, + SystemTimeWarning, ) from .packages.ssl_match_hostname import match_hostname -from .util import ( - assert_fingerprint, +from .packages import six + +from .util.ssl_ import ( resolve_cert_reqs, resolve_ssl_version, ssl_wrap_socket, + assert_fingerprint, ) +from .util import connection + port_by_scheme = { 'http': 80, 'https': 443, } +RECENT_DATE = datetime.date(2014, 1, 1) + class HTTPConnection(_HTTPConnection, object): + """ + Based on httplib.HTTPConnection but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + + .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x + + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass:: + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). + """ + default_port = port_by_scheme['http'] - # By default, disable Nagle's Algorithm. - tcp_nodelay = 1 + #: Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + + #: Whether this connection verifies the host's certificate. + is_verified = False + + def __init__(self, *args, **kw): + if six.PY3: # Python 3 + kw.pop('strict', None) + + # Pre-set source_address in case we have an older Python like 2.6. + self.source_address = kw.get('source_address') + + if sys.version_info < (2, 7): # Python 2.6 + # _HTTPConnection on Python 2.6 will balk at this keyword arg, but + # not newer versions. We can still use it when creating a + # connection though, so we pop it *after* we have saved it as + # self.source_address. + kw.pop('source_address', None) + + #: The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop('socket_options', self.default_socket_options) + + # Superclass also sets self.source_address in Python 2.7+. + _HTTPConnection.__init__(self, *args, **kw) def _new_conn(self): - """ Establish a socket connection and set nodelay settings on it + """ Establish a socket connection and set nodelay settings on it. - :return: a new socket connection + :return: New socket connection. """ + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + try: - conn = socket.create_connection( - (self.host, self.port), - self.timeout, - self.source_address, - ) - except AttributeError: # Python 2.6 - conn = socket.create_connection( - (self.host, self.port), - self.timeout, - ) - conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, - self.tcp_nodelay) + conn = connection.create_connection( + (self.host, self.port), self.timeout, **extra_kw) + + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + return conn def _prepare_conn(self, conn): self.sock = conn + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. if getattr(self, '_tunnel_host', None): # TODO: Fix tunnel so it doesn't depend on self.sock state. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 def connect(self): conn = self._new_conn() @@ -93,15 +151,18 @@ class HTTPSConnection(HTTPConnection): default_port = port_by_scheme['https'] def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None): - try: - HTTPConnection.__init__(self, host, port, strict, timeout, source_address) - except TypeError: # Python 2.6 - HTTPConnection.__init__(self, host, port, strict, timeout) + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw): + + HTTPConnection.__init__(self, host, port, strict=strict, + timeout=timeout, **kw) + self.key_file = key_file self.cert_file = cert_file + # Required property for Google AppEngine 1.9.0 which otherwise causes + # HTTPS requests to go out as HTTP. (See Issue #356) + self._protocol = 'https' + def connect(self): conn = self._new_conn() self._prepare_conn(conn) @@ -116,6 +177,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None + assert_fingerprint = None def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -130,46 +192,52 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification - try: - sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout, - ) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, - self.tcp_nodelay) + conn = self._new_conn() resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) - # the _tunnel_host attribute was added in python 2.6.3 (via - # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do - # not have them. + hostname = self.host if getattr(self, '_tunnel_host', None): - self.sock = sock + # _tunnel_host was added in Python 2.6.3 + # (See: http://hg.python.org/cpython/rev/0f57b30a152f) + + self.sock = conn # Calls self._set_hostport(), so self.host is # self._tunnel_host below. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host + + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn(( + 'System time is way off (before {0}). This will probably ' + 'lead to SSL verification errors').format(RECENT_DATE), + SystemTimeWarning + ) # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, cert_reqs=resolved_cert_reqs, ca_certs=self.ca_certs, - server_hostname=self.host, + server_hostname=hostname, ssl_version=resolved_ssl_version) - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif resolved_cert_reqs != ssl.CERT_NONE \ + and self.assert_hostname is not False: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or hostname) + + self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED + or self.assert_fingerprint is not None) if ssl: diff --git a/lib/requests/packages/urllib3/connectionpool.py b/lib/requests/packages/urllib3/connectionpool.py index 243d700e..9cc2a955 100644 --- a/lib/requests/packages/urllib3/connectionpool.py +++ b/lib/requests/packages/urllib3/connectionpool.py @@ -1,16 +1,12 @@ -# urllib3/connectionpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import errno import logging +import sys +import warnings from socket import error as SocketError, timeout as SocketTimeout import socket -try: # Python 3 +try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full @@ -19,14 +15,16 @@ except ImportError: from .exceptions import ( ClosedPoolError, - ConnectTimeoutError, + ProtocolError, EmptyPoolError, HostChangedError, + LocationValueError, MaxRetryError, + ProxyError, + ReadTimeoutError, SSLError, TimeoutError, - ReadTimeoutError, - ProxyError, + InsecureRequestWarning, ) from .packages.ssl_match_hostname import CertificateError from .packages import six @@ -38,12 +36,11 @@ from .connection import ( ) from .request import RequestMethods from .response import HTTPResponse -from .util import ( - assert_fingerprint, - get_host, - is_connection_dropped, - Timeout, -) + +from .util.connection import is_connection_dropped +from .util.retry import Retry +from .util.timeout import Timeout +from .util.url import get_host xrange = six.moves.xrange @@ -52,8 +49,8 @@ log = logging.getLogger(__name__) _Default = object() -## Pool objects +## Pool objects class ConnectionPool(object): """ Base class for all connection pools, such as @@ -64,10 +61,11 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): - # httplib doesn't like it when we include brackets in ipv6 addresses - host = host.strip('[]') + if not host: + raise LocationValueError("No host specified.") - self.host = host + # httplib doesn't like it when we include brackets in ipv6 addresses + self.host = host.strip('[]') self.port = port def __str__(self): @@ -77,6 +75,7 @@ class ConnectionPool(object): # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + class HTTPConnectionPool(ConnectionPool, RequestMethods): """ Thread-safe connection pool for one host. @@ -121,6 +120,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Headers to include with all requests, unless other headers are given explicitly. + :param retries: + Retry configuration to use by default with requests in this pool. + :param _proxy: Parsed proxy URL, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" @@ -128,6 +130,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param _proxy_headers: A dictionary with proxy headers, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. """ scheme = 'http' @@ -135,18 +141,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, - headers=None, _proxy=None, _proxy_headers=None): + headers=None, retries=None, + _proxy=None, _proxy_headers=None, + **conn_kw): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) self.strict = strict - # This is for backwards compatibility and can be removed once a timeout - # can only be set to a Timeout object if not isinstance(timeout, Timeout): timeout = Timeout.from_float(timeout) + if retries is None: + retries = Retry.DEFAULT + self.timeout = timeout + self.retries = retries self.pool = self.QueueCls(maxsize) self.block = block @@ -161,6 +171,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # These are mostly for testing and debugging purposes. self.num_connections = 0 self.num_requests = 0 + self.conn_kw = conn_kw + + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) def _new_conn(self): """ @@ -170,17 +187,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, - **extra_params) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 + strict=self.strict, **self.conn_kw) return conn def _get_conn(self, timeout=None): @@ -199,7 +208,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: conn = self.pool.get(block=self.block, timeout=timeout) - except AttributeError: # self.pool is None + except AttributeError: # self.pool is None raise ClosedPoolError(self, "Pool is closed.") except Empty: @@ -213,6 +222,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if conn and is_connection_dropped(conn): log.info("Resetting dropped connection: %s" % self.host) conn.close() + if getattr(conn, 'auto_open', 1) == 0: + # This is a proxied connection that has been mutated by + # httplib._tunnel() and cannot be reused (since it would + # attempt to bypass the proxy) + conn = None return conn or self._new_conn() @@ -232,19 +246,26 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ try: self.pool.put(conn, block=False) - return # Everything is dandy, done. + return # Everything is dandy, done. except AttributeError: # self.pool is None. pass except Full: # This should never happen if self.block == True - log.warning("HttpConnectionPool is full, discarding connection: %s" - % self.host) + log.warning( + "Connection pool is full, discarding connection: %s" % + self.host) # Connection never got put back into the pool, close it. if conn: conn.close() + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -276,23 +297,21 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_requests += 1 timeout_obj = self._get_timeout(timeout) + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout - try: - timeout_obj.start_connect() - conn.timeout = timeout_obj.connect_timeout - # conn.request() calls httplib.*.request, not the method in - # urllib3.request. It also calls makefile (recv) on the socket. - conn.request(method, url, **httplib_request_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, timeout_obj.connect_timeout)) + # Trigger any extra validation we need to do. + self._validate_conn(conn) + + # conn.request() calls httplib.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + conn.request(method, url, **httplib_request_kw) # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout # App Engine doesn't have a sock attr - if hasattr(conn, 'sock'): + if getattr(conn, 'sock', None): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -300,18 +319,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # timeouts, check for a zero timeout before making the request. if read_timeout == 0: raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) + self, url, "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) - else: # None or a value + else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7+, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older + except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() except SocketTimeout: raise ReadTimeoutError( @@ -323,17 +341,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # http://bugs.python.org/issue10272 if 'timed out' in str(e) or \ 'did not complete (read)' in str(e): # Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) raise - except SocketError as e: # Platform-specific: Python 2 + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error if e.errno in _blocking_errnos: raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) + self, url, "Read timed out. (read timeout=%s)" % read_timeout) raise @@ -358,7 +376,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() except Empty: - pass # Done. + pass # Done. def is_same_host(self, url): """ @@ -379,7 +397,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): return (scheme, host, port) == (self.scheme, self.host, self.port) - def urlopen(self, method, url, body=None, headers=None, retries=3, + def urlopen(self, method, url, body=None, headers=None, retries=None, redirect=True, assert_same_host=True, timeout=_Default, pool_timeout=None, release_conn=None, **response_kw): """ @@ -413,11 +431,25 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): these headers completely replace any pool-specific headers. :param retries: - Number of retries to allow before raising a MaxRetryError exception. + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307, 308). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -451,15 +483,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if headers is None: headers = self.headers - if retries < 0: - raise MaxRetryError(self, url) + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) if release_conn is None: release_conn = response_kw.get('preload_content', True) # Check host if assert_same_host and not self.is_same_host(url): - raise HostChangedError(self, url, retries - 1) + raise HostChangedError(self, url, retries) conn = None @@ -470,11 +502,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): headers = headers.copy() headers.update(self.proxy_headers) + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + try: - # Request a connection from the queue + # Request a connection from the queue. conn = self._get_conn(timeout=pool_timeout) - # Make the request on the httplib connection object + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, timeout=timeout, body=body, headers=headers) @@ -497,38 +533,35 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.read()``) except Empty: - # Timed out by queue + # Timed out by queue. raise EmptyPoolError(self, "No pool connections are available.") - except BaseSSLError as e: + except (BaseSSLError, CertificateError) as e: + # Release connection unconditionally because there is no way to + # close it externally in case of exception. + release_conn = True raise SSLError(e) - except CertificateError as e: - # Name mismatch - raise SSLError(e) + except (TimeoutError, HTTPException, SocketError) as e: + if conn: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn.close() + conn = None - except TimeoutError as e: - # Connection broken, discard. - conn = None - # Save the error off for retry logic. + stacktrace = sys.exc_info()[2] + if isinstance(e, SocketError) and self.proxy: + e = ProxyError('Cannot connect to proxy.', e) + elif isinstance(e, (SocketError, HTTPException)): + e = ProtocolError('Connection aborted.', e) + + retries = retries.increment(method, url, error=e, + _pool=self, _stacktrace=stacktrace) + retries.sleep() + + # Keep track of the error for the retry warning. err = e - if retries == 0: - raise - - except (HTTPException, SocketError) as e: - # Connection broken, discard. It will be replaced next _get_conn(). - conn = None - # This is necessary so we can access e below - err = e - - if retries == 0: - if isinstance(e, SocketError) and self.proxy is not None: - raise ProxyError('Cannot connect to proxy. ' - 'Socket error: %s.' % e) - else: - raise MaxRetryError(self, url, e) - finally: if release_conn: # Put the connection back to be reused. If the connection is @@ -538,9 +571,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again - log.warn("Retrying (%d attempts remain) after connection " - "broken by '%r': %s" % (retries, err, url)) - return self.urlopen(method, url, body, headers, retries - 1, + log.warning("Retrying (%r) after connection " + "broken by '%r': %s" % (retries, err, url)) + return self.urlopen(method, url, body, headers, retries, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, release_conn=release_conn, **response_kw) @@ -550,11 +583,31 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if redirect_location: if response.status == 303: method = 'GET' + + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, - retries - 1, redirect, assert_same_host, - timeout=timeout, pool_timeout=pool_timeout, - release_conn=release_conn, **response_kw) + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) + + # Check if we should retry the HTTP response. + if retries.is_forced_retry(method, status_code=response.status): + retries = retries.increment(method, url, response=response, _pool=self) + retries.sleep() + log.info("Forced retry: %s" % url) + return self.urlopen(method, url, body, headers, + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) return response @@ -581,15 +634,17 @@ class HTTPSConnectionPool(HTTPConnectionPool): ConnectionCls = HTTPSConnection def __init__(self, host, port=None, - strict=False, timeout=None, maxsize=1, - block=False, headers=None, + strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, + block=False, headers=None, retries=None, _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, - assert_hostname=None, assert_fingerprint=None): + assert_hostname=None, assert_fingerprint=None, + **conn_kw): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers) + block, headers, retries, _proxy, _proxy_headers, + **conn_kw) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -619,7 +674,12 @@ class HTTPSConnectionPool(HTTPConnectionPool): set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 set_tunnel = conn._set_tunnel - set_tunnel(self.host, self.port, self.proxy_headers) + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. conn.connect() @@ -645,20 +705,30 @@ class HTTPSConnectionPool(HTTPConnectionPool): actual_host = self.proxy.host actual_port = self.proxy.port - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, - **extra_params) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 + strict=self.strict, **self.conn_kw) return self._prepare_conn(conn) + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + super(HTTPSConnectionPool, self)._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if not getattr(conn, 'sock', None): # AppEngine might not have `.sock` + conn.connect() + + if not conn.is_verified: + warnings.warn(( + 'Unverified HTTPS request is being made. ' + 'Adding certificate verification is strongly advised. See: ' + 'https://urllib3.readthedocs.org/en/latest/security.html ' + '(This warning will only appear once by default.)'), + InsecureRequestWarning) + def connection_from_url(url, **kw): """ @@ -675,7 +745,7 @@ def connection_from_url(url, **kw): :class:`.ConnectionPool`. Useful for specifying things like timeout, maxsize, headers, etc. - Example: :: + Example:: >>> conn = connection_from_url('http://google.com/') >>> r = conn.request('GET', '/') diff --git a/lib/requests/packages/urllib3/contrib/ntlmpool.py b/lib/requests/packages/urllib3/contrib/ntlmpool.py index b8cd9330..c6b266f5 100644 --- a/lib/requests/packages/urllib3/contrib/ntlmpool.py +++ b/lib/requests/packages/urllib3/contrib/ntlmpool.py @@ -1,9 +1,3 @@ -# urllib3/contrib/ntlmpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ NTLM authenticating pool, contributed by erikcederstran diff --git a/lib/requests/packages/urllib3/contrib/pyopenssl.py b/lib/requests/packages/urllib3/contrib/pyopenssl.py index 9486b1b4..24de9e40 100644 --- a/lib/requests/packages/urllib3/contrib/pyopenssl.py +++ b/lib/requests/packages/urllib3/contrib/pyopenssl.py @@ -1,4 +1,7 @@ -'''SSL with SNI_-support for Python 2. +'''SSL with SNI_-support for Python 2. Follow these instructions if you would +like to verify SSL certificates in Python 2. Note, the default libraries do +*not* do certificate checking; you need to do additional work to validate +certificates yourself. This needs the following packages installed: @@ -6,9 +9,15 @@ This needs the following packages installed: * ndg-httpsclient (tested with 0.3.2) * pyasn1 (tested with 0.1.6) -To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. -This can be done in a ``sitecustomize`` module, or at any other time before -your application begins using ``urllib3``, like this:: +You can install them with the following command: + + pip install pyopenssl ndg-httpsclient pyasn1 + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this:: try: import urllib3.contrib.pyopenssl @@ -29,24 +38,26 @@ Module Variables ---------------- :var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. - Default: ``EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM EECDH+ECDSA+SHA256 - EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA EECDH RC4 !aNULL !eNULL !LOW !3DES - !MD5 !EXP !PSK !SRP !DSS'`` + Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: + ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` .. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication .. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) ''' -from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName +try: + from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT + from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName +except SyntaxError as e: + raise ImportError(e) + import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder from pyasn1.type import univ, constraint -from socket import _fileobject +from socket import _fileobject, timeout import ssl import select -from cStringIO import StringIO from .. import connection from .. import util @@ -57,18 +68,11 @@ __all__ = ['inject_into_urllib3', 'extract_from_urllib3'] HAS_SNI = SUBJ_ALT_NAME_SUPPORT # Map from urllib3 to PyOpenSSL compatible parameter-values. -try: - _openssl_versions = { - ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, - ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, - ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, - } -except AttributeError: - _openssl_versions = { - ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, - ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, - } - +_openssl_versions = { + ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, + ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} _openssl_verify = { ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, @@ -76,12 +80,22 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } -# Default SSL/TLS cipher list. -# Recommendation by https://community.qualys.com/blogs/securitylabs/2013/08/05/ -# configuring-apache-nginx-and-openssl-for-forward-secrecy -DEFAULT_SSL_CIPHER_LIST = 'EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM ' + \ - 'EECDH+ECDSA+SHA256 EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA ' + \ - 'EECDH RC4 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS' +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ + "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ + "!aNULL:!MD5:!DSS" orig_util_HAS_SNI = util.HAS_SNI @@ -144,184 +158,43 @@ def get_subj_alt_name(peer_cert): return dns_name -class fileobject(_fileobject): - - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very inefficient. - # We never leave read() with any leftover data from a new recv() call - # in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned by - # recv() minimizes memory usage and fragmentation that occurs when - # rbufsize is large compared to the typical return value of recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(rbufsize) - except OpenSSL.SSL.WantReadError: - continue - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - try: - data = self._sock.recv(left) - except OpenSSL.SSL.WantReadError: - continue - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - data = None - recv = self._sock.recv - while True: - try: - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - except OpenSSL.SSL.WantReadError: - continue - break - return "".join(buffers) - - buf.seek(0, 2) # seek end - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - continue - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - continue - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when returning - # a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - - class WrappedSocket(object): - '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' + '''API-compatibility wrapper for Python OpenSSL's Connection-class. - def __init__(self, connection, socket): + Note: _makefile_refs, _drop() and _reuse() are needed for the garbage + collector of pypy. + ''' + + def __init__(self, connection, socket, suppress_ragged_eofs=True): self.connection = connection self.socket = socket + self.suppress_ragged_eofs = suppress_ragged_eofs + self._makefile_refs = 0 def fileno(self): return self.socket.fileno() def makefile(self, mode, bufsize=-1): - return fileobject(self.connection, mode, bufsize) + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) + + def recv(self, *args, **kwargs): + try: + data = self.connection.recv(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return b'' + else: + raise + except OpenSSL.SSL.WantReadError: + rd, wd, ed = select.select( + [self.socket], [], [], self.socket.gettimeout()) + if not rd: + raise timeout('The read operation timed out') + else: + return self.recv(*args, **kwargs) + else: + return data def settimeout(self, timeout): return self.socket.settimeout(timeout) @@ -330,7 +203,10 @@ class WrappedSocket(object): return self.connection.sendall(data) def close(self): - return self.connection.shutdown() + if self._makefile_refs < 1: + return self.connection.shutdown() + else: + self._makefile_refs -= 1 def getpeercert(self, binary_form=False): x509 = self.connection.get_peer_certificate() @@ -353,6 +229,15 @@ class WrappedSocket(object): ] } + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + def _verify_callback(cnx, x509, err_no, err_depth, return_code): return err_no == 0 @@ -373,6 +258,8 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ctx.load_verify_locations(ca_certs, None) except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + else: + ctx.set_default_verify_paths() # Disable TLS compression to migitate CRIME attack (issue #309) OP_NO_COMPRESSION = 0x20000 diff --git a/lib/requests/packages/urllib3/exceptions.py b/lib/requests/packages/urllib3/exceptions.py index 98ef9abc..7519ba98 100644 --- a/lib/requests/packages/urllib3/exceptions.py +++ b/lib/requests/packages/urllib3/exceptions.py @@ -1,9 +1,3 @@ -# urllib3/exceptions.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - ## Base Exceptions @@ -11,6 +5,11 @@ class HTTPError(Exception): "Base exception used by this module." pass +class HTTPWarning(Warning): + "Base warning used by this module." + pass + + class PoolError(HTTPError): "Base exception for errors caused within a pool." @@ -49,17 +48,33 @@ class DecodeError(HTTPError): pass +class ProtocolError(HTTPError): + "Raised when something unexpected happens mid-request/response." + pass + + +#: Renamed to ProtocolError but aliased for backwards compatibility. +ConnectionError = ProtocolError + + ## Leaf Exceptions class MaxRetryError(RequestError): - "Raised when the maximum number of retries is exceeded." + """Raised when the maximum number of retries is exceeded. + + :param pool: The connection pool + :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` + :param string url: The requested Url + :param exceptions.Exception reason: The underlying error + + """ def __init__(self, pool, url, reason=None): self.reason = reason message = "Max retries exceeded with url: %s" % url if reason: - message += " (Caused by %s: %s)" % (type(reason), reason) + message += " (Caused by %r)" % reason else: message += " (Caused by redirect)" @@ -111,7 +126,12 @@ class ClosedPoolError(PoolError): pass -class LocationParseError(ValueError, HTTPError): +class LocationValueError(ValueError, HTTPError): + "Raised when there is something wrong with a given URL input." + pass + + +class LocationParseError(LocationValueError): "Raised when get_host or similar fails to parse the URL input." def __init__(self, location): @@ -119,3 +139,18 @@ class LocationParseError(ValueError, HTTPError): HTTPError.__init__(self, message) self.location = location + + +class SecurityWarning(HTTPWarning): + "Warned when perfoming security reducing actions" + pass + + +class InsecureRequestWarning(SecurityWarning): + "Warned when making an unverified HTTPS request." + pass + + +class SystemTimeWarning(SecurityWarning): + "Warned when system time is suspected to be wrong" + pass diff --git a/lib/requests/packages/urllib3/fields.py b/lib/requests/packages/urllib3/fields.py index ed017657..c853f8d5 100644 --- a/lib/requests/packages/urllib3/fields.py +++ b/lib/requests/packages/urllib3/fields.py @@ -1,9 +1,3 @@ -# urllib3/fields.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import email.utils import mimetypes @@ -15,7 +9,7 @@ def guess_content_type(filename, default='application/octet-stream'): Guess the "Content-Type" of a file. :param filename: - The filename to guess the "Content-Type" of using :mod:`mimetimes`. + The filename to guess the "Content-Type" of using :mod:`mimetypes`. :param default: If no "Content-Type" can be guessed, default to `default`. """ @@ -78,9 +72,10 @@ class RequestField(object): """ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. - Supports constructing :class:`~urllib3.fields.RequestField` from parameter - of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) - tuple where the MIME type is optional. For example: :: + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example:: 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), @@ -125,8 +120,8 @@ class RequestField(object): 'Content-Disposition' fields. :param header_parts: - A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as - `k1="v1"; k2="v2"; ...`. + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. """ parts = [] iterable = header_parts @@ -158,7 +153,8 @@ class RequestField(object): lines.append('\r\n') return '\r\n'.join(lines) - def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + def make_multipart(self, content_disposition=None, content_type=None, + content_location=None): """ Makes this request field into a multipart request field. @@ -172,6 +168,10 @@ class RequestField(object): """ self.headers['Content-Disposition'] = content_disposition or 'form-data' - self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Disposition'] += '; '.join([ + '', self._render_parts( + (('name', self._name), ('filename', self._filename)) + ) + ]) self.headers['Content-Type'] = content_type self.headers['Content-Location'] = content_location diff --git a/lib/requests/packages/urllib3/filepost.py b/lib/requests/packages/urllib3/filepost.py index e8b30bdd..0fbf488d 100644 --- a/lib/requests/packages/urllib3/filepost.py +++ b/lib/requests/packages/urllib3/filepost.py @@ -1,11 +1,4 @@ -# urllib3/filepost.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import codecs -import mimetypes from uuid import uuid4 from io import BytesIO @@ -38,10 +31,10 @@ def iter_field_objects(fields): i = iter(fields) for field in i: - if isinstance(field, RequestField): - yield field - else: - yield RequestField.from_tuples(*field) + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): diff --git a/lib/requests/packages/urllib3/packages/ordered_dict.py b/lib/requests/packages/urllib3/packages/ordered_dict.py index 7f8ee154..4479363c 100644 --- a/lib/requests/packages/urllib3/packages/ordered_dict.py +++ b/lib/requests/packages/urllib3/packages/ordered_dict.py @@ -2,7 +2,6 @@ # Passes Python2.7's test suite and incorporates all the latest updates. # Copyright 2009 Raymond Hettinger, released under the MIT License. # http://code.activestate.com/recipes/576693/ - try: from thread import get_ident as _get_ident except ImportError: diff --git a/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 3aa5b2e1..dd59a75f 100644 --- a/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -7,7 +7,7 @@ except ImportError: from backports.ssl_match_hostname import CertificateError, match_hostname except ImportError: # Our vendored copy - from _implementation import CertificateError, match_hostname + from ._implementation import CertificateError, match_hostname # Not needed, but documenting what we provide. __all__ = ('CertificateError', 'match_hostname') diff --git a/lib/requests/packages/urllib3/poolmanager.py b/lib/requests/packages/urllib3/poolmanager.py index f18ff2bb..515dc962 100644 --- a/lib/requests/packages/urllib3/poolmanager.py +++ b/lib/requests/packages/urllib3/poolmanager.py @@ -1,9 +1,3 @@ -# urllib3/poolmanager.py -# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import logging try: # Python 3 @@ -14,8 +8,10 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme +from .exceptions import LocationValueError from .request import RequestMethods -from .util import parse_url +from .util.url import parse_url +from .util.retry import Retry __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] @@ -49,7 +45,7 @@ class PoolManager(RequestMethods): Additional parameters are used to create fresh :class:`urllib3.connectionpool.ConnectionPool` instances. - Example: :: + Example:: >>> manager = PoolManager(num_pools=2) >>> r = manager.request('GET', 'http://google.com/') @@ -102,10 +98,11 @@ class PoolManager(RequestMethods): ``urllib3.connectionpool.port_by_scheme``. """ + if not host: + raise LocationValueError("No host specified.") + scheme = scheme or 'http' - port = port or port_by_scheme.get(scheme, 80) - pool_key = (scheme, host, port) with self.pools.lock: @@ -118,6 +115,7 @@ class PoolManager(RequestMethods): # Make a fresh ConnectionPool of the desired type pool = self._new_pool(scheme, host, port) self.pools[pool_key] = pool + return pool def connection_from_url(self, url): @@ -161,13 +159,18 @@ class PoolManager(RequestMethods): # Support relative URLs for redirecting. redirect_location = urljoin(url, redirect_location) - # RFC 2616, Section 10.3.4 + # RFC 7231, Section 6.4.4 if response.status == 303: method = 'GET' - log.info("Redirecting %s -> %s" % (url, redirect_location)) - kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + retries = kw.get('retries') + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + kw['retries'] = retries.increment(method, redirect_location) kw['redirect'] = redirect + + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, **kw) @@ -208,12 +211,16 @@ class ProxyManager(PoolManager): if not proxy.port: port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) + + assert proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % proxy.scheme + self.proxy = proxy self.proxy_headers = proxy_headers or {} - assert self.proxy.scheme in ("http", "https"), \ - 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( num_pools, headers, **connection_pool_kw) @@ -248,10 +255,10 @@ class ProxyManager(PoolManager): # For proxied HTTPS requests, httplib sets the necessary headers # on the CONNECT to the proxy. For HTTP, we'll definitely # need to set 'Host' at the very least. - kw['headers'] = self._set_proxy_headers(url, kw.get('headers', - self.headers)) + headers = kw.get('headers', self.headers) + kw['headers'] = self._set_proxy_headers(url, headers) - return super(ProxyManager, self).urlopen(method, url, redirect, **kw) + return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) def proxy_from_url(url, **kw): diff --git a/lib/requests/packages/urllib3/request.py b/lib/requests/packages/urllib3/request.py index 2a92cc20..51fe2386 100644 --- a/lib/requests/packages/urllib3/request.py +++ b/lib/requests/packages/urllib3/request.py @@ -1,9 +1,3 @@ -# urllib3/request.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - try: from urllib.parse import urlencode except ImportError: @@ -26,8 +20,8 @@ class RequestMethods(object): Specifically, - :meth:`.request_encode_url` is for sending requests whose fields are encoded - in the URL (such as GET, HEAD, DELETE). + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are encoded in the *body* of the request using multipart or www-form-urlencoded @@ -51,7 +45,7 @@ class RequestMethods(object): def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): # Abstract + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -61,8 +55,8 @@ class RequestMethods(object): ``fields`` based on the ``method`` used. This is a convenience method that requires the least amount of manual - effort. It can be used in most situations, while still having the option - to drop down to more specific methods when necessary, such as + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as :meth:`request_encode_url`, :meth:`request_encode_body`, or even the lowest level :meth:`urlopen`. """ @@ -70,12 +64,12 @@ class RequestMethods(object): if method in self._encode_url_methods: return self.request_encode_url(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) else: return self.request_encode_body(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) def request_encode_url(self, method, url, fields=None, **urlopen_kw): """ @@ -94,18 +88,18 @@ class RequestMethods(object): the body. This is useful for request methods like POST, PUT, PATCH, etc. When ``encode_multipart=True`` (default), then - :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the - payload with the appropriate content type. Otherwise + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise :meth:`urllib.urlencode` is used with the 'application/x-www-form-urlencoded' content type. Multipart encoding must be used when posting files, and it's reasonably - safe to use it in other times too. However, it may break request signing, - such as with OAuth. + safe to use it in other times too. However, it may break request + signing, such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where - the MIME type is optional. For example: :: + the MIME type is optional. For example:: fields = { 'foo': 'bar', @@ -119,17 +113,17 @@ class RequestMethods(object): When uploading a file, providing a filename (the first parameter of the tuple) is optional but recommended to best mimick behavior of browsers. - Note that if ``headers`` are supplied, the 'Content-Type' header will be - overwritten because it depends on the dynamic random boundary string + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ if encode_multipart: - body, content_type = encode_multipart_formdata(fields or {}, - boundary=multipart_boundary) + body, content_type = encode_multipart_formdata( + fields or {}, boundary=multipart_boundary) else: body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') + 'application/x-www-form-urlencoded') if headers is None: headers = self.headers diff --git a/lib/requests/packages/urllib3/response.py b/lib/requests/packages/urllib3/response.py index 6a1fe1a7..e69de957 100644 --- a/lib/requests/packages/urllib3/response.py +++ b/lib/requests/packages/urllib3/response.py @@ -1,21 +1,14 @@ -# urllib3/response.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - -import logging import zlib import io +from socket import timeout as SocketTimeout -from .exceptions import DecodeError +from ._collections import HTTPHeaderDict +from .exceptions import ProtocolError, DecodeError, ReadTimeoutError from .packages.six import string_types as basestring, binary_type -from .util import is_fp_closed +from .connection import HTTPException, BaseSSLError +from .util.response import is_fp_closed -log = logging.getLogger(__name__) - class DeflateDecoder(object): @@ -55,7 +48,10 @@ class HTTPResponse(io.IOBase): HTTP Response container. Backwards-compatible to httplib's HTTPResponse but the response ``body`` is - loaded and decoded on-demand when the ``data`` property is accessed. + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. Extra parameters for behaviour not present in httplib.HTTPResponse: @@ -79,7 +75,10 @@ class HTTPResponse(io.IOBase): def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = headers or {} + + self.headers = HTTPHeaderDict() + if headers: + self.headers.update(headers) self.status = status self.version = version self.reason = reason @@ -87,11 +86,14 @@ class HTTPResponse(io.IOBase): self.decode_content = decode_content self._decoder = None - self._body = body if body and isinstance(body, basestring) else None + self._body = None self._fp = None self._original_response = original_response self._fp_bytes_read = 0 + if body and isinstance(body, (basestring, binary_type)): + self._body = body + self._pool = pool self._connection = connection @@ -159,8 +161,8 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 2616 - # Section 3.5 + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 content_encoding = self.headers.get('content-encoding', '').lower() if self._decoder is None: if content_encoding in self.CONTENT_DECODERS: @@ -174,23 +176,42 @@ class HTTPResponse(io.IOBase): flush_decoder = False try: - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() - flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do not - # properly close the connection in all cases. There is no harm - # in redundantly calling close. - self._fp.close() + try: + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if not 'read operation timed out' in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except HTTPException as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) self._fp_bytes_read += len(data) @@ -200,8 +221,7 @@ class HTTPResponse(io.IOBase): except (IOError, zlib.error) as e: raise DecodeError( "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, - e) + "failed to decode it." % content_encoding, e) if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) @@ -238,7 +258,6 @@ class HTTPResponse(io.IOBase): if data: yield data - @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -249,17 +268,9 @@ class HTTPResponse(io.IOBase): with ``original_response=r``. """ - # Normalize headers between different versions of Python - headers = {} + headers = HTTPHeaderDict() for k, v in r.getheaders(): - # Python 3: Header keys are returned capitalised - k = k.lower() - - has_value = headers.get(k) - if has_value: # Python 3: Repeating header keys are unmerged. - v = ', '.join([has_value, v]) - - headers[k] = v + headers.add(k, v) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) @@ -301,7 +312,7 @@ class HTTPResponse(io.IOBase): elif hasattr(self._fp, "fileno"): return self._fp.fileno() else: - raise IOError("The file-like object this HTTPResponse is wrapped " + raise IOError("The file-like object this HTTPResponse is wrapped " "around has no file descriptor") def flush(self): @@ -309,4 +320,14 @@ class HTTPResponse(io.IOBase): return self._fp.flush() def readable(self): + # This method is required for `io` module compatibility. return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + else: + b[:len(temp)] = temp + return len(temp) diff --git a/lib/requests/packages/urllib3/util.py b/lib/requests/packages/urllib3/util.py deleted file mode 100644 index bd266317..00000000 --- a/lib/requests/packages/urllib3/util.py +++ /dev/null @@ -1,648 +0,0 @@ -# urllib3/util.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - -from base64 import b64encode -from binascii import hexlify, unhexlify -from collections import namedtuple -from hashlib import md5, sha1 -from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT -import time - -try: - from select import poll, POLLIN -except ImportError: # `poll` doesn't exist on OSX and other platforms - poll = False - try: - from select import select - except ImportError: # `select` doesn't exist on AppEngine. - select = False - -try: # Test for SSL features - SSLContext = None - HAS_SNI = False - - import ssl - from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import SSLContext # Modern SSL? - from ssl import HAS_SNI # Has SNI? -except ImportError: - pass - -from .packages import six -from .exceptions import LocationParseError, SSLError, TimeoutStateError - - -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout - - -def current_time(): - """ - Retrieve the current time, this function is mocked out in unit testing. - """ - return time.time() - - -class Timeout(object): - """ - Utility object for storing timeout values. - - Example usage: - - .. code-block:: python - - timeout = urllib3.util.Timeout(connect=2.0, read=7.0) - pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) - pool.request(...) # Etc, etc - - :param connect: - The maximum amount of time to wait for a connection attempt to a server - to succeed. Omitting the parameter will default the connect timeout to - the system default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout for connection attempts. - - :type connect: integer, float, or None - - :param read: - The maximum amount of time to wait between consecutive - read operations for a response from the server. Omitting - the parameter will default the read timeout to the system - default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout. - - :type read: integer, float, or None - - :param total: - This combines the connect and read timeouts into one; the read timeout - will be set to the time leftover from the connect attempt. In the - event that both a connect timeout and a total are specified, or a read - timeout and a total are specified, the shorter timeout will be applied. - - Defaults to None. - - :type total: integer, float, or None - - .. note:: - - Many factors can affect the total amount of time for urllib3 to return - an HTTP response. Specifically, Python's DNS resolver does not obey the - timeout specified on the socket. Other factors that can affect total - request time include high CPU load, high swap, the program running at a - low priority level, or other behaviors. The observed running time for - urllib3 to return a response may be greater than the value passed to - `total`. - - In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, - not the total amount of time for the request to return a complete - response. For most requests, the timeout is raised because the server - has not sent the first byte in the specified time. This is not always - the case; if a server streams one byte every fifteen seconds, a timeout - of 20 seconds will not ever trigger, even though the request will - take several minutes to complete. - - If your goal is to cut off any request after a set amount of wall clock - time, consider having a second "watcher" thread to cut off a slow - request. - """ - - #: A sentinel object representing the default timeout value - DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - - def __init__(self, total=None, connect=_Default, read=_Default): - self._connect = self._validate_timeout(connect, 'connect') - self._read = self._validate_timeout(read, 'read') - self.total = self._validate_timeout(total, 'total') - self._start_connect = None - - def __str__(self): - return '%s(connect=%r, read=%r, total=%r)' % ( - type(self).__name__, self._connect, self._read, self.total) - - - @classmethod - def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid - - :param value: The timeout value to validate - :param name: The name of the timeout attribute to validate. This is used - for clear error messages - :return: the value - :raises ValueError: if the type is not an integer or a float, or if it - is a numeric value less than zero - """ - if value is _Default: - return cls.DEFAULT_TIMEOUT - - if value is None or value is cls.DEFAULT_TIMEOUT: - return value - - try: - float(value) - except (TypeError, ValueError): - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - try: - if value < 0: - raise ValueError("Attempted to set %s timeout to %s, but the " - "timeout cannot be set to a value less " - "than 0." % (name, value)) - except TypeError: # Python 3 - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - return value - - @classmethod - def from_float(cls, timeout): - """ Create a new Timeout from a legacy timeout value. - - The timeout value used by httplib.py sets the same timeout on the - connect(), and recv() socket requests. This creates a :class:`Timeout` - object that sets the individual timeouts to the ``timeout`` value passed - to this function. - - :param timeout: The legacy timeout value - :type timeout: integer, float, sentinel default object, or None - :return: a Timeout object - :rtype: :class:`Timeout` - """ - return Timeout(read=timeout, connect=timeout) - - def clone(self): - """ Create a copy of the timeout object - - Timeout properties are stored per-pool but each request needs a fresh - Timeout object to ensure each one has its own start/stop configured. - - :return: a copy of the timeout object - :rtype: :class:`Timeout` - """ - # We can't use copy.deepcopy because that will also create a new object - # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to - # detect the user default. - return Timeout(connect=self._connect, read=self._read, - total=self.total) - - def start_connect(self): - """ Start the timeout clock, used during a connect() attempt - - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to start a timer that has been started already. - """ - if self._start_connect is not None: - raise TimeoutStateError("Timeout timer has already been started.") - self._start_connect = current_time() - return self._start_connect - - def get_connect_duration(self): - """ Gets the time elapsed since the call to :meth:`start_connect`. - - :return: the elapsed time - :rtype: float - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to get duration for a timer that hasn't been started. - """ - if self._start_connect is None: - raise TimeoutStateError("Can't get connect duration for timer " - "that has not started.") - return current_time() - self._start_connect - - @property - def connect_timeout(self): - """ Get the value to use when setting a connection timeout. - - This will be a positive float or integer, the value None - (never timeout), or the default system timeout. - - :return: the connect timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - """ - if self.total is None: - return self._connect - - if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: - return self.total - - return min(self._connect, self.total) - - @property - def read_timeout(self): - """ Get the value for the read timeout. - - This assumes some time has elapsed in the connection timeout and - computes the read timeout appropriately. - - If self.total is set, the read timeout is dependent on the amount of - time taken by the connect timeout. If the connection time has not been - established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be - raised. - - :return: the value to use for the read timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` - has not yet been called on this object. - """ - if (self.total is not None and - self.total is not self.DEFAULT_TIMEOUT and - self._read is not None and - self._read is not self.DEFAULT_TIMEOUT): - # in case the connect timeout has not yet been established. - if self._start_connect is None: - return self._read - return max(0, min(self.total - self.get_connect_duration(), - self._read)) - elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: - return max(0, self.total - self.get_connect_duration()) - else: - return self._read - - -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): - """ - Datastructure for representing an HTTP URL. Used as a return value for - :func:`parse_url`. - """ - slots = () - - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) - - @property - def hostname(self): - """For backwards-compatibility with urlparse. We're nice like that.""" - return self.host - - @property - def request_uri(self): - """Absolute path including the query string.""" - uri = self.path or '/' - - if self.query is not None: - uri += '?' + self.query - - return uri - - @property - def netloc(self): - """Network location including host and port""" - if self.port: - return '%s:%d' % (self.host, self.port) - return self.host - - -def split_first(s, delims): - """ - Given a string and an iterable of delimiters, split on the first found - delimiter. Return two split parts and the matched delimiter. - - If not found, then the first part is the full input string. - - Example: :: - - >>> split_first('foo/bar?baz', '?/=') - ('foo', 'bar?baz', '/') - >>> split_first('foo/bar?baz', '123') - ('foo/bar?baz', '', None) - - Scales linearly with number of delims. Not ideal for large number of delims. - """ - min_idx = None - min_delim = None - for d in delims: - idx = s.find(d) - if idx < 0: - continue - - if min_idx is None or idx < min_idx: - min_idx = idx - min_delim = d - - if min_idx is None or min_idx < 0: - return s, '', None - - return s[:min_idx], s[min_idx+1:], min_delim - - -def parse_url(url): - """ - Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is - performed to parse incomplete urls. Fields not provided will be None. - - Partly backwards-compatible with :mod:`urlparse`. - - Example: :: - - >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) - >>> parse_url('google.com:80') - Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> parse_url('/foo?bar') - Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) - """ - - # While this code has overlap with stdlib's urlparse, it is much - # simplified for our needs and less annoying. - # Additionally, this implementations does silly things to be optimal - # on CPython. - - scheme = None - auth = None - host = None - port = None - path = None - fragment = None - query = None - - # Scheme - if '://' in url: - scheme, url = url.split('://', 1) - - # Find the earliest Authority Terminator - # (http://tools.ietf.org/html/rfc3986#section-3.2) - url, path_, delim = split_first(url, ['/', '?', '#']) - - if delim: - # Reassemble the path - path = delim + path_ - - # Auth - if '@' in url: - # Last '@' denotes end of auth part - auth, url = url.rsplit('@', 1) - - # IPv6 - if url and url[0] == '[': - host, url = url.split(']', 1) - host += ']' - - # Port - if ':' in url: - _host, port = url.split(':', 1) - - if not host: - host = _host - - if port: - # If given, ports must be integers. - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - port = int(port) - else: - # Blank ports are cool, too. (rfc3986#section-3.2.3) - port = None - - elif not host and url: - host = url - - if not path: - return Url(scheme, auth, host, port, path, query, fragment) - - # Fragment - if '#' in path: - path, fragment = path.split('#', 1) - - # Query - if '?' in path: - path, query = path.split('?', 1) - - return Url(scheme, auth, host, port, path, query, fragment) - - -def get_host(url): - """ - Deprecated. Use :func:`.parse_url` instead. - """ - p = parse_url(url) - return p.scheme or 'http', p.hostname, p.port - - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None, proxy_basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - :param proxy_basic_auth: - Colon-separated username:password string for 'proxy-authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = 'gzip,deflate' - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') - - if proxy_basic_auth: - headers['proxy-authorization'] = 'Basic ' + \ - b64encode(six.b(proxy_basic_auth)).decode('utf-8') - - return headers - - -def is_connection_dropped(conn): # Platform-specific - """ - Returns True if the connection is dropped and should be closed. - - :param conn: - :class:`httplib.HTTPConnection` object. - - Note: For platforms like AppEngine, this will always return ``False`` to - let the platform handle connection recycling transparently for us. - """ - sock = getattr(conn, 'sock', False) - if not sock: # Platform-specific: AppEngine - return False - - if not poll: - if not select: # Platform-specific: AppEngine - return False - - try: - return select([sock], [], [], 0.0)[0] - except SocketError: - return True - - # This version is better on platforms that support it. - p = poll() - p.register(sock, POLLIN) - for (fno, ev) in p.poll(0.0): - if fno == sock.fileno(): - # Either data is buffered (bad), or the connection is dropped. - return True - - -def resolve_cert_reqs(candidate): - """ - Resolves the argument to a numeric constant, which can be passed to - the wrap_socket function/method from the ssl module. - Defaults to :data:`ssl.CERT_NONE`. - If given a string it is assumed to be the name of the constant in the - :mod:`ssl` module or its abbrevation. - (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. - If it's neither `None` nor a string we assume it is already the numeric - constant which can directly be passed to wrap_socket. - """ - if candidate is None: - return CERT_NONE - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'CERT_' + candidate) - return res - - return candidate - - -def resolve_ssl_version(candidate): - """ - like resolve_cert_reqs - """ - if candidate is None: - return PROTOCOL_SSLv23 - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'PROTOCOL_' + candidate) - return res - - return candidate - - -def assert_fingerprint(cert, fingerprint): - """ - Checks if given fingerprint matches the supplied certificate. - - :param cert: - Certificate as bytes object. - :param fingerprint: - Fingerprint as string of hexdigits, can be interspersed by colons. - """ - - # Maps the length of a digest to a possible hash function producing - # this digest. - hashfunc_map = { - 16: md5, - 20: sha1 - } - - fingerprint = fingerprint.replace(':', '').lower() - - digest_length, rest = divmod(len(fingerprint), 2) - - if rest or digest_length not in hashfunc_map: - raise SSLError('Fingerprint is of invalid length.') - - # We need encode() here for py32; works on py2 and p33. - fingerprint_bytes = unhexlify(fingerprint.encode()) - - hashfunc = hashfunc_map[digest_length] - - cert_digest = hashfunc(cert).digest() - - if not cert_digest == fingerprint_bytes: - raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' - .format(hexlify(fingerprint_bytes), - hexlify(cert_digest))) - -def is_fp_closed(obj): - """ - Checks whether a given file-like object is closed. - - :param obj: - The file-like object to check. - """ - if hasattr(obj, 'fp'): - # Object is a container for another file-like object that gets released - # on exhaustion (e.g. HTTPResponse) - return obj.fp is None - - return obj.closed - - -if SSLContext is not None: # Python 3.2+ - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - """ - All arguments except `server_hostname` have the same meaning as for - :func:`ssl.wrap_socket` - - :param server_hostname: - Hostname of the expected certificate - """ - context = SSLContext(ssl_version) - context.verify_mode = cert_reqs - - # Disable TLS compression to migitate CRIME attack (issue #309) - OP_NO_COMPRESSION = 0x20000 - context.options |= OP_NO_COMPRESSION - - if ca_certs: - try: - context.load_verify_locations(ca_certs) - # Py32 raises IOError - # Py33 raises FileNotFoundError - except Exception as e: # Reraise as SSLError - raise SSLError(e) - if certfile: - # FIXME: This block needs a test. - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) - -else: # Python 3.1 and earlier - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - return wrap_socket(sock, keyfile=keyfile, certfile=certfile, - ca_certs=ca_certs, cert_reqs=cert_reqs, - ssl_version=ssl_version) diff --git a/lib/requests/packages/urllib3/util/__init__.py b/lib/requests/packages/urllib3/util/__init__.py new file mode 100644 index 00000000..8becc814 --- /dev/null +++ b/lib/requests/packages/urllib3/util/__init__.py @@ -0,0 +1,24 @@ +# For backwards compatibility, provide imports that used to be here. +from .connection import is_connection_dropped +from .request import make_headers +from .response import is_fp_closed +from .ssl_ import ( + SSLContext, + HAS_SNI, + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +from .timeout import ( + current_time, + Timeout, +) + +from .retry import Retry +from .url import ( + get_host, + parse_url, + split_first, + Url, +) diff --git a/lib/requests/packages/urllib3/util/connection.py b/lib/requests/packages/urllib3/util/connection.py new file mode 100644 index 00000000..2156993a --- /dev/null +++ b/lib/requests/packages/urllib3/util/connection.py @@ -0,0 +1,97 @@ +import socket +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + :class:`httplib.HTTPConnection` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if sock is False: # Platform-specific: AppEngine + return False + if sock is None: # Connection already closed (such as by httplib). + return True + + if not poll: + if not select: # Platform-specific: AppEngine + return False + + try: + return select([sock], [], [], 0.0)[0] + except socket.error: + return True + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True + + +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + + # If provided, set socket level options before connecting. + # This is the only addition urllib3 makes to this function. + _set_socket_options(sock, socket_options) + + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except socket.error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + + for opt in options: + sock.setsockopt(*opt) diff --git a/lib/requests/packages/urllib3/util/request.py b/lib/requests/packages/urllib3/util/request.py new file mode 100644 index 00000000..bc64f6b1 --- /dev/null +++ b/lib/requests/packages/urllib3/util/request.py @@ -0,0 +1,71 @@ +from base64 import b64encode + +from ..packages.six import b + +ACCEPT_ENCODING = 'gzip,deflate' + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None, proxy_basic_auth=None, disable_cache=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. + + Example:: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(b(basic_auth)).decode('utf-8') + + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(b(proxy_basic_auth)).decode('utf-8') + + if disable_cache: + headers['cache-control'] = 'no-cache' + + return headers diff --git a/lib/requests/packages/urllib3/util/response.py b/lib/requests/packages/urllib3/util/response.py new file mode 100644 index 00000000..45fff552 --- /dev/null +++ b/lib/requests/packages/urllib3/util/response.py @@ -0,0 +1,22 @@ +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + + try: + # Check via the official file-like-object way. + return obj.closed + except AttributeError: + pass + + try: + # Check if the object is a container for another file-like object that + # gets released on exhaustion (e.g. HTTPResponse). + return obj.fp is None + except AttributeError: + pass + + raise ValueError("Unable to determine whether fp is closed.") diff --git a/lib/requests/packages/urllib3/util/retry.py b/lib/requests/packages/urllib3/util/retry.py new file mode 100644 index 00000000..eb560dfc --- /dev/null +++ b/lib/requests/packages/urllib3/util/retry.py @@ -0,0 +1,279 @@ +import time +import logging + +from ..exceptions import ( + ProtocolError, + ConnectTimeoutError, + ReadTimeoutError, + MaxRetryError, +) +from ..packages import six + + +log = logging.getLogger(__name__) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + indempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + :param iterable status_forcelist: + A set of HTTP status codes that we should force a retry on. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts. urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.MAX_BACKOFF`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + """ + + DEFAULT_METHOD_WHITELIST = frozenset([ + 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) + + #: Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__(self, total=10, connect=None, read=None, redirect=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, + backoff_factor=0, raise_on_redirect=True, _observed_errors=0): + + self.total = total + self.connect = connect + self.read = read + + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self._observed_errors = _observed_errors # TODO: use .history instead? + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, read=self.read, redirect=self.redirect, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + _observed_errors=self._observed_errors, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r" % (retries, new_retries)) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + if self._observed_errors <= 1: + return 0 + + backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1)) + return min(self.BACKOFF_MAX, backoff_value) + + def sleep(self): + """ Sleep between retry attempts using an exponential backoff. + + By default, the backoff factor is 0 and this method will return + immediately. + """ + backoff = self.get_backoff_time() + if backoff <= 0: + return + time.sleep(backoff) + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we can't + assume that the server did not process any of it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def is_forced_retry(self, method, status_code): + """ Is this method/response retryable? (Based on method/codes whitelists) + """ + if self.method_whitelist and method.upper() not in self.method_whitelist: + return False + + return self.status_forcelist and status_code in self.status_forcelist + + def is_exhausted(self): + """ Are we out of retries? + """ + retry_counts = (self.total, self.connect, self.read, self.redirect) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + _observed_errors = self._observed_errors + connect = self.connect + read = self.read + redirect = self.redirect + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + _observed_errors += 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False: + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + _observed_errors += 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + + else: + # FIXME: Nothing changed, scenario doesn't make sense. + _observed_errors += 1 + + new_retry = self.new( + total=total, + connect=connect, read=read, redirect=redirect, + _observed_errors=_observed_errors) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error) + + log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) + + return new_retry + + + def __repr__(self): + return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect})').format( + cls=type(self), self=self) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/lib/requests/packages/urllib3/util/ssl_.py b/lib/requests/packages/urllib3/util/ssl_.py new file mode 100644 index 00000000..9cfe2d2a --- /dev/null +++ b/lib/requests/packages/urllib3/util/ssl_.py @@ -0,0 +1,132 @@ +from binascii import hexlify, unhexlify +from hashlib import md5, sha1 + +from ..exceptions import SSLError + + +try: # Test for SSL features + SSLContext = None + HAS_SNI = False + + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import SSLContext # Modern SSL? + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1 + } + + fingerprint = fingerprint.replace(':', '').lower() + digest_length, odd = divmod(len(fingerprint), 2) + + if odd or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +if SSLContext is not None: # Python 3.2+ + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + """ + All arguments except `server_hostname` have the same meaning as for + :func:`ssl.wrap_socket` + + :param server_hostname: + Hostname of the expected certificate + """ + context = SSLContext(ssl_version) + context.verify_mode = cert_reqs + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + context.options |= OP_NO_COMPRESSION + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + # Py32 raises IOError + # Py33 raises FileNotFoundError + except Exception as e: # Reraise as SSLError + raise SSLError(e) + if certfile: + # FIXME: This block needs a test. + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) + +else: # Python 3.1 and earlier + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + return wrap_socket(sock, keyfile=keyfile, certfile=certfile, + ca_certs=ca_certs, cert_reqs=cert_reqs, + ssl_version=ssl_version) diff --git a/lib/requests/packages/urllib3/util/timeout.py b/lib/requests/packages/urllib3/util/timeout.py new file mode 100644 index 00000000..ea7027f3 --- /dev/null +++ b/lib/requests/packages/urllib3/util/timeout.py @@ -0,0 +1,240 @@ +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() + +def current_time(): + """ + Retrieve the current time. This function is mocked out in unit testing. + """ + return time.time() + + +class Timeout(object): + """ Timeout configuration. + + Timeouts can be defined as a default for a pool:: + + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + + Timeouts can be disabled by setting all the parameters to ``None``:: + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) + + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not trigger, even though the request will take + several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, total=None, connect=_Default, read=_Default): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid. + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If the type is not an integer or a float, or if it + is a numeric value less than zero. + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value + passed to this function. + + :param timeout: The legacy timeout value. + :type timeout: integer, float, sentinel default object, or None + :return: Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: Elapsed time. + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: Connect timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: Value to use for the read timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # In case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read diff --git a/lib/requests/packages/urllib3/util/url.py b/lib/requests/packages/urllib3/util/url.py new file mode 100644 index 00000000..487d456c --- /dev/null +++ b/lib/requests/packages/urllib3/util/url.py @@ -0,0 +1,171 @@ +from collections import namedtuple + +from ..exceptions import LocationParseError + + +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] + + +class Url(namedtuple('Url', url_attrs)): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, + query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, + query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example:: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example:: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + + if not url: + # Empty + return Url() + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError(url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`.parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port diff --git a/lib/requests/sessions.py b/lib/requests/sessions.py index 425db22c..c2f42b14 100644 --- a/lib/requests/sessions.py +++ b/lib/requests/sessions.py @@ -12,18 +12,24 @@ import os from collections import Mapping from datetime import datetime -from .compat import cookielib, OrderedDict, urljoin, urlparse, builtin_str +from .auth import _basic_auth_str +from .compat import cookielib, OrderedDict, urljoin, urlparse from .cookies import ( cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT from .hooks import default_hooks, dispatch_hook from .utils import to_key_val_list, default_headers, to_native_string -from .exceptions import TooManyRedirects, InvalidSchema +from .exceptions import ( + TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) +from .packages.urllib3._collections import RecentlyUsedContainer from .structures import CaseInsensitiveDict from .adapters import HTTPAdapter -from .utils import requote_uri, get_environ_proxies, get_netrc_auth +from .utils import ( + requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, + get_auth_from_url +) from .status_codes import codes @@ -86,11 +92,21 @@ class SessionRedirectMixin(object): """Receives a Response. Returns a generator of Responses.""" i = 0 + hist = [] # keep track of history while resp.is_redirect: prepared_request = req.copy() - resp.content # Consume socket so it can be released + if i > 0: + # Update history and keep track of redirects. + hist.append(resp) + new_hist = list(hist) + resp.history = new_hist + + try: + resp.content # Consume socket so it can be released + except (ChunkedEncodingError, ContentDecodingError, RuntimeError): + resp.raw.read(decode_content=False) if i >= self.max_redirects: raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects) @@ -110,7 +126,7 @@ class SessionRedirectMixin(object): parsed = urlparse(url) url = parsed.geturl() - # Facilitate non-RFC2616-compliant 'location' headers + # Facilitate relative 'location' headers, as allowed by RFC 7231. # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. if not urlparse(url).netloc: @@ -119,8 +135,11 @@ class SessionRedirectMixin(object): url = requote_uri(url) prepared_request.url = to_native_string(url) + # Cache the url, unless it redirects to itself. + if resp.is_permanent_redirect and req.url != prepared_request.url: + self.redirect_cache[req.url] = prepared_request.url - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 + # http://tools.ietf.org/html/rfc7231#section-6.4.4 if (resp.status_code == codes.see_other and method != 'HEAD'): method = 'GET' @@ -138,7 +157,7 @@ class SessionRedirectMixin(object): prepared_request.method = method # https://github.com/kennethreitz/requests/issues/1084 - if resp.status_code not in (codes.temporary, codes.resume): + if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): if 'Content-Length' in prepared_request.headers: del prepared_request.headers['Content-Length'] @@ -154,22 +173,15 @@ class SessionRedirectMixin(object): prepared_request._cookies.update(self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) - if 'Authorization' in headers: - # If we get redirected to a new host, we should strip out any - # authentication headers. - original_parsed = urlparse(resp.request.url) - redirect_parsed = urlparse(url) + # Rebuild auth and proxy information. + proxies = self.rebuild_proxies(prepared_request, proxies) + self.rebuild_auth(prepared_request, resp) - if (original_parsed.hostname != redirect_parsed.hostname): - del headers['Authorization'] - - # .netrc might have more auth for us. - new_auth = get_netrc_auth(url) if self.trust_env else None - if new_auth is not None: - prepared_request.prepare_auth(new_auth) + # Override the original request. + req = prepared_request resp = self.send( - prepared_request, + req, stream=stream, timeout=timeout, verify=verify, @@ -183,6 +195,68 @@ class SessionRedirectMixin(object): i += 1 yield resp + def rebuild_auth(self, prepared_request, response): + """ + When being redirected we may want to strip authentication from the + request to avoid leaking credentials. This method intelligently removes + and reapplies authentication where possible to avoid credential loss. + """ + headers = prepared_request.headers + url = prepared_request.url + + if 'Authorization' in headers: + # If we get redirected to a new host, we should strip out any + # authentication headers. + original_parsed = urlparse(response.request.url) + redirect_parsed = urlparse(url) + + if (original_parsed.hostname != redirect_parsed.hostname): + del headers['Authorization'] + + # .netrc might have more auth for us on our new host. + new_auth = get_netrc_auth(url) if self.trust_env else None + if new_auth is not None: + prepared_request.prepare_auth(new_auth) + + return + + def rebuild_proxies(self, prepared_request, proxies): + """ + This method re-evaluates the proxy configuration by considering the + environment variables. If we are redirected to a URL covered by + NO_PROXY, we strip the proxy configuration. Otherwise, we set missing + proxy keys for this URL (in case they were stripped by a previous + redirect). + + This method also replaces the Proxy-Authorization header where + necessary. + """ + headers = prepared_request.headers + url = prepared_request.url + scheme = urlparse(url).scheme + new_proxies = proxies.copy() if proxies is not None else {} + + if self.trust_env and not should_bypass_proxies(url): + environ_proxies = get_environ_proxies(url) + + proxy = environ_proxies.get(scheme) + + if proxy: + new_proxies.setdefault(scheme, environ_proxies[scheme]) + + if 'Proxy-Authorization' in headers: + del headers['Proxy-Authorization'] + + try: + username, password = get_auth_from_url(new_proxies[scheme]) + except KeyError: + username, password = None, None + + if username and password: + headers['Proxy-Authorization'] = _basic_auth_str(username, password) + + return new_proxies + class Session(SessionRedirectMixin): """A Requests session. @@ -198,9 +272,10 @@ class Session(SessionRedirectMixin): """ __attrs__ = [ - 'headers', 'cookies', 'auth', 'timeout', 'proxies', 'hooks', - 'params', 'verify', 'cert', 'prefetch', 'adapters', 'stream', - 'trust_env', 'max_redirects'] + 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', + 'cert', 'prefetch', 'adapters', 'stream', 'trust_env', + 'max_redirects', 'redirect_cache' + ] def __init__(self): @@ -253,6 +328,9 @@ class Session(SessionRedirectMixin): self.mount('https://', HTTPAdapter()) self.mount('http://', HTTPAdapter()) + # Only store 1000 redirects to prevent using infinite memory + self.redirect_cache = RecentlyUsedContainer(1000) + def __enter__(self): return self @@ -290,6 +368,7 @@ class Session(SessionRedirectMixin): url=request.url, files=request.files, data=request.data, + json=request.json, headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), params=merge_setting(request.params, self.params), auth=merge_setting(auth, self.auth), @@ -311,7 +390,8 @@ class Session(SessionRedirectMixin): hooks=None, stream=None, verify=None, - cert=None): + cert=None, + json=None): """Constructs a :class:`Request `, prepares it and sends it. Returns :class:`Response ` object. @@ -321,17 +401,22 @@ class Session(SessionRedirectMixin): string for the :class:`Request`. :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param json: (optional) json to send in the body of the + :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param files: (optional) Dictionary of 'filename': file-like-objects + :param files: (optional) Dictionary of ``'filename': file-like-objects`` for multipart encoding upload. :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) Float describing the timeout of the - request in seconds. - :param allow_redirects: (optional) Boolean. Set to True by default. + :param timeout: (optional) How long to wait for the server to send + data before giving up, as a float, or a (`connect timeout, read + timeout `_) tuple. + :type timeout: float or tuple + :param allow_redirects: (optional) Set to True by default. + :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param stream: (optional) whether to immediately download the response @@ -342,7 +427,7 @@ class Session(SessionRedirectMixin): If Tuple, ('cert', 'key') pair. """ - method = builtin_str(method) + method = to_native_string(method) # Create the Request. req = Request( @@ -351,6 +436,7 @@ class Session(SessionRedirectMixin): headers = headers, files = files, data = data or {}, + json = json, params = params or {}, auth = auth, cookies = cookies, @@ -360,36 +446,16 @@ class Session(SessionRedirectMixin): proxies = proxies or {} - # Gather clues from the surrounding environment. - if self.trust_env: - # Set environment's proxies. - env_proxies = get_environ_proxies(url) or {} - for (k, v) in env_proxies.items(): - proxies.setdefault(k, v) - - # Look for configuration. - if not verify and verify is not False: - verify = os.environ.get('REQUESTS_CA_BUNDLE') - - # Curl compatibility. - if not verify and verify is not False: - verify = os.environ.get('CURL_CA_BUNDLE') - - # Merge all the kwargs. - proxies = merge_setting(proxies, self.proxies) - stream = merge_setting(stream, self.stream) - verify = merge_setting(verify, self.verify) - cert = merge_setting(cert, self.cert) + settings = self.merge_environment_settings( + prep.url, proxies, stream, verify, cert + ) # Send the request. send_kwargs = { - 'stream': stream, 'timeout': timeout, - 'verify': verify, - 'cert': cert, - 'proxies': proxies, 'allow_redirects': allow_redirects, } + send_kwargs.update(settings) resp = self.send(prep, **send_kwargs) return resp @@ -424,15 +490,16 @@ class Session(SessionRedirectMixin): kwargs.setdefault('allow_redirects', False) return self.request('HEAD', url, **kwargs) - def post(self, url, data=None, **kwargs): + def post(self, url, data=None, json=None, **kwargs): """Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. """ - return self.request('POST', url, data=data, **kwargs) + return self.request('POST', url, data=data, json=json, **kwargs) def put(self, url, data=None, **kwargs): """Sends a PUT request. Returns :class:`Response` object. @@ -477,6 +544,14 @@ class Session(SessionRedirectMixin): if not isinstance(request, PreparedRequest): raise ValueError('You can only send PreparedRequests.') + checked_urls = set() + while request.url in self.redirect_cache: + checked_urls.add(request.url) + new_url = self.redirect_cache.get(request.url) + if new_url in checked_urls: + break + request.url = new_url + # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') @@ -527,10 +602,37 @@ class Session(SessionRedirectMixin): history.insert(0, r) # Get the last request made r = history.pop() - r.history = tuple(history) + r.history = history + + if not stream: + r.content return r + def merge_environment_settings(self, url, proxies, stream, verify, cert): + """Check the environment and merge it with some settings.""" + # Gather clues from the surrounding environment. + if self.trust_env: + # Set environment's proxies. + env_proxies = get_environ_proxies(url) or {} + for (k, v) in env_proxies.items(): + proxies.setdefault(k, v) + + # Look for requests environment configuration and be compatible + # with cURL. + if verify is True or verify is None: + verify = (os.environ.get('REQUESTS_CA_BUNDLE') or + os.environ.get('CURL_CA_BUNDLE')) + + # Merge all the kwargs. + proxies = merge_setting(proxies, self.proxies) + stream = merge_setting(stream, self.stream) + verify = merge_setting(verify, self.verify) + cert = merge_setting(cert, self.cert) + + return {'verify': verify, 'proxies': proxies, 'stream': stream, + 'cert': cert} + def get_adapter(self, url): """Returns the appropriate connnection adapter for the given URL.""" for (prefix, adapter) in self.adapters.items(): diff --git a/lib/requests/status_codes.py b/lib/requests/status_codes.py index ed7a8660..e0887f21 100644 --- a/lib/requests/status_codes.py +++ b/lib/requests/status_codes.py @@ -30,7 +30,8 @@ _codes = { 305: ('use_proxy',), 306: ('switch_proxy',), 307: ('temporary_redirect', 'temporary_moved', 'temporary'), - 308: ('resume_incomplete', 'resume'), + 308: ('permanent_redirect', + 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 # Client Error. 400: ('bad_request', 'bad'), diff --git a/lib/requests/structures.py b/lib/requests/structures.py index a1759137..3e5f2faa 100644 --- a/lib/requests/structures.py +++ b/lib/requests/structures.py @@ -8,30 +8,7 @@ Data structures that power Requests. """ -import os import collections -from itertools import islice - - -class IteratorProxy(object): - """docstring for IteratorProxy""" - def __init__(self, i): - self.i = i - # self.i = chain.from_iterable(i) - - def __iter__(self): - return self.i - - def __len__(self): - if hasattr(self.i, '__len__'): - return len(self.i) - if hasattr(self.i, 'len'): - return self.i.len - if hasattr(self.i, 'fileno'): - return os.fstat(self.i.fileno()).st_size - - def read(self, n): - return "".join(islice(self.i, None, n)) class CaseInsensitiveDict(collections.MutableMapping): @@ -46,7 +23,7 @@ class CaseInsensitiveDict(collections.MutableMapping): case of the last key to be set, and ``iter(instance)``, ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` will contain case-sensitive keys. However, querying and contains - testing is case insensitive: + testing is case insensitive:: cid = CaseInsensitiveDict() cid['Accept'] = 'application/json' @@ -106,8 +83,7 @@ class CaseInsensitiveDict(collections.MutableMapping): return CaseInsensitiveDict(self._store.values()) def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, dict(self.items())) - + return str(dict(self.items())) class LookupDict(dict): """Dictionary lookup object.""" diff --git a/lib/requests/utils.py b/lib/requests/utils.py index 4d648bc5..aa5c140e 100644 --- a/lib/requests/utils.py +++ b/lib/requests/utils.py @@ -19,15 +19,16 @@ import re import sys import socket import struct +import warnings from . import __version__ from . import certs from .compat import parse_http_list as _parse_list_header from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, - builtin_str, getproxies, proxy_bypass) + builtin_str, getproxies, proxy_bypass, urlunparse) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict -from .exceptions import MissingSchema, InvalidURL +from .exceptions import InvalidURL _hush_pyflakes = (RequestsCookieJar,) @@ -61,7 +62,7 @@ def super_len(o): return os.fstat(fileno).st_size if hasattr(o, 'getvalue'): - # e.g. BytesIO, cStringIO.StringI + # e.g. BytesIO, cStringIO.StringIO return len(o.getvalue()) @@ -287,6 +288,11 @@ def get_encodings_from_content(content): :param content: bytestring to extract encodings from. """ + warnings.warn(( + 'In requests 3.0, get_encodings_from_content will be removed. For ' + 'more information, please see the discussion on issue #2266. (This' + ' warning should only appear once.)'), + DeprecationWarning) charset_re = re.compile(r']', flags=re.I) pragma_re = re.compile(r']', flags=re.I) @@ -351,12 +357,14 @@ def get_unicode_from_response(r): Tried: 1. charset from content-type - - 2. every encodings from ```` - - 3. fall back and replace all unicode characters + 2. fall back and replace all unicode characters """ + warnings.warn(( + 'In requests 3.0, get_unicode_from_response will be removed. For ' + 'more information, please see the discussion on issue #2266. (This' + ' warning should only appear once.)'), + DeprecationWarning) tried_encodings = [] @@ -466,9 +474,10 @@ def is_valid_cidr(string_network): return True -def get_environ_proxies(url): - """Return a dict of environment proxies.""" - +def should_bypass_proxies(url): + """ + Returns whether we should bypass proxies or not. + """ get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL @@ -486,13 +495,13 @@ def get_environ_proxies(url): for proxy_ip in no_proxy: if is_valid_cidr(proxy_ip): if address_in_network(ip, proxy_ip): - return {} + return True else: for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith(host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. - return {} + return True # If the system proxy settings indicate that this URL should be bypassed, # don't proxy. @@ -506,12 +515,16 @@ def get_environ_proxies(url): bypass = False if bypass: - return {} + return True - # If we get here, we either didn't have no_proxy set or we're not going - # anywhere that no_proxy applies to, and the system settings don't require - # bypassing the proxy for the current URL. - return getproxies() + return False + +def get_environ_proxies(url): + """Return a dict of environment proxies.""" + if should_bypass_proxies(url): + return {} + else: + return getproxies() def default_user_agent(name="python-requests"): @@ -549,7 +562,8 @@ def default_headers(): return CaseInsensitiveDict({ 'User-Agent': default_user_agent(), 'Accept-Encoding': ', '.join(('gzip', 'deflate')), - 'Accept': '*/*' + 'Accept': '*/*', + 'Connection': 'keep-alive', }) @@ -564,7 +578,7 @@ def parse_header_links(value): replace_chars = " '\"" - for val in value.split(","): + for val in re.split(", *<", value): try: url, params = val.split(";", 1) except ValueError: @@ -622,13 +636,18 @@ def guess_json_utf(data): return None -def except_on_missing_scheme(url): - """Given a URL, raise a MissingSchema exception if the scheme is missing. - """ - scheme, netloc, path, params, query, fragment = urlparse(url) +def prepend_scheme_if_needed(url, new_scheme): + '''Given a URL that may or may not have a scheme, prepend the given scheme. + Does not replace a present scheme with the one provided as an argument.''' + scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) - if not scheme: - raise MissingSchema('Proxy URLs must have explicit schemes.') + # urlparse is a finicky beast, and sometimes decides that there isn't a + # netloc present. Assume that it's being over-cautious, and switch netloc + # and path if urlparse decided there was no netloc. + if not netloc: + netloc, path = path, netloc + + return urlunparse((scheme, netloc, path, params, query, fragment)) def get_auth_from_url(url): @@ -661,3 +680,18 @@ def to_native_string(string, encoding='ascii'): out = string.decode(encoding) return out + + +def urldefragauth(url): + """ + Given a url remove the fragment and the authentication part + """ + scheme, netloc, path, params, query, fragment = urlparse(url) + + # see func:`prepend_scheme_if_needed` + if not netloc: + netloc, path = path, netloc + + netloc = netloc.rsplit('@', 1)[-1] + + return urlunparse((scheme, netloc, path, params, query, ''))