Merge branch 'release/3.30.0'

This commit is contained in:
JackDandy 2023-09-23 17:20:08 +01:00
commit 07bb660e77
146 changed files with 7725 additions and 6545 deletions

View file

@ -1,4 +1,25 @@
### 3.29.11 (2023-09-22 23:00:00 UTC)
### 3.30.0 (2023-09-23 17:20:00 UTC)
* Update Beautiful Soup 4.11.1 (r642) to 4.12.2
* Update certifi 2023.05.07 to 2023.07.22
* Update CacheControl 0.12.11 (c05ef9e) to 0.13.1 (783a338)
* Update feedparser 6.0.10 (859ac57) to 6.0.10 (9865dec)
* Update filelock 3.12.0 (b4713c9) to 3.12.4 (c1163ae)
* Update idna library 3.4 (37c7d9b) to 3.4 (cab054c)
* Update Msgpack 1.0.5 (0516c2c) to 1.0.6 (e1d3d5d)
* Update package resource API 67.5.1 (f51eccd) to 68.1.2 (1ef36f2)
* Update Requests library 2.29.0 (87d63de) to 2.31.0 (8812812)
* Update soupsieve 2.3.2.post1 (792d566) to 2.4.1 (2e66beb)
* Update Tornado Web Server 6.3.2 (e3aa6c5) to 6.3.3 (e4d6984)
* Update urllib3 1.26.15 (25cca389) to 2.0.5 (d9f85a7)
* Add thefuzz 0.19.0 (c2cd4f4) as a replacement with fallback to fuzzywuzzy 0.18.0 (2188520)
* Fix regex that was not using py312 notation
* Change sort backlog and manual segment search results episode number
* Change sort episodes when set to wanted on display show page
* Add search of grouped options in shows drop down at view-show
### 3.29.11 (2023-09-22 23:00:00 UTC)
* Fix pytvmaze country handling in NetworkBase
* Update issue template

View file

@ -190,7 +190,7 @@
trakt_played='most played this month', trakt_played_period_year='most played this year',
trakt_collected='most collected this month', trakt_collected_period_year='most collected this year',
trakt_recommended='recommended', trakt_watchlist='watchlist')
#set $trakt_mode = $trakt_modes.get(re.sub('[\?=]', '_', $sg_var('TRAKT_MRU')), 'trends, tailored suggestions')
#set $trakt_mode = $trakt_modes.get(re.sub(r'[\?=]', '_', $sg_var('TRAKT_MRU')), 'trends, tailored suggestions')
<li><a href="$sbRoot/add-shows/trakt-default/" tabindex="$tab#set $tab += 1#"><i class="sgicon-trakt"></i>Trakt Cards
<div class="menu-item-desc opacity60">$trakt_mode...</div></a></li>
#set $imdb_func = $sg_str('IMDB_MRU').split('-')

View file

@ -23,7 +23,83 @@ $(document).ready(function() {
}
return $('<span class="ended"><span class="label" title="">ended</span> <i>' + data.text + '</i></span>');
}
select$.select2({templateResult: populateItem, templateSelection:populateItem});
// https://github.com/bevacqua/fuzzysearch
function fuzzysearch(needle, haystack) {
var hlen = haystack.length;
var nlen = needle.length;
if (nlen > hlen) {
return false;
}
if (nlen === hlen) {
return needle === haystack;
}
outer: for (var i = 0, j = 0; i < nlen; i++) {
var nch = needle.charCodeAt(i);
while (j < hlen) {
if (haystack.charCodeAt(j++) === nch) {
continue outer;
}
}
return false;
}
return true;
}
const white_space_regex = /\W/gui;
function sel_matcher(params, data) {
// If there are no search terms, return all of the data
if ($.trim(params.term) === '') {
return data;
}
// Do not display the item if there is no 'text' property
if (typeof data.text === 'undefined') {
return null;
}
// `params.term` should be the term that is used for searching
var param_term = params.term.toLowerCase().trim().replace(white_space_regex, '');
if ('undefined' !== typeof data.children) {
// `data.children` contains options to match against
var filteredChildren = [];
$.each(data.children, function (idx, child) {
// `child.text` is the text that is displayed for the data object
var param_data = child.text.toLowerCase().trim().replace(white_space_regex, '');
if (fuzzysearch(param_term, param_data)) {
filteredChildren.push(child);
}
});
// If any of the group's children match,
// then set the matched children on the group and return the group object
if (filteredChildren.length) {
var modifiedData = $.extend({}, data, true);
modifiedData.children = filteredChildren;
// You can return modified objects from here
// This includes matching the `children` how you want in nested data sets
return modifiedData;
}
}
// `data.text` is the text that is displayed for the data object
var param_data = data.text.toLowerCase().trim().replace(white_space_regex, '');
if (fuzzysearch(param_term, param_data)) {
var modifiedData = $.extend({}, data, true);
// You can return modified objects from here
// This includes matching the `children` how you want in nested data sets
return modifiedData;
}
// Return `null` if the term should not be displayed
return null;
}
select$.select2({templateResult: populateItem, templateSelection:populateItem, matcher: sel_matcher});
$('#prevShow, #nextShow').on('click', function() {
var select$ = $('#pickShow'),

View file

@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a
provides methods and Pythonic idioms that make it easy to navigate,
search, and modify the parse tree.
Beautiful Soup works with Python 3.5 and up. It works better if lxml
Beautiful Soup works with Python 3.6 and up. It works better if lxml
and/or html5lib is installed.
For more than you ever wanted to know about Beautiful Soup, see the
@ -15,8 +15,8 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.11.1"
__copyright__ = "Copyright (c) 2004-2022 Leonard Richardson"
__version__ = "4.12.2"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
@ -38,11 +38,13 @@ from .builder import (
builder_registry,
ParserRejectedMarkup,
XMLParsedAsHTMLWarning,
HTMLParserTreeBuilder
)
from .dammit import UnicodeDammit
from .element import (
CData,
Comment,
CSS,
DEFAULT_OUTPUT_ENCODING,
Declaration,
Doctype,
@ -211,7 +213,7 @@ class BeautifulSoup(Tag):
warnings.warn(
'The "%s" argument to the BeautifulSoup constructor '
'has been renamed to "%s."' % (old_name, new_name),
DeprecationWarning
DeprecationWarning, stacklevel=3
)
return kwargs.pop(old_name)
return None
@ -348,26 +350,50 @@ class BeautifulSoup(Tag):
self.markup = None
self.builder.soup = None
def __copy__(self):
"""Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
copy = type(self)(
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
)
def _clone(self):
"""Create a new BeautifulSoup object with the same TreeBuilder,
but not associated with any markup.
# Although we encoded the tree to UTF-8, that may not have
# been the encoding of the original markup. Set the copy's
# .original_encoding to reflect the original object's
# .original_encoding.
copy.original_encoding = self.original_encoding
return copy
This is the first step of the deepcopy process.
"""
clone = type(self)("", None, self.builder)
# Keep track of the encoding of the original document,
# since we won't be parsing it again.
clone.original_encoding = self.original_encoding
return clone
def __getstate__(self):
# Frequently a tree builder can't be pickled.
d = dict(self.__dict__)
if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
d['builder'] = None
d['builder'] = type(self.builder)
# Store the contents as a Unicode string.
d['contents'] = []
d['markup'] = self.decode()
# If _most_recent_element is present, it's a Tag object left
# over from initial parse. It might not be picklable and we
# don't need it.
if '_most_recent_element' in d:
del d['_most_recent_element']
return d
def __setstate__(self, state):
# If necessary, restore the TreeBuilder by looking it up.
self.__dict__ = state
if isinstance(self.builder, type):
self.builder = self.builder()
elif not self.builder:
# We don't know which builder was used to build this
# parse tree, so use a default we know is always available.
self.builder = HTMLParserTreeBuilder()
self.builder.soup = self
self.reset()
self._feed()
return state
@classmethod
def _decode_markup(cls, markup):
"""Ensure `markup` is bytes so it's safe to send into warnings.warn.
@ -405,7 +431,8 @@ class BeautifulSoup(Tag):
'The input looks more like a URL than markup. You may want to use'
' an HTTP client like requests to get the document behind'
' the URL, and feed that document to Beautiful Soup.',
MarkupResemblesLocatorWarning
MarkupResemblesLocatorWarning,
stacklevel=3
)
return True
return False
@ -436,7 +463,7 @@ class BeautifulSoup(Tag):
'The input looks more like a filename than markup. You may'
' want to open this file and pass the filehandle into'
' Beautiful Soup.',
MarkupResemblesLocatorWarning
MarkupResemblesLocatorWarning, stacklevel=3
)
return True
return False
@ -467,6 +494,7 @@ class BeautifulSoup(Tag):
self.open_tag_counter = Counter()
self.preserve_whitespace_tag_stack = []
self.string_container_stack = []
self._most_recent_element = None
self.pushTag(self)
def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@ -748,7 +776,7 @@ class BeautifulSoup(Tag):
def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
formatter="minimal", iterator=None):
"""Returns a string or Unicode representation of the parse tree
as an HTML or XML document.
@ -775,7 +803,7 @@ class BeautifulSoup(Tag):
else:
indent_level = 0
return prefix + super(BeautifulSoup, self).decode(
indent_level, eventual_encoding, formatter)
indent_level, eventual_encoding, formatter, iterator)
# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
_s = BeautifulSoup
@ -789,7 +817,7 @@ class BeautifulStoneSoup(BeautifulSoup):
warnings.warn(
'The BeautifulStoneSoup class is deprecated. Instead of using '
'it, pass features="xml" into the BeautifulSoup constructor.',
DeprecationWarning
DeprecationWarning, stacklevel=2
)
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)

View file

@ -70,7 +70,10 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
# ATM because the html5lib TreeBuilder doesn't use
# UnicodeDammit.
if exclude_encodings:
warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
warnings.warn(
"You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.",
stacklevel=3
)
# html5lib only parses HTML, so if it's given XML that's worth
# noting.
@ -81,7 +84,10 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
# These methods are defined by Beautiful Soup.
def feed(self, markup):
if self.soup.parse_only is not None:
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
warnings.warn(
"You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.",
stacklevel=4
)
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
self.underlying_builder.parser = parser
extra_kwargs = dict()

View file

@ -10,30 +10,9 @@ __all__ = [
from html.parser import HTMLParser
try:
from html.parser import HTMLParseError
except ImportError as e:
# HTMLParseError is removed in Python 3.5. Since it can never be
# thrown in 3.5, we can just define our own class as a placeholder.
class HTMLParseError(Exception):
pass
import sys
import warnings
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
# argument, which we'd like to set to False. Unfortunately,
# http://bugs.python.org/issue13273 makes strict=True a better bet
# before Python 3.2.3.
#
# At the end of this file, we monkeypatch HTMLParser so that
# strict=True works well on Python 3.2.2.
major, minor, release = sys.version_info[:3]
CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
from ..element import (
CData,
Comment,
@ -45,6 +24,7 @@ from ..dammit import EntitySubstitution, UnicodeDammit
from ..builder import (
DetectsXMLParsedAsHTML,
ParserRejectedMarkup,
HTML,
HTMLTreeBuilder,
STRICT,
@ -91,18 +71,21 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
self._initialize_xml_detector()
def error(self, msg):
"""In Python 3, HTMLParser subclasses must implement error(), although
this requirement doesn't appear to be documented.
In Python 2, HTMLParser implements error() by raising an exception,
which we don't want to do.
In any event, this method is called only on very strange
markup and our best strategy is to pretend it didn't happen
and keep going.
"""
warnings.warn(msg)
def error(self, message):
# NOTE: This method is required so long as Python 3.9 is
# supported. The corresponding code is removed from HTMLParser
# in 3.5, but not removed from ParserBase until 3.10.
# https://github.com/python/cpython/issues/76025
#
# The original implementation turned the error into a warning,
# but in every case I discovered, this made HTMLParser
# immediately crash with an error message that was less
# helpful than the warning. The new implementation makes it
# more clear that html.parser just can't parse this
# markup. The 3.10 implementation does the same, though it
# raises AssertionError rather than calling a method. (We
# catch this error and wrap it in a ParserRejectedMarkup.)
raise ParserRejectedMarkup(message)
def handle_startendtag(self, name, attrs):
"""Handle an incoming empty-element tag.
@ -203,9 +186,10 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
:param name: Character number, possibly in hexadecimal.
"""
# XXX workaround for a bug in HTMLParser. Remove this once
# it's fixed in all supported versions.
# http://bugs.python.org/issue13633
# TODO: This was originally a workaround for a bug in
# HTMLParser. (http://bugs.python.org/issue13633) The bug has
# been fixed, but removing this code still makes some
# Beautiful Soup tests fail. This needs investigation.
if name.startswith('x'):
real_name = int(name.lstrip('x'), 16)
elif name.startswith('X'):
@ -333,9 +317,6 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser_args = parser_args or []
parser_kwargs = parser_kwargs or {}
parser_kwargs.update(extra_parser_kwargs)
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
parser_kwargs['strict'] = False
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
parser_kwargs['convert_charrefs'] = False
self.parser_args = (parser_args, parser_kwargs)
@ -397,103 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
except AssertionError as e:
# html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e)
parser.close()
except HTMLParseError as e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
parser.already_closed_empty_element = []
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
import re
attrfind_tolerant = re.compile(
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:\s+ # whitespace before attribute name
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
(?:\s*=\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|\"[^\"]*\" # LIT-enclosed value
|[^'\">\s]+ # bare value
)
)?
)
)*
\s* # trailing whitespace
""", re.VERBOSE)
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
from html.parser import tagfind, attrfind
def parse_starttag(self, i):
self.__starttag_text = None
endpos = self.check_for_whole_start_tag(i)
if endpos < 0:
return endpos
rawdata = self.rawdata
self.__starttag_text = rawdata[i:endpos]
# Now parse the data between i+1 and j into a tag and attrs
attrs = []
match = tagfind.match(rawdata, i+1)
assert match, 'unexpected call to parse_starttag()'
k = match.end()
self.lasttag = tag = rawdata[i+1:k].lower()
while k < endpos:
if self.strict:
m = attrfind.match(rawdata, k)
else:
m = attrfind_tolerant.match(rawdata, k)
if not m:
break
attrname, rest, attrvalue = m.group(1, 2, 3)
if not rest:
attrvalue = None
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
attrvalue[:1] == '"' == attrvalue[-1:]:
attrvalue = attrvalue[1:-1]
if attrvalue:
attrvalue = self.unescape(attrvalue)
attrs.append((attrname.lower(), attrvalue))
k = m.end()
end = rawdata[k:endpos].strip()
if end not in (">", "/>"):
lineno, offset = self.getpos()
if "\n" in self.__starttag_text:
lineno = lineno + self.__starttag_text.count("\n")
offset = len(self.__starttag_text) \
- self.__starttag_text.rfind("\n")
else:
offset = offset + len(self.__starttag_text)
if self.strict:
self.error("junk characters in start tag: %r"
% (rawdata[k:endpos][:20],))
self.handle_data(rawdata[i:endpos])
return endpos
if end.endswith('/>'):
# XHTML-style empty tag: <span attr="value" />
self.handle_startendtag(tag, attrs)
else:
self.handle_starttag(tag, attrs)
if tag in self.CDATA_CONTENT_ELEMENTS:
self.set_cdata_mode(tag)
return endpos
def set_cdata_mode(self, elem):
self.cdata_elem = elem.lower()
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
CONSTRUCTOR_TAKES_STRICT = True

280
lib/bs4/css.py Normal file
View file

@ -0,0 +1,280 @@
"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
import warnings
try:
import soupsieve
except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
class CSS(object):
"""A proxy object against the soupsieve library, to simplify its
CSS selector API.
Acquire this object through the .css attribute on the
BeautifulSoup object, or on the Tag you want to use as the
starting point for a CSS selector.
The main advantage of doing this is that the tag to be selected
against doesn't need to be explicitly specified in the function
calls, since it's already scoped to a tag.
"""
def __init__(self, tag, api=soupsieve):
"""Constructor.
You don't need to instantiate this class yourself; instead,
access the .css attribute on the BeautifulSoup object, or on
the Tag you want to use as the starting point for your CSS
selector.
:param tag: All CSS selectors will use this as their starting
point.
:param api: A plug-in replacement for the soupsieve module,
designed mainly for use in tests.
"""
if api is None:
raise NotImplementedError(
"Cannot execute CSS selectors because the soupsieve package is not installed."
)
self.api = api
self.tag = tag
def escape(self, ident):
"""Escape a CSS identifier.
This is a simple wrapper around soupselect.escape(). See the
documentation for that function for more information.
"""
if soupsieve is None:
raise NotImplementedError(
"Cannot escape CSS identifiers because the soupsieve package is not installed."
)
return self.api.escape(ident)
def _ns(self, ns, select):
"""Normalize a dictionary of namespaces."""
if not isinstance(select, self.api.SoupSieve) and ns is None:
# If the selector is a precompiled pattern, it already has
# a namespace context compiled in, which cannot be
# replaced.
ns = self.tag._namespaces
return ns
def _rs(self, results):
"""Normalize a list of results to a Resultset.
A ResultSet is more consistent with the rest of Beautiful
Soup's API, and ResultSet.__getattr__ has a helpful error
message if you try to treat a list of results as a single
result (a common mistake).
"""
# Import here to avoid circular import
from .element import ResultSet
return ResultSet(None, results)
def compile(self, select, namespaces=None, flags=0, **kwargs):
"""Pre-compile a selector and return the compiled object.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.compile() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.compile() method.
:return: A precompiled selector object.
:rtype: soupsieve.SoupSieve
"""
return self.api.compile(
select, self._ns(namespaces, select), flags, **kwargs
)
def select_one(self, select, namespaces=None, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag and return the
first result.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.select_one()
method.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.select_one() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.select_one() method.
:return: A Tag, or None if the selector has no match.
:rtype: bs4.element.Tag
"""
return self.api.select_one(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.select()
method.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.select() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.select() method.
:return: A ResultSet of Tag objects.
:rtype: bs4.element.ResultSet
"""
if limit is None:
limit = 0
return self._rs(
self.api.select(
select, self.tag, self._ns(namespaces, select), limit, flags,
**kwargs
)
)
def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.iselect()
method. It is the same as select(), but it returns a generator
instead of a list.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.iselect() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.iselect() method.
:return: A generator
:rtype: types.GeneratorType
"""
return self.api.iselect(
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
)
def closest(self, select, namespaces=None, flags=0, **kwargs):
"""Find the Tag closest to this one that matches the given selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.closest()
method.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.closest() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.closest() method.
:return: A Tag, or None if there is no match.
:rtype: bs4.Tag
"""
return self.api.closest(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def match(self, select, namespaces=None, flags=0, **kwargs):
"""Check whether this Tag matches the given CSS selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.match()
method.
:param: a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.match() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.match() method.
:return: True if this Tag matches the selector; False otherwise.
:rtype: bool
"""
return self.api.match(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def filter(self, select, namespaces=None, flags=0, **kwargs):
"""Filter this Tag's direct children based on the given CSS selector.
This uses the Soup Sieve library. It works the same way as
passing this Tag into that library's soupsieve.filter()
method. More information, for more information see the
documentation for soupsieve.filter().
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.filter() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.filter() method.
:return: A ResultSet of Tag objects.
:rtype: bs4.element.ResultSet
"""
return self._rs(
self.api.filter(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
)

View file

@ -59,21 +59,6 @@ def diagnose(data):
if hasattr(data, 'read'):
data = data.read()
elif data.startswith("http:") or data.startswith("https:"):
print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
return
else:
try:
if os.path.exists(data):
print(('"%s" looks like a filename. Reading data from the file.' % data))
with open(data) as fp:
data = fp.read()
except ValueError:
# This can happen on some platforms when the 'filename' is
# too long. Assume it's data and not a filename.
pass
print("")
for parser in basic_parsers:
print(("Trying to parse your markup with %s" % parser))

View file

@ -8,14 +8,8 @@ except ImportError as e:
import re
import sys
import warnings
try:
import soupsieve
except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
from .css import CSS
from .formatter import (
Formatter,
HTMLFormatter,
@ -154,6 +148,11 @@ class PageElement(object):
NavigableString, Tag, etc. are all subclasses of PageElement.
"""
# In general, we can't tell just by looking at an element whether
# it's contained in an XML document or an HTML document. But for
# Tags (q.v.) we can store this information at parse time.
known_xml = None
def setup(self, parent=None, previous_element=None, next_element=None,
previous_sibling=None, next_sibling=None):
"""Sets up the initial relations between this element and
@ -496,13 +495,16 @@ class PageElement(object):
def extend(self, tags):
"""Appends the given PageElements to this one's contents.
:param tags: A list of PageElements.
:param tags: A list of PageElements. If a single Tag is
provided instead, this PageElement's contents will be extended
with that Tag's contents.
"""
if isinstance(tags, Tag):
# Calling self.append() on another tag's contents will change
# the list we're iterating over. Make a list that won't
# change.
tags = list(tags.contents)
tags = tags.contents
if isinstance(tags, list):
# Moving items around the tree may change their position in
# the original list. Make a list that won't change.
tags = list(tags)
for tag in tags:
self.append(tag)
@ -586,8 +588,9 @@ class PageElement(object):
:kwargs: A dictionary of filters on attribute values.
:return: A ResultSet containing PageElements.
"""
_stacklevel = kwargs.pop('_stacklevel', 2)
return self._find_all(name, attrs, string, limit, self.next_elements,
**kwargs)
_stacklevel=_stacklevel+1, **kwargs)
findAllNext = find_all_next # BS3
def find_next_sibling(self, name=None, attrs={}, string=None, **kwargs):
@ -624,8 +627,11 @@ class PageElement(object):
:return: A ResultSet of PageElements.
:rtype: bs4.element.ResultSet
"""
return self._find_all(name, attrs, string, limit,
self.next_siblings, **kwargs)
_stacklevel = kwargs.pop('_stacklevel', 2)
return self._find_all(
name, attrs, string, limit,
self.next_siblings, _stacklevel=_stacklevel+1, **kwargs
)
findNextSiblings = find_next_siblings # BS3
fetchNextSiblings = find_next_siblings # BS2
@ -663,8 +669,11 @@ class PageElement(object):
:return: A ResultSet of PageElements.
:rtype: bs4.element.ResultSet
"""
return self._find_all(name, attrs, string, limit, self.previous_elements,
**kwargs)
_stacklevel = kwargs.pop('_stacklevel', 2)
return self._find_all(
name, attrs, string, limit, self.previous_elements,
_stacklevel=_stacklevel+1, **kwargs
)
findAllPrevious = find_all_previous # BS3
fetchPrevious = find_all_previous # BS2
@ -702,8 +711,11 @@ class PageElement(object):
:return: A ResultSet of PageElements.
:rtype: bs4.element.ResultSet
"""
return self._find_all(name, attrs, string, limit,
self.previous_siblings, **kwargs)
_stacklevel = kwargs.pop('_stacklevel', 2)
return self._find_all(
name, attrs, string, limit,
self.previous_siblings, _stacklevel=_stacklevel+1, **kwargs
)
findPreviousSiblings = find_previous_siblings # BS3
fetchPreviousSiblings = find_previous_siblings # BS2
@ -724,7 +736,7 @@ class PageElement(object):
# NOTE: We can't use _find_one because findParents takes a different
# set of arguments.
r = None
l = self.find_parents(name, attrs, 1, **kwargs)
l = self.find_parents(name, attrs, 1, _stacklevel=3, **kwargs)
if l:
r = l[0]
return r
@ -744,8 +756,9 @@ class PageElement(object):
:return: A PageElement.
:rtype: bs4.element.Tag | bs4.element.NavigableString
"""
_stacklevel = kwargs.pop('_stacklevel', 2)
return self._find_all(name, attrs, None, limit, self.parents,
**kwargs)
_stacklevel=_stacklevel+1, **kwargs)
findParents = find_parents # BS3
fetchParents = find_parents # BS2
@ -771,19 +784,20 @@ class PageElement(object):
def _find_one(self, method, name, attrs, string, **kwargs):
r = None
l = method(name, attrs, string, 1, **kwargs)
l = method(name, attrs, string, 1, _stacklevel=4, **kwargs)
if l:
r = l[0]
return r
def _find_all(self, name, attrs, string, limit, generator, **kwargs):
"Iterates over a generator looking for things that match."
_stacklevel = kwargs.pop('_stacklevel', 3)
if string is None and 'text' in kwargs:
string = kwargs.pop('text')
warnings.warn(
"The 'text' argument to find()-type methods is deprecated. Use 'string' instead.",
DeprecationWarning
DeprecationWarning, stacklevel=_stacklevel
)
if isinstance(name, SoupStrainer):
@ -926,11 +940,6 @@ class NavigableString(str, PageElement):
PREFIX = ''
SUFFIX = ''
# We can't tell just by looking at a string whether it's contained
# in an XML document or an HTML document.
known_xml = None
def __new__(cls, value):
"""Create a new NavigableString.
@ -946,12 +955,22 @@ class NavigableString(str, PageElement):
u.setup()
return u
def __copy__(self):
def __deepcopy__(self, memo, recursive=False):
"""A copy of a NavigableString has the same contents and class
as the original, but it is not connected to the parse tree.
:param recursive: This parameter is ignored; it's only defined
so that NavigableString.__deepcopy__ implements the same
signature as Tag.__deepcopy__.
"""
return type(self)(self)
def __copy__(self):
"""A copy of a NavigableString can only be a deep copy, because
only one PageElement can occupy a given place in a parse tree.
"""
return self.__deepcopy__({})
def __getnewargs__(self):
return (str(self),)
@ -1296,22 +1315,57 @@ class Tag(PageElement):
parserClass = _alias("parser_class") # BS3
def __copy__(self):
"""A copy of a Tag is a new Tag, unconnected to the parse tree.
def __deepcopy__(self, memo, recursive=True):
"""A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
Its contents are a copy of the old Tag's contents.
"""
clone = self._clone()
if recursive:
# Clone this tag's descendants recursively, but without
# making any recursive function calls.
tag_stack = [clone]
for event, element in self._event_stream(self.descendants):
if event is Tag.END_ELEMENT_EVENT:
# Stop appending incoming Tags to the Tag that was
# just closed.
tag_stack.pop()
else:
descendant_clone = element.__deepcopy__(
memo, recursive=False
)
# Add to its parent's .contents
tag_stack[-1].append(descendant_clone)
if event is Tag.START_ELEMENT_EVENT:
# Add the Tag itself to the stack so that its
# children will be .appended to it.
tag_stack.append(descendant_clone)
return clone
def __copy__(self):
"""A copy of a Tag must always be a deep copy, because a Tag's
children can only have one parent at a time.
"""
return self.__deepcopy__({})
def _clone(self):
"""Create a new Tag just like this one, but with no
contents and unattached to any parse tree.
This is the first step in the deepcopy process.
"""
clone = type(self)(
None, self.builder, self.name, self.namespace,
self.prefix, self.attrs, is_xml=self._is_xml,
sourceline=self.sourceline, sourcepos=self.sourcepos,
can_be_empty_element=self.can_be_empty_element,
cdata_list_attributes=self.cdata_list_attributes,
preserve_whitespace_tags=self.preserve_whitespace_tags
preserve_whitespace_tags=self.preserve_whitespace_tags,
interesting_string_types=self.interesting_string_types
)
for attr in ('can_be_empty_element', 'hidden'):
setattr(clone, attr, getattr(self, attr))
for child in self.contents:
clone.append(child.__copy__())
return clone
@property
@ -1558,7 +1612,7 @@ class Tag(PageElement):
'.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict(
name=tag_name
),
DeprecationWarning
DeprecationWarning, stacklevel=2
)
return self.find(tag_name)
# We special case contents to avoid recursion.
@ -1634,28 +1688,178 @@ class Tag(PageElement):
def decode(self, indent_level=None,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
"""Render a Unicode representation of this PageElement and its
contents.
:param indent_level: Each line of the rendering will be
indented this many spaces. Used internally in
recursive calls while pretty-printing.
:param eventual_encoding: The tag is destined to be
encoded into this encoding. This method is _not_
responsible for performing that encoding. This information
is passed in so that it can be substituted in if the
document contains a <META> tag that mentions the document's
encoding.
:param formatter: A Formatter object, or a string naming one of
the standard formatters.
"""
formatter="minimal",
iterator=None):
pieces = []
# First off, turn a non-Formatter `formatter` into a Formatter
# object. This will stop the lookup from happening over and
# over again.
if not isinstance(formatter, Formatter):
formatter = self.formatter_for_name(formatter)
if indent_level is True:
indent_level = 0
# The currently active tag that put us into string literal
# mode. Until this element is closed, children will be treated
# as string literals and not pretty-printed. String literal
# mode is turned on immediately after this tag begins, and
# turned off immediately before it's closed. This means there
# will be whitespace before and after the tag itself.
string_literal_tag = None
for event, element in self._event_stream(iterator):
if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
piece = element._format_tag(
eventual_encoding, formatter, opening=True
)
elif event is Tag.END_ELEMENT_EVENT:
piece = element._format_tag(
eventual_encoding, formatter, opening=False
)
if indent_level is not None:
indent_level -= 1
else:
piece = element.output_ready(formatter)
# Now we need to apply the 'prettiness' -- extra
# whitespace before and/or after this tag. This can get
# complicated because certain tags, like <pre> and
# <script>, can't be prettified, since adding whitespace would
# change the meaning of the content.
# The default behavior is to add whitespace before and
# after an element when string literal mode is off, and to
# leave things as they are when string literal mode is on.
if string_literal_tag:
indent_before = indent_after = False
else:
indent_before = indent_after = True
# The only time the behavior is more complex than that is
# when we encounter an opening or closing tag that might
# put us into or out of string literal mode.
if (event is Tag.START_ELEMENT_EVENT
and not string_literal_tag
and not element._should_pretty_print()):
# We are about to enter string literal mode. Add
# whitespace before this tag, but not after. We
# will stay in string literal mode until this tag
# is closed.
indent_before = True
indent_after = False
string_literal_tag = element
elif (event is Tag.END_ELEMENT_EVENT
and element is string_literal_tag):
# We are about to exit string literal mode by closing
# the tag that sent us into that mode. Add whitespace
# after this tag, but not before.
indent_before = False
indent_after = True
string_literal_tag = None
# Now we know whether to add whitespace before and/or
# after this element.
if indent_level is not None:
if (indent_before or indent_after):
if isinstance(element, NavigableString):
piece = piece.strip()
if piece:
piece = self._indent_string(
piece, indent_level, formatter,
indent_before, indent_after
)
if event == Tag.START_ELEMENT_EVENT:
indent_level += 1
pieces.append(piece)
return "".join(pieces)
# Names for the different events yielded by _event_stream
START_ELEMENT_EVENT = object()
END_ELEMENT_EVENT = object()
EMPTY_ELEMENT_EVENT = object()
STRING_ELEMENT_EVENT = object()
def _event_stream(self, iterator=None):
"""Yield a sequence of events that can be used to reconstruct the DOM
for this element.
This lets us recreate the nested structure of this element
(e.g. when formatting it as a string) without using recursive
method calls.
This is similar in concept to the SAX API, but it's a simpler
interface designed for internal use. The events are different
from SAX and the arguments associated with the events are Tags
and other Beautiful Soup objects.
:param iterator: An alternate iterator to use when traversing
the tree.
"""
tag_stack = []
iterator = iterator or self.self_and_descendants
for c in iterator:
# If the parent of the element we're about to yield is not
# the tag currently on the stack, it means that the tag on
# the stack closed before this element appeared.
while tag_stack and c.parent != tag_stack[-1]:
now_closed_tag = tag_stack.pop()
yield Tag.END_ELEMENT_EVENT, now_closed_tag
if isinstance(c, Tag):
if c.is_empty_element:
yield Tag.EMPTY_ELEMENT_EVENT, c
else:
yield Tag.START_ELEMENT_EVENT, c
tag_stack.append(c)
continue
else:
yield Tag.STRING_ELEMENT_EVENT, c
while tag_stack:
now_closed_tag = tag_stack.pop()
yield Tag.END_ELEMENT_EVENT, now_closed_tag
def _indent_string(self, s, indent_level, formatter,
indent_before, indent_after):
"""Add indentation whitespace before and/or after a string.
:param s: The string to amend with whitespace.
:param indent_level: The indentation level; affects how much
whitespace goes before the string.
:param indent_before: Whether or not to add whitespace
before the string.
:param indent_after: Whether or not to add whitespace
(a newline) after the string.
"""
space_before = ''
if indent_before and indent_level:
space_before = (formatter.indent * indent_level)
space_after = ''
if indent_after:
space_after = "\n"
return space_before + s + space_after
def _format_tag(self, eventual_encoding, formatter, opening):
# A tag starts with the < character (see below).
# Then the / character, if this is a closing tag.
closing_slash = ''
if not opening:
closing_slash = '/'
# Then an optional namespace prefix.
prefix = ''
if self.prefix:
prefix = self.prefix + ":"
# Then a list of attribute values, if this is an opening tag.
attribute_string = ''
if opening:
attributes = formatter.attributes(self)
attrs = []
for key, val in attributes:
@ -1677,63 +1881,19 @@ class Tag(PageElement):
str(key) + '='
+ formatter.quoted_attribute_value(text))
attrs.append(decoded)
close = ''
closeTag = ''
prefix = ''
if self.prefix:
prefix = self.prefix + ":"
if self.is_empty_element:
close = formatter.void_element_close_prefix or ''
else:
closeTag = '</%s%s>' % (prefix, self.name)
pretty_print = self._should_pretty_print(indent_level)
space = ''
indent_space = ''
if indent_level is not None:
indent_space = (formatter.indent * (indent_level - 1))
if pretty_print:
space = indent_space
indent_contents = indent_level + 1
else:
indent_contents = None
contents = self.decode_contents(
indent_contents, eventual_encoding, formatter
)
if self.hidden:
# This is the 'document root' object.
s = contents
else:
s = []
attribute_string = ''
if attrs:
attribute_string = ' ' + ' '.join(attrs)
if indent_level is not None:
# Even if this particular tag is not pretty-printed,
# we should indent up to the start of the tag.
s.append(indent_space)
s.append('<%s%s%s%s>' % (
prefix, self.name, attribute_string, close))
if pretty_print:
s.append("\n")
s.append(contents)
if pretty_print and contents and contents[-1] != "\n":
s.append("\n")
if pretty_print and closeTag:
s.append(space)
s.append(closeTag)
if indent_level is not None and closeTag and self.next_sibling:
# Even if this particular tag is not pretty-printed,
# we're now done with the tag, and we should add a
# newline if appropriate.
s.append("\n")
s = ''.join(s)
return s
def _should_pretty_print(self, indent_level):
# Then an optional closing slash (for a void element in an
# XML document).
void_element_closing_slash = ''
if self.is_empty_element:
void_element_closing_slash = formatter.void_element_close_prefix or ''
# Put it all together.
return '<' + closing_slash + prefix + self.name + attribute_string + void_element_closing_slash + '>'
def _should_pretty_print(self, indent_level=1):
"""Should this tag be pretty-printed?
Most of them should, but some (such as <pre> in HTML
@ -1784,32 +1944,8 @@ class Tag(PageElement):
the standard Formatters.
"""
# First off, turn a string formatter into a Formatter object. This
# will stop the lookup from happening over and over again.
if not isinstance(formatter, Formatter):
formatter = self.formatter_for_name(formatter)
pretty_print = (indent_level is not None)
s = []
for c in self:
text = None
if isinstance(c, NavigableString):
text = c.output_ready(formatter)
elif isinstance(c, Tag):
s.append(c.decode(indent_level, eventual_encoding,
formatter))
preserve_whitespace = (
self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
)
if text and indent_level and not preserve_whitespace:
text = text.strip()
if text:
if pretty_print and not preserve_whitespace:
s.append(formatter.indent * (indent_level - 1))
s.append(text)
if pretty_print and not preserve_whitespace:
s.append("\n")
return ''.join(s)
return self.decode(indent_level, eventual_encoding, formatter,
iterator=self.descendants)
def encode_contents(
self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
@ -1862,7 +1998,8 @@ class Tag(PageElement):
:rtype: bs4.element.Tag | bs4.element.NavigableString
"""
r = None
l = self.find_all(name, attrs, recursive, string, 1, **kwargs)
l = self.find_all(name, attrs, recursive, string, 1, _stacklevel=3,
**kwargs)
if l:
r = l[0]
return r
@ -1889,7 +2026,9 @@ class Tag(PageElement):
generator = self.descendants
if not recursive:
generator = self.children
return self._find_all(name, attrs, string, limit, generator, **kwargs)
_stacklevel = kwargs.pop('_stacklevel', 2)
return self._find_all(name, attrs, string, limit, generator,
_stacklevel=_stacklevel+1, **kwargs)
findAll = find_all # BS3
findChildren = find_all # BS2
@ -1903,6 +2042,18 @@ class Tag(PageElement):
# return iter() to make the purpose of the method clear
return iter(self.contents) # XXX This seems to be untested.
@property
def self_and_descendants(self):
"""Iterate over this PageElement and its children in a
breadth-first sequence.
:yield: A sequence of PageElements.
"""
if not self.hidden:
yield self
for i in self.descendants:
yield i
@property
def descendants(self):
"""Iterate over all children of this PageElement in a
@ -1929,16 +2080,13 @@ class Tag(PageElement):
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param kwargs: Keyword arguments to be passed into SoupSieve's
:param kwargs: Keyword arguments to be passed into Soup Sieve's
soupsieve.select() method.
:return: A Tag.
:rtype: bs4.element.Tag
"""
value = self.select(selector, namespaces, 1, **kwargs)
if value:
return value[0]
return None
return self.css.select_one(selector, namespaces, **kwargs)
def select(self, selector, namespaces=None, limit=None, **kwargs):
"""Perform a CSS selection operation on the current element.
@ -1960,21 +2108,12 @@ class Tag(PageElement):
:return: A ResultSet of Tags.
:rtype: bs4.element.ResultSet
"""
if namespaces is None:
namespaces = self._namespaces
return self.css.select(selector, namespaces, limit, **kwargs)
if limit is None:
limit = 0
if soupsieve is None:
raise NotImplementedError(
"Cannot execute CSS selectors because the soupsieve package is not installed."
)
results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
# We do this because it's more consistent and because
# ResultSet.__getattr__ has a helpful error message.
return ResultSet(None, results)
@property
def css(self):
"""Return an interface to the CSS selector API."""
return CSS(self)
# Old names for backwards compatibility
def childGenerator(self):
@ -1993,7 +2132,7 @@ class Tag(PageElement):
"""
warnings.warn(
'has_key is deprecated. Use has_attr(key) instead.',
DeprecationWarning
DeprecationWarning, stacklevel=2
)
return self.has_attr(key)
@ -2024,7 +2163,7 @@ class SoupStrainer(object):
string = kwargs.pop('text')
warnings.warn(
"The 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead.",
DeprecationWarning
DeprecationWarning, stacklevel=2
)
self.name = self._normalize_search_value(name)

View file

@ -149,14 +149,14 @@ class HTMLFormatter(Formatter):
"""A generic Formatter for HTML."""
REGISTRY = {}
def __init__(self, *args, **kwargs):
return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
class XMLFormatter(Formatter):
"""A generic Formatter for XML."""
REGISTRY = {}
def __init__(self, *args, **kwargs):
return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
# Set up aliases for the default formatters.

View file

@ -8,11 +8,21 @@ Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = "Eric Larson"
__email__ = "eric@ionrock.org"
__version__ = "0.12.11"
__version__ = "0.13.1"
from .wrapper import CacheControl
from .adapter import CacheControlAdapter
from .controller import CacheController
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.controller import CacheController
from cachecontrol.wrapper import CacheControl
__all__ = [
"__author__",
"__email__",
"__version__",
"CacheControlAdapter",
"CacheController",
"CacheControl",
]
import logging
logging.getLogger(__name__).addHandler(logging.NullHandler())

View file

@ -1,8 +1,11 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
from argparse import ArgumentParser
from typing import TYPE_CHECKING
import requests
@ -10,16 +13,19 @@ from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import DictCache
from cachecontrol.controller import logger
from argparse import ArgumentParser
if TYPE_CHECKING:
from argparse import Namespace
from cachecontrol.controller import CacheController
def setup_logging():
def setup_logging() -> None:
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
logger.addHandler(handler)
def get_session():
def get_session() -> requests.Session:
adapter = CacheControlAdapter(
DictCache(), cache_etags=True, serializer=None, heuristic=None
)
@ -27,17 +33,17 @@ def get_session():
sess.mount("http://", adapter)
sess.mount("https://", adapter)
sess.cache_controller = adapter.controller
sess.cache_controller = adapter.controller # type: ignore[attr-defined]
return sess
def get_args():
def get_args() -> Namespace:
parser = ArgumentParser()
parser.add_argument("url", help="The URL to try and cache")
return parser.parse_args()
def main(args=None):
def main() -> None:
args = get_args()
sess = get_session()
@ -48,10 +54,13 @@ def main(args=None):
setup_logging()
# try setting the cache
sess.cache_controller.cache_response(resp.request, resp.raw)
cache_controller: CacheController = (
sess.cache_controller # type: ignore[attr-defined]
)
cache_controller.cache_response(resp.request, resp.raw)
# Now try to get it
if sess.cache_controller.cached_request(resp.request):
if cache_controller.cached_request(resp.request):
print("Cached!")
else:
print("Not cached :(")

View file

@ -1,16 +1,26 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import types
import functools
import types
import zlib
from typing import TYPE_CHECKING, Any, Collection, Mapping
from requests.adapters import HTTPAdapter
from .controller import CacheController, PERMANENT_REDIRECT_STATUSES
from .cache import DictCache
from .filewrapper import CallbackFileWrapper
from cachecontrol.cache import DictCache
from cachecontrol.controller import PERMANENT_REDIRECT_STATUSES, CacheController
from cachecontrol.filewrapper import CallbackFileWrapper
if TYPE_CHECKING:
from requests import PreparedRequest, Response
from urllib3 import HTTPResponse
from cachecontrol.cache import BaseCache
from cachecontrol.heuristics import BaseHeuristic
from cachecontrol.serialize import Serializer
class CacheControlAdapter(HTTPAdapter):
@ -18,16 +28,16 @@ class CacheControlAdapter(HTTPAdapter):
def __init__(
self,
cache=None,
cache_etags=True,
controller_class=None,
serializer=None,
heuristic=None,
cacheable_methods=None,
*args,
**kw
):
super(CacheControlAdapter, self).__init__(*args, **kw)
cache: BaseCache | None = None,
cache_etags: bool = True,
controller_class: type[CacheController] | None = None,
serializer: Serializer | None = None,
heuristic: BaseHeuristic | None = None,
cacheable_methods: Collection[str] | None = None,
*args: Any,
**kw: Any,
) -> None:
super().__init__(*args, **kw)
self.cache = DictCache() if cache is None else cache
self.heuristic = heuristic
self.cacheable_methods = cacheable_methods or ("GET",)
@ -37,7 +47,16 @@ class CacheControlAdapter(HTTPAdapter):
self.cache, cache_etags=cache_etags, serializer=serializer
)
def send(self, request, cacheable_methods=None, **kw):
def send(
self,
request: PreparedRequest,
stream: bool = False,
timeout: None | float | tuple[float, float] | tuple[float, None] = None,
verify: bool | str = True,
cert: (None | bytes | str | tuple[bytes | str, bytes | str]) = None,
proxies: Mapping[str, str] | None = None,
cacheable_methods: Collection[str] | None = None,
) -> Response:
"""
Send a request. Use the request information to see if it
exists in the cache and cache the response if we need to and can.
@ -54,13 +73,17 @@ class CacheControlAdapter(HTTPAdapter):
# check for etags and add headers if appropriate
request.headers.update(self.controller.conditional_headers(request))
resp = super(CacheControlAdapter, self).send(request, **kw)
resp = super().send(request, stream, timeout, verify, cert, proxies)
return resp
def build_response(
self, request, response, from_cache=False, cacheable_methods=None
):
self,
request: PreparedRequest,
response: HTTPResponse,
from_cache: bool = False,
cacheable_methods: Collection[str] | None = None,
) -> Response:
"""
Build a response by making a request or using the cache.
@ -102,36 +125,37 @@ class CacheControlAdapter(HTTPAdapter):
else:
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper(
response._fp,
response._fp = CallbackFileWrapper( # type: ignore[attr-defined]
response._fp, # type: ignore[attr-defined]
functools.partial(
self.controller.cache_response, request, response
),
)
if response.chunked:
super_update_chunk_length = response._update_chunk_length
super_update_chunk_length = response._update_chunk_length # type: ignore[attr-defined]
def _update_chunk_length(self):
def _update_chunk_length(self: HTTPResponse) -> None:
super_update_chunk_length()
if self.chunk_left == 0:
self._fp._close()
self._fp._close() # type: ignore[attr-defined]
response._update_chunk_length = types.MethodType(
response._update_chunk_length = types.MethodType( # type: ignore[attr-defined]
_update_chunk_length, response
)
resp = super(CacheControlAdapter, self).build_response(request, response)
resp: Response = super().build_response(request, response) # type: ignore[no-untyped-call]
# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
assert request.url is not None
cache_url = self.controller.cache_url(request.url)
self.cache.delete(cache_url)
# Give the request a from_cache attr to let people use it
resp.from_cache = from_cache
resp.from_cache = from_cache # type: ignore[attr-defined]
return resp
def close(self):
def close(self) -> None:
self.cache.close()
super(CacheControlAdapter, self).close()
super().close() # type: ignore[no-untyped-call]

View file

@ -6,38 +6,46 @@
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
"""
from __future__ import annotations
from threading import Lock
from typing import IO, TYPE_CHECKING, MutableMapping
if TYPE_CHECKING:
from datetime import datetime
class BaseCache(object):
def get(self, key):
class BaseCache:
def get(self, key: str) -> bytes | None:
raise NotImplementedError()
def set(self, key, value, expires=None):
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
raise NotImplementedError()
def delete(self, key):
def delete(self, key: str) -> None:
raise NotImplementedError()
def close(self):
def close(self) -> None:
pass
class DictCache(BaseCache):
def __init__(self, init_dict=None):
def __init__(self, init_dict: MutableMapping[str, bytes] | None = None) -> None:
self.lock = Lock()
self.data = init_dict or {}
def get(self, key):
def get(self, key: str) -> bytes | None:
return self.data.get(key, None)
def set(self, key, value, expires=None):
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
with self.lock:
self.data.update({key: value})
def delete(self, key):
def delete(self, key: str) -> None:
with self.lock:
if key in self.data:
self.data.pop(key)
@ -55,10 +63,11 @@ class SeparateBodyBaseCache(BaseCache):
Similarly, the body should be loaded separately via ``get_body()``.
"""
def set_body(self, key, body):
def set_body(self, key: str, body: bytes) -> None:
raise NotImplementedError()
def get_body(self, key):
def get_body(self, key: str) -> IO[bytes] | None:
"""
Return the body as file-like object.
"""

View file

@ -2,8 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0
from .file_cache import FileCache, SeparateBodyFileCache
from .redis_cache import RedisCache
from cachecontrol.caches.file_cache import FileCache, SeparateBodyFileCache
from cachecontrol.caches.redis_cache import RedisCache
__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"]

View file

@ -1,22 +1,25 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import gc
import hashlib
import os
from textwrap import dedent
from typing import IO, TYPE_CHECKING, Union
from pathlib import Path
from ..cache import BaseCache, SeparateBodyBaseCache
from ..controller import CacheController
from cachecontrol.cache import BaseCache, SeparateBodyBaseCache
from cachecontrol.controller import CacheController
try:
FileNotFoundError
except NameError:
# py2.X
FileNotFoundError = (IOError, OSError)
if TYPE_CHECKING:
from datetime import datetime
from filelock import BaseFileLock
def _secure_open_write(filename, fmode):
def _secure_open_write(filename: str, fmode: int) -> IO[bytes]:
# We only want to write to this file, so open it in write only mode
flags = os.O_WRONLY
@ -40,7 +43,7 @@ def _secure_open_write(filename, fmode):
try:
os.remove(filename)
gc.collect(2)
except (IOError, OSError):
except OSError:
# The file must not exist already, so we can just skip ahead to opening
pass
@ -63,23 +66,23 @@ class _FileCacheMixin:
def __init__(
self,
directory,
forever=False,
filemode=0o0600,
dirmode=0o0700,
lock_class=None,
):
directory: Union[str, Path],
forever: bool = False,
filemode: int = 0o0600,
dirmode: int = 0o0700,
lock_class: type[BaseFileLock] | None = None,
) -> None:
try:
if lock_class is None:
from filelock import FileLock
lock_class = FileLock
except ImportError:
notice = dedent(
"""
NOTE: In order to use the FileCache you must have
filelock installed. You can install it via pip:
pip install filelock
pip install cachecontrol[filecache]
"""
)
raise ImportError(notice)
@ -91,17 +94,17 @@ class _FileCacheMixin:
self.lock_class = lock_class
@staticmethod
def encode(x):
def encode(x: str) -> str:
return hashlib.sha224(x.encode()).hexdigest()
def _fn(self, name):
def _fn(self, name: str) -> str:
# NOTE: This method should not change as some may depend on it.
# See: https://github.com/ionrock/cachecontrol/issues/63
hashed = self.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
def get(self, key):
def get(self, key: str) -> bytes | None:
name = self._fn(key)
try:
with open(name, "rb") as fh:
@ -110,18 +113,20 @@ class _FileCacheMixin:
except FileNotFoundError:
return None
def set(self, key, value, expires=None):
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
name = self._fn(key)
self._write(name, value)
def _write(self, path, data: bytes):
def _write(self, path: str, data: bytes) -> None:
"""
Safely write the data to the given path.
"""
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(path), self.dirmode)
except (IOError, OSError):
except OSError:
pass
with self.lock_class(path + ".lock"):
@ -129,7 +134,7 @@ class _FileCacheMixin:
with _secure_open_write(path, self.filemode) as fh:
fh.write(data)
def _delete(self, key, suffix):
def _delete(self, key: str, suffix: str) -> None:
name = self._fn(key) + suffix
if not self.forever:
try:
@ -144,7 +149,7 @@ class FileCache(_FileCacheMixin, BaseCache):
downloads.
"""
def delete(self, key):
def delete(self, key: str) -> None:
self._delete(key, "")
@ -154,23 +159,23 @@ class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache):
peak memory usage.
"""
def get_body(self, key):
def get_body(self, key: str) -> IO[bytes] | None:
name = self._fn(key) + ".body"
try:
return open(name, "rb")
except FileNotFoundError:
return None
def set_body(self, key, body):
def set_body(self, key: str, body: bytes) -> None:
name = self._fn(key) + ".body"
self._write(name, body)
def delete(self, key):
def delete(self, key: str) -> None:
self._delete(key, "")
self._delete(key, ".body")
def url_to_file_path(url, filecache):
def url_to_file_path(url: str, filecache: FileCache) -> str:
"""Return the file cache path based on the URL.
This does not ensure the file exists!

View file

@ -1,39 +1,48 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from __future__ import division
from datetime import datetime
from datetime import datetime, timezone
from typing import TYPE_CHECKING
from cachecontrol.cache import BaseCache
if TYPE_CHECKING:
from redis import Redis
class RedisCache(BaseCache):
def __init__(self, conn):
def __init__(self, conn: Redis[bytes]) -> None:
self.conn = conn
def get(self, key):
def get(self, key: str) -> bytes | None:
return self.conn.get(key)
def set(self, key, value, expires=None):
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
if not expires:
self.conn.set(key, value)
elif isinstance(expires, datetime):
expires = expires - datetime.utcnow()
self.conn.setex(key, int(expires.total_seconds()), value)
now_utc = datetime.now(timezone.utc)
if expires.tzinfo is None:
now_utc = now_utc.replace(tzinfo=None)
delta = expires - now_utc
self.conn.setex(key, int(delta.total_seconds()), value)
else:
self.conn.setex(key, expires, value)
def delete(self, key):
def delete(self, key: str) -> None:
self.conn.delete(key)
def clear(self):
def clear(self) -> None:
"""Helper for clearing all the keys in a database. Use with
caution!"""
for key in self.conn.keys():
self.conn.delete(key)
def close(self):
def close(self) -> None:
"""Redis uses connection pooling, no need to close the connection."""
pass

View file

@ -1,32 +0,0 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
try:
from urllib.parse import urljoin
except ImportError:
from urlparse import urljoin
try:
import cPickle as pickle
except ImportError:
import pickle
# Handle the case where the requests module has been patched to not have
# urllib3 bundled as part of its source.
try:
from requests.packages.urllib3.response import HTTPResponse
except ImportError:
from urllib3.response import HTTPResponse
try:
from requests.packages.urllib3.util import is_fp_closed
except ImportError:
from urllib3.util import is_fp_closed
# Replicate some six behaviour
try:
text_type = unicode
except NameError:
text_type = str

View file

@ -5,17 +5,27 @@
"""
The httplib2 algorithms ported for use with requests.
"""
from __future__ import annotations
import calendar
import logging
import re
import calendar
import time
from email.utils import parsedate_tz
from typing import TYPE_CHECKING, Collection, Mapping
from requests.structures import CaseInsensitiveDict
from .cache import DictCache, SeparateBodyBaseCache
from .serialize import Serializer
from cachecontrol.cache import DictCache, SeparateBodyBaseCache
from cachecontrol.serialize import Serializer
if TYPE_CHECKING:
from typing import Literal
from requests import PreparedRequest
from urllib3 import HTTPResponse
from cachecontrol.cache import BaseCache
logger = logging.getLogger(__name__)
@ -24,20 +34,26 @@ URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
PERMANENT_REDIRECT_STATUSES = (301, 308)
def parse_uri(uri):
def parse_uri(uri: str) -> tuple[str, str, str, str, str]:
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
groups = URI.match(uri).groups()
match = URI.match(uri)
assert match is not None
groups = match.groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object):
class CacheController:
"""An interface to see if request should cached or not."""
def __init__(
self, cache=None, cache_etags=True, serializer=None, status_codes=None
self,
cache: BaseCache | None = None,
cache_etags: bool = True,
serializer: Serializer | None = None,
status_codes: Collection[int] | None = None,
):
self.cache = DictCache() if cache is None else cache
self.cache_etags = cache_etags
@ -45,7 +61,7 @@ class CacheController(object):
self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)
@classmethod
def _urlnorm(cls, uri):
def _urlnorm(cls, uri: str) -> str:
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
@ -65,10 +81,10 @@ class CacheController(object):
return defrag_uri
@classmethod
def cache_url(cls, uri):
def cache_url(cls, uri: str) -> str:
return cls._urlnorm(uri)
def parse_cache_control(self, headers):
def parse_cache_control(self, headers: Mapping[str, str]) -> dict[str, int | None]:
known_directives = {
# https://tools.ietf.org/html/rfc7234#section-5.2
"max-age": (int, True),
@ -87,7 +103,7 @@ class CacheController(object):
cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
retval = {}
retval: dict[str, int | None] = {}
for cc_directive in cc_headers.split(","):
if not cc_directive.strip():
@ -122,11 +138,12 @@ class CacheController(object):
return retval
def _load_from_cache(self, request):
def _load_from_cache(self, request: PreparedRequest) -> HTTPResponse | None:
"""
Load a cached response, or return None if it's not available.
"""
cache_url = request.url
assert cache_url is not None
cache_data = self.cache.get(cache_url)
if cache_data is None:
logger.debug("No cache entry available")
@ -142,11 +159,12 @@ class CacheController(object):
logger.warning("Cache entry deserialization failed, entry ignored")
return result
def cached_request(self, request):
def cached_request(self, request: PreparedRequest) -> HTTPResponse | Literal[False]:
"""
Return a cached response if it exists in the cache, otherwise
return False.
"""
assert request.url is not None
cache_url = self.cache_url(request.url)
logger.debug('Looking up "%s" in the cache', cache_url)
cc = self.parse_cache_control(request.headers)
@ -182,7 +200,7 @@ class CacheController(object):
logger.debug(msg)
return resp
headers = CaseInsensitiveDict(resp.headers)
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
if not headers or "date" not in headers:
if "etag" not in headers:
# Without date or etag, the cached response can never be used
@ -193,7 +211,9 @@ class CacheController(object):
return False
now = time.time()
date = calendar.timegm(parsedate_tz(headers["date"]))
time_tuple = parsedate_tz(headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
current_age = max(0, now - date)
logger.debug("Current age based on date: %i", current_age)
@ -207,28 +227,30 @@ class CacheController(object):
freshness_lifetime = 0
# Check the max-age pragma in the cache control header
if "max-age" in resp_cc:
freshness_lifetime = resp_cc["max-age"]
max_age = resp_cc.get("max-age")
if max_age is not None:
freshness_lifetime = max_age
logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
# If there isn't a max-age, check for an expires header
elif "expires" in headers:
expires = parsedate_tz(headers["expires"])
if expires is not None:
expire_time = calendar.timegm(expires) - date
expire_time = calendar.timegm(expires[:6]) - date
freshness_lifetime = max(0, expire_time)
logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
# Determine if we are setting freshness limit in the
# request. Note, this overrides what was in the response.
if "max-age" in cc:
freshness_lifetime = cc["max-age"]
max_age = cc.get("max-age")
if max_age is not None:
freshness_lifetime = max_age
logger.debug(
"Freshness lifetime from request max-age: %i", freshness_lifetime
)
if "min-fresh" in cc:
min_fresh = cc["min-fresh"]
min_fresh = cc.get("min-fresh")
if min_fresh is not None:
# adjust our current age by our min fresh
current_age += min_fresh
logger.debug("Adjusted current age from min-fresh: %i", current_age)
@ -247,12 +269,12 @@ class CacheController(object):
# return the original handler
return False
def conditional_headers(self, request):
def conditional_headers(self, request: PreparedRequest) -> dict[str, str]:
resp = self._load_from_cache(request)
new_headers = {}
if resp:
headers = CaseInsensitiveDict(resp.headers)
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
if "etag" in headers:
new_headers["If-None-Match"] = headers["ETag"]
@ -262,7 +284,14 @@ class CacheController(object):
return new_headers
def _cache_set(self, cache_url, request, response, body=None, expires_time=None):
def _cache_set(
self,
cache_url: str,
request: PreparedRequest,
response: HTTPResponse,
body: bytes | None = None,
expires_time: int | None = None,
) -> None:
"""
Store the data in the cache.
"""
@ -285,7 +314,13 @@ class CacheController(object):
expires=expires_time,
)
def cache_response(self, request, response, body=None, status_codes=None):
def cache_response(
self,
request: PreparedRequest,
response: HTTPResponse,
body: bytes | None = None,
status_codes: Collection[int] | None = None,
) -> None:
"""
Algorithm for caching requests.
@ -300,10 +335,14 @@ class CacheController(object):
)
return
response_headers = CaseInsensitiveDict(response.headers)
response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
response.headers
)
if "date" in response_headers:
date = calendar.timegm(parsedate_tz(response_headers["date"]))
time_tuple = parsedate_tz(response_headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
else:
date = 0
@ -322,6 +361,7 @@ class CacheController(object):
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers)
assert request.url is not None
cache_url = self.cache_url(request.url)
logger.debug('Updating cache with response from "%s"', cache_url)
@ -354,11 +394,11 @@ class CacheController(object):
if response_headers.get("expires"):
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires) - date
expires_time = calendar.timegm(expires[:6]) - date
expires_time = max(expires_time, 14 * 86400)
logger.debug("etag object cached for {0} seconds".format(expires_time))
logger.debug(f"etag object cached for {expires_time} seconds")
logger.debug("Caching due to etag")
self._cache_set(cache_url, request, response, body, expires_time)
@ -372,11 +412,14 @@ class CacheController(object):
# is no date header then we can't do anything about expiring
# the cache.
elif "date" in response_headers:
date = calendar.timegm(parsedate_tz(response_headers["date"]))
time_tuple = parsedate_tz(response_headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
# cache when there is a max-age > 0
if "max-age" in cc and cc["max-age"] > 0:
max_age = cc.get("max-age")
if max_age is not None and max_age > 0:
logger.debug("Caching b/c date exists and max-age > 0")
expires_time = cc["max-age"]
expires_time = max_age
self._cache_set(
cache_url,
request,
@ -391,12 +434,12 @@ class CacheController(object):
if response_headers["expires"]:
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires) - date
expires_time = calendar.timegm(expires[:6]) - date
else:
expires_time = None
logger.debug(
"Caching b/c of expires header. expires in {0} seconds".format(
"Caching b/c of expires header. expires in {} seconds".format(
expires_time
)
)
@ -408,13 +451,16 @@ class CacheController(object):
expires_time,
)
def update_cached_response(self, request, response):
def update_cached_response(
self, request: PreparedRequest, response: HTTPResponse
) -> HTTPResponse:
"""On a 304 we will get a new set of headers that we want to
update our cached value with, assuming we have one.
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
assert request.url is not None
cache_url = self.cache_url(request.url)
cached_response = self._load_from_cache(request)
@ -432,11 +478,11 @@ class CacheController(object):
excluded_headers = ["content-length"]
cached_response.headers.update(
dict(
(k, v)
for k, v in response.headers.items()
{
k: v
for k, v in response.headers.items() # type: ignore[no-untyped-call]
if k.lower() not in excluded_headers
)
}
)
# we want a 200 b/c we have content via the cache

View file

@ -1,12 +1,17 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from tempfile import NamedTemporaryFile
import mmap
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Any, Callable
if TYPE_CHECKING:
from http.client import HTTPResponse
class CallbackFileWrapper(object):
class CallbackFileWrapper:
"""
Small wrapper around a fp object which will tee everything read into a
buffer, and when that file is closed it will execute a callback with the
@ -25,12 +30,14 @@ class CallbackFileWrapper(object):
performance impact.
"""
def __init__(self, fp, callback):
def __init__(
self, fp: HTTPResponse, callback: Callable[[bytes], None] | None
) -> None:
self.__buf = NamedTemporaryFile("rb+", delete=True)
self.__fp = fp
self.__callback = callback
def __getattr__(self, name):
def __getattr__(self, name: str) -> Any:
# The vaguaries of garbage collection means that self.__fp is
# not always set. By using __getattribute__ and the private
# name[0] allows looking up the attribute value and raising an
@ -42,7 +49,7 @@ class CallbackFileWrapper(object):
fp = self.__getattribute__("_CallbackFileWrapper__fp")
return getattr(fp, name)
def __is_fp_closed(self):
def __is_fp_closed(self) -> bool:
try:
return self.__fp.fp is None
@ -50,7 +57,8 @@ class CallbackFileWrapper(object):
pass
try:
return self.__fp.closed
closed: bool = self.__fp.closed
return closed
except AttributeError:
pass
@ -59,7 +67,7 @@ class CallbackFileWrapper(object):
# TODO: Add some logging here...
return False
def _close(self):
def _close(self) -> None:
if self.__callback:
if self.__buf.tell() == 0:
# Empty file:
@ -86,8 +94,8 @@ class CallbackFileWrapper(object):
# Important when caching big files.
self.__buf.close()
def read(self, amt=None):
data = self.__fp.read(amt)
def read(self, amt: int | None = None) -> bytes:
data: bytes = self.__fp.read(amt)
if data:
# We may be dealing with b'', a sign that things are over:
# it's passed e.g. after we've already closed self.__buf.
@ -97,8 +105,8 @@ class CallbackFileWrapper(object):
return data
def _safe_read(self, amt):
data = self.__fp._safe_read(amt)
def _safe_read(self, amt: int) -> bytes:
data: bytes = self.__fp._safe_read(amt) # type: ignore[attr-defined]
if amt == 2 and data == b"\r\n":
# urllib executes this read to toss the CRLF at the end
# of the chunk.

View file

@ -1,29 +1,31 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import calendar
import time
from datetime import datetime, timedelta, timezone
from email.utils import formatdate, parsedate, parsedate_tz
from typing import TYPE_CHECKING, Any, Mapping
from datetime import datetime, timedelta
if TYPE_CHECKING:
from urllib3 import HTTPResponse
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
def expire_after(delta, date=None):
date = date or datetime.utcnow()
def expire_after(delta: timedelta, date: datetime | None = None) -> datetime:
date = date or datetime.now(timezone.utc)
return date + delta
def datetime_to_header(dt):
def datetime_to_header(dt: datetime) -> str:
return formatdate(calendar.timegm(dt.timetuple()))
class BaseHeuristic(object):
def warning(self, response):
class BaseHeuristic:
def warning(self, response: HTTPResponse) -> str | None:
"""
Return a valid 1xx warning header value describing the cache
adjustments.
@ -34,7 +36,7 @@ class BaseHeuristic(object):
"""
return '110 - "Response is Stale"'
def update_headers(self, response):
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
"""Update the response headers with any new headers.
NOTE: This SHOULD always include some Warning header to
@ -43,7 +45,7 @@ class BaseHeuristic(object):
"""
return {}
def apply(self, response):
def apply(self, response: HTTPResponse) -> HTTPResponse:
updated_headers = self.update_headers(response)
if updated_headers:
@ -61,12 +63,12 @@ class OneDayCache(BaseHeuristic):
future.
"""
def update_headers(self, response):
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
headers = {}
if "expires" not in response.headers:
date = parsedate(response.headers["date"])
expires = expire_after(timedelta(days=1), date=datetime(*date[:6]))
expires = expire_after(timedelta(days=1), date=datetime(*date[:6], tzinfo=timezone.utc)) # type: ignore[misc]
headers["expires"] = datetime_to_header(expires)
headers["cache-control"] = "public"
return headers
@ -77,14 +79,14 @@ class ExpiresAfter(BaseHeuristic):
Cache **all** requests for a defined time period.
"""
def __init__(self, **kw):
def __init__(self, **kw: Any) -> None:
self.delta = timedelta(**kw)
def update_headers(self, response):
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
expires = expire_after(self.delta)
return {"expires": datetime_to_header(expires), "cache-control": "public"}
def warning(self, response):
def warning(self, response: HTTPResponse) -> str | None:
tmpl = "110 - Automatically cached for %s. Response might be stale"
return tmpl % self.delta
@ -101,12 +103,23 @@ class LastModified(BaseHeuristic):
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr.
"""
cacheable_by_default_statuses = {
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
200,
203,
204,
206,
300,
301,
404,
405,
410,
414,
501,
}
def update_headers(self, resp):
headers = resp.headers
def update_headers(self, resp: HTTPResponse) -> dict[str, str]:
headers: Mapping[str, str] = resp.headers
if "expires" in headers:
return {}
@ -120,9 +133,11 @@ class LastModified(BaseHeuristic):
if "date" not in headers or "last-modified" not in headers:
return {}
date = calendar.timegm(parsedate_tz(headers["date"]))
time_tuple = parsedate_tz(headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
last_modified = parsedate(headers["last-modified"])
if date is None or last_modified is None:
if last_modified is None:
return {}
now = time.time()
@ -135,5 +150,5 @@ class LastModified(BaseHeuristic):
expires = date + freshness_lifetime
return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
def warning(self, resp):
def warning(self, resp: HTTPResponse) -> str | None:
return None

View file

@ -1,105 +1,91 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import base64
import io
import json
import zlib
from typing import IO, TYPE_CHECKING, Any, Mapping, cast
import msgpack
from requests.structures import CaseInsensitiveDict
from urllib3 import HTTPResponse
from .compat import HTTPResponse, pickle, text_type
if TYPE_CHECKING:
from requests import PreparedRequest
def _b64_decode_bytes(b):
return base64.b64decode(b.encode("ascii"))
class Serializer:
serde_version = "4"
def _b64_decode_str(s):
return _b64_decode_bytes(s).decode("utf8")
_default_body_read = object()
class Serializer(object):
def dumps(self, request, response, body=None):
response_headers = CaseInsensitiveDict(response.headers)
def dumps(
self,
request: PreparedRequest,
response: HTTPResponse,
body: bytes | None = None,
) -> bytes:
response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
response.headers
)
if body is None:
# When a body isn't passed in, we'll read the response. We
# also update the response with a new file handler to be
# sure it acts as though it was never read.
body = response.read(decode_content=False)
response._fp = io.BytesIO(body)
response._fp = io.BytesIO(body) # type: ignore[attr-defined]
response.length_remaining = len(body)
# NOTE: This is all a bit weird, but it's really important that on
# Python 2.x these objects are unicode and not str, even when
# they contain only ascii. The problem here is that msgpack
# understands the difference between unicode and bytes and we
# have it set to differentiate between them, however Python 2
# doesn't know the difference. Forcing these to unicode will be
# enough to have msgpack know the difference.
data = {
u"response": {
u"body": body, # Empty bytestring if body is stored separately
u"headers": dict(
(text_type(k), text_type(v)) for k, v in response.headers.items()
),
u"status": response.status,
u"version": response.version,
u"reason": text_type(response.reason),
u"strict": response.strict,
u"decode_content": response.decode_content,
"response": {
"body": body, # Empty bytestring if body is stored separately
"headers": {str(k): str(v) for k, v in response.headers.items()}, # type: ignore[no-untyped-call]
"status": response.status,
"version": response.version,
"reason": str(response.reason),
"decode_content": response.decode_content,
}
}
# Construct our vary headers
data[u"vary"] = {}
if u"vary" in response_headers:
varied_headers = response_headers[u"vary"].split(",")
data["vary"] = {}
if "vary" in response_headers:
varied_headers = response_headers["vary"].split(",")
for header in varied_headers:
header = text_type(header).strip()
header = str(header).strip()
header_value = request.headers.get(header, None)
if header_value is not None:
header_value = text_type(header_value)
data[u"vary"][header] = header_value
header_value = str(header_value)
data["vary"][header] = header_value
return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
return b",".join([f"cc={self.serde_version}".encode(), self.serialize(data)])
def loads(self, request, data, body_file=None):
def serialize(self, data: dict[str, Any]) -> bytes:
return cast(bytes, msgpack.dumps(data, use_bin_type=True))
def loads(
self,
request: PreparedRequest,
data: bytes,
body_file: IO[bytes] | None = None,
) -> HTTPResponse | None:
# Short circuit if we've been given an empty set of data
if not data:
return
return None
# Determine what version of the serializer the data was serialized
# with
try:
ver, data = data.split(b",", 1)
except ValueError:
ver = b"cc=0"
# Previous versions of this library supported other serialization
# formats, but these have all been removed.
if not data.startswith(f"cc={self.serde_version},".encode()):
return None
# Make sure that our "ver" is actually a version and isn't a false
# positive from a , being in the data stream.
if ver[:3] != b"cc=":
data = ver + data
ver = b"cc=0"
data = data[5:]
return self._loads_v4(request, data, body_file)
# Get the version number out of the cc=N
ver = ver.split(b"=", 1)[-1].decode("ascii")
# Dispatch to the actual load method for the given version
try:
return getattr(self, "_loads_v{}".format(ver))(request, data, body_file)
except AttributeError:
# This is a version we don't have a loads function for, so we'll
# just treat it as a miss and return None
return
def prepare_response(self, request, cached, body_file=None):
def prepare_response(
self,
request: PreparedRequest,
cached: Mapping[str, Any],
body_file: IO[bytes] | None = None,
) -> HTTPResponse | None:
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
@ -108,23 +94,26 @@ class Serializer(object):
# This case is also handled in the controller code when creating
# a cache entry, but is left here for backwards compatibility.
if "*" in cached.get("vary", {}):
return
return None
# Ensure that the Vary headers for the cached response match our
# request
for header, value in cached.get("vary", {}).items():
if request.headers.get(header, None) != value:
return
return None
body_raw = cached["response"].pop("body")
headers = CaseInsensitiveDict(data=cached["response"]["headers"])
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
data=cached["response"]["headers"]
)
if headers.get("transfer-encoding", "") == "chunked":
headers.pop("transfer-encoding")
cached["response"]["headers"] = headers
try:
body: IO[bytes]
if body_file is None:
body = io.BytesIO(body_raw)
else:
@ -138,53 +127,20 @@ class Serializer(object):
# TypeError: 'str' does not support the buffer interface
body = io.BytesIO(body_raw.encode("utf8"))
# Discard any `strict` parameter serialized by older version of cachecontrol.
cached["response"].pop("strict", None)
return HTTPResponse(body=body, preload_content=False, **cached["response"])
def _loads_v0(self, request, data, body_file=None):
# The original legacy cache data. This doesn't contain enough
# information to construct everything we need, so we'll treat this as
# a miss.
return
def _loads_v1(self, request, data, body_file=None):
try:
cached = pickle.loads(data)
except ValueError:
return
return self.prepare_response(request, cached, body_file)
def _loads_v2(self, request, data, body_file=None):
assert body_file is None
try:
cached = json.loads(zlib.decompress(data).decode("utf8"))
except (ValueError, zlib.error):
return
# We need to decode the items that we've base64 encoded
cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"])
cached["response"]["headers"] = dict(
(_b64_decode_str(k), _b64_decode_str(v))
for k, v in cached["response"]["headers"].items()
)
cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"])
cached["vary"] = dict(
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
for k, v in cached["vary"].items()
)
return self.prepare_response(request, cached, body_file)
def _loads_v3(self, request, data, body_file):
# Due to Python 2 encoding issues, it's impossible to know for sure
# exactly how to load v3 entries, thus we'll treat these as a miss so
# that they get rewritten out as v4 entries.
return
def _loads_v4(self, request, data, body_file=None):
def _loads_v4(
self,
request: PreparedRequest,
data: bytes,
body_file: IO[bytes] | None = None,
) -> HTTPResponse | None:
try:
cached = msgpack.loads(data, raw=False)
except ValueError:
return
return None
return self.prepare_response(request, cached, body_file)

View file

@ -1,22 +1,32 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from .adapter import CacheControlAdapter
from .cache import DictCache
from typing import TYPE_CHECKING, Collection
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import DictCache
if TYPE_CHECKING:
import requests
from cachecontrol.cache import BaseCache
from cachecontrol.controller import CacheController
from cachecontrol.heuristics import BaseHeuristic
from cachecontrol.serialize import Serializer
def CacheControl(
sess,
cache=None,
cache_etags=True,
serializer=None,
heuristic=None,
controller_class=None,
adapter_class=None,
cacheable_methods=None,
):
sess: requests.Session,
cache: BaseCache | None = None,
cache_etags: bool = True,
serializer: Serializer | None = None,
heuristic: BaseHeuristic | None = None,
controller_class: type[CacheController] | None = None,
adapter_class: type[CacheControlAdapter] | None = None,
cacheable_methods: Collection[str] | None = None,
) -> requests.Session:
cache = DictCache() if cache is None else cache
adapter_class = adapter_class or CacheControlAdapter
adapter = adapter_class(

View file

@ -1,4 +1,4 @@
from .core import contents, where
__all__ = ["contents", "where"]
__version__ = "2023.05.07"
__version__ = "2023.07.22"

View file

@ -791,34 +791,6 @@ uLjbvrW5KfnaNwUASZQDhETnv0Mxz3WLJdH0pmT1kvarBes96aULNmLazAZfNou2
XjG4Kvte9nHfRCaexOYNkbQudZWAUWpLMKawYqGT8ZvYzsRjdT9ZR7E=
-----END CERTIFICATE-----
# Issuer: CN=Hongkong Post Root CA 1 O=Hongkong Post
# Subject: CN=Hongkong Post Root CA 1 O=Hongkong Post
# Label: "Hongkong Post Root CA 1"
# Serial: 1000
# MD5 Fingerprint: a8:0d:6f:39:78:b9:43:6d:77:42:6d:98:5a:cc:23:ca
# SHA1 Fingerprint: d6:da:a8:20:8d:09:d2:15:4d:24:b5:2f:cb:34:6e:b2:58:b2:8a:58
# SHA256 Fingerprint: f9:e6:7d:33:6c:51:00:2a:c0:54:c6:32:02:2d:66:dd:a2:e7:e3:ff:f1:0a:d0:61:ed:31:d8:bb:b4:10:cf:b2
-----BEGIN CERTIFICATE-----
MIIDMDCCAhigAwIBAgICA+gwDQYJKoZIhvcNAQEFBQAwRzELMAkGA1UEBhMCSEsx
FjAUBgNVBAoTDUhvbmdrb25nIFBvc3QxIDAeBgNVBAMTF0hvbmdrb25nIFBvc3Qg
Um9vdCBDQSAxMB4XDTAzMDUxNTA1MTMxNFoXDTIzMDUxNTA0NTIyOVowRzELMAkG
A1UEBhMCSEsxFjAUBgNVBAoTDUhvbmdrb25nIFBvc3QxIDAeBgNVBAMTF0hvbmdr
b25nIFBvc3QgUm9vdCBDQSAxMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
AQEArP84tulmAknjorThkPlAj3n54r15/gK97iSSHSL22oVyaf7XPwnU3ZG1ApzQ
jVrhVcNQhrkpJsLj2aDxaQMoIIBFIi1WpztUlVYiWR8o3x8gPW2iNr4joLFutbEn
PzlTCeqrauh0ssJlXI6/fMN4hM2eFvz1Lk8gKgifd/PFHsSaUmYeSF7jEAaPIpjh
ZY4bXSNmO7ilMlHIhqqhqZ5/dpTCpmy3QfDVyAY45tQM4vM7TG1QjMSDJ8EThFk9
nnV0ttgCXjqQesBCNnLsak3c78QA3xMYV18meMjWCnl3v/evt3a5pQuEF10Q6m/h
q5URX208o1xNg1vysxmKgIsLhwIDAQABoyYwJDASBgNVHRMBAf8ECDAGAQH/AgED
MA4GA1UdDwEB/wQEAwIBxjANBgkqhkiG9w0BAQUFAAOCAQEADkbVPK7ih9legYsC
mEEIjEy82tvuJxuC52pF7BaLT4Wg87JwvVqWuspube5Gi27nKi6Wsxkz67SfqLI3
7piol7Yutmcn1KZJ/RyTZXaeQi/cImyaT/JaFTmxcdcrUehtHJjA2Sr0oYJ71clB
oiMBdDhViw+5LmeiIAQ32pwL0xch4I+XeTRvhEgCIDMb5jREn5Fw9IBehEPCKdJs
EhTkYY2sEJCehFC78JZvRZ+K88psT/oROhUVRsPNH4NbLUES7VBnQRM9IauUiqpO
fMGx+6fWtScvl6tu4B3i0RwsH0Ti/L6RoZz71ilTc4afU9hDDl3WY4JxHYB0yvbi
AmvZWg==
-----END CERTIFICATE-----
# Issuer: CN=SecureSign RootCA11 O=Japan Certification Services, Inc.
# Subject: CN=SecureSign RootCA11 O=Japan Certification Services, Inc.
# Label: "SecureSign RootCA11"
@ -1676,50 +1648,6 @@ HL/EVlP6Y2XQ8xwOFvVrhlhNGNTkDY6lnVuR3HYkUD/GKvvZt5y11ubQ2egZixVx
SK236thZiNSQvxaz2emsWWFUyBy6ysHK4bkgTI86k4mloMy/0/Z1pHWWbVY=
-----END CERTIFICATE-----
# Issuer: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi
# Subject: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi
# Label: "E-Tugra Certification Authority"
# Serial: 7667447206703254355
# MD5 Fingerprint: b8:a1:03:63:b0:bd:21:71:70:8a:6f:13:3a:bb:79:49
# SHA1 Fingerprint: 51:c6:e7:08:49:06:6e:f3:92:d4:5c:a0:0d:6d:a3:62:8f:c3:52:39
# SHA256 Fingerprint: b0:bf:d5:2b:b0:d7:d9:bd:92:bf:5d:4d:c1:3d:a2:55:c0:2c:54:2f:37:83:65:ea:89:39:11:f5:5e:55:f2:3c
-----BEGIN CERTIFICATE-----
MIIGSzCCBDOgAwIBAgIIamg+nFGby1MwDQYJKoZIhvcNAQELBQAwgbIxCzAJBgNV
BAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExQDA+BgNVBAoMN0UtVHXEn3JhIEVCRyBC
aWxpxZ9pbSBUZWtub2xvamlsZXJpIHZlIEhpem1ldGxlcmkgQS7Fni4xJjAkBgNV
BAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBNZXJrZXppMSgwJgYDVQQDDB9FLVR1
Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTEzMDMwNTEyMDk0OFoXDTIz
MDMwMzEyMDk0OFowgbIxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExQDA+
BgNVBAoMN0UtVHXEn3JhIEVCRyBCaWxpxZ9pbSBUZWtub2xvamlsZXJpIHZlIEhp
em1ldGxlcmkgQS7Fni4xJjAkBgNVBAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBN
ZXJrZXppMSgwJgYDVQQDDB9FLVR1Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5
MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA4vU/kwVRHoViVF56C/UY
B4Oufq9899SKa6VjQzm5S/fDxmSJPZQuVIBSOTkHS0vdhQd2h8y/L5VMzH2nPbxH
D5hw+IyFHnSOkm0bQNGZDbt1bsipa5rAhDGvykPL6ys06I+XawGb1Q5KCKpbknSF
Q9OArqGIW66z6l7LFpp3RMih9lRozt6Plyu6W0ACDGQXwLWTzeHxE2bODHnv0ZEo
q1+gElIwcxmOj+GMB6LDu0rw6h8VqO4lzKRG+Bsi77MOQ7osJLjFLFzUHPhdZL3D
k14opz8n8Y4e0ypQBaNV2cvnOVPAmJ6MVGKLJrD3fY185MaeZkJVgkfnsliNZvcH
fC425lAcP9tDJMW/hkd5s3kc91r0E+xs+D/iWR+V7kI+ua2oMoVJl0b+SzGPWsut
dEcf6ZG33ygEIqDUD13ieU/qbIWGvaimzuT6w+Gzrt48Ue7LE3wBf4QOXVGUnhMM
ti6lTPk5cDZvlsouDERVxcr6XQKj39ZkjFqzAQqptQpHF//vkUAqjqFGOjGY5RH8
zLtJVor8udBhmm9lbObDyz51Sf6Pp+KJxWfXnUYTTjF2OySznhFlhqt/7x3U+Lzn
rFpct1pHXFXOVbQicVtbC/DP3KBhZOqp12gKY6fgDT+gr9Oq0n7vUaDmUStVkhUX
U8u3Zg5mTPj5dUyQ5xJwx0UCAwEAAaNjMGEwHQYDVR0OBBYEFC7j27JJ0JxUeVz6
Jyr+zE7S6E5UMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAULuPbsknQnFR5
XPonKv7MTtLoTlQwDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3DQEBCwUAA4ICAQAF
Nzr0TbdF4kV1JI+2d1LoHNgQk2Xz8lkGpD4eKexd0dCrfOAKkEh47U6YA5n+KGCR
HTAduGN8qOY1tfrTYXbm1gdLymmasoR6d5NFFxWfJNCYExL/u6Au/U5Mh/jOXKqY
GwXgAEZKgoClM4so3O0409/lPun++1ndYYRP0lSWE2ETPo+Aab6TR7U1Q9Jauz1c
77NCR807VRMGsAnb/WP2OogKmW9+4c4bU2pEZiNRCHu8W1Ki/QY3OEBhj0qWuJA3
+GbHeJAAFS6LrVE1Uweoa2iu+U48BybNCAVwzDk/dr2l02cmAYamU9JgO3xDf1WK
vJUawSg5TB9D0pH0clmKuVb8P7Sd2nCcdlqMQ1DujjByTd//SffGqWfZbawCEeI6
FiWnWAjLb1NBnEg4R2gz0dfHj9R0IdTDBZB6/86WiLEVKV0jq9BgoRJP3vQXzTLl
yb/IQ639Lo7xr+L0mPoSHyDYwKcMhcWQ9DstliaxLL5Mq+ux0orJ23gTDx4JnW2P
AJ8C2sH6H3p6CcRK5ogql5+Ji/03X186zjhZhkuvcQu02PJwT58yE+Owp1fl2tpD
y4Q08ijE6m30Ku/Ba3ba+367hTzSU8JNvnHhRdH9I2cNE3X7z2VnIp2usAnRCf8d
NL/+I5c30jn6PQ0GC7TbO6Orb1wdtn7os4I07QZcJA==
-----END CERTIFICATE-----
# Issuer: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center
# Subject: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center
# Label: "T-TeleSec GlobalRoot Class 2"
@ -4397,73 +4325,6 @@ ut6Dacpps6kFtZaSF4fC0urQe87YQVt8rgIwRt7qy12a7DLCZRawTDBcMPPaTnOG
BtjOiQRINzf43TNRnXCve1XYAS59BWQOhriR
-----END CERTIFICATE-----
# Issuer: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Subject: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Label: "E-Tugra Global Root CA RSA v3"
# Serial: 75951268308633135324246244059508261641472512052
# MD5 Fingerprint: 22:be:10:f6:c2:f8:03:88:73:5f:33:29:47:28:47:a4
# SHA1 Fingerprint: e9:a8:5d:22:14:52:1c:5b:aa:0a:b4:be:24:6a:23:8a:c9:ba:e2:a9
# SHA256 Fingerprint: ef:66:b0:b1:0a:3c:db:9f:2e:36:48:c7:6b:d2:af:18:ea:d2:bf:e6:f1:17:65:5e:28:c4:06:0d:a1:a3:f4:c2
-----BEGIN CERTIFICATE-----
MIIF8zCCA9ugAwIBAgIUDU3FzRYilZYIfrgLfxUGNPt5EDQwDQYJKoZIhvcNAQEL
BQAwgYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUt
VHVncmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYw
JAYDVQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIFJTQSB2MzAeFw0yMDAzMTgw
OTA3MTdaFw00NTAzMTIwOTA3MTdaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMG
QW5rYXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1
Z3JhIFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBD
QSBSU0EgdjMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCiZvCJt3J7
7gnJY9LTQ91ew6aEOErxjYG7FL1H6EAX8z3DeEVypi6Q3po61CBxyryfHUuXCscx
uj7X/iWpKo429NEvx7epXTPcMHD4QGxLsqYxYdE0PD0xesevxKenhOGXpOhL9hd8
7jwH7eKKV9y2+/hDJVDqJ4GohryPUkqWOmAalrv9c/SF/YP9f4RtNGx/ardLAQO/
rWm31zLZ9Vdq6YaCPqVmMbMWPcLzJmAy01IesGykNz709a/r4d+ABs8qQedmCeFL
l+d3vSFtKbZnwy1+7dZ5ZdHPOrbRsV5WYVB6Ws5OUDGAA5hH5+QYfERaxqSzO8bG
wzrwbMOLyKSRBfP12baqBqG3q+Sx6iEUXIOk/P+2UNOMEiaZdnDpwA+mdPy70Bt4
znKS4iicvObpCdg604nmvi533wEKb5b25Y08TVJ2Glbhc34XrD2tbKNSEhhw5oBO
M/J+JjKsBY04pOZ2PJ8QaQ5tndLBeSBrW88zjdGUdjXnXVXHt6woq0bM5zshtQoK
5EpZ3IE1S0SVEgpnpaH/WwAH0sDM+T/8nzPyAPiMbIedBi3x7+PmBvrFZhNb/FAH
nnGGstpvdDDPk1Po3CLW3iAfYY2jLqN4MpBs3KwytQXk9TwzDdbgh3cXTJ2w2Amo
DVf3RIXwyAS+XF1a4xeOVGNpf0l0ZAWMowIDAQABo2MwYTAPBgNVHRMBAf8EBTAD
AQH/MB8GA1UdIwQYMBaAFLK0ruYt9ybVqnUtdkvAG1Mh0EjvMB0GA1UdDgQWBBSy
tK7mLfcm1ap1LXZLwBtTIdBI7zAOBgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQEL
BQADggIBAImocn+M684uGMQQgC0QDP/7FM0E4BQ8Tpr7nym/Ip5XuYJzEmMmtcyQ
6dIqKe6cLcwsmb5FJ+Sxce3kOJUxQfJ9emN438o2Fi+CiJ+8EUdPdk3ILY7r3y18
Tjvarvbj2l0Upq7ohUSdBm6O++96SmotKygY/r+QLHUWnw/qln0F7psTpURs+APQ
3SPh/QMSEgj0GDSz4DcLdxEBSL9htLX4GdnLTeqjjO/98Aa1bZL0SmFQhO3sSdPk
vmjmLuMxC1QLGpLWgti2omU8ZgT5Vdps+9u1FGZNlIM7zR6mK7L+d0CGq+ffCsn9
9t2HVhjYsCxVYJb6CH5SkPVLpi6HfMsg2wY+oF0Dd32iPBMbKaITVaA9FCKvb7jQ
mhty3QUBjYZgv6Rn7rWlDdF/5horYmbDB7rnoEgcOMPpRfunf/ztAmgayncSd6YA
VSgU7NbHEqIbZULpkejLPoeJVF3Zr52XnGnnCv8PWniLYypMfUeUP95L6VPQMPHF
9p5J3zugkaOj/s1YzOrfr28oO6Bpm4/srK4rVJ2bBLFHIK+WEj5jlB0E5y67hscM
moi/dkfv97ALl2bSRM9gUgfh1SxKOidhd8rXj+eHDjD/DLsE4mHDosiXYY60MGo8
bcIHX0pzLz/5FooBZu+6kcpSV3uu1OYP3Qt6f4ueJiDPO++BcYNZ
-----END CERTIFICATE-----
# Issuer: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Subject: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center
# Label: "E-Tugra Global Root CA ECC v3"
# Serial: 218504919822255052842371958738296604628416471745
# MD5 Fingerprint: 46:bc:81:bb:f1:b5:1e:f7:4b:96:bc:14:e2:e7:27:64
# SHA1 Fingerprint: 8a:2f:af:57:53:b1:b0:e6:a1:04:ec:5b:6a:69:71:6d:f6:1c:e2:84
# SHA256 Fingerprint: 87:3f:46:85:fa:7f:56:36:25:25:2e:6d:36:bc:d7:f1:6f:c2:49:51:f2:64:e4:7e:1b:95:4f:49:08:cd:ca:13
-----BEGIN CERTIFICATE-----
MIICpTCCAiqgAwIBAgIUJkYZdzHhT28oNt45UYbm1JeIIsEwCgYIKoZIzj0EAwMw
gYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUtVHVn
cmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYwJAYD
VQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIEVDQyB2MzAeFw0yMDAzMTgwOTQ2
NThaFw00NTAzMTIwOTQ2NThaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMGQW5r
YXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1Z3Jh
IFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBDQSBF
Q0MgdjMwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASOmCm/xxAeJ9urA8woLNheSBkQ
KczLWYHMjLiSF4mDKpL2w6QdTGLVn9agRtwcvHbB40fQWxPa56WzZkjnIZpKT4YK
fWzqTTKACrJ6CZtpS5iB4i7sAnCWH/31Rs7K3IKjYzBhMA8GA1UdEwEB/wQFMAMB
Af8wHwYDVR0jBBgwFoAU/4Ixcj75xGZsrTie0bBRiKWQzPUwHQYDVR0OBBYEFP+C
MXI++cRmbK04ntGwUYilkMz1MA4GA1UdDwEB/wQEAwIBBjAKBggqhkjOPQQDAwNp
ADBmAjEA5gVYaWHlLcoNy/EZCL3W/VGSGn5jVASQkZo1kTmZ+gepZpO6yGjUij/6
7W4WAie3AjEA3VoXK3YdZUKWpqxdinlW2Iob35reX8dQj7FbcQwm32pAAOwzkSFx
vmjkI6TZraE3
-----END CERTIFICATE-----
# Issuer: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD.
# Subject: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD.
# Label: "Security Communication RootCA3"
@ -4587,3 +4448,188 @@ AgEGMAoGCCqGSM49BAMDA2gAMGUCMBq8W9f+qdJUDkpd0m2xQNz0Q9XSSpkZElaA
94M04TVOSG0ED1cxMDAtsaqdAzjbBgIxAMvMh1PLet8gUXOQwKhbYdDFUDn9hf7B
43j4ptZLvZuHjw/l1lOWqzzIQNph91Oj9w==
-----END CERTIFICATE-----
# Issuer: CN=Sectigo Public Server Authentication Root E46 O=Sectigo Limited
# Subject: CN=Sectigo Public Server Authentication Root E46 O=Sectigo Limited
# Label: "Sectigo Public Server Authentication Root E46"
# Serial: 88989738453351742415770396670917916916
# MD5 Fingerprint: 28:23:f8:b2:98:5c:37:16:3b:3e:46:13:4e:b0:b3:01
# SHA1 Fingerprint: ec:8a:39:6c:40:f0:2e:bc:42:75:d4:9f:ab:1c:1a:5b:67:be:d2:9a
# SHA256 Fingerprint: c9:0f:26:f0:fb:1b:40:18:b2:22:27:51:9b:5c:a2:b5:3e:2c:a5:b3:be:5c:f1:8e:fe:1b:ef:47:38:0c:53:83
-----BEGIN CERTIFICATE-----
MIICOjCCAcGgAwIBAgIQQvLM2htpN0RfFf51KBC49DAKBggqhkjOPQQDAzBfMQsw
CQYDVQQGEwJHQjEYMBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQDEy1T
ZWN0aWdvIFB1YmxpYyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBFNDYwHhcN
MjEwMzIyMDAwMDAwWhcNNDYwMzIxMjM1OTU5WjBfMQswCQYDVQQGEwJHQjEYMBYG
A1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQDEy1TZWN0aWdvIFB1YmxpYyBT
ZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBFNDYwdjAQBgcqhkjOPQIBBgUrgQQA
IgNiAAR2+pmpbiDt+dd34wc7qNs9Xzjoq1WmVk/WSOrsfy2qw7LFeeyZYX8QeccC
WvkEN/U0NSt3zn8gj1KjAIns1aeibVvjS5KToID1AZTc8GgHHs3u/iVStSBDHBv+
6xnOQ6OjQjBAMB0GA1UdDgQWBBTRItpMWfFLXyY4qp3W7usNw/upYTAOBgNVHQ8B
Af8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNnADBkAjAn7qRa
qCG76UeXlImldCBteU/IvZNeWBj7LRoAasm4PdCkT0RHlAFWovgzJQxC36oCMB3q
4S6ILuH5px0CMk7yn2xVdOOurvulGu7t0vzCAxHrRVxgED1cf5kDW21USAGKcw==
-----END CERTIFICATE-----
# Issuer: CN=Sectigo Public Server Authentication Root R46 O=Sectigo Limited
# Subject: CN=Sectigo Public Server Authentication Root R46 O=Sectigo Limited
# Label: "Sectigo Public Server Authentication Root R46"
# Serial: 156256931880233212765902055439220583700
# MD5 Fingerprint: 32:10:09:52:00:d5:7e:6c:43:df:15:c0:b1:16:93:e5
# SHA1 Fingerprint: ad:98:f9:f3:e4:7d:75:3b:65:d4:82:b3:a4:52:17:bb:6e:f5:e4:38
# SHA256 Fingerprint: 7b:b6:47:a6:2a:ee:ac:88:bf:25:7a:a5:22:d0:1f:fe:a3:95:e0:ab:45:c7:3f:93:f6:56:54:ec:38:f2:5a:06
-----BEGIN CERTIFICATE-----
MIIFijCCA3KgAwIBAgIQdY39i658BwD6qSWn4cetFDANBgkqhkiG9w0BAQwFADBf
MQswCQYDVQQGEwJHQjEYMBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQD
Ey1TZWN0aWdvIFB1YmxpYyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBSNDYw
HhcNMjEwMzIyMDAwMDAwWhcNNDYwMzIxMjM1OTU5WjBfMQswCQYDVQQGEwJHQjEY
MBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTYwNAYDVQQDEy1TZWN0aWdvIFB1Ymxp
YyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gUm9vdCBSNDYwggIiMA0GCSqGSIb3DQEB
AQUAA4ICDwAwggIKAoICAQCTvtU2UnXYASOgHEdCSe5jtrch/cSV1UgrJnwUUxDa
ef0rty2k1Cz66jLdScK5vQ9IPXtamFSvnl0xdE8H/FAh3aTPaE8bEmNtJZlMKpnz
SDBh+oF8HqcIStw+KxwfGExxqjWMrfhu6DtK2eWUAtaJhBOqbchPM8xQljeSM9xf
iOefVNlI8JhD1mb9nxc4Q8UBUQvX4yMPFF1bFOdLvt30yNoDN9HWOaEhUTCDsG3X
ME6WW5HwcCSrv0WBZEMNvSE6Lzzpng3LILVCJ8zab5vuZDCQOc2TZYEhMbUjUDM3
IuM47fgxMMxF/mL50V0yeUKH32rMVhlATc6qu/m1dkmU8Sf4kaWD5QazYw6A3OAS
VYCmO2a0OYctyPDQ0RTp5A1NDvZdV3LFOxxHVp3i1fuBYYzMTYCQNFu31xR13NgE
SJ/AwSiItOkcyqex8Va3e0lMWeUgFaiEAin6OJRpmkkGj80feRQXEgyDet4fsZfu
+Zd4KKTIRJLpfSYFplhym3kT2BFfrsU4YjRosoYwjviQYZ4ybPUHNs2iTG7sijbt
8uaZFURww3y8nDnAtOFr94MlI1fZEoDlSfB1D++N6xybVCi0ITz8fAr/73trdf+L
HaAZBav6+CuBQug4urv7qv094PPK306Xlynt8xhW6aWWrL3DkJiy4Pmi1KZHQ3xt
zwIDAQABo0IwQDAdBgNVHQ4EFgQUVnNYZJX5khqwEioEYnmhQBWIIUkwDgYDVR0P
AQH/BAQDAgGGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEMBQADggIBAC9c
mTz8Bl6MlC5w6tIyMY208FHVvArzZJ8HXtXBc2hkeqK5Duj5XYUtqDdFqij0lgVQ
YKlJfp/imTYpE0RHap1VIDzYm/EDMrraQKFz6oOht0SmDpkBm+S8f74TlH7Kph52
gDY9hAaLMyZlbcp+nv4fjFg4exqDsQ+8FxG75gbMY/qB8oFM2gsQa6H61SilzwZA
Fv97fRheORKkU55+MkIQpiGRqRxOF3yEvJ+M0ejf5lG5Nkc/kLnHvALcWxxPDkjB
JYOcCj+esQMzEhonrPcibCTRAUH4WAP+JWgiH5paPHxsnnVI84HxZmduTILA7rpX
DhjvLpr3Etiga+kFpaHpaPi8TD8SHkXoUsCjvxInebnMMTzD9joiFgOgyY9mpFui
TdaBJQbpdqQACj7LzTWb4OE4y2BThihCQRxEV+ioratF4yUQvNs+ZUH7G6aXD+u5
dHn5HrwdVw1Hr8Mvn4dGp+smWg9WY7ViYG4A++MnESLn/pmPNPW56MORcr3Ywx65
LvKRRFHQV80MNNVIIb/bE/FmJUNS0nAiNs2fxBx1IK1jcmMGDw4nztJqDby1ORrp
0XZ60Vzk50lJLVU3aPAaOpg+VBeHVOmmJ1CJeyAvP/+/oYtKR5j/K3tJPsMpRmAY
QqszKbrAKbkTidOIijlBO8n9pu0f9GBj39ItVQGL
-----END CERTIFICATE-----
# Issuer: CN=SSL.com TLS RSA Root CA 2022 O=SSL Corporation
# Subject: CN=SSL.com TLS RSA Root CA 2022 O=SSL Corporation
# Label: "SSL.com TLS RSA Root CA 2022"
# Serial: 148535279242832292258835760425842727825
# MD5 Fingerprint: d8:4e:c6:59:30:d8:fe:a0:d6:7a:5a:2c:2c:69:78:da
# SHA1 Fingerprint: ec:2c:83:40:72:af:26:95:10:ff:0e:f2:03:ee:31:70:f6:78:9d:ca
# SHA256 Fingerprint: 8f:af:7d:2e:2c:b4:70:9b:b8:e0:b3:36:66:bf:75:a5:dd:45:b5:de:48:0f:8e:a8:d4:bf:e6:be:bc:17:f2:ed
-----BEGIN CERTIFICATE-----
MIIFiTCCA3GgAwIBAgIQb77arXO9CEDii02+1PdbkTANBgkqhkiG9w0BAQsFADBO
MQswCQYDVQQGEwJVUzEYMBYGA1UECgwPU1NMIENvcnBvcmF0aW9uMSUwIwYDVQQD
DBxTU0wuY29tIFRMUyBSU0EgUm9vdCBDQSAyMDIyMB4XDTIyMDgyNTE2MzQyMloX
DTQ2MDgxOTE2MzQyMVowTjELMAkGA1UEBhMCVVMxGDAWBgNVBAoMD1NTTCBDb3Jw
b3JhdGlvbjElMCMGA1UEAwwcU1NMLmNvbSBUTFMgUlNBIFJvb3QgQ0EgMjAyMjCC
AiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANCkCXJPQIgSYT41I57u9nTP
L3tYPc48DRAokC+X94xI2KDYJbFMsBFMF3NQ0CJKY7uB0ylu1bUJPiYYf7ISf5OY
t6/wNr/y7hienDtSxUcZXXTzZGbVXcdotL8bHAajvI9AI7YexoS9UcQbOcGV0ins
S657Lb85/bRi3pZ7QcacoOAGcvvwB5cJOYF0r/c0WRFXCsJbwST0MXMwgsadugL3
PnxEX4MN8/HdIGkWCVDi1FW24IBydm5MR7d1VVm0U3TZlMZBrViKMWYPHqIbKUBO
L9975hYsLfy/7PO0+r4Y9ptJ1O4Fbtk085zx7AGL0SDGD6C1vBdOSHtRwvzpXGk3
R2azaPgVKPC506QVzFpPulJwoxJF3ca6TvvC0PeoUidtbnm1jPx7jMEWTO6Af77w
dr5BUxIzrlo4QqvXDz5BjXYHMtWrifZOZ9mxQnUjbvPNQrL8VfVThxc7wDNY8VLS
+YCk8OjwO4s4zKTGkH8PnP2L0aPP2oOnaclQNtVcBdIKQXTbYxE3waWglksejBYS
d66UNHsef8JmAOSqg+qKkK3ONkRN0VHpvB/zagX9wHQfJRlAUW7qglFA35u5CCoG
AtUjHBPW6dvbxrB6y3snm/vg1UYk7RBLY0ulBY+6uB0rpvqR4pJSvezrZ5dtmi2f
gTIFZzL7SAg/2SW4BCUvAgMBAAGjYzBhMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0j
BBgwFoAU+y437uOEeicuzRk1sTN8/9REQrkwHQYDVR0OBBYEFPsuN+7jhHonLs0Z
NbEzfP/UREK5MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAgEAjYlt
hEUY8U+zoO9opMAdrDC8Z2awms22qyIZZtM7QbUQnRC6cm4pJCAcAZli05bg4vsM
QtfhWsSWTVTNj8pDU/0quOr4ZcoBwq1gaAafORpR2eCNJvkLTqVTJXojpBzOCBvf
R4iyrT7gJ4eLSYwfqUdYe5byiB0YrrPRpgqU+tvT5TgKa3kSM/tKWTcWQA673vWJ
DPFs0/dRa1419dvAJuoSc06pkZCmF8NsLzjUo3KUQyxi4U5cMj29TH0ZR6LDSeeW
P4+a0zvkEdiLA9z2tmBVGKaBUfPhqBVq6+AL8BQx1rmMRTqoENjwuSfr98t67wVy
lrXEj5ZzxOhWc5y8aVFjvO9nHEMaX3cZHxj4HCUp+UmZKbaSPaKDN7EgkaibMOlq
bLQjk2UEqxHzDh1TJElTHaE/nUiSEeJ9DU/1172iWD54nR4fK/4huxoTtrEoZP2w
AgDHbICivRZQIA9ygV/MlP+7mea6kMvq+cYMwq7FGc4zoWtcu358NFcXrfA/rs3q
r5nsLFR+jM4uElZI7xc7P0peYNLcdDa8pUNjyw9bowJWCZ4kLOGGgYz+qxcs+sji
Mho6/4UIyYOf8kpIEFR3N+2ivEC+5BB09+Rbu7nzifmPQdjH5FCQNYA+HLhNkNPU
98OwoX6EyneSMSy4kLGCenROmxMmtNVQZlR4rmA=
-----END CERTIFICATE-----
# Issuer: CN=SSL.com TLS ECC Root CA 2022 O=SSL Corporation
# Subject: CN=SSL.com TLS ECC Root CA 2022 O=SSL Corporation
# Label: "SSL.com TLS ECC Root CA 2022"
# Serial: 26605119622390491762507526719404364228
# MD5 Fingerprint: 99:d7:5c:f1:51:36:cc:e9:ce:d9:19:2e:77:71:56:c5
# SHA1 Fingerprint: 9f:5f:d9:1a:54:6d:f5:0c:71:f0:ee:7a:bd:17:49:98:84:73:e2:39
# SHA256 Fingerprint: c3:2f:fd:9f:46:f9:36:d1:6c:36:73:99:09:59:43:4b:9a:d6:0a:af:bb:9e:7c:f3:36:54:f1:44:cc:1b:a1:43
-----BEGIN CERTIFICATE-----
MIICOjCCAcCgAwIBAgIQFAP1q/s3ixdAW+JDsqXRxDAKBggqhkjOPQQDAzBOMQsw
CQYDVQQGEwJVUzEYMBYGA1UECgwPU1NMIENvcnBvcmF0aW9uMSUwIwYDVQQDDBxT
U0wuY29tIFRMUyBFQ0MgUm9vdCBDQSAyMDIyMB4XDTIyMDgyNTE2MzM0OFoXDTQ2
MDgxOTE2MzM0N1owTjELMAkGA1UEBhMCVVMxGDAWBgNVBAoMD1NTTCBDb3Jwb3Jh
dGlvbjElMCMGA1UEAwwcU1NMLmNvbSBUTFMgRUNDIFJvb3QgQ0EgMjAyMjB2MBAG
ByqGSM49AgEGBSuBBAAiA2IABEUpNXP6wrgjzhR9qLFNoFs27iosU8NgCTWyJGYm
acCzldZdkkAZDsalE3D07xJRKF3nzL35PIXBz5SQySvOkkJYWWf9lCcQZIxPBLFN
SeR7T5v15wj4A4j3p8OSSxlUgaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAfBgNVHSME
GDAWgBSJjy+j6CugFFR781a4Jl9nOAuc0DAdBgNVHQ4EFgQUiY8vo+groBRUe/NW
uCZfZzgLnNAwDgYDVR0PAQH/BAQDAgGGMAoGCCqGSM49BAMDA2gAMGUCMFXjIlbp
15IkWE8elDIPDAI2wv2sdDJO4fscgIijzPvX6yv/N33w7deedWo1dlJF4AIxAMeN
b0Igj762TVntd00pxCAgRWSGOlDGxK0tk/UYfXLtqc/ErFc2KAhl3zx5Zn6g6g==
-----END CERTIFICATE-----
# Issuer: CN=Atos TrustedRoot Root CA ECC TLS 2021 O=Atos
# Subject: CN=Atos TrustedRoot Root CA ECC TLS 2021 O=Atos
# Label: "Atos TrustedRoot Root CA ECC TLS 2021"
# Serial: 81873346711060652204712539181482831616
# MD5 Fingerprint: 16:9f:ad:f1:70:ad:79:d6:ed:29:b4:d1:c5:79:70:a8
# SHA1 Fingerprint: 9e:bc:75:10:42:b3:02:f3:81:f4:f7:30:62:d4:8f:c3:a7:51:b2:dd
# SHA256 Fingerprint: b2:fa:e5:3e:14:cc:d7:ab:92:12:06:47:01:ae:27:9c:1d:89:88:fa:cb:77:5f:a8:a0:08:91:4e:66:39:88:a8
-----BEGIN CERTIFICATE-----
MIICFTCCAZugAwIBAgIQPZg7pmY9kGP3fiZXOATvADAKBggqhkjOPQQDAzBMMS4w
LAYDVQQDDCVBdG9zIFRydXN0ZWRSb290IFJvb3QgQ0EgRUNDIFRMUyAyMDIxMQ0w
CwYDVQQKDARBdG9zMQswCQYDVQQGEwJERTAeFw0yMTA0MjIwOTI2MjNaFw00MTA0
MTcwOTI2MjJaMEwxLjAsBgNVBAMMJUF0b3MgVHJ1c3RlZFJvb3QgUm9vdCBDQSBF
Q0MgVExTIDIwMjExDTALBgNVBAoMBEF0b3MxCzAJBgNVBAYTAkRFMHYwEAYHKoZI
zj0CAQYFK4EEACIDYgAEloZYKDcKZ9Cg3iQZGeHkBQcfl+3oZIK59sRxUM6KDP/X
tXa7oWyTbIOiaG6l2b4siJVBzV3dscqDY4PMwL502eCdpO5KTlbgmClBk1IQ1SQ4
AjJn8ZQSb+/Xxd4u/RmAo0IwQDAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBR2
KCXWfeBmmnoJsmo7jjPXNtNPojAOBgNVHQ8BAf8EBAMCAYYwCgYIKoZIzj0EAwMD
aAAwZQIwW5kp85wxtolrbNa9d+F851F+uDrNozZffPc8dz7kUK2o59JZDCaOMDtu
CCrCp1rIAjEAmeMM56PDr9NJLkaCI2ZdyQAUEv049OGYa3cpetskz2VAv9LcjBHo
9H1/IISpQuQo
-----END CERTIFICATE-----
# Issuer: CN=Atos TrustedRoot Root CA RSA TLS 2021 O=Atos
# Subject: CN=Atos TrustedRoot Root CA RSA TLS 2021 O=Atos
# Label: "Atos TrustedRoot Root CA RSA TLS 2021"
# Serial: 111436099570196163832749341232207667876
# MD5 Fingerprint: d4:d3:46:b8:9a:c0:9c:76:5d:9e:3a:c3:b9:99:31:d2
# SHA1 Fingerprint: 18:52:3b:0d:06:37:e4:d6:3a:df:23:e4:98:fb:5b:16:fb:86:74:48
# SHA256 Fingerprint: 81:a9:08:8e:a5:9f:b3:64:c5:48:a6:f8:55:59:09:9b:6f:04:05:ef:bf:18:e5:32:4e:c9:f4:57:ba:00:11:2f
-----BEGIN CERTIFICATE-----
MIIFZDCCA0ygAwIBAgIQU9XP5hmTC/srBRLYwiqipDANBgkqhkiG9w0BAQwFADBM
MS4wLAYDVQQDDCVBdG9zIFRydXN0ZWRSb290IFJvb3QgQ0EgUlNBIFRMUyAyMDIx
MQ0wCwYDVQQKDARBdG9zMQswCQYDVQQGEwJERTAeFw0yMTA0MjIwOTIxMTBaFw00
MTA0MTcwOTIxMDlaMEwxLjAsBgNVBAMMJUF0b3MgVHJ1c3RlZFJvb3QgUm9vdCBD
QSBSU0EgVExTIDIwMjExDTALBgNVBAoMBEF0b3MxCzAJBgNVBAYTAkRFMIICIjAN
BgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAtoAOxHm9BYx9sKOdTSJNy/BBl01Z
4NH+VoyX8te9j2y3I49f1cTYQcvyAh5x5en2XssIKl4w8i1mx4QbZFc4nXUtVsYv
Ye+W/CBGvevUez8/fEc4BKkbqlLfEzfTFRVOvV98r61jx3ncCHvVoOX3W3WsgFWZ
kmGbzSoXfduP9LVq6hdKZChmFSlsAvFr1bqjM9xaZ6cF4r9lthawEO3NUDPJcFDs
GY6wx/J0W2tExn2WuZgIWWbeKQGb9Cpt0xU6kGpn8bRrZtkh68rZYnxGEFzedUln
nkL5/nWpo63/dgpnQOPF943HhZpZnmKaau1Fh5hnstVKPNe0OwANwI8f4UDErmwh
3El+fsqyjW22v5MvoVw+j8rtgI5Y4dtXz4U2OLJxpAmMkokIiEjxQGMYsluMWuPD
0xeqqxmjLBvk1cbiZnrXghmmOxYsL3GHX0WelXOTwkKBIROW1527k2gV+p2kHYzy
geBYBr3JtuP2iV2J+axEoctr+hbxx1A9JNr3w+SH1VbxT5Aw+kUJWdo0zuATHAR8
ANSbhqRAvNncTFd+rrcztl524WWLZt+NyteYr842mIycg5kDcPOvdO3GDjbnvezB
c6eUWsuSZIKmAMFwoW4sKeFYV+xafJlrJaSQOoD0IJ2azsct+bJLKZWD6TWNp0lI
pw9MGZHQ9b8Q4HECAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQU
dEmZ0f+0emhFdcN+tNzMzjkz2ggwDgYDVR0PAQH/BAQDAgGGMA0GCSqGSIb3DQEB
DAUAA4ICAQAjQ1MkYlxt/T7Cz1UAbMVWiLkO3TriJQ2VSpfKgInuKs1l+NsW4AmS
4BjHeJi78+xCUvuppILXTdiK/ORO/auQxDh1MoSf/7OwKwIzNsAQkG8dnK/haZPs
o0UvFJ/1TCplQ3IM98P4lYsU84UgYt1UU90s3BiVaU+DR3BAM1h3Egyi61IxHkzJ
qM7F78PRreBrAwA0JrRUITWXAdxfG/F851X6LWh3e9NpzNMOa7pNdkTWwhWaJuyw
xfW70Xp0wmzNxbVe9kzmWy2B27O3Opee7c9GslA9hGCZcbUztVdF5kJHdWoOsAgM
rr3e97sPWD2PAzHoPYJQyi9eDF20l74gNAf0xBLh7tew2VktafcxBPTy+av5EzH4
AXcOPUIjJsyacmdRIXrMPIWo6iFqO9taPKU0nprALN+AnCng33eU0aKAQv9qTFsR
0PXNor6uzFFcw9VUewyu1rkGd4Di7wcaaMxZUa1+XGdrudviB0JbuAEFWDlN5LuY
o7Ey7Nmj1m+UI/87tyll5gfp77YZ6ufCOB0yiJA8EytuzO+rdwY0d4RPcuSBhPm5
dDTedk+SKlOxJTnbPP/lPqYO5Wue/9vsL3SD3460s6neFE3/MaNFcyT6lSnMEpcE
oji2jbDwN/zIIX8/syQbPYtuzE2wFg2WHYMfRsCbvUOZ58SWLs5fyQ==
-----END CERTIFICATE-----

View file

@ -335,7 +335,7 @@ def convert_to_utf8(
# How much to read from a binary file in order to detect encoding.
# In inital tests, 4k was enough for ~160 mostly-English feeds;
# In initial tests, 4k was enough for ~160 mostly-English feeds;
# 64k seems like a safe margin.
CONVERT_FILE_PREFIX_LEN = 2**16

View file

@ -152,7 +152,7 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
:rtype: None
"""
data = re.sub(r"<!((?!DOCTYPE|--|\[))", r"&lt;!\1", data, re.IGNORECASE)
data = re.sub(r"<!((?!DOCTYPE|--|\[))", r"&lt;!\1", data, flags=re.IGNORECASE)
data = re.sub(r"<([^<>\s]+?)\s*/>", self._shorttag_replace, data)
data = data.replace("&#39;", "'")
data = data.replace("&#34;", '"')

View file

@ -192,6 +192,7 @@ class XMLParserMixin(
self.incontributor = 0
self.inpublisher = 0
self.insource = 0
self.isentrylink = 0
self.sourcedata = FeedParserDict()
self.contentparams = FeedParserDict()
@ -233,7 +234,7 @@ class XMLParserMixin(
if isinstance(baseuri, bytes):
baseuri = baseuri.decode(self.encoding, "ignore")
# ensure that self.baseuri is always an absolute URI that
# uses a whitelisted URI scheme (e.g. not `javscript:`)
# uses a whitelisted URI scheme (e.g. not `javascript:`)
if self.baseuri:
self.baseuri = make_safe_absolute_uri(self.baseuri, baseuri) or self.baseuri
else:
@ -624,6 +625,7 @@ class XMLParserMixin(
# unhandled character references. fix this special case.
output = output.replace("&amp;", "&")
output = re.sub("&([A-Za-z0-9_]+);", r"&\g<1>", output)
if self.isentrylink or not self.entries[-1].get(element):
self.entries[-1][element] = output
if output:
self.entries[-1]["links"][-1]["href"] = output

View file

@ -361,21 +361,24 @@ class Namespace:
attrs_d = self._enforce_href(attrs_d)
if "href" in attrs_d:
attrs_d["href"] = self.resolve_uri(attrs_d["href"])
if (
attrs_d.get("rel") == "alternate"
and self.map_content_type(attrs_d.get("type")) in self.html_types
):
self.isentrylink = 1
expecting_text = self.infeed or self.inentry or self.insource
context.setdefault("links", [])
if not (self.inentry and self.inimage):
context["links"].append(FeedParserDict(attrs_d))
if "href" in attrs_d:
if (
attrs_d.get("rel") == "alternate"
and self.map_content_type(attrs_d.get("type")) in self.html_types
):
if self.isentrylink:
context["link"] = attrs_d["href"]
else:
self.push("link", expecting_text)
def _end_link(self):
self.pop("link")
self.isentrylink = 0
def _start_guid(self, attrs_d):
self.guidislink = attrs_d.get("ispermalink", "true") == "true"

View file

@ -24,7 +24,7 @@ __version__: str = version
if sys.platform == "win32": # pragma: win32 cover
_FileLock: type[BaseFileLock] = WindowsFileLock
else: # pragma: win32 no cover
else: # pragma: win32 no cover # noqa: PLR5501
if has_fcntl:
_FileLock: type[BaseFileLock] = UnixFileLock
else:
@ -32,7 +32,7 @@ else: # pragma: win32 no cover
if warnings is not None:
warnings.warn("only soft file lock is available", stacklevel=2)
if TYPE_CHECKING:
if TYPE_CHECKING: # noqa: SIM108
FileLock = SoftFileLock
else:
#: Alias for the lock, which should be used for the current platform.

View file

@ -8,11 +8,20 @@ import warnings
from abc import ABC, abstractmethod
from dataclasses import dataclass
from threading import local
from types import TracebackType
from typing import Any
from typing import TYPE_CHECKING, Any
from ._error import Timeout
if TYPE_CHECKING:
import sys
from types import TracebackType
if sys.version_info >= (3, 11): # pragma: no cover (py311+)
from typing import Self
else: # pragma: no cover (<py311)
from typing_extensions import Self
_LOGGER = logging.getLogger("filelock")
@ -30,18 +39,16 @@ class AcquireReturnProxy:
def __exit__(
self,
exc_type: type[BaseException] | None, # noqa: U100
exc_value: BaseException | None, # noqa: U100
traceback: TracebackType | None, # noqa: U100
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
traceback: TracebackType | None,
) -> None:
self.lock.release()
@dataclass
class FileLockContext:
"""
A dataclass which holds the context for a ``BaseFileLock`` object.
"""
"""A dataclass which holds the context for a ``BaseFileLock`` object."""
# The context is held in a separate class to allow optional use of thread local storage via the
# ThreadLocalFileContext class.
@ -63,9 +70,7 @@ class FileLockContext:
class ThreadLocalFileContext(FileLockContext, local):
"""
A thread local version of the ``FileLockContext`` class.
"""
"""A thread local version of the ``FileLockContext`` class."""
class BaseFileLock(ABC, contextlib.ContextDecorator):
@ -73,10 +78,10 @@ class BaseFileLock(ABC, contextlib.ContextDecorator):
def __init__(
self,
lock_file: str | os.PathLike[Any],
lock_file: str | os.PathLike[str],
timeout: float = -1,
mode: int = 0o644,
thread_local: bool = True,
thread_local: bool = True, # noqa: FBT001, FBT002
) -> None:
"""
Create a new lock object.
@ -151,9 +156,7 @@ class BaseFileLock(ABC, contextlib.ContextDecorator):
@property
def lock_counter(self) -> int:
"""
:return: The number of times this lock has been acquired (but not yet released).
"""
""":return: The number of times this lock has been acquired (but not yet released)."""
return self._context.lock_counter
def acquire(
@ -218,13 +221,12 @@ class BaseFileLock(ABC, contextlib.ContextDecorator):
if self.is_locked:
_LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
break
elif blocking is False:
if blocking is False:
_LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
raise Timeout(lock_filename)
elif 0 <= timeout < time.perf_counter() - start_time:
raise Timeout(lock_filename) # noqa: TRY301
if 0 <= timeout < time.perf_counter() - start_time:
_LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
raise Timeout(lock_filename)
else:
raise Timeout(lock_filename) # noqa: TRY301
msg = "Lock %s not acquired on %s, waiting %s seconds ..."
_LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
time.sleep(poll_interval)
@ -233,7 +235,7 @@ class BaseFileLock(ABC, contextlib.ContextDecorator):
raise
return AcquireReturnProxy(lock=self)
def release(self, force: bool = False) -> None:
def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002
"""
Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. Also
note, that the lock file itself is not automatically deleted.
@ -251,7 +253,7 @@ class BaseFileLock(ABC, contextlib.ContextDecorator):
self._context.lock_counter = 0
_LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
def __enter__(self) -> BaseFileLock:
def __enter__(self) -> Self:
"""
Acquire the lock.
@ -262,9 +264,9 @@ class BaseFileLock(ABC, contextlib.ContextDecorator):
def __exit__(
self,
exc_type: type[BaseException] | None, # noqa: U100
exc_value: BaseException | None, # noqa: U100
traceback: TracebackType | None, # noqa: U100
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
traceback: TracebackType | None,
) -> None:
"""
Release the lock.

View file

@ -3,7 +3,7 @@ from __future__ import annotations
from typing import Any
class Timeout(TimeoutError):
class Timeout(TimeoutError): # noqa: N818
"""Raised when the lock could not be acquired in *timeout* seconds."""
def __init__(self, lock_file: str) -> None:

View file

@ -2,10 +2,12 @@ from __future__ import annotations
import os
import sys
from contextlib import suppress
from errno import EACCES, EEXIST
from pathlib import Path
from ._api import BaseFileLock
from ._util import raise_on_not_writable_file
from ._util import ensure_directory_exists, raise_on_not_writable_file
class SoftFileLock(BaseFileLock):
@ -13,6 +15,7 @@ class SoftFileLock(BaseFileLock):
def _acquire(self) -> None:
raise_on_not_writable_file(self.lock_file)
ensure_directory_exists(self.lock_file)
# first check for exists and read-only mode as the open will mask this case as EEXIST
flags = (
os.O_WRONLY # open for writing only
@ -32,12 +35,11 @@ class SoftFileLock(BaseFileLock):
self._context.lock_file_fd = file_handler
def _release(self) -> None:
os.close(self._context.lock_file_fd) # type: ignore # the lock file is definitely not None
assert self._context.lock_file_fd is not None # noqa: S101
os.close(self._context.lock_file_fd) # the lock file is definitely not None
self._context.lock_file_fd = None
try:
os.remove(self.lock_file)
except OSError: # the file is already deleted and that's what we want
pass
with suppress(OSError): # the file is already deleted and that's what we want
Path(self.lock_file).unlink()
__all__ = [

View file

@ -2,10 +2,12 @@ from __future__ import annotations
import os
import sys
from contextlib import suppress
from errno import ENOSYS
from typing import cast
from ._api import BaseFileLock
from ._util import ensure_directory_exists
#: a flag to indicate if the fcntl API is available
has_fcntl = False
@ -32,18 +34,18 @@ else: # pragma: win32 no cover
"""Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
def _acquire(self) -> None:
ensure_directory_exists(self.lock_file)
open_flags = os.O_RDWR | os.O_CREAT | os.O_TRUNC
fd = os.open(self.lock_file, open_flags, self._context.mode)
try:
with suppress(PermissionError): # This locked is not owned by this UID
os.fchmod(fd, self._context.mode)
except PermissionError:
pass # This locked is not owned by this UID
try:
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
except OSError as exception:
os.close(fd)
if exception.errno == ENOSYS: # NotImplemented error
raise NotImplementedError("FileSystem does not appear to support flock; user SoftFileLock instead")
msg = "FileSystem does not appear to support flock; user SoftFileLock instead"
raise NotImplementedError(msg) from exception
else:
self._context.lock_file_fd = fd

View file

@ -4,6 +4,7 @@ import os
import stat
import sys
from errno import EACCES, EISDIR
from pathlib import Path
def raise_on_not_writable_file(filename: str) -> None:
@ -12,12 +13,12 @@ def raise_on_not_writable_file(filename: str) -> None:
This is done so files that will never be writable can be separated from
files that are writable but currently locked
:param filename: file to check
:raises OSError: as if the file was opened for writing
:raises OSError: as if the file was opened for writing.
"""
try:
file_stat = os.stat(filename) # use stat to do exists + can write to check without race condition
try: # use stat to do exists + can write to check without race condition
file_stat = os.stat(filename) # noqa: PTH116
except OSError:
return None # swallow does not exist or other errors
return # swallow does not exist or other errors
if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it
if not (file_stat.st_mode & stat.S_IWUSR):
@ -27,11 +28,20 @@ def raise_on_not_writable_file(filename: str) -> None:
if sys.platform == "win32": # pragma: win32 cover
# On Windows, this is PermissionError
raise PermissionError(EACCES, "Permission denied", filename)
else: # pragma: win32 no cover
else: # pragma: win32 no cover # noqa: RET506
# On linux / macOS, this is IsADirectoryError
raise IsADirectoryError(EISDIR, "Is a directory", filename)
def ensure_directory_exists(filename: Path | str) -> None:
"""
Ensure the directory containing the file exists (create it if necessary)
:param filename: file.
"""
Path(filename).parent.mkdir(parents=True, exist_ok=True)
__all__ = [
"raise_on_not_writable_file",
"ensure_directory_exists",
]

View file

@ -2,11 +2,13 @@ from __future__ import annotations
import os
import sys
from contextlib import suppress
from errno import EACCES
from pathlib import Path
from typing import cast
from ._api import BaseFileLock
from ._util import raise_on_not_writable_file
from ._util import ensure_directory_exists, raise_on_not_writable_file
if sys.platform == "win32": # pragma: win32 cover
import msvcrt
@ -16,6 +18,7 @@ if sys.platform == "win32": # pragma: win32 cover
def _acquire(self) -> None:
raise_on_not_writable_file(self.lock_file)
ensure_directory_exists(self.lock_file)
flags = (
os.O_RDWR # open for read and write
| os.O_CREAT # create file if not exists
@ -42,11 +45,8 @@ if sys.platform == "win32": # pragma: win32 cover
msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
os.close(fd)
try:
os.remove(self.lock_file)
# Probably another instance of the application hat acquired the file lock.
except OSError:
pass
with suppress(OSError): # Probably another instance of the application hat acquired the file lock.
Path(self.lock_file).unlink()
else: # pragma: win32 no cover

View file

@ -1,4 +1,4 @@
# file generated by setuptools_scm
# don't change, don't track in version control
__version__ = version = '3.12.0'
__version_tuple__ = version_tuple = (3, 12, 0)
__version__ = version = '3.12.4'
__version_tuple__ = version_tuple = (3, 12, 4)

View file

@ -32,7 +32,7 @@ def number_list(s):
if len(l) == 2:
# it is an episode interval, return all numbers in between
return range(l[0], l[1]+1)
return list(range(l[0], l[1]+1))
return l

View file

@ -1,7 +1,7 @@
from .core import encode, decode, alabel, ulabel, IDNAError
import codecs
import re
from typing import Tuple, Optional
from typing import Any, Tuple, Optional
_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]')
@ -26,24 +26,24 @@ class Codec(codecs.Codec):
return decode(data), len(data)
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore
def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]:
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
if not data:
return "", 0
return b'', 0
labels = _unicode_dots_re.split(data)
trailing_dot = ''
trailing_dot = b''
if labels:
if not labels[-1]:
trailing_dot = '.'
trailing_dot = b'.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
trailing_dot = '.'
trailing_dot = b'.'
result = []
size = 0
@ -54,18 +54,21 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
size += len(label)
# Join with U+002E
result_str = '.'.join(result) + trailing_dot # type: ignore
result_bytes = b'.'.join(result) + trailing_dot
size += len(trailing_dot)
return result_str, size
return result_bytes, size
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore
def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]:
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
if not data:
return ('', 0)
if not isinstance(data, str):
data = str(data, 'ascii')
labels = _unicode_dots_re.split(data)
trailing_dot = ''
if labels:
@ -99,13 +102,11 @@ class StreamReader(Codec, codecs.StreamReader):
pass
def getregentry(name: str) -> Optional[codecs.CodecInfo]:
if name != 'idna' and name != 'idna2008':
def search_function(name: str) -> Optional[codecs.CodecInfo]:
if name != 'idna2008':
return None
# Compatibility as a search_function for codecs.register()
return codecs.CodecInfo(
name='idna2008',
name=name,
encode=Codec().encode, # type: ignore
decode=Codec().decode, # type: ignore
incrementalencoder=IncrementalEncoder,
@ -114,4 +115,4 @@ def getregentry(name: str) -> Optional[codecs.CodecInfo]:
streamreader=StreamReader,
)
codecs.register(getregentry)
codecs.register(search_function)

View file

@ -338,9 +338,9 @@ def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False
def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes:
if isinstance(s, (bytes, bytearray)):
if not isinstance(s, str):
try:
s = s.decode('ascii')
s = str(s, 'ascii')
except UnicodeDecodeError:
raise IDNAError('should pass a unicode string to the function rather than a byte string.')
if uts46:
@ -372,8 +372,8 @@ def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool =
def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False) -> str:
try:
if isinstance(s, (bytes, bytearray)):
s = s.decode('ascii')
if not isinstance(s, str):
s = str(s, 'ascii')
except UnicodeDecodeError:
raise IDNAError('Invalid ASCII in A-label')
if uts46:

View file

@ -1834,7 +1834,6 @@ codepoint_classes = {
0xa7d50000a7d6,
0xa7d70000a7d8,
0xa7d90000a7da,
0xa7f20000a7f5,
0xa7f60000a7f8,
0xa7fa0000a828,
0xa82c0000a82d,
@ -1907,9 +1906,7 @@ codepoint_classes = {
0x1060000010737,
0x1074000010756,
0x1076000010768,
0x1078000010786,
0x10787000107b1,
0x107b2000107bb,
0x1078000010781,
0x1080000010806,
0x1080800010809,
0x1080a00010836,

View file

@ -1,16 +1,14 @@
# coding: utf-8
from .exceptions import *
from .ext import ExtType, Timestamp
import os
import sys
version = (1, 0, 5)
__version__ = "1.0.5"
version = (1, 0, 6, "rc", 1)
__version__ = "1.0.6rc1"
if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2:
if os.environ.get("MSGPACK_PUREPYTHON"):
from .fallback import Packer, unpackb, Unpacker
else:
try:

View file

@ -1 +1 @@
version = (1, 0, 4)
version = (1, 0, 6)

View file

@ -1,23 +1,8 @@
# coding: utf-8
from collections import namedtuple
import datetime
import sys
import struct
PY2 = sys.version_info[0] == 2
if PY2:
int_types = (int, long)
_utc = None
else:
int_types = int
try:
_utc = datetime.timezone.utc
except AttributeError:
_utc = datetime.timezone(datetime.timedelta(0))
class ExtType(namedtuple("ExtType", "code data")):
"""ExtType represents ext type in msgpack."""
@ -28,14 +13,15 @@ class ExtType(namedtuple("ExtType", "code data")):
raise TypeError("data must be bytes")
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super(ExtType, cls).__new__(cls, code, data)
return super().__new__(cls, code, data)
class Timestamp(object):
class Timestamp:
"""Timestamp represents the Timestamp extension type in msgpack.
When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python
msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`.
When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`.
When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and
unpack `Timestamp`.
This class is immutable: Do not override seconds and nanoseconds.
"""
@ -53,31 +39,25 @@ class Timestamp(object):
Number of nanoseconds to add to `seconds` to get fractional time.
Maximum is 999_999_999. Default is 0.
Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns.
Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns.
"""
if not isinstance(seconds, int_types):
if not isinstance(seconds, int):
raise TypeError("seconds must be an integer")
if not isinstance(nanoseconds, int_types):
if not isinstance(nanoseconds, int):
raise TypeError("nanoseconds must be an integer")
if not (0 <= nanoseconds < 10**9):
raise ValueError(
"nanoseconds must be a non-negative integer less than 999999999."
)
raise ValueError("nanoseconds must be a non-negative integer less than 999999999.")
self.seconds = seconds
self.nanoseconds = nanoseconds
def __repr__(self):
"""String representation of Timestamp."""
return "Timestamp(seconds={0}, nanoseconds={1})".format(
self.seconds, self.nanoseconds
)
return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})"
def __eq__(self, other):
"""Check for equality with another Timestamp object"""
if type(other) is self.__class__:
return (
self.seconds == other.seconds and self.nanoseconds == other.nanoseconds
)
return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds
return False
def __ne__(self, other):
@ -140,7 +120,7 @@ class Timestamp(object):
"""Create a Timestamp from posix timestamp in seconds.
:param unix_float: Posix timestamp in seconds.
:type unix_float: int or float.
:type unix_float: int or float
"""
seconds = int(unix_sec // 1)
nanoseconds = int((unix_sec % 1) * 10**9)
@ -174,20 +154,15 @@ class Timestamp(object):
def to_datetime(self):
"""Get the timestamp as a UTC datetime.
Python 2 is not supported.
:rtype: datetime.
:rtype: `datetime.datetime`
"""
return datetime.datetime.fromtimestamp(0, _utc) + datetime.timedelta(
seconds=self.to_unix()
)
utc = datetime.timezone.utc
return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta(seconds=self.to_unix())
@staticmethod
def from_datetime(dt):
"""Create a Timestamp from datetime with tzinfo.
Python 2 is not supported.
:rtype: Timestamp
"""
return Timestamp.from_unix(dt.timestamp())

View file

@ -4,39 +4,6 @@ import sys
import struct
PY2 = sys.version_info[0] == 2
if PY2:
int_types = (int, long)
def dict_iteritems(d):
return d.iteritems()
else:
int_types = int
unicode = str
xrange = range
def dict_iteritems(d):
return d.items()
if sys.version_info < (3, 5):
# Ugly hack...
RecursionError = RuntimeError
def _is_recursionerror(e):
return (
len(e.args) == 1
and isinstance(e.args[0], str)
and e.args[0].startswith("maximum recursion depth exceeded")
)
else:
def _is_recursionerror(e):
return True
if hasattr(sys, "pypy_version_info"):
# StringIO is slow on PyPy, StringIO is faster. However: PyPy's own
# StringBuilder is fastest.
@ -48,7 +15,7 @@ if hasattr(sys, "pypy_version_info"):
from __pypy__.builders import StringBuilder
USING_STRINGBUILDER = True
class StringIO(object):
class StringIO:
def __init__(self, s=b""):
if s:
self.builder = StringBuilder(len(s))
@ -125,24 +92,13 @@ def unpackb(packed, **kwargs):
ret = unpacker._unpack()
except OutOfData:
raise ValueError("Unpack failed: incomplete input")
except RecursionError as e:
if _is_recursionerror(e):
except RecursionError:
raise StackError
raise
if unpacker._got_extradata():
raise ExtraData(ret, unpacker._get_extradata())
return ret
if sys.version_info < (2, 7, 6):
def _unpack_from(f, b, o=0):
"""Explicit type cast for legacy struct.unpack_from"""
return struct.unpack_from(f, bytes(b), o)
else:
_unpack_from = struct.unpack_from
_NO_FORMAT_USED = ""
_MSGPACK_HEADERS = {
0xC4: (1, _NO_FORMAT_USED, TYPE_BIN),
@ -176,14 +132,14 @@ _MSGPACK_HEADERS = {
}
class Unpacker(object):
class Unpacker:
"""Streaming unpacker.
Arguments:
:param file_like:
File-like object having `.read(n)` method.
If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable.
If specified, unpacker reads serialized data from it and `.feed()` is not usable.
:param int read_size:
Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
@ -202,17 +158,17 @@ class Unpacker(object):
0 - Timestamp
1 - float (Seconds from the EPOCH)
2 - int (Nanoseconds from the EPOCH)
3 - datetime.datetime (UTC). Python 2 is not supported.
3 - datetime.datetime (UTC).
:param bool strict_map_key:
If true (default), only str or bytes are accepted for map (dict) keys.
:param callable object_hook:
:param object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
(See also simplejson)
:param callable object_pairs_hook:
:param object_pairs_hook:
When specified, it should be callable.
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
(See also simplejson)
@ -359,9 +315,7 @@ class Unpacker(object):
if object_pairs_hook is not None and not callable(object_pairs_hook):
raise TypeError("`object_pairs_hook` is not callable")
if object_hook is not None and object_pairs_hook is not None:
raise TypeError(
"object_pairs_hook and object_hook are mutually " "exclusive"
)
raise TypeError("object_pairs_hook and object_hook are mutually exclusive")
if not callable(ext_hook):
raise TypeError("`ext_hook` is not callable")
@ -453,20 +407,18 @@ class Unpacker(object):
n = b & 0b00011111
typ = TYPE_RAW
if n > self._max_str_len:
raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len))
raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
obj = self._read(n)
elif b & 0b11110000 == 0b10010000:
n = b & 0b00001111
typ = TYPE_ARRAY
if n > self._max_array_len:
raise ValueError(
"%s exceeds max_array_len(%s)" % (n, self._max_array_len)
)
raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
elif b & 0b11110000 == 0b10000000:
n = b & 0b00001111
typ = TYPE_MAP
if n > self._max_map_len:
raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len))
raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
elif b == 0xC0:
obj = None
elif b == 0xC2:
@ -477,65 +429,61 @@ class Unpacker(object):
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
if len(fmt) > 0:
n = _unpack_from(fmt, self._buffer, self._buff_i)[0]
n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
else:
n = self._buffer[self._buff_i]
self._buff_i += size
if n > self._max_bin_len:
raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})")
obj = self._read(n)
elif 0xC7 <= b <= 0xC9:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
L, n = _unpack_from(fmt, self._buffer, self._buff_i)
L, n = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size
if L > self._max_ext_len:
raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})")
obj = self._read(L)
elif 0xCA <= b <= 0xD3:
size, fmt = _MSGPACK_HEADERS[b]
self._reserve(size)
if len(fmt) > 0:
obj = _unpack_from(fmt, self._buffer, self._buff_i)[0]
obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
else:
obj = self._buffer[self._buff_i]
self._buff_i += size
elif 0xD4 <= b <= 0xD8:
size, fmt, typ = _MSGPACK_HEADERS[b]
if self._max_ext_len < size:
raise ValueError(
"%s exceeds max_ext_len(%s)" % (size, self._max_ext_len)
)
raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})")
self._reserve(size + 1)
n, obj = _unpack_from(fmt, self._buffer, self._buff_i)
n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size + 1
elif 0xD9 <= b <= 0xDB:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
if len(fmt) > 0:
(n,) = _unpack_from(fmt, self._buffer, self._buff_i)
(n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
else:
n = self._buffer[self._buff_i]
self._buff_i += size
if n > self._max_str_len:
raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len))
raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
obj = self._read(n)
elif 0xDC <= b <= 0xDD:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
(n,) = _unpack_from(fmt, self._buffer, self._buff_i)
(n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size
if n > self._max_array_len:
raise ValueError(
"%s exceeds max_array_len(%s)" % (n, self._max_array_len)
)
raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
elif 0xDE <= b <= 0xDF:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
(n,) = _unpack_from(fmt, self._buffer, self._buff_i)
(n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size
if n > self._max_map_len:
raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len))
raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
else:
raise FormatError("Unknown header: 0x%x" % b)
return typ, n, obj
@ -554,12 +502,12 @@ class Unpacker(object):
# TODO should we eliminate the recursion?
if typ == TYPE_ARRAY:
if execute == EX_SKIP:
for i in xrange(n):
for i in range(n):
# TODO check whether we need to call `list_hook`
self._unpack(EX_SKIP)
return
ret = newlist_hint(n)
for i in xrange(n):
for i in range(n):
ret.append(self._unpack(EX_CONSTRUCT))
if self._list_hook is not None:
ret = self._list_hook(ret)
@ -567,25 +515,22 @@ class Unpacker(object):
return ret if self._use_list else tuple(ret)
if typ == TYPE_MAP:
if execute == EX_SKIP:
for i in xrange(n):
for i in range(n):
# TODO check whether we need to call hooks
self._unpack(EX_SKIP)
self._unpack(EX_SKIP)
return
if self._object_pairs_hook is not None:
ret = self._object_pairs_hook(
(self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT))
for _ in xrange(n)
(self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n)
)
else:
ret = {}
for _ in xrange(n):
for _ in range(n):
key = self._unpack(EX_CONSTRUCT)
if self._strict_map_key and type(key) not in (unicode, bytes):
raise ValueError(
"%s is not allowed for map key" % str(type(key))
)
if not PY2 and type(key) is str:
if self._strict_map_key and type(key) not in (str, bytes):
raise ValueError("%s is not allowed for map key" % str(type(key)))
if isinstance(key, str):
key = sys.intern(key)
ret[key] = self._unpack(EX_CONSTRUCT)
if self._object_hook is not None:
@ -659,7 +604,7 @@ class Unpacker(object):
return self._stream_offset
class Packer(object):
class Packer:
"""
MessagePack Packer
@ -671,7 +616,8 @@ class Packer(object):
Packer's constructor has some keyword arguments:
:param callable default:
:param default:
When specified, it should be callable.
Convert user type to builtin type that Packer supports.
See also simplejson's document.
@ -698,7 +644,6 @@ class Packer(object):
If set to true, datetime with tzinfo is packed into Timestamp type.
Note that the tzinfo is stripped in the timestamp.
You can get UTC datetime with `timestamp=3` option of the Unpacker.
(Python 2 is not supported).
:param str unicode_errors:
The error handler for encoding unicode. (default: 'strict')
@ -743,8 +688,6 @@ class Packer(object):
self._autoreset = autoreset
self._use_bin_type = use_bin_type
self._buffer = StringIO()
if PY2 and datetime:
raise ValueError("datetime is not supported in Python 2")
self._datetime = bool(datetime)
self._unicode_errors = unicode_errors or "strict"
if default is not None:
@ -774,7 +717,7 @@ class Packer(object):
if obj:
return self._buffer.write(b"\xc3")
return self._buffer.write(b"\xc2")
if check(obj, int_types):
if check(obj, int):
if 0 <= obj < 0x80:
return self._buffer.write(struct.pack("B", obj))
if -0x20 <= obj < 0:
@ -806,7 +749,7 @@ class Packer(object):
raise ValueError("%s is too large" % type(obj).__name__)
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, unicode):
if check(obj, str):
obj = obj.encode("utf-8", self._unicode_errors)
n = len(obj)
if n >= 2**32:
@ -855,13 +798,11 @@ class Packer(object):
if check(obj, list_types):
n = len(obj)
self._pack_array_header(n)
for i in xrange(n):
for i in range(n):
self._pack(obj[i], nest_limit - 1)
return
if check(obj, dict):
return self._pack_map_pairs(
len(obj), dict_iteritems(obj), nest_limit - 1
)
return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1)
if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None:
obj = Timestamp.from_datetime(obj)
@ -874,9 +815,9 @@ class Packer(object):
continue
if self._datetime and check(obj, _DateTime):
raise ValueError("Cannot serialize %r where tzinfo=None" % (obj,))
raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None")
raise TypeError("Cannot serialize %r" % (obj,))
raise TypeError(f"Cannot serialize {obj!r}")
def pack(self, obj):
try:
@ -963,7 +904,7 @@ class Packer(object):
def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
self._pack_map_header(n)
for (k, v) in pairs:
for k, v in pairs:
self._pack(k, nest_limit - 1)
self._pack(v, nest_limit - 1)
@ -1004,7 +945,7 @@ class Packer(object):
def getbuffer(self):
"""Return view of internal buffer."""
if USING_STRINGBUILDER or PY2:
if USING_STRINGBUILDER:
return memoryview(self.bytes())
else:
return self._buffer.getbuffer()

View file

@ -13,11 +13,8 @@ The package resource API is designed to work with normal filesystem packages,
.zip files and with custom PEP 302 loaders that support the ``get_data()``
method.
This module is deprecated. Users are directed to
`importlib.resources <https://docs.python.org/3/library/importlib.resources.html>`_
and
`importlib.metadata <https://docs.python.org/3/library/importlib.metadata.html>`_
instead.
This module is deprecated. Users are directed to :mod:`importlib.resources`,
:mod:`importlib.metadata` and :pypi:`packaging` instead.
"""
import sys
@ -118,7 +115,12 @@ _namespace_handlers = None
_namespace_packages = None
warnings.warn("pkg_resources is deprecated as an API", DeprecationWarning)
warnings.warn(
"pkg_resources is deprecated as an API. "
"See https://setuptools.pypa.io/en/latest/pkg_resources.html",
DeprecationWarning,
stacklevel=2,
)
_PEP440_FALLBACK = re.compile(r"^v?(?P<safe>(?:[0-9]+!)?[0-9]+(?:\.[0-9]+)*)", re.I)
@ -1416,7 +1418,7 @@ def _forgiving_version(version):
match = _PEP440_FALLBACK.search(version)
if match:
safe = match["safe"]
rest = version[len(safe):]
rest = version[len(safe) :]
else:
safe = "0"
rest = version
@ -1659,10 +1661,9 @@ is not allowed.
# for compatibility, warn; in future
# raise ValueError(msg)
warnings.warn(
issue_warning(
msg[:-1] + " and will raise exceptions in a future release.",
DeprecationWarning,
stacklevel=4,
)
def _get(self, path):
@ -3046,6 +3047,9 @@ class Distribution:
except ValueError:
issue_warning("Unbuilt egg for " + repr(self))
return False
except SystemError:
# TODO: remove this except clause when python/cpython#103632 is fixed.
return False
return True
def clone(self, **kw):

View file

@ -4,6 +4,7 @@ import inspect
import collections
import types
import itertools
import warnings
import pkg_resources.extern.more_itertools
@ -266,11 +267,33 @@ def result_invoke(action):
return wrap
def call_aside(f, *args, **kwargs):
def invoke(f, *args, **kwargs):
"""
Call a function for its side effect after initialization.
>>> @call_aside
The benefit of using the decorator instead of simply invoking a function
after defining it is that it makes explicit the author's intent for the
function to be called immediately. Whereas if one simply calls the
function immediately, it's less obvious if that was intentional or
incidental. It also avoids repeating the name - the two actions, defining
the function and calling it immediately are modeled separately, but linked
by the decorator construct.
The benefit of having a function construct (opposed to just invoking some
behavior inline) is to serve as a scope in which the behavior occurs. It
avoids polluting the global namespace with local variables, provides an
anchor on which to attach documentation (docstring), keeps the behavior
logically separated (instead of conceptually separated or not separated at
all), and provides potential to re-use the behavior for testing or other
purposes.
This function is named as a pithy way to communicate, "call this function
primarily for its side effect", or "while defining this function, also
take it aside and call it". It exists because there's no Python construct
for "define and call" (nor should there be, as decorators serve this need
just fine). The behavior happens immediately and synchronously.
>>> @invoke
... def func(): print("called")
called
>>> func()
@ -278,7 +301,7 @@ def call_aside(f, *args, **kwargs):
Use functools.partial to pass parameters to the initial call
>>> @functools.partial(call_aside, name='bingo')
>>> @functools.partial(invoke, name='bingo')
... def func(name): print("called with", name)
called with bingo
"""
@ -286,6 +309,14 @@ def call_aside(f, *args, **kwargs):
return f
def call_aside(*args, **kwargs):
"""
Deprecated name for invoke.
"""
warnings.warn("call_aside is deprecated, use invoke", DeprecationWarning)
return invoke(*args, **kwargs)
class Throttler:
"""
Rate-limit a function (or other callable)

View file

@ -3,4 +3,4 @@
from .more import * # noqa
from .recipes import * # noqa
__version__ = '9.0.0'
__version__ = '9.1.0'

View file

@ -68,6 +68,7 @@ __all__ = [
'exactly_n',
'filter_except',
'first',
'gray_product',
'groupby_transform',
'ichunked',
'iequals',
@ -658,6 +659,7 @@ def distinct_permutations(iterable, r=None):
[(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
"""
# Algorithm: https://w.wiki/Qai
def _full(A):
while True:
@ -1301,7 +1303,7 @@ def split_at(iterable, pred, maxsplit=-1, keep_separator=False):
[[0], [2], [4, 5, 6, 7, 8, 9]]
By default, the delimiting items are not included in the output.
The include them, set *keep_separator* to ``True``.
To include them, set *keep_separator* to ``True``.
>>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True))
[['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
@ -1391,7 +1393,9 @@ def split_after(iterable, pred, maxsplit=-1):
if pred(item) and buf:
yield buf
if maxsplit == 1:
yield list(it)
buf = list(it)
if buf:
yield buf
return
buf = []
maxsplit -= 1
@ -2914,6 +2918,7 @@ def make_decorator(wrapping_func, result_index=0):
'7'
"""
# See https://sites.google.com/site/bbayles/index/decorator_factory for
# notes on how this works.
def decorator(*wrapping_args, **wrapping_kwargs):
@ -3464,7 +3469,6 @@ def _sample_unweighted(iterable, k):
next_index = k + floor(log(random()) / log(1 - W))
for index, element in enumerate(iterable, k):
if index == next_index:
reservoir[randrange(k)] = element
# The new W is the largest in a sample of k U(0, `old_W`) numbers
@ -4283,7 +4287,6 @@ def minmax(iterable_or_value, *others, key=None, default=_marker):
lo_key = hi_key = key(lo)
for x, y in zip_longest(it, it, fillvalue=lo):
x_key, y_key = key(x), key(y)
if y_key < x_key:
@ -4344,3 +4347,45 @@ def constrained_batches(
if batch:
yield tuple(batch)
def gray_product(*iterables):
"""Like :func:`itertools.product`, but return tuples in an order such
that only one element in the generated tuple changes from one iteration
to the next.
>>> list(gray_product('AB','CD'))
[('A', 'C'), ('B', 'C'), ('B', 'D'), ('A', 'D')]
This function consumes all of the input iterables before producing output.
If any of the input iterables have fewer than two items, ``ValueError``
is raised.
For information on the algorithm, see
`this section <https://www-cs-faculty.stanford.edu/~knuth/fasc2a.ps.gz>`__
of Donald Knuth's *The Art of Computer Programming*.
"""
all_iterables = tuple(tuple(x) for x in iterables)
iterable_count = len(all_iterables)
for iterable in all_iterables:
if len(iterable) < 2:
raise ValueError("each iterable must have two or more items")
# This is based on "Algorithm H" from section 7.2.1.1, page 20.
# a holds the indexes of the source iterables for the n-tuple to be yielded
# f is the array of "focus pointers"
# o is the array of "directions"
a = [0] * iterable_count
f = list(range(iterable_count + 1))
o = [1] * iterable_count
while True:
yield tuple(all_iterables[i][a[i]] for i in range(iterable_count))
j = f[0]
f[0] = 0
if j == iterable_count:
break
a[j] = a[j] + o[j]
if a[j] == 0 or a[j] == len(all_iterables[j]) - 1:
o[j] = -o[j]
f[j] = f[j + 1]
f[j + 1] = j + 1

View file

@ -1,26 +1,25 @@
"""Stubs for more_itertools.more"""
from __future__ import annotations
from types import TracebackType
from typing import (
Any,
Callable,
Container,
Dict,
ContextManager,
Generic,
Hashable,
Iterable,
Iterator,
List,
Optional,
overload,
Reversible,
Sequence,
Sized,
Tuple,
Union,
Type,
TypeVar,
type_check_only,
)
from types import TracebackType
from typing_extensions import ContextManager, Protocol, Type, overload
from typing_extensions import Protocol
# Type and type variable definitions
_T = TypeVar('_T')
@ -31,7 +30,7 @@ _V = TypeVar('_V')
_W = TypeVar('_W')
_T_co = TypeVar('_T_co', covariant=True)
_GenFn = TypeVar('_GenFn', bound=Callable[..., Iterator[object]])
_Raisable = Union[BaseException, 'Type[BaseException]']
_Raisable = BaseException | Type[BaseException]
@type_check_only
class _SizedIterable(Protocol[_T_co], Sized, Iterable[_T_co]): ...
@ -39,23 +38,25 @@ class _SizedIterable(Protocol[_T_co], Sized, Iterable[_T_co]): ...
@type_check_only
class _SizedReversible(Protocol[_T_co], Sized, Reversible[_T_co]): ...
@type_check_only
class _SupportsSlicing(Protocol[_T_co]):
def __getitem__(self, __k: slice) -> _T_co: ...
def chunked(
iterable: Iterable[_T], n: Optional[int], strict: bool = ...
) -> Iterator[List[_T]]: ...
iterable: Iterable[_T], n: int | None, strict: bool = ...
) -> Iterator[list[_T]]: ...
@overload
def first(iterable: Iterable[_T]) -> _T: ...
@overload
def first(iterable: Iterable[_T], default: _U) -> Union[_T, _U]: ...
def first(iterable: Iterable[_T], default: _U) -> _T | _U: ...
@overload
def last(iterable: Iterable[_T]) -> _T: ...
@overload
def last(iterable: Iterable[_T], default: _U) -> Union[_T, _U]: ...
def last(iterable: Iterable[_T], default: _U) -> _T | _U: ...
@overload
def nth_or_last(iterable: Iterable[_T], n: int) -> _T: ...
@overload
def nth_or_last(
iterable: Iterable[_T], n: int, default: _U
) -> Union[_T, _U]: ...
def nth_or_last(iterable: Iterable[_T], n: int, default: _U) -> _T | _U: ...
class peekable(Generic[_T], Iterator[_T]):
def __init__(self, iterable: Iterable[_T]) -> None: ...
@ -64,13 +65,13 @@ class peekable(Generic[_T], Iterator[_T]):
@overload
def peek(self) -> _T: ...
@overload
def peek(self, default: _U) -> Union[_T, _U]: ...
def peek(self, default: _U) -> _T | _U: ...
def prepend(self, *items: _T) -> None: ...
def __next__(self) -> _T: ...
@overload
def __getitem__(self, index: int) -> _T: ...
@overload
def __getitem__(self, index: slice) -> List[_T]: ...
def __getitem__(self, index: slice) -> list[_T]: ...
def consumer(func: _GenFn) -> _GenFn: ...
def ilen(iterable: Iterable[object]) -> int: ...
@ -80,42 +81,42 @@ def with_iter(
) -> Iterator[_T]: ...
def one(
iterable: Iterable[_T],
too_short: Optional[_Raisable] = ...,
too_long: Optional[_Raisable] = ...,
too_short: _Raisable | None = ...,
too_long: _Raisable | None = ...,
) -> _T: ...
def raise_(exception: _Raisable, *args: Any) -> None: ...
def strictly_n(
iterable: Iterable[_T],
n: int,
too_short: Optional[_GenFn] = ...,
too_long: Optional[_GenFn] = ...,
) -> List[_T]: ...
too_short: _GenFn | None = ...,
too_long: _GenFn | None = ...,
) -> list[_T]: ...
def distinct_permutations(
iterable: Iterable[_T], r: Optional[int] = ...
) -> Iterator[Tuple[_T, ...]]: ...
iterable: Iterable[_T], r: int | None = ...
) -> Iterator[tuple[_T, ...]]: ...
def intersperse(
e: _U, iterable: Iterable[_T], n: int = ...
) -> Iterator[Union[_T, _U]]: ...
def unique_to_each(*iterables: Iterable[_T]) -> List[List[_T]]: ...
) -> Iterator[_T | _U]: ...
def unique_to_each(*iterables: Iterable[_T]) -> list[list[_T]]: ...
@overload
def windowed(
seq: Iterable[_T], n: int, *, step: int = ...
) -> Iterator[Tuple[Optional[_T], ...]]: ...
) -> Iterator[tuple[_T | None, ...]]: ...
@overload
def windowed(
seq: Iterable[_T], n: int, fillvalue: _U, step: int = ...
) -> Iterator[Tuple[Union[_T, _U], ...]]: ...
def substrings(iterable: Iterable[_T]) -> Iterator[Tuple[_T, ...]]: ...
) -> Iterator[tuple[_T | _U, ...]]: ...
def substrings(iterable: Iterable[_T]) -> Iterator[tuple[_T, ...]]: ...
def substrings_indexes(
seq: Sequence[_T], reverse: bool = ...
) -> Iterator[Tuple[Sequence[_T], int, int]]: ...
) -> Iterator[tuple[Sequence[_T], int, int]]: ...
class bucket(Generic[_T, _U], Container[_U]):
def __init__(
self,
iterable: Iterable[_T],
key: Callable[[_T], _U],
validator: Optional[Callable[[object], object]] = ...,
validator: Callable[[object], object] | None = ...,
) -> None: ...
def __contains__(self, value: object) -> bool: ...
def __iter__(self) -> Iterator[_U]: ...
@ -123,109 +124,105 @@ class bucket(Generic[_T, _U], Container[_U]):
def spy(
iterable: Iterable[_T], n: int = ...
) -> Tuple[List[_T], Iterator[_T]]: ...
) -> tuple[list[_T], Iterator[_T]]: ...
def interleave(*iterables: Iterable[_T]) -> Iterator[_T]: ...
def interleave_longest(*iterables: Iterable[_T]) -> Iterator[_T]: ...
def interleave_evenly(
iterables: List[Iterable[_T]], lengths: Optional[List[int]] = ...
iterables: list[Iterable[_T]], lengths: list[int] | None = ...
) -> Iterator[_T]: ...
def collapse(
iterable: Iterable[Any],
base_type: Optional[type] = ...,
levels: Optional[int] = ...,
base_type: type | None = ...,
levels: int | None = ...,
) -> Iterator[Any]: ...
@overload
def side_effect(
func: Callable[[_T], object],
iterable: Iterable[_T],
chunk_size: None = ...,
before: Optional[Callable[[], object]] = ...,
after: Optional[Callable[[], object]] = ...,
before: Callable[[], object] | None = ...,
after: Callable[[], object] | None = ...,
) -> Iterator[_T]: ...
@overload
def side_effect(
func: Callable[[List[_T]], object],
func: Callable[[list[_T]], object],
iterable: Iterable[_T],
chunk_size: int,
before: Optional[Callable[[], object]] = ...,
after: Optional[Callable[[], object]] = ...,
before: Callable[[], object] | None = ...,
after: Callable[[], object] | None = ...,
) -> Iterator[_T]: ...
def sliced(
seq: Sequence[_T], n: int, strict: bool = ...
) -> Iterator[Sequence[_T]]: ...
seq: _SupportsSlicing[_T], n: int, strict: bool = ...
) -> Iterator[_T]: ...
def split_at(
iterable: Iterable[_T],
pred: Callable[[_T], object],
maxsplit: int = ...,
keep_separator: bool = ...,
) -> Iterator[List[_T]]: ...
) -> Iterator[list[_T]]: ...
def split_before(
iterable: Iterable[_T], pred: Callable[[_T], object], maxsplit: int = ...
) -> Iterator[List[_T]]: ...
) -> Iterator[list[_T]]: ...
def split_after(
iterable: Iterable[_T], pred: Callable[[_T], object], maxsplit: int = ...
) -> Iterator[List[_T]]: ...
) -> Iterator[list[_T]]: ...
def split_when(
iterable: Iterable[_T],
pred: Callable[[_T, _T], object],
maxsplit: int = ...,
) -> Iterator[List[_T]]: ...
) -> Iterator[list[_T]]: ...
def split_into(
iterable: Iterable[_T], sizes: Iterable[Optional[int]]
) -> Iterator[List[_T]]: ...
iterable: Iterable[_T], sizes: Iterable[int | None]
) -> Iterator[list[_T]]: ...
@overload
def padded(
iterable: Iterable[_T],
*,
n: Optional[int] = ...,
n: int | None = ...,
next_multiple: bool = ...,
) -> Iterator[Optional[_T]]: ...
) -> Iterator[_T | None]: ...
@overload
def padded(
iterable: Iterable[_T],
fillvalue: _U,
n: Optional[int] = ...,
n: int | None = ...,
next_multiple: bool = ...,
) -> Iterator[Union[_T, _U]]: ...
) -> Iterator[_T | _U]: ...
@overload
def repeat_last(iterable: Iterable[_T]) -> Iterator[_T]: ...
@overload
def repeat_last(
iterable: Iterable[_T], default: _U
) -> Iterator[Union[_T, _U]]: ...
def distribute(n: int, iterable: Iterable[_T]) -> List[Iterator[_T]]: ...
def repeat_last(iterable: Iterable[_T], default: _U) -> Iterator[_T | _U]: ...
def distribute(n: int, iterable: Iterable[_T]) -> list[Iterator[_T]]: ...
@overload
def stagger(
iterable: Iterable[_T],
offsets: _SizedIterable[int] = ...,
longest: bool = ...,
) -> Iterator[Tuple[Optional[_T], ...]]: ...
) -> Iterator[tuple[_T | None, ...]]: ...
@overload
def stagger(
iterable: Iterable[_T],
offsets: _SizedIterable[int] = ...,
longest: bool = ...,
fillvalue: _U = ...,
) -> Iterator[Tuple[Union[_T, _U], ...]]: ...
) -> Iterator[tuple[_T | _U, ...]]: ...
class UnequalIterablesError(ValueError):
def __init__(
self, details: Optional[Tuple[int, int, int]] = ...
) -> None: ...
def __init__(self, details: tuple[int, int, int] | None = ...) -> None: ...
@overload
def zip_equal(__iter1: Iterable[_T1]) -> Iterator[Tuple[_T1]]: ...
def zip_equal(__iter1: Iterable[_T1]) -> Iterator[tuple[_T1]]: ...
@overload
def zip_equal(
__iter1: Iterable[_T1], __iter2: Iterable[_T2]
) -> Iterator[Tuple[_T1, _T2]]: ...
) -> Iterator[tuple[_T1, _T2]]: ...
@overload
def zip_equal(
__iter1: Iterable[_T],
__iter2: Iterable[_T],
__iter3: Iterable[_T],
*iterables: Iterable[_T],
) -> Iterator[Tuple[_T, ...]]: ...
) -> Iterator[tuple[_T, ...]]: ...
@overload
def zip_offset(
__iter1: Iterable[_T1],
@ -233,7 +230,7 @@ def zip_offset(
offsets: _SizedIterable[int],
longest: bool = ...,
fillvalue: None = None,
) -> Iterator[Tuple[Optional[_T1]]]: ...
) -> Iterator[tuple[_T1 | None]]: ...
@overload
def zip_offset(
__iter1: Iterable[_T1],
@ -242,7 +239,7 @@ def zip_offset(
offsets: _SizedIterable[int],
longest: bool = ...,
fillvalue: None = None,
) -> Iterator[Tuple[Optional[_T1], Optional[_T2]]]: ...
) -> Iterator[tuple[_T1 | None, _T2 | None]]: ...
@overload
def zip_offset(
__iter1: Iterable[_T],
@ -252,7 +249,7 @@ def zip_offset(
offsets: _SizedIterable[int],
longest: bool = ...,
fillvalue: None = None,
) -> Iterator[Tuple[Optional[_T], ...]]: ...
) -> Iterator[tuple[_T | None, ...]]: ...
@overload
def zip_offset(
__iter1: Iterable[_T1],
@ -260,7 +257,7 @@ def zip_offset(
offsets: _SizedIterable[int],
longest: bool = ...,
fillvalue: _U,
) -> Iterator[Tuple[Union[_T1, _U]]]: ...
) -> Iterator[tuple[_T1 | _U]]: ...
@overload
def zip_offset(
__iter1: Iterable[_T1],
@ -269,7 +266,7 @@ def zip_offset(
offsets: _SizedIterable[int],
longest: bool = ...,
fillvalue: _U,
) -> Iterator[Tuple[Union[_T1, _U], Union[_T2, _U]]]: ...
) -> Iterator[tuple[_T1 | _U, _T2 | _U]]: ...
@overload
def zip_offset(
__iter1: Iterable[_T],
@ -279,82 +276,80 @@ def zip_offset(
offsets: _SizedIterable[int],
longest: bool = ...,
fillvalue: _U,
) -> Iterator[Tuple[Union[_T, _U], ...]]: ...
) -> Iterator[tuple[_T | _U, ...]]: ...
def sort_together(
iterables: Iterable[Iterable[_T]],
key_list: Iterable[int] = ...,
key: Optional[Callable[..., Any]] = ...,
key: Callable[..., Any] | None = ...,
reverse: bool = ...,
) -> List[Tuple[_T, ...]]: ...
def unzip(iterable: Iterable[Sequence[_T]]) -> Tuple[Iterator[_T], ...]: ...
def divide(n: int, iterable: Iterable[_T]) -> List[Iterator[_T]]: ...
) -> list[tuple[_T, ...]]: ...
def unzip(iterable: Iterable[Sequence[_T]]) -> tuple[Iterator[_T], ...]: ...
def divide(n: int, iterable: Iterable[_T]) -> list[Iterator[_T]]: ...
def always_iterable(
obj: object,
base_type: Union[
type, Tuple[Union[type, Tuple[Any, ...]], ...], None
] = ...,
base_type: type | tuple[type | tuple[Any, ...], ...] | None = ...,
) -> Iterator[Any]: ...
def adjacent(
predicate: Callable[[_T], bool],
iterable: Iterable[_T],
distance: int = ...,
) -> Iterator[Tuple[bool, _T]]: ...
) -> Iterator[tuple[bool, _T]]: ...
@overload
def groupby_transform(
iterable: Iterable[_T],
keyfunc: None = None,
valuefunc: None = None,
reducefunc: None = None,
) -> Iterator[Tuple[_T, Iterator[_T]]]: ...
) -> Iterator[tuple[_T, Iterator[_T]]]: ...
@overload
def groupby_transform(
iterable: Iterable[_T],
keyfunc: Callable[[_T], _U],
valuefunc: None,
reducefunc: None,
) -> Iterator[Tuple[_U, Iterator[_T]]]: ...
) -> Iterator[tuple[_U, Iterator[_T]]]: ...
@overload
def groupby_transform(
iterable: Iterable[_T],
keyfunc: None,
valuefunc: Callable[[_T], _V],
reducefunc: None,
) -> Iterable[Tuple[_T, Iterable[_V]]]: ...
) -> Iterable[tuple[_T, Iterable[_V]]]: ...
@overload
def groupby_transform(
iterable: Iterable[_T],
keyfunc: Callable[[_T], _U],
valuefunc: Callable[[_T], _V],
reducefunc: None,
) -> Iterable[Tuple[_U, Iterator[_V]]]: ...
) -> Iterable[tuple[_U, Iterator[_V]]]: ...
@overload
def groupby_transform(
iterable: Iterable[_T],
keyfunc: None,
valuefunc: None,
reducefunc: Callable[[Iterator[_T]], _W],
) -> Iterable[Tuple[_T, _W]]: ...
) -> Iterable[tuple[_T, _W]]: ...
@overload
def groupby_transform(
iterable: Iterable[_T],
keyfunc: Callable[[_T], _U],
valuefunc: None,
reducefunc: Callable[[Iterator[_T]], _W],
) -> Iterable[Tuple[_U, _W]]: ...
) -> Iterable[tuple[_U, _W]]: ...
@overload
def groupby_transform(
iterable: Iterable[_T],
keyfunc: None,
valuefunc: Callable[[_T], _V],
reducefunc: Callable[[Iterable[_V]], _W],
) -> Iterable[Tuple[_T, _W]]: ...
) -> Iterable[tuple[_T, _W]]: ...
@overload
def groupby_transform(
iterable: Iterable[_T],
keyfunc: Callable[[_T], _U],
valuefunc: Callable[[_T], _V],
reducefunc: Callable[[Iterable[_V]], _W],
) -> Iterable[Tuple[_U, _W]]: ...
) -> Iterable[tuple[_U, _W]]: ...
class numeric_range(Generic[_T, _U], Sequence[_T], Hashable, Reversible[_T]):
@overload
@ -375,22 +370,22 @@ class numeric_range(Generic[_T, _U], Sequence[_T], Hashable, Reversible[_T]):
def __len__(self) -> int: ...
def __reduce__(
self,
) -> Tuple[Type[numeric_range[_T, _U]], Tuple[_T, _T, _U]]: ...
) -> tuple[Type[numeric_range[_T, _U]], tuple[_T, _T, _U]]: ...
def __repr__(self) -> str: ...
def __reversed__(self) -> Iterator[_T]: ...
def count(self, value: _T) -> int: ...
def index(self, value: _T) -> int: ... # type: ignore
def count_cycle(
iterable: Iterable[_T], n: Optional[int] = ...
) -> Iterable[Tuple[int, _T]]: ...
iterable: Iterable[_T], n: int | None = ...
) -> Iterable[tuple[int, _T]]: ...
def mark_ends(
iterable: Iterable[_T],
) -> Iterable[Tuple[bool, bool, _T]]: ...
) -> Iterable[tuple[bool, bool, _T]]: ...
def locate(
iterable: Iterable[object],
pred: Callable[..., Any] = ...,
window_size: Optional[int] = ...,
window_size: int | None = ...,
) -> Iterator[int]: ...
def lstrip(
iterable: Iterable[_T], pred: Callable[[_T], object]
@ -403,9 +398,7 @@ def strip(
) -> Iterator[_T]: ...
class islice_extended(Generic[_T], Iterator[_T]):
def __init__(
self, iterable: Iterable[_T], *args: Optional[int]
) -> None: ...
def __init__(self, iterable: Iterable[_T], *args: int | None) -> None: ...
def __iter__(self) -> islice_extended[_T]: ...
def __next__(self) -> _T: ...
def __getitem__(self, index: slice) -> islice_extended[_T]: ...
@ -420,7 +413,7 @@ def difference(
func: Callable[[_T, _T], _U] = ...,
*,
initial: None = ...,
) -> Iterator[Union[_T, _U]]: ...
) -> Iterator[_T | _U]: ...
@overload
def difference(
iterable: Iterable[_T], func: Callable[[_T, _T], _U] = ..., *, initial: _U
@ -436,7 +429,7 @@ class SequenceView(Generic[_T], Sequence[_T]):
class seekable(Generic[_T], Iterator[_T]):
def __init__(
self, iterable: Iterable[_T], maxlen: Optional[int] = ...
self, iterable: Iterable[_T], maxlen: int | None = ...
) -> None: ...
def __iter__(self) -> seekable[_T]: ...
def __next__(self) -> _T: ...
@ -444,20 +437,20 @@ class seekable(Generic[_T], Iterator[_T]):
@overload
def peek(self) -> _T: ...
@overload
def peek(self, default: _U) -> Union[_T, _U]: ...
def peek(self, default: _U) -> _T | _U: ...
def elements(self) -> SequenceView[_T]: ...
def seek(self, index: int) -> None: ...
class run_length:
@staticmethod
def encode(iterable: Iterable[_T]) -> Iterator[Tuple[_T, int]]: ...
def encode(iterable: Iterable[_T]) -> Iterator[tuple[_T, int]]: ...
@staticmethod
def decode(iterable: Iterable[Tuple[_T, int]]) -> Iterator[_T]: ...
def decode(iterable: Iterable[tuple[_T, int]]) -> Iterator[_T]: ...
def exactly_n(
iterable: Iterable[_T], n: int, predicate: Callable[[_T], object] = ...
) -> bool: ...
def circular_shifts(iterable: Iterable[_T]) -> List[Tuple[_T, ...]]: ...
def circular_shifts(iterable: Iterable[_T]) -> list[tuple[_T, ...]]: ...
def make_decorator(
wrapping_func: Callable[..., _U], result_index: int = ...
) -> Callable[..., Callable[[Callable[..., Any]], Callable[..., _U]]]: ...
@ -467,44 +460,44 @@ def map_reduce(
keyfunc: Callable[[_T], _U],
valuefunc: None = ...,
reducefunc: None = ...,
) -> Dict[_U, List[_T]]: ...
) -> dict[_U, list[_T]]: ...
@overload
def map_reduce(
iterable: Iterable[_T],
keyfunc: Callable[[_T], _U],
valuefunc: Callable[[_T], _V],
reducefunc: None = ...,
) -> Dict[_U, List[_V]]: ...
) -> dict[_U, list[_V]]: ...
@overload
def map_reduce(
iterable: Iterable[_T],
keyfunc: Callable[[_T], _U],
valuefunc: None = ...,
reducefunc: Callable[[List[_T]], _W] = ...,
) -> Dict[_U, _W]: ...
reducefunc: Callable[[list[_T]], _W] = ...,
) -> dict[_U, _W]: ...
@overload
def map_reduce(
iterable: Iterable[_T],
keyfunc: Callable[[_T], _U],
valuefunc: Callable[[_T], _V],
reducefunc: Callable[[List[_V]], _W],
) -> Dict[_U, _W]: ...
reducefunc: Callable[[list[_V]], _W],
) -> dict[_U, _W]: ...
def rlocate(
iterable: Iterable[_T],
pred: Callable[..., object] = ...,
window_size: Optional[int] = ...,
window_size: int | None = ...,
) -> Iterator[int]: ...
def replace(
iterable: Iterable[_T],
pred: Callable[..., object],
substitutes: Iterable[_U],
count: Optional[int] = ...,
count: int | None = ...,
window_size: int = ...,
) -> Iterator[Union[_T, _U]]: ...
def partitions(iterable: Iterable[_T]) -> Iterator[List[List[_T]]]: ...
) -> Iterator[_T | _U]: ...
def partitions(iterable: Iterable[_T]) -> Iterator[list[list[_T]]]: ...
def set_partitions(
iterable: Iterable[_T], k: Optional[int] = ...
) -> Iterator[List[List[_T]]]: ...
iterable: Iterable[_T], k: int | None = ...
) -> Iterator[list[list[_T]]]: ...
class time_limited(Generic[_T], Iterator[_T]):
def __init__(
@ -515,16 +508,16 @@ class time_limited(Generic[_T], Iterator[_T]):
@overload
def only(
iterable: Iterable[_T], *, too_long: Optional[_Raisable] = ...
) -> Optional[_T]: ...
iterable: Iterable[_T], *, too_long: _Raisable | None = ...
) -> _T | None: ...
@overload
def only(
iterable: Iterable[_T], default: _U, too_long: Optional[_Raisable] = ...
) -> Union[_T, _U]: ...
iterable: Iterable[_T], default: _U, too_long: _Raisable | None = ...
) -> _T | _U: ...
def ichunked(iterable: Iterable[_T], n: int) -> Iterator[Iterator[_T]]: ...
def distinct_combinations(
iterable: Iterable[_T], r: int
) -> Iterator[Tuple[_T, ...]]: ...
) -> Iterator[tuple[_T, ...]]: ...
def filter_except(
validator: Callable[[Any], object],
iterable: Iterable[_T],
@ -539,16 +532,16 @@ def map_if(
iterable: Iterable[Any],
pred: Callable[[Any], bool],
func: Callable[[Any], Any],
func_else: Optional[Callable[[Any], Any]] = ...,
func_else: Callable[[Any], Any] | None = ...,
) -> Iterator[Any]: ...
def sample(
iterable: Iterable[_T],
k: int,
weights: Optional[Iterable[float]] = ...,
) -> List[_T]: ...
weights: Iterable[float] | None = ...,
) -> list[_T]: ...
def is_sorted(
iterable: Iterable[_T],
key: Optional[Callable[[_T], _U]] = ...,
key: Callable[[_T], _U] | None = ...,
reverse: bool = False,
strict: bool = False,
) -> bool: ...
@ -566,10 +559,10 @@ class callback_iter(Generic[_T], Iterator[_T]):
def __enter__(self) -> callback_iter[_T]: ...
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> Optional[bool]: ...
exc_type: Type[BaseException] | None,
exc_value: BaseException | None,
traceback: TracebackType | None,
) -> bool | None: ...
def __iter__(self) -> callback_iter[_T]: ...
def __next__(self) -> _T: ...
def _reader(self) -> Iterator[_T]: ...
@ -580,15 +573,15 @@ class callback_iter(Generic[_T], Iterator[_T]):
def windowed_complete(
iterable: Iterable[_T], n: int
) -> Iterator[Tuple[_T, ...]]: ...
) -> Iterator[tuple[_T, ...]]: ...
def all_unique(
iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ...
iterable: Iterable[_T], key: Callable[[_T], _U] | None = ...
) -> bool: ...
def nth_product(index: int, *args: Iterable[_T]) -> Tuple[_T, ...]: ...
def nth_product(index: int, *args: Iterable[_T]) -> tuple[_T, ...]: ...
def nth_permutation(
iterable: Iterable[_T], r: int, index: int
) -> Tuple[_T, ...]: ...
def value_chain(*args: Union[_T, Iterable[_T]]) -> Iterable[_T]: ...
) -> tuple[_T, ...]: ...
def value_chain(*args: _T | Iterable[_T]) -> Iterable[_T]: ...
def product_index(element: Iterable[_T], *args: Iterable[_T]) -> int: ...
def combination_index(
element: Iterable[_T], iterable: Iterable[_T]
@ -603,22 +596,20 @@ class countable(Generic[_T], Iterator[_T]):
def __iter__(self) -> countable[_T]: ...
def __next__(self) -> _T: ...
def chunked_even(iterable: Iterable[_T], n: int) -> Iterator[List[_T]]: ...
def chunked_even(iterable: Iterable[_T], n: int) -> Iterator[list[_T]]: ...
def zip_broadcast(
*objects: Union[_T, Iterable[_T]],
scalar_types: Union[
type, Tuple[Union[type, Tuple[Any, ...]], ...], None
] = ...,
*objects: _T | Iterable[_T],
scalar_types: type | tuple[type | tuple[Any, ...], ...] | None = ...,
strict: bool = ...,
) -> Iterable[Tuple[_T, ...]]: ...
) -> Iterable[tuple[_T, ...]]: ...
def unique_in_window(
iterable: Iterable[_T], n: int, key: Optional[Callable[[_T], _U]] = ...
iterable: Iterable[_T], n: int, key: Callable[[_T], _U] | None = ...
) -> Iterator[_T]: ...
def duplicates_everseen(
iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ...
iterable: Iterable[_T], key: Callable[[_T], _U] | None = ...
) -> Iterator[_T]: ...
def duplicates_justseen(
iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ...
iterable: Iterable[_T], key: Callable[[_T], _U] | None = ...
) -> Iterator[_T]: ...
class _SupportsLessThan(Protocol):
@ -629,38 +620,38 @@ _SupportsLessThanT = TypeVar("_SupportsLessThanT", bound=_SupportsLessThan)
@overload
def minmax(
iterable_or_value: Iterable[_SupportsLessThanT], *, key: None = None
) -> Tuple[_SupportsLessThanT, _SupportsLessThanT]: ...
) -> tuple[_SupportsLessThanT, _SupportsLessThanT]: ...
@overload
def minmax(
iterable_or_value: Iterable[_T], *, key: Callable[[_T], _SupportsLessThan]
) -> Tuple[_T, _T]: ...
) -> tuple[_T, _T]: ...
@overload
def minmax(
iterable_or_value: Iterable[_SupportsLessThanT],
*,
key: None = None,
default: _U,
) -> Union[_U, Tuple[_SupportsLessThanT, _SupportsLessThanT]]: ...
) -> _U | tuple[_SupportsLessThanT, _SupportsLessThanT]: ...
@overload
def minmax(
iterable_or_value: Iterable[_T],
*,
key: Callable[[_T], _SupportsLessThan],
default: _U,
) -> Union[_U, Tuple[_T, _T]]: ...
) -> _U | tuple[_T, _T]: ...
@overload
def minmax(
iterable_or_value: _SupportsLessThanT,
__other: _SupportsLessThanT,
*others: _SupportsLessThanT,
) -> Tuple[_SupportsLessThanT, _SupportsLessThanT]: ...
) -> tuple[_SupportsLessThanT, _SupportsLessThanT]: ...
@overload
def minmax(
iterable_or_value: _T,
__other: _T,
*others: _T,
key: Callable[[_T], _SupportsLessThan],
) -> Tuple[_T, _T]: ...
) -> tuple[_T, _T]: ...
def longest_common_prefix(
iterables: Iterable[Iterable[_T]],
) -> Iterator[_T]: ...
@ -668,7 +659,8 @@ def iequals(*iterables: Iterable[object]) -> bool: ...
def constrained_batches(
iterable: Iterable[object],
max_size: int,
max_count: Optional[int] = ...,
max_count: int | None = ...,
get_len: Callable[[_T], object] = ...,
strict: bool = ...,
) -> Iterator[Tuple[_T]]: ...
) -> Iterator[tuple[_T]]: ...
def gray_product(*iterables: Iterable[_T]) -> Iterator[tuple[_T, ...]]: ...

View file

@ -9,6 +9,7 @@ Some backward-compatible usability improvements have been made.
"""
import math
import operator
import warnings
from collections import deque
from collections.abc import Sized
@ -21,12 +22,14 @@ from itertools import (
cycle,
groupby,
islice,
product,
repeat,
starmap,
tee,
zip_longest,
)
from random import randrange, sample, choice
from sys import hexversion
__all__ = [
'all_equal',
@ -36,9 +39,12 @@ __all__ = [
'convolve',
'dotproduct',
'first_true',
'factor',
'flatten',
'grouper',
'iter_except',
'iter_index',
'matmul',
'ncycles',
'nth',
'nth_combination',
@ -62,6 +68,7 @@ __all__ = [
'tabulate',
'tail',
'take',
'transpose',
'triplewise',
'unique_everseen',
'unique_justseen',
@ -808,6 +815,35 @@ def polynomial_from_roots(roots):
]
def iter_index(iterable, value, start=0):
"""Yield the index of each place in *iterable* that *value* occurs,
beginning with index *start*.
See :func:`locate` for a more general means of finding the indexes
associated with particular values.
>>> list(iter_index('AABCADEAF', 'A'))
[0, 1, 4, 7]
"""
try:
seq_index = iterable.index
except AttributeError:
# Slow path for general iterables
it = islice(iterable, start, None)
for i, element in enumerate(it, start):
if element is value or element == value:
yield i
else:
# Fast path for sequences
i = start - 1
try:
while True:
i = seq_index(value, i + 1)
yield i
except ValueError:
pass
def sieve(n):
"""Yield the primes less than n.
@ -815,13 +851,13 @@ def sieve(n):
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29]
"""
isqrt = getattr(math, 'isqrt', lambda x: int(math.sqrt(x)))
data = bytearray((0, 1)) * (n // 2)
data[:3] = 0, 0, 0
limit = isqrt(n) + 1
data = bytearray([1]) * n
data[:2] = 0, 0
for p in compress(range(limit), data):
data[p + p : n : p] = bytearray(len(range(p + p, n, p)))
return compress(count(), data)
data[p * p : n : p + p] = bytes(len(range(p * p, n, p + p)))
data[2] = 1
return iter_index(data, 1) if n > 2 else iter([])
def batched(iterable, n):
@ -833,9 +869,62 @@ def batched(iterable, n):
This recipe is from the ``itertools`` docs. This library also provides
:func:`chunked`, which has a different implementation.
"""
if hexversion >= 0x30C00A0: # Python 3.12.0a0
warnings.warn(
(
'batched will be removed in a future version of '
'more-itertools. Use the standard library '
'itertools.batched function instead'
),
DeprecationWarning,
)
it = iter(iterable)
while True:
batch = list(islice(it, n))
if not batch:
break
yield batch
def transpose(it):
"""Swap the rows and columns of the input.
>>> list(transpose([(1, 2, 3), (11, 22, 33)]))
[(1, 11), (2, 22), (3, 33)]
The caller should ensure that the dimensions of the input are compatible.
"""
# TODO: when 3.9 goes end-of-life, add stric=True to this.
return zip(*it)
def matmul(m1, m2):
"""Multiply two matrices.
>>> list(matmul([(7, 5), (3, 5)], [(2, 5), (7, 9)]))
[[49, 80], [41, 60]]
The caller should ensure that the dimensions of the input matrices are
compatible with each other.
"""
n = len(m2[0])
return batched(starmap(dotproduct, product(m1, transpose(m2))), n)
def factor(n):
"""Yield the prime factors of n.
>>> list(factor(360))
[2, 2, 2, 3, 3, 5]
"""
isqrt = getattr(math, 'isqrt', lambda x: int(math.sqrt(x)))
for prime in sieve(isqrt(n) + 1):
while True:
quotient, remainder = divmod(n, prime)
if remainder:
break
yield prime
n = quotient
if n == 1:
return
if n >= 2:
yield n

View file

@ -1,110 +1,119 @@
"""Stubs for more_itertools.recipes"""
from __future__ import annotations
from typing import (
Any,
Callable,
Iterable,
Iterator,
List,
Optional,
overload,
Sequence,
Tuple,
Type,
TypeVar,
Union,
)
from typing_extensions import overload, Type
# Type and type variable definitions
_T = TypeVar('_T')
_U = TypeVar('_U')
def take(n: int, iterable: Iterable[_T]) -> List[_T]: ...
def take(n: int, iterable: Iterable[_T]) -> list[_T]: ...
def tabulate(
function: Callable[[int], _T], start: int = ...
) -> Iterator[_T]: ...
def tail(n: int, iterable: Iterable[_T]) -> Iterator[_T]: ...
def consume(iterator: Iterable[object], n: Optional[int] = ...) -> None: ...
def consume(iterator: Iterable[object], n: int | None = ...) -> None: ...
@overload
def nth(iterable: Iterable[_T], n: int) -> Optional[_T]: ...
def nth(iterable: Iterable[_T], n: int) -> _T | None: ...
@overload
def nth(iterable: Iterable[_T], n: int, default: _U) -> Union[_T, _U]: ...
def nth(iterable: Iterable[_T], n: int, default: _U) -> _T | _U: ...
def all_equal(iterable: Iterable[object]) -> bool: ...
def quantify(
iterable: Iterable[_T], pred: Callable[[_T], bool] = ...
) -> int: ...
def pad_none(iterable: Iterable[_T]) -> Iterator[Optional[_T]]: ...
def padnone(iterable: Iterable[_T]) -> Iterator[Optional[_T]]: ...
def pad_none(iterable: Iterable[_T]) -> Iterator[_T | None]: ...
def padnone(iterable: Iterable[_T]) -> Iterator[_T | None]: ...
def ncycles(iterable: Iterable[_T], n: int) -> Iterator[_T]: ...
def dotproduct(vec1: Iterable[object], vec2: Iterable[object]) -> object: ...
def flatten(listOfLists: Iterable[Iterable[_T]]) -> Iterator[_T]: ...
def repeatfunc(
func: Callable[..., _U], times: Optional[int] = ..., *args: Any
func: Callable[..., _U], times: int | None = ..., *args: Any
) -> Iterator[_U]: ...
def pairwise(iterable: Iterable[_T]) -> Iterator[Tuple[_T, _T]]: ...
def pairwise(iterable: Iterable[_T]) -> Iterator[tuple[_T, _T]]: ...
def grouper(
iterable: Iterable[_T],
n: int,
incomplete: str = ...,
fillvalue: _U = ...,
) -> Iterator[Tuple[Union[_T, _U], ...]]: ...
) -> Iterator[tuple[_T | _U, ...]]: ...
def roundrobin(*iterables: Iterable[_T]) -> Iterator[_T]: ...
def partition(
pred: Optional[Callable[[_T], object]], iterable: Iterable[_T]
) -> Tuple[Iterator[_T], Iterator[_T]]: ...
def powerset(iterable: Iterable[_T]) -> Iterator[Tuple[_T, ...]]: ...
pred: Callable[[_T], object] | None, iterable: Iterable[_T]
) -> tuple[Iterator[_T], Iterator[_T]]: ...
def powerset(iterable: Iterable[_T]) -> Iterator[tuple[_T, ...]]: ...
def unique_everseen(
iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ...
iterable: Iterable[_T], key: Callable[[_T], _U] | None = ...
) -> Iterator[_T]: ...
def unique_justseen(
iterable: Iterable[_T], key: Optional[Callable[[_T], object]] = ...
iterable: Iterable[_T], key: Callable[[_T], object] | None = ...
) -> Iterator[_T]: ...
@overload
def iter_except(
func: Callable[[], _T],
exception: Union[Type[BaseException], Tuple[Type[BaseException], ...]],
exception: Type[BaseException] | tuple[Type[BaseException], ...],
first: None = ...,
) -> Iterator[_T]: ...
@overload
def iter_except(
func: Callable[[], _T],
exception: Union[Type[BaseException], Tuple[Type[BaseException], ...]],
exception: Type[BaseException] | tuple[Type[BaseException], ...],
first: Callable[[], _U],
) -> Iterator[Union[_T, _U]]: ...
) -> Iterator[_T | _U]: ...
@overload
def first_true(
iterable: Iterable[_T], *, pred: Optional[Callable[[_T], object]] = ...
) -> Optional[_T]: ...
iterable: Iterable[_T], *, pred: Callable[[_T], object] | None = ...
) -> _T | None: ...
@overload
def first_true(
iterable: Iterable[_T],
default: _U,
pred: Optional[Callable[[_T], object]] = ...,
) -> Union[_T, _U]: ...
pred: Callable[[_T], object] | None = ...,
) -> _T | _U: ...
def random_product(
*args: Iterable[_T], repeat: int = ...
) -> Tuple[_T, ...]: ...
) -> tuple[_T, ...]: ...
def random_permutation(
iterable: Iterable[_T], r: Optional[int] = ...
) -> Tuple[_T, ...]: ...
def random_combination(iterable: Iterable[_T], r: int) -> Tuple[_T, ...]: ...
iterable: Iterable[_T], r: int | None = ...
) -> tuple[_T, ...]: ...
def random_combination(iterable: Iterable[_T], r: int) -> tuple[_T, ...]: ...
def random_combination_with_replacement(
iterable: Iterable[_T], r: int
) -> Tuple[_T, ...]: ...
) -> tuple[_T, ...]: ...
def nth_combination(
iterable: Iterable[_T], r: int, index: int
) -> Tuple[_T, ...]: ...
def prepend(value: _T, iterator: Iterable[_U]) -> Iterator[Union[_T, _U]]: ...
) -> tuple[_T, ...]: ...
def prepend(value: _T, iterator: Iterable[_U]) -> Iterator[_T | _U]: ...
def convolve(signal: Iterable[_T], kernel: Iterable[_T]) -> Iterator[_T]: ...
def before_and_after(
predicate: Callable[[_T], bool], it: Iterable[_T]
) -> Tuple[Iterator[_T], Iterator[_T]]: ...
def triplewise(iterable: Iterable[_T]) -> Iterator[Tuple[_T, _T, _T]]: ...
) -> tuple[Iterator[_T], Iterator[_T]]: ...
def triplewise(iterable: Iterable[_T]) -> Iterator[tuple[_T, _T, _T]]: ...
def sliding_window(
iterable: Iterable[_T], n: int
) -> Iterator[Tuple[_T, ...]]: ...
def subslices(iterable: Iterable[_T]) -> Iterator[List[_T]]: ...
def polynomial_from_roots(roots: Sequence[int]) -> List[int]: ...
) -> Iterator[tuple[_T, ...]]: ...
def subslices(iterable: Iterable[_T]) -> Iterator[list[_T]]: ...
def polynomial_from_roots(roots: Sequence[int]) -> list[int]: ...
def iter_index(
iterable: Iterable[object],
value: Any,
start: int | None = ...,
) -> Iterator[int]: ...
def sieve(n: int) -> Iterator[int]: ...
def batched(
iterable: Iterable[_T],
n: int,
) -> Iterator[List[_T]]: ...
) -> Iterator[list[_T]]: ...
def transpose(
it: Iterable[Iterable[_T]],
) -> tuple[Iterator[_T], ...]: ...
def matmul(m1: Sequence[_T], m2: Sequence[_T]) -> Iterator[list[_T]]: ...
def factor(n: int) -> Iterator[int]: ...

View file

@ -6,7 +6,7 @@ __title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
__version__ = "23.0"
__version__ = "23.1"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"

View file

@ -14,6 +14,8 @@ EF_ARM_ABI_VER5 = 0x05000000
EF_ARM_ABI_FLOAT_HARD = 0x00000400
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
# as the type for `path` until then.
@contextlib.contextmanager
def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]:
try:

View file

@ -163,7 +163,11 @@ def _parse_extras(tokenizer: Tokenizer) -> List[str]:
if not tokenizer.check("LEFT_BRACKET", peek=True):
return []
with tokenizer.enclosing_tokens("LEFT_BRACKET", "RIGHT_BRACKET"):
with tokenizer.enclosing_tokens(
"LEFT_BRACKET",
"RIGHT_BRACKET",
around="extras",
):
tokenizer.consume("WS")
extras = _parse_extras_list(tokenizer)
tokenizer.consume("WS")
@ -203,7 +207,11 @@ def _parse_specifier(tokenizer: Tokenizer) -> str:
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
| WS? version_many WS?
"""
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="version specifier",
):
tokenizer.consume("WS")
parsed_specifiers = _parse_version_many(tokenizer)
tokenizer.consume("WS")
@ -217,7 +225,20 @@ def _parse_version_many(tokenizer: Tokenizer) -> str:
"""
parsed_specifiers = ""
while tokenizer.check("SPECIFIER"):
span_start = tokenizer.position
parsed_specifiers += tokenizer.read().text
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
tokenizer.raise_syntax_error(
".* suffix can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position + 1,
)
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
tokenizer.raise_syntax_error(
"Local version label can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position,
)
tokenizer.consume("WS")
if not tokenizer.check("COMMA"):
break
@ -254,7 +275,11 @@ def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
tokenizer.consume("WS")
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="marker expression",
):
tokenizer.consume("WS")
marker: MarkerAtom = _parse_marker(tokenizer)
tokenizer.consume("WS")

View file

@ -78,6 +78,8 @@ DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
"AT": r"\@",
"URL": r"[^ \t]+",
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
"VERSION_PREFIX_TRAIL": r"\.\*",
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
"WS": r"[ \t]+",
"END": r"$",
}
@ -167,21 +169,23 @@ class Tokenizer:
)
@contextlib.contextmanager
def enclosing_tokens(self, open_token: str, close_token: str) -> Iterator[bool]:
def enclosing_tokens(
self, open_token: str, close_token: str, *, around: str
) -> Iterator[None]:
if self.check(open_token):
open_position = self.position
self.read()
else:
open_position = None
yield open_position is not None
yield
if open_position is None:
return
if not self.check(close_token):
self.raise_syntax_error(
f"Expected closing {close_token}",
f"Expected matching {close_token} for {open_token}, after {around}",
span_start=open_position,
)

View file

@ -8,7 +8,14 @@ import platform
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from ._parser import MarkerAtom, MarkerList, Op, Value, Variable, parse_marker
from ._parser import (
MarkerAtom,
MarkerList,
Op,
Value,
Variable,
parse_marker as _parse_marker,
)
from ._tokenizer import ParserSyntaxError
from .specifiers import InvalidSpecifier, Specifier
from .utils import canonicalize_name
@ -189,7 +196,7 @@ class Marker:
# packaging.requirements.Requirement. If any additional logic is
# added here, make sure to mirror/adapt Requirement.
try:
self._markers = _normalize_extra_values(parse_marker(marker))
self._markers = _normalize_extra_values(_parse_marker(marker))
# The attribute `_markers` can be described in terms of a recursive type:
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
#

View file

@ -0,0 +1,408 @@
import email.feedparser
import email.header
import email.message
import email.parser
import email.policy
import sys
import typing
from typing import Dict, List, Optional, Tuple, Union, cast
if sys.version_info >= (3, 8): # pragma: no cover
from typing import TypedDict
else: # pragma: no cover
if typing.TYPE_CHECKING:
from typing_extensions import TypedDict
else:
try:
from typing_extensions import TypedDict
except ImportError:
class TypedDict:
def __init_subclass__(*_args, **_kwargs):
pass
# The RawMetadata class attempts to make as few assumptions about the underlying
# serialization formats as possible. The idea is that as long as a serialization
# formats offer some very basic primitives in *some* way then we can support
# serializing to and from that format.
class RawMetadata(TypedDict, total=False):
"""A dictionary of raw core metadata.
Each field in core metadata maps to a key of this dictionary (when data is
provided). The key is lower-case and underscores are used instead of dashes
compared to the equivalent core metadata field. Any core metadata field that
can be specified multiple times or can hold multiple values in a single
field have a key with a plural name.
Core metadata fields that can be specified multiple times are stored as a
list or dict depending on which is appropriate for the field. Any fields
which hold multiple values in a single field are stored as a list.
"""
# Metadata 1.0 - PEP 241
metadata_version: str
name: str
version: str
platforms: List[str]
summary: str
description: str
keywords: List[str]
home_page: str
author: str
author_email: str
license: str
# Metadata 1.1 - PEP 314
supported_platforms: List[str]
download_url: str
classifiers: List[str]
requires: List[str]
provides: List[str]
obsoletes: List[str]
# Metadata 1.2 - PEP 345
maintainer: str
maintainer_email: str
requires_dist: List[str]
provides_dist: List[str]
obsoletes_dist: List[str]
requires_python: str
requires_external: List[str]
project_urls: Dict[str, str]
# Metadata 2.0
# PEP 426 attempted to completely revamp the metadata format
# but got stuck without ever being able to build consensus on
# it and ultimately ended up withdrawn.
#
# However, a number of tools had started emiting METADATA with
# `2.0` Metadata-Version, so for historical reasons, this version
# was skipped.
# Metadata 2.1 - PEP 566
description_content_type: str
provides_extra: List[str]
# Metadata 2.2 - PEP 643
dynamic: List[str]
# Metadata 2.3 - PEP 685
# No new fields were added in PEP 685, just some edge case were
# tightened up to provide better interoptability.
_STRING_FIELDS = {
"author",
"author_email",
"description",
"description_content_type",
"download_url",
"home_page",
"license",
"maintainer",
"maintainer_email",
"metadata_version",
"name",
"requires_python",
"summary",
"version",
}
_LIST_STRING_FIELDS = {
"classifiers",
"dynamic",
"obsoletes",
"obsoletes_dist",
"platforms",
"provides",
"provides_dist",
"provides_extra",
"requires",
"requires_dist",
"requires_external",
"supported_platforms",
}
def _parse_keywords(data: str) -> List[str]:
"""Split a string of comma-separate keyboards into a list of keywords."""
return [k.strip() for k in data.split(",")]
def _parse_project_urls(data: List[str]) -> Dict[str, str]:
"""Parse a list of label/URL string pairings separated by a comma."""
urls = {}
for pair in data:
# Our logic is slightly tricky here as we want to try and do
# *something* reasonable with malformed data.
#
# The main thing that we have to worry about, is data that does
# not have a ',' at all to split the label from the Value. There
# isn't a singular right answer here, and we will fail validation
# later on (if the caller is validating) so it doesn't *really*
# matter, but since the missing value has to be an empty str
# and our return value is dict[str, str], if we let the key
# be the missing value, then they'd have multiple '' values that
# overwrite each other in a accumulating dict.
#
# The other potentional issue is that it's possible to have the
# same label multiple times in the metadata, with no solid "right"
# answer with what to do in that case. As such, we'll do the only
# thing we can, which is treat the field as unparseable and add it
# to our list of unparsed fields.
parts = [p.strip() for p in pair.split(",", 1)]
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
# TODO: The spec doesn't say anything about if the keys should be
# considered case sensitive or not... logically they should
# be case-preserving and case-insensitive, but doing that
# would open up more cases where we might have duplicate
# entries.
label, url = parts
if label in urls:
# The label already exists in our set of urls, so this field
# is unparseable, and we can just add the whole thing to our
# unparseable data and stop processing it.
raise KeyError("duplicate labels in project urls")
urls[label] = url
return urls
def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str:
"""Get the body of the message."""
# If our source is a str, then our caller has managed encodings for us,
# and we don't need to deal with it.
if isinstance(source, str):
payload: str = msg.get_payload()
return payload
# If our source is a bytes, then we're managing the encoding and we need
# to deal with it.
else:
bpayload: bytes = msg.get_payload(decode=True)
try:
return bpayload.decode("utf8", "strict")
except UnicodeDecodeError:
raise ValueError("payload in an invalid encoding")
# The various parse_FORMAT functions here are intended to be as lenient as
# possible in their parsing, while still returning a correctly typed
# RawMetadata.
#
# To aid in this, we also generally want to do as little touching of the
# data as possible, except where there are possibly some historic holdovers
# that make valid data awkward to work with.
#
# While this is a lower level, intermediate format than our ``Metadata``
# class, some light touch ups can make a massive difference in usability.
# Map METADATA fields to RawMetadata.
_EMAIL_TO_RAW_MAPPING = {
"author": "author",
"author-email": "author_email",
"classifier": "classifiers",
"description": "description",
"description-content-type": "description_content_type",
"download-url": "download_url",
"dynamic": "dynamic",
"home-page": "home_page",
"keywords": "keywords",
"license": "license",
"maintainer": "maintainer",
"maintainer-email": "maintainer_email",
"metadata-version": "metadata_version",
"name": "name",
"obsoletes": "obsoletes",
"obsoletes-dist": "obsoletes_dist",
"platform": "platforms",
"project-url": "project_urls",
"provides": "provides",
"provides-dist": "provides_dist",
"provides-extra": "provides_extra",
"requires": "requires",
"requires-dist": "requires_dist",
"requires-external": "requires_external",
"requires-python": "requires_python",
"summary": "summary",
"supported-platform": "supported_platforms",
"version": "version",
}
def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]:
"""Parse a distribution's metadata.
This function returns a two-item tuple of dicts. The first dict is of
recognized fields from the core metadata specification. Fields that can be
parsed and translated into Python's built-in types are converted
appropriately. All other fields are left as-is. Fields that are allowed to
appear multiple times are stored as lists.
The second dict contains all other fields from the metadata. This includes
any unrecognized fields. It also includes any fields which are expected to
be parsed into a built-in type but were not formatted appropriately. Finally,
any fields that are expected to appear only once but are repeated are
included in this dict.
"""
raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
unparsed: Dict[str, List[str]] = {}
if isinstance(data, str):
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
else:
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
# We have to wrap parsed.keys() in a set, because in the case of multiple
# values for a key (a list), the key will appear multiple times in the
# list of keys, but we're avoiding that by using get_all().
for name in frozenset(parsed.keys()):
# Header names in RFC are case insensitive, so we'll normalize to all
# lower case to make comparisons easier.
name = name.lower()
# We use get_all() here, even for fields that aren't multiple use,
# because otherwise someone could have e.g. two Name fields, and we
# would just silently ignore it rather than doing something about it.
headers = parsed.get_all(name)
# The way the email module works when parsing bytes is that it
# unconditionally decodes the bytes as ascii using the surrogateescape
# handler. When you pull that data back out (such as with get_all() ),
# it looks to see if the str has any surrogate escapes, and if it does
# it wraps it in a Header object instead of returning the string.
#
# As such, we'll look for those Header objects, and fix up the encoding.
value = []
# Flag if we have run into any issues processing the headers, thus
# signalling that the data belongs in 'unparsed'.
valid_encoding = True
for h in headers:
# It's unclear if this can return more types than just a Header or
# a str, so we'll just assert here to make sure.
assert isinstance(h, (email.header.Header, str))
# If it's a header object, we need to do our little dance to get
# the real data out of it. In cases where there is invalid data
# we're going to end up with mojibake, but there's no obvious, good
# way around that without reimplementing parts of the Header object
# ourselves.
#
# That should be fine since, if mojibacked happens, this key is
# going into the unparsed dict anyways.
if isinstance(h, email.header.Header):
# The Header object stores it's data as chunks, and each chunk
# can be independently encoded, so we'll need to check each
# of them.
chunks: List[Tuple[bytes, Optional[str]]] = []
for bin, encoding in email.header.decode_header(h):
try:
bin.decode("utf8", "strict")
except UnicodeDecodeError:
# Enable mojibake.
encoding = "latin1"
valid_encoding = False
else:
encoding = "utf8"
chunks.append((bin, encoding))
# Turn our chunks back into a Header object, then let that
# Header object do the right thing to turn them into a
# string for us.
value.append(str(email.header.make_header(chunks)))
# This is already a string, so just add it.
else:
value.append(h)
# We've processed all of our values to get them into a list of str,
# but we may have mojibake data, in which case this is an unparsed
# field.
if not valid_encoding:
unparsed[name] = value
continue
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
if raw_name is None:
# This is a bit of a weird situation, we've encountered a key that
# we don't know what it means, so we don't know whether it's meant
# to be a list or not.
#
# Since we can't really tell one way or another, we'll just leave it
# as a list, even though it may be a single item list, because that's
# what makes the most sense for email headers.
unparsed[name] = value
continue
# If this is one of our string fields, then we'll check to see if our
# value is a list of a single item. If it is then we'll assume that
# it was emitted as a single string, and unwrap the str from inside
# the list.
#
# If it's any other kind of data, then we haven't the faintest clue
# what we should parse it as, and we have to just add it to our list
# of unparsed stuff.
if raw_name in _STRING_FIELDS and len(value) == 1:
raw[raw_name] = value[0]
# If this is one of our list of string fields, then we can just assign
# the value, since email *only* has strings, and our get_all() call
# above ensures that this is a list.
elif raw_name in _LIST_STRING_FIELDS:
raw[raw_name] = value
# Special Case: Keywords
# The keywords field is implemented in the metadata spec as a str,
# but it conceptually is a list of strings, and is serialized using
# ", ".join(keywords), so we'll do some light data massaging to turn
# this into what it logically is.
elif raw_name == "keywords" and len(value) == 1:
raw[raw_name] = _parse_keywords(value[0])
# Special Case: Project-URL
# The project urls is implemented in the metadata spec as a list of
# specially-formatted strings that represent a key and a value, which
# is fundamentally a mapping, however the email format doesn't support
# mappings in a sane way, so it was crammed into a list of strings
# instead.
#
# We will do a little light data massaging to turn this into a map as
# it logically should be.
elif raw_name == "project_urls":
try:
raw[raw_name] = _parse_project_urls(value)
except KeyError:
unparsed[name] = value
# Nothing that we've done has managed to parse this, so it'll just
# throw it in our unparseable data and move on.
else:
unparsed[name] = value
# We need to support getting the Description from the message payload in
# addition to getting it from the the headers. This does mean, though, there
# is the possibility of it being set both ways, in which case we put both
# in 'unparsed' since we don't know which is right.
try:
payload = _get_payload(parsed, data)
except ValueError:
unparsed.setdefault("description", []).append(
parsed.get_payload(decode=isinstance(data, bytes))
)
else:
if payload:
# Check to see if we've already got a description, if so then both
# it, and this body move to unparseable.
if "description" in raw:
description_header = cast(str, raw.pop("description"))
unparsed.setdefault("description", []).extend(
[description_header, payload]
)
elif "description" in unparsed:
unparsed["description"].append(payload)
else:
raw["description"] = payload
# We need to cast our `raw` to a metadata, because a TypedDict only support
# literal key names, but we're computing our key names on purpose, but the
# way this function is implemented, our `TypedDict` can only have valid key
# names.
return cast(RawMetadata, raw), unparsed

View file

@ -5,7 +5,7 @@
import urllib.parse
from typing import Any, List, Optional, Set
from ._parser import parse_requirement
from ._parser import parse_requirement as _parse_requirement
from ._tokenizer import ParserSyntaxError
from .markers import Marker, _normalize_extra_values
from .specifiers import SpecifierSet
@ -32,7 +32,7 @@ class Requirement:
def __init__(self, requirement_string: str) -> None:
try:
parsed = parse_requirement(requirement_string)
parsed = _parse_requirement(requirement_string)
except ParserSyntaxError as e:
raise InvalidRequirement(str(e)) from e

View file

@ -252,7 +252,8 @@ class Specifier(BaseSpecifier):
# Store whether or not this Specifier should accept prereleases
self._prereleases = prereleases
@property
# https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515
@property # type: ignore[override]
def prereleases(self) -> bool:
# If there is an explicit prereleases set for this, then we'll just
# blindly use that.
@ -398,7 +399,9 @@ class Specifier(BaseSpecifier):
# We need special logic to handle prefix matching
if spec.endswith(".*"):
# In the case of prefix matching we want to ignore local segment.
normalized_prospective = canonicalize_version(prospective.public)
normalized_prospective = canonicalize_version(
prospective.public, strip_trailing_zero=False
)
# Get the normalized version string ignoring the trailing .*
normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False)
# Split the spec out by dots, and pretend that there is an implicit

View file

@ -111,7 +111,7 @@ def parse_tag(tag: str) -> FrozenSet[Tag]:
def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
value = sysconfig.get_config_var(name)
value: Union[int, str, None] = sysconfig.get_config_var(name)
if value is None and warn:
logger.debug(
"Config variable '%s' is unset, Python ABI tag may be incorrect", name
@ -120,7 +120,7 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
def _normalize_string(string: str) -> str:
return string.replace(".", "_").replace("-", "_")
return string.replace(".", "_").replace("-", "_").replace(" ", "_")
def _abi3_applies(python_version: PythonVersion) -> bool:

View file

@ -10,7 +10,7 @@
import collections
import itertools
import re
from typing import Callable, Optional, SupportsInt, Tuple, Union
from typing import Any, Callable, Optional, SupportsInt, Tuple, Union
from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
@ -63,7 +63,7 @@ class InvalidVersion(ValueError):
class _BaseVersion:
_key: CmpKey
_key: Tuple[Any, ...]
def __hash__(self) -> int:
return hash(self._key)
@ -179,6 +179,7 @@ class Version(_BaseVersion):
"""
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_key: CmpKey
def __init__(self, version: str) -> None:
"""Initialize a Version object.

View file

@ -1,4 +1,4 @@
packaging==23.0
packaging==23.1
platformdirs==2.6.2
# required for platformdirs on Python < 3.8

View file

@ -66,10 +66,10 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
# Check urllib3 for compatibility.
major, minor, patch = urllib3_version # noqa: F811
major, minor, patch = int(major), int(minor), int(patch)
# urllib3 >= 1.21.1, <= 1.26
assert major == 1
# urllib3 >= 1.21.1
assert major >= 1
if major == 1:
assert minor >= 21
assert minor <= 26
# Check charset_normalizer for compatibility.
if chardet_version:

View file

@ -5,10 +5,10 @@
__title__ = "requests"
__description__ = "Python HTTP for Humans."
__url__ = "https://requests.readthedocs.io"
__version__ = "2.29.0"
__build__ = 0x022900
__version__ = "2.31.0"
__build__ = 0x023100
__author__ = "Kenneth Reitz"
__author_email__ = "me@kennethreitz.org"
__license__ = "Apache 2.0"
__license__ = "Apache-2.0"
__copyright__ = "Copyright Kenneth Reitz"
__cake__ = "\u2728 \U0001f370 \u2728"

View file

@ -193,7 +193,6 @@ class HTTPAdapter(BaseAdapter):
num_pools=connections,
maxsize=maxsize,
block=block,
strict=True,
**pool_kwargs,
)
@ -248,7 +247,6 @@ class HTTPAdapter(BaseAdapter):
:param cert: The SSL certificate to verify.
"""
if url.lower().startswith("https") and verify:
cert_loc = None
# Allow self-specified cert location.

View file

@ -25,7 +25,7 @@ def request(method, url, **kwargs):
:param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
:param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload.
``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')``
or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string
or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content_type'`` is a string
defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers
to add for the file.
:param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.

View file

@ -258,7 +258,6 @@ class HTTPDigestAuth(AuthBase):
s_auth = r.headers.get("www-authenticate", "")
if "digest" in s_auth.lower() and self._thread_local.num_401_calls < 2:
self._thread_local.num_401_calls += 1
pat = re.compile(r"digest ", flags=re.IGNORECASE)
self._thread_local.chal = parse_dict_header(pat.sub("", s_auth, count=1))

View file

@ -2,7 +2,7 @@
requests.cookies
~~~~~~~~~~~~~~~~
Compatibility code to be able to use `cookielib.CookieJar` with requests.
Compatibility code to be able to use `http.cookiejar.CookieJar` with requests.
requests.utils imports from here, so be careful with imports.
"""
@ -23,7 +23,7 @@ except ImportError:
class MockRequest:
"""Wraps a `requests.Request` to mimic a `urllib2.Request`.
The code in `cookielib.CookieJar` expects this interface in order to correctly
The code in `http.cookiejar.CookieJar` expects this interface in order to correctly
manage cookie policies, i.e., determine whether a cookie can be set, given the
domains of the request and the cookie.
@ -76,7 +76,7 @@ class MockRequest:
return self._r.headers.get(name, self._new_headers.get(name, default))
def add_header(self, key, val):
"""cookielib has no legitimate use for this method; add it back if you find one."""
"""cookiejar has no legitimate use for this method; add it back if you find one."""
raise NotImplementedError(
"Cookie headers should be added with add_unredirected_header()"
)
@ -104,11 +104,11 @@ class MockResponse:
"""Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`.
...what? Basically, expose the parsed HTTP headers from the server response
the way `cookielib` expects to see them.
the way `http.cookiejar` expects to see them.
"""
def __init__(self, headers):
"""Make a MockResponse for `cookielib` to read.
"""Make a MockResponse for `cookiejar` to read.
:param headers: a httplib.HTTPMessage or analogous carrying the headers
"""
@ -124,7 +124,7 @@ class MockResponse:
def extract_cookies_to_jar(jar, request, response):
"""Extract the cookies from the response into a CookieJar.
:param jar: cookielib.CookieJar (not necessarily a RequestsCookieJar)
:param jar: http.cookiejar.CookieJar (not necessarily a RequestsCookieJar)
:param request: our own requests.Request object
:param response: urllib3.HTTPResponse object
"""
@ -174,7 +174,7 @@ class CookieConflictError(RuntimeError):
class RequestsCookieJar(cookielib.CookieJar, MutableMapping):
"""Compatibility class; is a cookielib.CookieJar, but exposes a dict
"""Compatibility class; is a http.cookiejar.CookieJar, but exposes a dict
interface.
This is the CookieJar we create by default for requests and sessions that
@ -341,7 +341,7 @@ class RequestsCookieJar(cookielib.CookieJar, MutableMapping):
self.set(name, value)
def __delitem__(self, name):
"""Deletes a cookie given a name. Wraps ``cookielib.CookieJar``'s
"""Deletes a cookie given a name. Wraps ``http.cookiejar.CookieJar``'s
``remove_cookie_by_name()``.
"""
remove_cookie_by_name(self, name)

View file

@ -170,7 +170,7 @@ class RequestEncodingMixin:
)
)
for (k, v) in files:
for k, v in files:
# support for explicit filename
ft = None
fh = None
@ -268,7 +268,6 @@ class Request(RequestHooksMixin):
hooks=None,
json=None,
):
# Default empty dicts for dict params.
data = [] if data is None else data
files = [] if files is None else files
@ -277,7 +276,7 @@ class Request(RequestHooksMixin):
hooks = {} if hooks is None else hooks
self.hooks = default_hooks()
for (k, v) in list(hooks.items()):
for k, v in list(hooks.items()):
self.register_hook(event=k, hook=v)
self.method = method
@ -865,7 +864,6 @@ class Response:
for chunk in self.iter_content(
chunk_size=chunk_size, decode_unicode=decode_unicode
):
if pending is not None:
chunk = pending + chunk

View file

@ -262,7 +262,6 @@ class SessionRedirectMixin:
if yield_requests:
yield req
else:
resp = self.send(
req,
stream=stream,
@ -324,7 +323,9 @@ class SessionRedirectMixin:
except KeyError:
username, password = None, None
if username and password:
# urllib3 handles proxy authorization for us in the standard adapter.
# Avoid appending this to TLS tunneled requests where it may be leaked.
if not scheme.startswith("https") and username and password:
headers["Proxy-Authorization"] = _basic_auth_str(username, password)
return new_proxies
@ -387,7 +388,6 @@ class Session(SessionRedirectMixin):
]
def __init__(self):
#: A case-insensitive dictionary of headers to be sent on each
#: :class:`Request <Request>` sent from this
#: :class:`Session <Session>`.
@ -543,6 +543,8 @@ class Session(SessionRedirectMixin):
:type allow_redirects: bool
:param proxies: (optional) Dictionary mapping protocol or protocol and
hostname to the URL of the proxy.
:param hooks: (optional) Dictionary mapping hook name to one event or
list of events, event must be callable.
:param stream: (optional) whether to immediately download the response
content. Defaults to ``False``.
:param verify: (optional) Either a boolean, in which case it controls whether we verify
@ -709,7 +711,6 @@ class Session(SessionRedirectMixin):
# Persist cookies
if r.history:
# If the hooks create history then we want those cookies too
for resp in r.history:
extract_cookies_to_jar(self.cookies, resp.request, resp.raw)
@ -757,7 +758,7 @@ class Session(SessionRedirectMixin):
# Set environment's proxies.
no_proxy = proxies.get("no_proxy") if proxies is not None else None
env_proxies = get_environ_proxies(url, no_proxy=no_proxy)
for (k, v) in env_proxies.items():
for k, v in env_proxies.items():
proxies.setdefault(k, v)
# Look for requests environment configuration
@ -783,8 +784,7 @@ class Session(SessionRedirectMixin):
:rtype: requests.adapters.BaseAdapter
"""
for (prefix, adapter) in self.adapters.items():
for prefix, adapter in self.adapters.items():
if url.lower().startswith(prefix.lower()):
return adapter

View file

@ -466,11 +466,7 @@ def dict_from_cookiejar(cj):
:rtype: dict
"""
cookie_dict = {}
for cookie in cj:
cookie_dict[cookie.name] = cookie.value
cookie_dict = {cookie.name: cookie.value for cookie in cj}
return cookie_dict
@ -767,6 +763,7 @@ def should_bypass_proxies(url, no_proxy):
:rtype: bool
"""
# Prioritize lowercase environment variables over uppercase
# to keep a consistent behaviour with other http projects (curl, wget).
def get_proxy(key):

View file

@ -1071,7 +1071,8 @@ def scantree(path, # type: AnyStr
include=None, # type: Optional[AnyStr, List[AnyStr]]
follow_symlinks=False, # type: bool
filter_kind=None, # type: Optional[bool]
recurse=True # type: bool
recurse=True, # type: bool
exclude_folders_with_files=None # type: Optional[List[AnyStr]]
):
# type: (...) -> Generator[DirEntry, None, None]
"""Yield DirEntry objects for given path. Returns without yield if path fails sanity check
@ -1082,6 +1083,7 @@ def scantree(path, # type: AnyStr
:param follow_symlinks: Follow symlinks
:param filter_kind: None to yield everything, True yields directories, False yields files
:param recurse: Recursively scan the tree
:param exclude_folders_with_files: exclude folder that contain the listed file(s)
"""
if isinstance(path, string_types) and path and os.path.isdir(path):
rc_exc, rc_inc = [re.compile(rx % '|'.join(
@ -1093,6 +1095,11 @@ def scantree(path, # type: AnyStr
no_filter = any([None is filter_kind, filter_kind and is_dir, not filter_kind and is_file])
if (rc_exc.search(entry.name), True)[not exclude] and (rc_inc.search(entry.name), True)[not include] \
and (no_filter or (not filter_kind and is_dir and recurse)):
if is_dir and exclude_folders_with_files and any(os.path.isfile(os.path.join(entry.path, e_f))
for e_f in exclude_folders_with_files):
logger.debug(f'Ignoring Folder: "{entry.path}", because it contains a exclude file'
f' "{", ".join(exclude_folders_with_files)}"')
continue
if recurse and is_dir:
for subentry in scantree(entry.path, exclude, include, follow_symlinks, filter_kind, recurse):
yield subentry

View file

@ -32,7 +32,7 @@ from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Optional, Any, Iterator, Iterable
from typing import Any, Iterator, Iterable
__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@ -45,17 +45,14 @@ SoupSieve = cm.SoupSieve
def compile( # noqa: A001
pattern: str,
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
if isinstance(pattern, SoupSieve):
if flags:
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
@ -65,7 +62,12 @@ def compile( # noqa: A001
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
return pattern
return cp._cached_css_compile(pattern, ns, cs, flags)
return cp._cached_css_compile(
pattern,
ct.Namespaces(namespaces) if namespaces is not None else namespaces,
ct.CustomSelectors(custom) if custom is not None else custom,
flags
)
def purge() -> None:
@ -77,10 +79,10 @@ def purge() -> None:
def closest(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Match closest ancestor."""
@ -91,10 +93,10 @@ def closest(
def match(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> bool:
"""Match node."""
@ -105,10 +107,10 @@ def match(
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
"""Filter list of nodes."""
@ -119,10 +121,10 @@ def filter( # noqa: A001
def select_one(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Select a single tag."""
@ -133,11 +135,11 @@ def select_one(
def select(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
"""Select the specified tags."""
@ -148,11 +150,11 @@ def select(
def iselect(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
"""Iterate the specified tags."""

View file

@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 5, 0, "final", post=1)
__version_info__ = Version(2, 4, 1, "final")
__version__ = __version_info__._get_canonical()

View file

@ -6,7 +6,7 @@ import re
from . import css_types as ct
import unicodedata
import bs4 # type: ignore[import]
from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast # noqa: F401
from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401
# Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -171,7 +171,7 @@ class _DocumentNav:
def get_children(
self,
el: bs4.Tag,
start: Optional[int] = None,
start: int | None = None,
reverse: bool = False,
tags: bool = True,
no_iframe: bool = False
@ -239,22 +239,22 @@ class _DocumentNav:
return parent
@staticmethod
def get_tag_name(el: bs4.Tag) -> Optional[str]:
def get_tag_name(el: bs4.Tag) -> str | None:
"""Get tag."""
return cast(Optional[str], el.name)
return cast('str | None', el.name)
@staticmethod
def get_prefix_name(el: bs4.Tag) -> Optional[str]:
def get_prefix_name(el: bs4.Tag) -> str | None:
"""Get prefix."""
return cast(Optional[str], el.prefix)
return cast('str | None', el.prefix)
@staticmethod
def get_uri(el: bs4.Tag) -> Optional[str]:
def get_uri(el: bs4.Tag) -> str | None:
"""Get namespace `URI`."""
return cast(Optional[str], el.namespace)
return cast('str | None', el.namespace)
@classmethod
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
@ -287,7 +287,7 @@ class _DocumentNav:
return bool(ns and ns == NS_XHTML)
@staticmethod
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]:
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
"""Return namespace and attribute name without the prefix."""
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@ -330,8 +330,8 @@ class _DocumentNav:
cls,
el: bs4.Tag,
name: str,
default: Optional[str | Sequence[str]] = None
) -> Optional[str | Sequence[str]]:
default: str | Sequence[str] | None = None
) -> str | Sequence[str] | None:
"""Get attribute by name."""
value = default
@ -348,7 +348,7 @@ class _DocumentNav:
return value
@classmethod
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]:
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
"""Iterate attributes."""
for k, v in el.attrs.items():
@ -424,10 +424,10 @@ class Inputs:
return 0 <= minutes <= 59
@classmethod
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]:
def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
"""Parse the input value."""
parsed = None # type: Optional[tuple[float, ...]]
parsed = None # type: tuple[float, ...] | None
if value is None:
return value
if itype == "date":
@ -486,7 +486,7 @@ class CSSMatch(_DocumentNav):
self,
selectors: ct.SelectorList,
scope: bs4.Tag,
namespaces: Optional[ct.Namespaces],
namespaces: ct.Namespaces | None,
flags: int
) -> None:
"""Initialize."""
@ -545,19 +545,19 @@ class CSSMatch(_DocumentNav):
return self.get_tag_ns(el) == NS_XHTML
def get_tag(self, el: bs4.Tag) -> Optional[str]:
def get_tag(self, el: bs4.Tag) -> str | None:
"""Get tag."""
name = self.get_tag_name(el)
return util.lower(name) if name is not None and not self.is_xml else name
def get_prefix(self, el: bs4.Tag) -> Optional[str]:
def get_prefix(self, el: bs4.Tag) -> str | None:
"""Get prefix."""
prefix = self.get_prefix_name(el)
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
def find_bidi(self, el: bs4.Tag) -> Optional[int]:
def find_bidi(self, el: bs4.Tag) -> int | None:
"""Get directionality from element text."""
for node in self.get_children(el, tags=False):
@ -653,8 +653,8 @@ class CSSMatch(_DocumentNav):
self,
el: bs4.Tag,
attr: str,
prefix: Optional[str]
) -> Optional[str | Sequence[str]]:
prefix: str | None
) -> str | Sequence[str] | None:
"""Match attribute name and return value if it exists."""
value = None
@ -751,7 +751,7 @@ class CSSMatch(_DocumentNav):
name not in (self.get_tag(el), '*')
)
def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool:
def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
"""Match the tag."""
match = True
@ -1030,7 +1030,7 @@ class CSSMatch(_DocumentNav):
"""Match element if it contains text."""
match = True
content = None # type: Optional[str | Sequence[str]]
content = None # type: str | Sequence[str] | None
for contain_list in contains:
if content is None:
if contain_list.own:
@ -1099,7 +1099,7 @@ class CSSMatch(_DocumentNav):
match = False
name = cast(str, self.get_attribute_by_name(el, 'name'))
def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]:
def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
"""Find this input's form."""
form = None
parent = self.get_parent(el, no_iframe=True)
@ -1478,7 +1478,7 @@ class CSSMatch(_DocumentNav):
if lim < 1:
break
def closest(self) -> Optional[bs4.Tag]:
def closest(self) -> bs4.Tag | None:
"""Match closest ancestor."""
current = self.tag
@ -1506,7 +1506,7 @@ class SoupSieve(ct.Immutable):
pattern: str
selectors: ct.SelectorList
namespaces: Optional[ct.Namespaces]
namespaces: ct.Namespaces | None
custom: dict[str, str]
flags: int
@ -1516,8 +1516,8 @@ class SoupSieve(ct.Immutable):
self,
pattern: str,
selectors: ct.SelectorList,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
):
"""Initialize."""

View file

@ -7,7 +7,7 @@ from . import css_match as cm
from . import css_types as ct
from .util import SelectorSyntaxError
import warnings
from typing import Optional, Match, Any, Iterator, cast
from typing import Match, Any, Iterator, cast
UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -113,7 +113,7 @@ VALUE = r'''
'''.format(nl=NEWLINE, ident=IDENTIFIER)
# Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r'''
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE)
# Selector patterns
@ -207,8 +207,8 @@ _MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE)
def _cached_css_compile(
pattern: str,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
) -> cm.SoupSieve:
"""Cached CSS compile."""
@ -233,7 +233,7 @@ def _purge_cache() -> None:
_cached_css_compile.cache_clear()
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
"""Process custom."""
custom_selectors = {}
@ -317,7 +317,7 @@ class SelectorPattern:
return self.name
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
return self.re_pattern.match(selector, index)
@ -336,7 +336,7 @@ class SpecialPseudoPattern(SelectorPattern):
for pseudo in p[1]:
self.patterns[pseudo] = pattern
self.matched_name = None # type: Optional[SelectorPattern]
self.matched_name = None # type: SelectorPattern | None
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
def get_name(self) -> str:
@ -344,7 +344,7 @@ class SpecialPseudoPattern(SelectorPattern):
return '' if self.matched_name is None else self.matched_name.get_name()
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
pseudo = None
@ -372,14 +372,14 @@ class _Selector:
def __init__(self, **kwargs: Any) -> None:
"""Initialize."""
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
self.ids = kwargs.get('ids', []) # type: list[str]
self.classes = kwargs.get('classes', []) # type: list[str]
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: list[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
self.rel_type = kwargs.get('rel_type', None) # type: str | None
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
self.flags = kwargs.get('flags', 0) # type: int
@ -462,7 +462,7 @@ class CSSParser:
def __init__(
self,
selector: str,
custom: Optional[dict[str, str | ct.SelectorList]] = None,
custom: dict[str, str | ct.SelectorList] | None = None,
flags: int = 0
) -> None:
"""Initialize."""
@ -723,7 +723,7 @@ class CSSParser:
if postfix == '_child':
if m.group('of'):
# Parse the rest of `of S`.
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN | FLG_FORGIVE)
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
else:
# Use default `*|*` for `of S`.
nth_sel = CSS_NTH_OF_S_DEFAULT
@ -753,7 +753,7 @@ class CSSParser:
if name == ':not':
flags |= FLG_NOT
elif name == ':has':
flags |= FLG_RELATIVE | FLG_FORGIVE
flags |= FLG_RELATIVE
elif name in (':where', ':is'):
flags |= FLG_FORGIVE
@ -777,11 +777,6 @@ class CSSParser:
if not combinator:
combinator = WS_COMBINATOR
if combinator == COMMA_COMBINATOR:
if not has_selector:
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
sel.no_match = True
sel.rel_type = rel_type
selectors[-1].relations.append(sel)
rel_type = ":" + WS_COMBINATOR
@ -1070,18 +1065,8 @@ class CSSParser:
selectors.append(sel)
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
elif is_forgive:
if is_relative:
# Handle relative selectors pseudo-classes with empty slots like `:has()`
if selectors and selectors[-1].rel_type is None and rel_type == ': ':
sel.rel_type = rel_type
sel.no_match = True
selectors[-1].relations.append(sel)
has_selector = True
else:
# Handle normal pseudo-classes with empty slots
if not selectors or not relations:
# Others like `:is()` etc.
elif is_forgive and (not selectors or not relations):
# Handle normal pseudo-classes with empty slots like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)

View file

@ -2,7 +2,7 @@
from __future__ import annotations
import copyreg
from .pretty import pretty
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
__all__ = (
'Selector',
@ -189,28 +189,28 @@ class Selector(Immutable):
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)
tag: Optional[SelectorTag]
tag: SelectorTag | None
ids: tuple[str, ...]
classes: tuple[str, ...]
attributes: tuple[SelectorAttribute, ...]
nth: tuple[SelectorNth, ...]
selectors: tuple[SelectorList, ...]
relation: SelectorList
rel_type: Optional[str]
rel_type: str | None
contains: tuple[SelectorContains, ...]
lang: tuple[SelectorLang, ...]
flags: int
def __init__(
self,
tag: Optional[SelectorTag],
tag: SelectorTag | None,
ids: tuple[str, ...],
classes: tuple[str, ...],
attributes: tuple[SelectorAttribute, ...],
nth: tuple[SelectorNth, ...],
selectors: tuple[SelectorList, ...],
relation: SelectorList,
rel_type: Optional[str],
rel_type: str | None,
contains: tuple[SelectorContains, ...],
lang: tuple[SelectorLang, ...],
flags: int
@ -247,9 +247,9 @@ class SelectorTag(Immutable):
__slots__ = ("name", "prefix", "_hash")
name: str
prefix: Optional[str]
prefix: str | None
def __init__(self, name: str, prefix: Optional[str]) -> None:
def __init__(self, name: str, prefix: str | None) -> None:
"""Initialize."""
super().__init__(name=name, prefix=prefix)
@ -262,15 +262,15 @@ class SelectorAttribute(Immutable):
attribute: str
prefix: str
pattern: Optional[Pattern[str]]
xml_type_pattern: Optional[Pattern[str]]
pattern: Pattern[str] | None
xml_type_pattern: Pattern[str] | None
def __init__(
self,
attribute: str,
prefix: str,
pattern: Optional[Pattern[str]],
xml_type_pattern: Optional[Pattern[str]]
pattern: Pattern[str] | None,
xml_type_pattern: Pattern[str] | None
) -> None:
"""Initialize."""
@ -360,7 +360,7 @@ class SelectorList(Immutable):
def __init__(
self,
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
selectors: Iterable[Selector | SelectorNull] | None = None,
is_not: bool = False,
is_html: bool = False
) -> None:

View file

@ -3,7 +3,7 @@ from __future__ import annotations
from functools import wraps, lru_cache
import warnings
import re
from typing import Callable, Any, Optional
from typing import Callable, Any
DEBUG = 0x00001
@ -27,7 +27,7 @@ def lower(string: str) -> str:
class SelectorSyntaxError(Exception):
"""Syntax error in a CSS selector."""
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
"""Initialize."""
self.line = None
@ -84,7 +84,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
col = 1
text = [] # type: list[str]
line = 1
offset = None # type: Optional[int]
offset = None # type: int | None
# Split pattern by newline and handle the text before the newline
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):

1
lib/thefuzz/__init__.py Normal file
View file

@ -0,0 +1 @@
__version__ = '0.19.0'

160
lib/thefuzz/fuzz.py Normal file
View file

@ -0,0 +1,160 @@
#!/usr/bin/env python
from rapidfuzz.fuzz import (
ratio as _ratio,
partial_ratio as _partial_ratio,
token_set_ratio as _token_set_ratio,
token_sort_ratio as _token_sort_ratio,
partial_token_set_ratio as _partial_token_set_ratio,
partial_token_sort_ratio as _partial_token_sort_ratio,
WRatio as _WRatio,
QRatio as _QRatio,
)
from . import utils
###########################
# Basic Scoring Functions #
###########################
def _rapidfuzz_scorer(scorer, s1, s2, force_ascii, full_process):
"""
wrapper around rapidfuzz function to be compatible with the API of thefuzz
"""
if full_process:
if s1 is None or s2 is None:
return 0
s1 = utils.full_process(s1, force_ascii=force_ascii)
s2 = utils.full_process(s2, force_ascii=force_ascii)
return int(round(scorer(s1, s2)))
def ratio(s1, s2):
return _rapidfuzz_scorer(_ratio, s1, s2, False, False)
def partial_ratio(s1, s2):
"""
Return the ratio of the most similar substring
as a number between 0 and 100.
"""
return _rapidfuzz_scorer(_partial_ratio, s1, s2, False, False)
##############################
# Advanced Scoring Functions #
##############################
# Sorted Token
# find all alphanumeric tokens in the string
# sort those tokens and take ratio of resulting joined strings
# controls for unordered string elements
def token_sort_ratio(s1, s2, force_ascii=True, full_process=True):
"""
Return a measure of the sequences' similarity between 0 and 100
but sorting the token before comparing.
"""
return _rapidfuzz_scorer(_token_sort_ratio, s1, s2, force_ascii, full_process)
def partial_token_sort_ratio(s1, s2, force_ascii=True, full_process=True):
"""
Return the ratio of the most similar substring as a number between
0 and 100 but sorting the token before comparing.
"""
return _rapidfuzz_scorer(
_partial_token_sort_ratio, s1, s2, force_ascii, full_process
)
def token_set_ratio(s1, s2, force_ascii=True, full_process=True):
return _rapidfuzz_scorer(_token_set_ratio, s1, s2, force_ascii, full_process)
def partial_token_set_ratio(s1, s2, force_ascii=True, full_process=True):
return _rapidfuzz_scorer(
_partial_token_set_ratio, s1, s2, force_ascii, full_process
)
###################
# Combination API #
###################
# q is for quick
def QRatio(s1, s2, force_ascii=True, full_process=True):
"""
Quick ratio comparison between two strings.
Runs full_process from utils on both strings
Short circuits if either of the strings is empty after processing.
:param s1:
:param s2:
:param force_ascii: Allow only ASCII characters (Default: True)
:full_process: Process inputs, used here to avoid double processing in extract functions (Default: True)
:return: similarity ratio
"""
return _rapidfuzz_scorer(_QRatio, s1, s2, force_ascii, full_process)
def UQRatio(s1, s2, full_process=True):
"""
Unicode quick ratio
Calls QRatio with force_ascii set to False
:param s1:
:param s2:
:return: similarity ratio
"""
return QRatio(s1, s2, force_ascii=False, full_process=full_process)
# w is for weighted
def WRatio(s1, s2, force_ascii=True, full_process=True):
"""
Return a measure of the sequences' similarity between 0 and 100, using different algorithms.
**Steps in the order they occur**
#. Run full_process from utils on both strings
#. Short circuit if this makes either string empty
#. Take the ratio of the two processed strings (fuzz.ratio)
#. Run checks to compare the length of the strings
* If one of the strings is more than 1.5 times as long as the other
use partial_ratio comparisons - scale partial results by 0.9
(this makes sure only full results can return 100)
* If one of the strings is over 8 times as long as the other
instead scale by 0.6
#. Run the other ratio functions
* if using partial ratio functions call partial_ratio,
partial_token_sort_ratio and partial_token_set_ratio
scale all of these by the ratio based on length
* otherwise call token_sort_ratio and token_set_ratio
* all token based comparisons are scaled by 0.95
(on top of any partial scalars)
#. Take the highest value from these results
round it and return it as an integer.
:param s1:
:param s2:
:param force_ascii: Allow only ascii characters
:type force_ascii: bool
:full_process: Process inputs, used here to avoid double processing in extract functions (Default: True)
:return:
"""
return _rapidfuzz_scorer(_WRatio, s1, s2, force_ascii, full_process)
def UWRatio(s1, s2, full_process=True):
"""
Return a measure of the sequences' similarity between 0 and 100,
using different algorithms. Same as WRatio but preserving unicode.
"""
return WRatio(s1, s2, force_ascii=False, full_process=full_process)

10
lib/thefuzz/fuzz.pyi Normal file
View file

@ -0,0 +1,10 @@
def ratio(s1: str, s2: str) -> int: ...
def partial_ratio(s1: str, s2: str) -> int: ...
def token_sort_ratio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ...
def partial_token_sort_ratio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ...
def token_set_ratio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ...
def partial_token_set_ratio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ...
def QRatio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ...
def UQRatio(s1: str, s2: str, full_process: bool = ...) -> int: ...
def WRatio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ...
def UWRatio(s1: str, s2: str, full_process: bool = ...) -> int: ...

309
lib/thefuzz/process.py Normal file
View file

@ -0,0 +1,309 @@
#!/usr/bin/env python
from . import fuzz
from . import utils
import logging
from rapidfuzz import fuzz as rfuzz
from rapidfuzz import process as rprocess
_logger = logging.getLogger(__name__)
default_scorer = fuzz.WRatio
default_processor = utils.full_process
def _get_processor(processor, scorer):
"""
thefuzz runs both the default preprocessing of the function and the preprocessing
function passed into process.* while rapidfuzz only runs the one passed into
process.*. This function wraps the processor to mimic this behavior
"""
if scorer not in (fuzz.WRatio, fuzz.QRatio,
fuzz.token_set_ratio, fuzz.token_sort_ratio,
fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio,
fuzz.UWRatio, fuzz.UQRatio):
return processor
if not processor:
return utils.full_process
def wrapper(s):
return utils.full_process(processor(s))
return wrapper
# this allows lowering the scorers back to the scorers used in rapidfuzz
# this allows rapidfuzz to perform more optimizations behind the scenes.
# These mapped scorers are the same with two expceptions
# - default processor
# - result is not rounded
# these two exceptions need to be taken into account in the implementation
_scorer_lowering = {
fuzz.ratio: rfuzz.ratio,
fuzz.partial_ratio: rfuzz.partial_ratio,
fuzz.token_set_ratio: rfuzz.token_set_ratio,
fuzz.token_sort_ratio: rfuzz.token_sort_ratio,
fuzz.partial_token_set_ratio: rfuzz.partial_token_set_ratio,
fuzz.partial_token_sort_ratio: rfuzz.partial_token_sort_ratio,
fuzz.WRatio: rfuzz.WRatio,
fuzz.QRatio: rfuzz.QRatio,
fuzz.UWRatio: rfuzz.WRatio,
fuzz.UQRatio: rfuzz.QRatio,
}
def _get_scorer(scorer):
"""
rapidfuzz scorers require the score_cutoff argument to be available
This generates a compatible wrapper function
"""
def wrapper(s1, s2, score_cutoff=0):
return scorer(s1, s2)
return _scorer_lowering.get(scorer, wrapper)
def _preprocess_query(query, processor):
processed_query = processor(query) if processor else query
if len(processed_query) == 0:
_logger.warning("Applied processor reduces input query to empty string, "
"all comparisons will have score 0. "
f"[Query: \'{query}\']")
return processed_query
def extractWithoutOrder(query, choices, processor=default_processor, scorer=default_scorer, score_cutoff=0):
"""
Select the best match in a list or dictionary of choices.
Find best matches in a list or dictionary of choices, return a
generator of tuples containing the match and its score. If a dictionary
is used, also returns the key for each match.
Arguments:
query: An object representing the thing we want to find.
choices: An iterable or dictionary-like object containing choices
to be matched against the query. Dictionary arguments of
{key: value} pairs will attempt to match the query against
each value.
processor: Optional function of the form f(a) -> b, where a is the query or
individual choice and b is the choice to be used in matching.
This can be used to match against, say, the first element of
a list:
lambda x: x[0]
Defaults to thefuzz.utils.full_process().
scorer: Optional function for scoring matches between the query and
an individual processed choice. This should be a function
of the form f(query, choice) -> int.
By default, fuzz.WRatio() is used and expects both query and
choice to be strings.
score_cutoff: Optional argument for score threshold. No matches with
a score less than this number will be returned. Defaults to 0.
Returns:
Generator of tuples containing the match and its score.
If a list is used for choices, then the result will be 2-tuples.
If a dictionary is used, then the result will be 3-tuples containing
the key for each match.
For example, searching for 'bird' in the dictionary
{'bard': 'train', 'dog': 'man'}
may return
('train', 22, 'bard'), ('man', 0, 'dog')
"""
is_mapping = hasattr(choices, "items")
is_lowered = scorer in _scorer_lowering
query = _preprocess_query(query, processor)
it = rprocess.extract_iter(
query, choices,
processor=_get_processor(processor, scorer),
scorer=_get_scorer(scorer),
score_cutoff=score_cutoff
)
for choice, score, key in it:
if is_lowered:
score = int(round(score))
yield (choice, score, key) if is_mapping else (choice, score)
def extract(query, choices, processor=default_processor, scorer=default_scorer, limit=5):
"""
Select the best match in a list or dictionary of choices.
Find best matches in a list or dictionary of choices, return a
list of tuples containing the match and its score. If a dictionary
is used, also returns the key for each match.
Arguments:
query: An object representing the thing we want to find.
choices: An iterable or dictionary-like object containing choices
to be matched against the query. Dictionary arguments of
{key: value} pairs will attempt to match the query against
each value.
processor: Optional function of the form f(a) -> b, where a is the query or
individual choice and b is the choice to be used in matching.
This can be used to match against, say, the first element of
a list:
lambda x: x[0]
Defaults to thefuzz.utils.full_process().
scorer: Optional function for scoring matches between the query and
an individual processed choice. This should be a function
of the form f(query, choice) -> int.
By default, fuzz.WRatio() is used and expects both query and
choice to be strings.
limit: Optional maximum for the number of elements returned. Defaults
to 5.
Returns:
List of tuples containing the match and its score.
If a list is used for choices, then the result will be 2-tuples.
If a dictionary is used, then the result will be 3-tuples containing
the key for each match.
For example, searching for 'bird' in the dictionary
{'bard': 'train', 'dog': 'man'}
may return
[('train', 22, 'bard'), ('man', 0, 'dog')]
"""
return extractBests(query, choices, processor=processor, scorer=scorer, limit=limit)
def extractBests(query, choices, processor=default_processor, scorer=default_scorer, score_cutoff=0, limit=5):
"""
Get a list of the best matches to a collection of choices.
Convenience function for getting the choices with best scores.
Args:
query: A string to match against
choices: A list or dictionary of choices, suitable for use with
extract().
processor: Optional function for transforming choices before matching.
See extract().
scorer: Scoring function for extract().
score_cutoff: Optional argument for score threshold. No matches with
a score less than this number will be returned. Defaults to 0.
limit: Optional maximum for the number of elements returned. Defaults
to 5.
Returns: A a list of (match, score) tuples.
"""
is_mapping = hasattr(choices, "items")
is_lowered = scorer in _scorer_lowering
query = _preprocess_query(query, processor)
results = rprocess.extract(
query, choices,
processor=_get_processor(processor, scorer),
scorer=_get_scorer(scorer),
score_cutoff=score_cutoff,
limit=limit
)
for i, (choice, score, key) in enumerate(results):
if is_lowered:
score = int(round(score))
results[i] = (choice, score, key) if is_mapping else (choice, score)
return results
def extractOne(query, choices, processor=default_processor, scorer=default_scorer, score_cutoff=0):
"""
Find the single best match above a score in a list of choices.
This is a convenience method which returns the single best choice.
See extract() for the full arguments list.
Args:
query: A string to match against
choices: A list or dictionary of choices, suitable for use with
extract().
processor: Optional function for transforming choices before matching.
See extract().
scorer: Scoring function for extract().
score_cutoff: Optional argument for score threshold. If the best
match is found, but it is not greater than this number, then
return None anyway ("not a good enough match"). Defaults to 0.
Returns:
A tuple containing a single match and its score, if a match
was found that was above score_cutoff. Otherwise, returns None.
"""
is_mapping = hasattr(choices, "items")
is_lowered = scorer in _scorer_lowering
query = _preprocess_query(query, processor)
res = rprocess.extractOne(
query, choices,
processor=_get_processor(processor, scorer),
scorer=_get_scorer(scorer),
score_cutoff=score_cutoff
)
if res is None:
return res
choice, score, key = res
if is_lowered:
score = int(round(score))
return (choice, score, key) if is_mapping else (choice, score)
def dedupe(contains_dupes, threshold=70, scorer=fuzz.token_set_ratio):
"""
This convenience function takes a list of strings containing duplicates and uses fuzzy matching to identify
and remove duplicates. Specifically, it uses process.extract to identify duplicates that
score greater than a user defined threshold. Then, it looks for the longest item in the duplicate list
since we assume this item contains the most entity information and returns that. It breaks string
length ties on an alphabetical sort.
Note: as the threshold DECREASES the number of duplicates that are found INCREASES. This means that the
returned deduplicated list will likely be shorter. Raise the threshold for dedupe to be less
sensitive.
Args:
contains_dupes: A list of strings that we would like to dedupe.
threshold: the numerical value (0,100) point at which we expect to find duplicates.
Defaults to 70 out of 100
scorer: Optional function for scoring matches between the query and
an individual processed choice. This should be a function
of the form f(query, choice) -> int.
By default, fuzz.token_set_ratio() is used and expects both query and
choice to be strings.
Returns:
A deduplicated list. For example:
In: contains_dupes = ['Frodo Baggin', 'Frodo Baggins', 'F. Baggins', 'Samwise G.', 'Gandalf', 'Bilbo Baggins']
In: dedupe(contains_dupes)
Out: ['Frodo Baggins', 'Samwise G.', 'Bilbo Baggins', 'Gandalf']
"""
deduped = set()
for item in contains_dupes:
matches = extractBests(item, contains_dupes, scorer=scorer, score_cutoff=threshold, limit=None)
deduped.add(max(matches, key=lambda x: (len(x[0]), x[0]))[0])
return list(deduped) if len(deduped) != len(contains_dupes) else contains_dupes

17
lib/thefuzz/process.pyi Normal file
View file

@ -0,0 +1,17 @@
from collections.abc import Mapping
import typing
from typing import Any, Callable, Union, Tuple, Generator, TypeVar, Sequence
ChoicesT = Union[Mapping[str, str], Sequence[str]]
T = TypeVar('T')
ProcessorT = Union[Callable[[str, bool], str], Callable[[Any], Any]]
ScorerT = Callable[[str, str, bool, bool], int]
@typing.overload
def extractWithoutOrder(query: str, choices: Mapping[str, str], processor: ProcessorT, scorer: ScorerT, score_cutoff: int = ...) -> Generator[Tuple[str, int, str], None, None]: ...
@typing.overload
def extractWithoutOrder(query: str, choices: Sequence[str], processor: ProcessorT, scorer: ScorerT, score_cutoff: int = ...) -> Generator[Tuple[str, int], None, None]: ...

22
lib/thefuzz/utils.py Normal file
View file

@ -0,0 +1,22 @@
from rapidfuzz.utils import default_process as _default_process
translation_table = {i: None for i in range(128, 256)} # ascii dammit!
def ascii_only(s):
return s.translate(translation_table)
def full_process(s, force_ascii=False):
"""
Process string by
-- removing all but letters and numbers
-- trim whitespace
-- force to lower case
if force_ascii == True, force convert to ascii
"""
if force_ascii:
s = ascii_only(str(s))
return _default_process(s)

3
lib/thefuzz/utils.pyi Normal file
View file

@ -0,0 +1,3 @@
def ascii_only(s: str) -> str: ...
def full_process(s: str, force_ascii: bool = ...) -> str: ...

View file

@ -22,8 +22,8 @@
# is zero for an official release, positive for a development branch,
# or negative for a release candidate or beta (after the base version
# number has been incremented)
version = "6.3.2"
version_info = (6, 3, 2, 0)
version = "6.3.3"
version_info = (6, 3, 3, 0)
import importlib
import typing

View file

@ -442,7 +442,7 @@ class HTTP1Connection(httputil.HTTPConnection):
):
self._expected_content_remaining = 0
elif "Content-Length" in headers:
self._expected_content_remaining = int(headers["Content-Length"])
self._expected_content_remaining = parse_int(headers["Content-Length"])
else:
self._expected_content_remaining = None
# TODO: headers are supposed to be of type str, but we still have some
@ -618,7 +618,7 @@ class HTTP1Connection(httputil.HTTPConnection):
headers["Content-Length"] = pieces[0]
try:
content_length = int(headers["Content-Length"]) # type: Optional[int]
content_length: Optional[int] = parse_int(headers["Content-Length"])
except ValueError:
# Handles non-integer Content-Length value.
raise httputil.HTTPInputError(
@ -668,7 +668,10 @@ class HTTP1Connection(httputil.HTTPConnection):
total_size = 0
while True:
chunk_len_str = await self.stream.read_until(b"\r\n", max_bytes=64)
chunk_len = int(chunk_len_str.strip(), 16)
try:
chunk_len = parse_hex_int(native_str(chunk_len_str[:-2]))
except ValueError:
raise httputil.HTTPInputError("invalid chunk size")
if chunk_len == 0:
crlf = await self.stream.read_bytes(2)
if crlf != b"\r\n":
@ -846,3 +849,21 @@ class HTTP1ServerConnection(object):
await asyncio.sleep(0)
finally:
delegate.on_close(self)
DIGITS = re.compile(r"[0-9]+")
HEXDIGITS = re.compile(r"[0-9a-fA-F]+")
def parse_int(s: str) -> int:
"""Parse a non-negative integer from a string."""
if DIGITS.fullmatch(s) is None:
raise ValueError("not an integer: %r" % s)
return int(s)
def parse_hex_int(s: str) -> int:
"""Parse a non-negative hexadecimal integer from a string."""
if HEXDIGITS.fullmatch(s) is None:
raise ValueError("not a hexadecimal integer: %r" % s)
return int(s, 16)

View file

@ -1,23 +1,48 @@
"""
Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more
"""
from __future__ import absolute_import
from __future__ import annotations
# Set default logging handler to avoid "No handler found" warnings.
import logging
import typing
import warnings
from logging import NullHandler
from . import exceptions
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from ._version import __version__
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
from .filepost import encode_multipart_formdata
from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse
from .response import BaseHTTPResponse, HTTPResponse
from .util.request import make_headers
from .util.retry import Retry
from .util.timeout import Timeout
from .util.url import get_host
# Ensure that Python is compiled with OpenSSL 1.1.1+
# If the 'ssl' module isn't available at all that's
# fine, we only care if the module is available.
try:
import ssl
except ImportError:
pass
else:
if not ssl.OPENSSL_VERSION.startswith("OpenSSL "): # Defensive:
warnings.warn(
"urllib3 v2.0 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/3020",
exceptions.NotOpenSSLWarning,
)
elif ssl.OPENSSL_VERSION_INFO < (1, 1, 1): # Defensive:
raise ImportError(
"urllib3 v2.0 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/2168"
)
# === NOTE TO REPACKAGERS AND VENDORS ===
# Please delete this block, this logic is only
@ -25,12 +50,12 @@ from .util.url import get_host
# See: https://github.com/urllib3/urllib3/issues/2680
try:
import urllib3_secure_extra # type: ignore # noqa: F401
except ImportError:
except ModuleNotFoundError:
pass
else:
warnings.warn(
"'urllib3[secure]' extra is deprecated and will be removed "
"in a future release of urllib3 2.x. Read more in this issue: "
"in urllib3 v2.1.0. Read more in this issue: "
"https://github.com/urllib3/urllib3/issues/2680",
category=DeprecationWarning,
stacklevel=2,
@ -42,6 +67,7 @@ __version__ = __version__
__all__ = (
"HTTPConnectionPool",
"HTTPHeaderDict",
"HTTPSConnectionPool",
"PoolManager",
"ProxyManager",
@ -52,15 +78,18 @@ __all__ = (
"connection_from_url",
"disable_warnings",
"encode_multipart_formdata",
"get_host",
"make_headers",
"proxy_from_url",
"request",
"BaseHTTPResponse",
)
logging.getLogger(__name__).addHandler(NullHandler())
def add_stderr_logger(level=logging.DEBUG):
def add_stderr_logger(
level: int = logging.DEBUG,
) -> logging.StreamHandler[typing.TextIO]:
"""
Helper for quickly adding a StreamHandler to the logger. Useful for
debugging.
@ -87,16 +116,51 @@ del NullHandler
# mechanisms to silence them.
# SecurityWarning's always go off by default.
warnings.simplefilter("always", exceptions.SecurityWarning, append=True)
# SubjectAltNameWarning's should go off once per host
warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True)
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True)
# SNIMissingWarnings should go off only once.
warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True)
def disable_warnings(category=exceptions.HTTPWarning):
def disable_warnings(category: type[Warning] = exceptions.HTTPWarning) -> None:
"""
Helper for quickly disabling all urllib3 warnings.
"""
warnings.simplefilter("ignore", category)
_DEFAULT_POOL = PoolManager()
def request(
method: str,
url: str,
*,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
preload_content: bool | None = True,
decode_content: bool | None = True,
redirect: bool | None = True,
retries: Retry | bool | int | None = None,
timeout: Timeout | float | int | None = 3,
json: typing.Any | None = None,
) -> BaseHTTPResponse:
"""
A convenience, top-level request method. It uses a module-global ``PoolManager`` instance.
Therefore, its side effects could be shared across dependencies relying on it.
To avoid side effects create a new ``PoolManager`` instance and use it instead.
The method does not accept low-level ``**urlopen_kw`` keyword arguments.
"""
return _DEFAULT_POOL.request(
method,
url,
body=body,
fields=fields,
headers=headers,
preload_content=preload_content,
decode_content=decode_content,
redirect=redirect,
retries=retries,
timeout=timeout,
json=json,
)

View file

@ -0,0 +1,173 @@
from __future__ import annotations
import typing
from .util.connection import _TYPE_SOCKET_OPTIONS
from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT
from .util.url import Url
_TYPE_BODY = typing.Union[bytes, typing.IO[typing.Any], typing.Iterable[bytes], str]
class ProxyConfig(typing.NamedTuple):
ssl_context: ssl.SSLContext | None
use_forwarding_for_https: bool
assert_hostname: None | str | Literal[False]
assert_fingerprint: str | None
class _ResponseOptions(typing.NamedTuple):
# TODO: Remove this in favor of a better
# HTTP request/response lifecycle tracking.
request_method: str
request_url: str
preload_content: bool
decode_content: bool
enforce_content_length: bool
if typing.TYPE_CHECKING:
import ssl
from typing_extensions import Literal, Protocol
from .response import BaseHTTPResponse
class BaseHTTPConnection(Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
host: str
port: int
timeout: None | (
float
) # Instance doesn't store _DEFAULT_TIMEOUT, must be resolved.
blocksize: int
source_address: tuple[str, int] | None
socket_options: _TYPE_SOCKET_OPTIONS | None
proxy: Url | None
proxy_config: ProxyConfig | None
is_verified: bool
proxy_is_verified: bool | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 8192,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
) -> None:
...
def set_tunnel(
self,
host: str,
port: int | None = None,
headers: typing.Mapping[str, str] | None = None,
scheme: str = "http",
) -> None:
...
def connect(self) -> None:
...
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
# We know *at least* botocore is depending on the order of the
# first 3 parameters so to be safe we only mark the later ones
# as keyword-only to ensure we have space to extend.
*,
chunked: bool = False,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> None:
...
def getresponse(self) -> BaseHTTPResponse:
...
def close(self) -> None:
...
@property
def is_closed(self) -> bool:
"""Whether the connection either is brand new or has been previously closed.
If this property is True then both ``is_connected`` and ``has_connected_to_proxy``
properties must be False.
"""
@property
def is_connected(self) -> bool:
"""Whether the connection is actively connected to any origin (proxy or target)"""
@property
def has_connected_to_proxy(self) -> bool:
"""Whether the connection has successfully connected to its proxy.
This returns False if no proxy is in use. Used to determine whether
errors are coming from the proxy layer or from tunnelling to the target origin.
"""
class BaseHTTPSConnection(BaseHTTPConnection, Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
# Certificate verification methods
cert_reqs: int | str | None
assert_hostname: None | str | Literal[False]
assert_fingerprint: str | None
ssl_context: ssl.SSLContext | None
# Trusted CAs
ca_certs: str | None
ca_cert_dir: str | None
ca_cert_data: None | str | bytes
# TLS version
ssl_minimum_version: int | None
ssl_maximum_version: int | None
ssl_version: int | str | None # Deprecated
# Client certificates
cert_file: str | None
key_file: str | None
key_password: str | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 16384,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
cert_reqs: int | str | None = None,
assert_hostname: None | str | Literal[False] = None,
assert_fingerprint: str | None = None,
server_hostname: str | None = None,
ssl_context: ssl.SSLContext | None = None,
ca_certs: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
ssl_minimum_version: int | None = None,
ssl_maximum_version: int | None = None,
ssl_version: int | str | None = None, # Deprecated
cert_file: str | None = None,
key_file: str | None = None,
key_password: str | None = None,
) -> None:
...

View file

@ -1,34 +1,66 @@
from __future__ import absolute_import
try:
from collections.abc import Mapping, MutableMapping
except ImportError:
from collections import Mapping, MutableMapping
try:
from threading import RLock
except ImportError: # Platform-specific: No threads available
class RLock:
def __enter__(self):
pass
def __exit__(self, exc_type, exc_value, traceback):
pass
from __future__ import annotations
import typing
from collections import OrderedDict
from enum import Enum, auto
from threading import RLock
if typing.TYPE_CHECKING:
# We can only import Protocol if TYPE_CHECKING because it's a development
# dependency, and is not available at runtime.
from typing_extensions import Protocol
class HasGettableStringKeys(Protocol):
def keys(self) -> typing.Iterator[str]:
...
def __getitem__(self, key: str) -> str:
...
from .exceptions import InvalidHeader
from .packages import six
from .packages.six import iterkeys, itervalues
__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
_Null = object()
# Key type
_KT = typing.TypeVar("_KT")
# Value type
_VT = typing.TypeVar("_VT")
# Default type
_DT = typing.TypeVar("_DT")
ValidHTTPHeaderSource = typing.Union[
"HTTPHeaderDict",
typing.Mapping[str, str],
typing.Iterable[typing.Tuple[str, str]],
"HasGettableStringKeys",
]
class RecentlyUsedContainer(MutableMapping):
class _Sentinel(Enum):
not_passed = auto()
def ensure_can_construct_http_header_dict(
potential: object,
) -> ValidHTTPHeaderSource | None:
if isinstance(potential, HTTPHeaderDict):
return potential
elif isinstance(potential, typing.Mapping):
# Full runtime checking of the contents of a Mapping is expensive, so for the
# purposes of typechecking, we assume that any Mapping is the right shape.
return typing.cast(typing.Mapping[str, str], potential)
elif isinstance(potential, typing.Iterable):
# Similarly to Mapping, full runtime checking of the contents of an Iterable is
# expensive, so for the purposes of typechecking, we assume that any Iterable
# is the right shape.
return typing.cast(typing.Iterable[typing.Tuple[str, str]], potential)
elif hasattr(potential, "keys") and hasattr(potential, "__getitem__"):
return typing.cast("HasGettableStringKeys", potential)
else:
return None
class RecentlyUsedContainer(typing.Generic[_KT, _VT], typing.MutableMapping[_KT, _VT]):
"""
Provides a thread-safe dict-like container which maintains up to
``maxsize`` keys while throwing away the least-recently-used keys beyond
@ -42,69 +74,134 @@ class RecentlyUsedContainer(MutableMapping):
``dispose_func(value)`` is called. Callback which will get called
"""
ContainerCls = OrderedDict
_container: typing.OrderedDict[_KT, _VT]
_maxsize: int
dispose_func: typing.Callable[[_VT], None] | None
lock: RLock
def __init__(self, maxsize=10, dispose_func=None):
def __init__(
self,
maxsize: int = 10,
dispose_func: typing.Callable[[_VT], None] | None = None,
) -> None:
super().__init__()
self._maxsize = maxsize
self.dispose_func = dispose_func
self._container = self.ContainerCls()
self._container = OrderedDict()
self.lock = RLock()
def __getitem__(self, key):
def __getitem__(self, key: _KT) -> _VT:
# Re-insert the item, moving it to the end of the eviction line.
with self.lock:
item = self._container.pop(key)
self._container[key] = item
return item
def __setitem__(self, key, value):
evicted_value = _Null
def __setitem__(self, key: _KT, value: _VT) -> None:
evicted_item = None
with self.lock:
# Possibly evict the existing value of 'key'
evicted_value = self._container.get(key, _Null)
try:
# If the key exists, we'll overwrite it, which won't change the
# size of the pool. Because accessing a key should move it to
# the end of the eviction line, we pop it out first.
evicted_item = key, self._container.pop(key)
self._container[key] = value
except KeyError:
# When the key does not exist, we insert the value first so that
# evicting works in all cases, including when self._maxsize is 0
self._container[key] = value
# If we didn't evict an existing value, we might have to evict the
# least recently used item from the beginning of the container.
if len(self._container) > self._maxsize:
_key, evicted_value = self._container.popitem(last=False)
# If we didn't evict an existing value, and we've hit our maximum
# size, then we have to evict the least recently used item from
# the beginning of the container.
evicted_item = self._container.popitem(last=False)
if self.dispose_func and evicted_value is not _Null:
# After releasing the lock on the pool, dispose of any evicted value.
if evicted_item is not None and self.dispose_func:
_, evicted_value = evicted_item
self.dispose_func(evicted_value)
def __delitem__(self, key):
def __delitem__(self, key: _KT) -> None:
with self.lock:
value = self._container.pop(key)
if self.dispose_func:
self.dispose_func(value)
def __len__(self):
def __len__(self) -> int:
with self.lock:
return len(self._container)
def __iter__(self):
def __iter__(self) -> typing.NoReturn:
raise NotImplementedError(
"Iteration over this class is unlikely to be threadsafe."
)
def clear(self):
def clear(self) -> None:
with self.lock:
# Copy pointers to all values, then wipe the mapping
values = list(itervalues(self._container))
values = list(self._container.values())
self._container.clear()
if self.dispose_func:
for value in values:
self.dispose_func(value)
def keys(self):
def keys(self) -> set[_KT]: # type: ignore[override]
with self.lock:
return list(iterkeys(self._container))
return set(self._container.keys())
class HTTPHeaderDict(MutableMapping):
class HTTPHeaderDictItemView(typing.Set[typing.Tuple[str, str]]):
"""
HTTPHeaderDict is unusual for a Mapping[str, str] in that it has two modes of
address.
If we directly try to get an item with a particular name, we will get a string
back that is the concatenated version of all the values:
>>> d['X-Header-Name']
'Value1, Value2, Value3'
However, if we iterate over an HTTPHeaderDict's items, we will optionally combine
these values based on whether combine=True was called when building up the dictionary
>>> d = HTTPHeaderDict({"A": "1", "B": "foo"})
>>> d.add("A", "2", combine=True)
>>> d.add("B", "bar")
>>> list(d.items())
[
('A', '1, 2'),
('B', 'foo'),
('B', 'bar'),
]
This class conforms to the interface required by the MutableMapping ABC while
also giving us the nonstandard iteration behavior we want; items with duplicate
keys, ordered by time of first insertion.
"""
_headers: HTTPHeaderDict
def __init__(self, headers: HTTPHeaderDict) -> None:
self._headers = headers
def __len__(self) -> int:
return len(list(self._headers.iteritems()))
def __iter__(self) -> typing.Iterator[tuple[str, str]]:
return self._headers.iteritems()
def __contains__(self, item: object) -> bool:
if isinstance(item, tuple) and len(item) == 2:
passed_key, passed_val = item
if isinstance(passed_key, str) and isinstance(passed_val, str):
return self._headers._has_value_for_header(passed_key, passed_val)
return False
class HTTPHeaderDict(typing.MutableMapping[str, str]):
"""
:param headers:
An iterable of field-value pairs. Must not contain multiple field names
@ -138,9 +235,11 @@ class HTTPHeaderDict(MutableMapping):
'7'
"""
def __init__(self, headers=None, **kwargs):
super(HTTPHeaderDict, self).__init__()
self._container = OrderedDict()
_container: typing.MutableMapping[str, list[str]]
def __init__(self, headers: ValidHTTPHeaderSource | None = None, **kwargs: str):
super().__init__()
self._container = {} # 'dict' is insert-ordered in Python 3.7+
if headers is not None:
if isinstance(headers, HTTPHeaderDict):
self._copy_from(headers)
@ -149,123 +248,147 @@ class HTTPHeaderDict(MutableMapping):
if kwargs:
self.extend(kwargs)
def __setitem__(self, key, val):
def __setitem__(self, key: str, val: str) -> None:
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
self._container[key.lower()] = [key, val]
return self._container[key.lower()]
def __getitem__(self, key):
def __getitem__(self, key: str) -> str:
val = self._container[key.lower()]
return ", ".join(val[1:])
def __delitem__(self, key):
def __delitem__(self, key: str) -> None:
del self._container[key.lower()]
def __contains__(self, key):
def __contains__(self, key: object) -> bool:
if isinstance(key, str):
return key.lower() in self._container
def __eq__(self, other):
if not isinstance(other, Mapping) and not hasattr(other, "keys"):
return False
if not isinstance(other, type(self)):
other = type(self)(other)
return dict((k.lower(), v) for k, v in self.itermerged()) == dict(
(k.lower(), v) for k, v in other.itermerged()
)
def __ne__(self, other):
def setdefault(self, key: str, default: str = "") -> str:
return super().setdefault(key, default)
def __eq__(self, other: object) -> bool:
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return False
else:
other_as_http_header_dict = type(self)(maybe_constructable)
return {k.lower(): v for k, v in self.itermerged()} == {
k.lower(): v for k, v in other_as_http_header_dict.itermerged()
}
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
if six.PY2: # Python 2
iterkeys = MutableMapping.iterkeys
itervalues = MutableMapping.itervalues
__marker = object()
def __len__(self):
def __len__(self) -> int:
return len(self._container)
def __iter__(self):
def __iter__(self) -> typing.Iterator[str]:
# Only provide the originally cased names
for vals in self._container.values():
yield vals[0]
def pop(self, key, default=__marker):
"""D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If key is not found, d is returned if given, otherwise KeyError is raised.
"""
# Using the MutableMapping function directly fails due to the private marker.
# Using ordinary dict.pop would expose the internal structures.
# So let's reinvent the wheel.
try:
value = self[key]
except KeyError:
if default is self.__marker:
raise
return default
else:
del self[key]
return value
def discard(self, key):
def discard(self, key: str) -> None:
try:
del self[key]
except KeyError:
pass
def add(self, key, val):
def add(self, key: str, val: str, *, combine: bool = False) -> None:
"""Adds a (name, value) pair, doesn't overwrite the value if it already
exists.
If this is called with combine=True, instead of adding a new header value
as a distinct item during iteration, this will instead append the value to
any existing header value with a comma. If no existing header value exists
for the key, then the value will simply be added, ignoring the combine parameter.
>>> headers = HTTPHeaderDict(foo='bar')
>>> headers.add('Foo', 'baz')
>>> headers['foo']
'bar, baz'
>>> list(headers.items())
[('foo', 'bar'), ('foo', 'baz')]
>>> headers.add('foo', 'quz', combine=True)
>>> list(headers.items())
[('foo', 'bar, baz, quz')]
"""
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
key_lower = key.lower()
new_vals = [key, val]
# Keep the common case aka no item present as fast as possible
vals = self._container.setdefault(key_lower, new_vals)
if new_vals is not vals:
# if there are values here, then there is at least the initial
# key/value pair
assert len(vals) >= 2
if combine:
vals[-1] = vals[-1] + ", " + val
else:
vals.append(val)
def extend(self, *args, **kwargs):
def extend(self, *args: ValidHTTPHeaderSource, **kwargs: str) -> None:
"""Generic import function for any type of header-like object.
Adapted version of MutableMapping.update in order to insert items
with self.add instead of self.__setitem__
"""
if len(args) > 1:
raise TypeError(
"extend() takes at most 1 positional "
"arguments ({0} given)".format(len(args))
f"extend() takes at most 1 positional arguments ({len(args)} given)"
)
other = args[0] if len(args) >= 1 else ()
if isinstance(other, HTTPHeaderDict):
for key, val in other.iteritems():
self.add(key, val)
elif isinstance(other, Mapping):
for key in other:
self.add(key, other[key])
elif hasattr(other, "keys"):
for key in other.keys():
self.add(key, other[key])
else:
elif isinstance(other, typing.Mapping):
for key, val in other.items():
self.add(key, val)
elif isinstance(other, typing.Iterable):
other = typing.cast(typing.Iterable[typing.Tuple[str, str]], other)
for key, value in other:
self.add(key, value)
elif hasattr(other, "keys") and hasattr(other, "__getitem__"):
# THIS IS NOT A TYPESAFE BRANCH
# In this branch, the object has a `keys` attr but is not a Mapping or any of
# the other types indicated in the method signature. We do some stuff with
# it as though it partially implements the Mapping interface, but we're not
# doing that stuff safely AT ALL.
for key in other.keys():
self.add(key, other[key])
for key, value in kwargs.items():
self.add(key, value)
def getlist(self, key, default=__marker):
@typing.overload
def getlist(self, key: str) -> list[str]:
...
@typing.overload
def getlist(self, key: str, default: _DT) -> list[str] | _DT:
...
def getlist(
self, key: str, default: _Sentinel | _DT = _Sentinel.not_passed
) -> list[str] | _DT:
"""Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist."""
try:
vals = self._container[key.lower()]
except KeyError:
if default is self.__marker:
if default is _Sentinel.not_passed:
# _DT is unbound; empty list is instance of List[str]
return []
# _DT is bound; default is instance of _DT
return default
else:
# _DT may or may not be bound; vals[1:] is instance of List[str], which
# meets our external interface requirement of `Union[List[str], _DT]`.
return vals[1:]
# Backwards compatibility for httplib
@ -276,62 +399,65 @@ class HTTPHeaderDict(MutableMapping):
# Backwards compatibility for http.cookiejar
get_all = getlist
def __repr__(self):
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
def __repr__(self) -> str:
return f"{type(self).__name__}({dict(self.itermerged())})"
def _copy_from(self, other):
def _copy_from(self, other: HTTPHeaderDict) -> None:
for key in other:
val = other.getlist(key)
if isinstance(val, list):
# Don't need to convert tuples
val = list(val)
self._container[key.lower()] = [key] + val
self._container[key.lower()] = [key, *val]
def copy(self):
def copy(self) -> HTTPHeaderDict:
clone = type(self)()
clone._copy_from(self)
return clone
def iteritems(self):
def iteritems(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all header lines, including duplicate ones."""
for key in self:
vals = self._container[key.lower()]
for val in vals[1:]:
yield vals[0], val
def itermerged(self):
def itermerged(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all headers, merging duplicate ones together."""
for key in self:
val = self._container[key.lower()]
yield val[0], ", ".join(val[1:])
def items(self):
return list(self.iteritems())
def items(self) -> HTTPHeaderDictItemView: # type: ignore[override]
return HTTPHeaderDictItemView(self)
@classmethod
def from_httplib(cls, message): # Python 2
"""Read headers from a Python 2 httplib message object."""
# python2.7 does not expose a proper API for exporting multiheaders
# efficiently. This function re-reads raw lines from the message
# object and extracts the multiheaders properly.
obs_fold_continued_leaders = (" ", "\t")
headers = []
def _has_value_for_header(self, header_name: str, potential_value: str) -> bool:
if header_name in self:
return potential_value in self._container[header_name.lower()][1:]
return False
for line in message.headers:
if line.startswith(obs_fold_continued_leaders):
if not headers:
# We received a header line that starts with OWS as described
# in RFC-7230 S3.2.4. This indicates a multiline header, but
# there exists no previous header to which we can attach it.
raise InvalidHeader(
"Header continuation with no previous header: %s" % line
)
else:
key, value = headers[-1]
headers[-1] = (key, value + " " + line.strip())
continue
def __ior__(self, other: object) -> HTTPHeaderDict:
# Supports extending a header dict in-place using operator |=
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
self.extend(maybe_constructable)
return self
key, value = line.split(":", 1)
headers.append((key, value.strip()))
def __or__(self, other: object) -> HTTPHeaderDict:
# Supports merging header dicts using operator |
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = self.copy()
result.extend(maybe_constructable)
return result
return cls(headers)
def __ror__(self, other: object) -> HTTPHeaderDict:
# Supports merging header dicts using operator | when other is on left side
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = type(self)(maybe_constructable)
result.extend(self)
return result

View file

@ -1,12 +1,23 @@
from __future__ import absolute_import
from __future__ import annotations
from .filepost import encode_multipart_formdata
from .packages.six.moves.urllib.parse import urlencode
import json as _json
import typing
from urllib.parse import urlencode
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .response import BaseHTTPResponse
__all__ = ["RequestMethods"]
_TYPE_ENCODE_URL_FIELDS = typing.Union[
typing.Sequence[typing.Tuple[str, typing.Union[str, bytes]]],
typing.Mapping[str, typing.Union[str, bytes]],
]
class RequestMethods(object):
class RequestMethods:
"""
Convenience mixin for classes who implement a :meth:`urlopen` method, such
as :class:`urllib3.HTTPConnectionPool` and
@ -37,25 +48,34 @@ class RequestMethods(object):
_encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"}
def __init__(self, headers=None):
def __init__(self, headers: typing.Mapping[str, str] | None = None) -> None:
self.headers = headers or {}
def urlopen(
self,
method,
url,
body=None,
headers=None,
encode_multipart=True,
multipart_boundary=None,
**kw
): # Abstract
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
encode_multipart: bool = True,
multipart_boundary: str | None = None,
**kw: typing.Any,
) -> BaseHTTPResponse: # Abstract
raise NotImplementedError(
"Classes extending RequestMethods must implement "
"their own ``urlopen`` method."
)
def request(self, method, url, fields=None, headers=None, **urlopen_kw):
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
json: typing.Any | None = None,
**urlopen_kw: typing.Any,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the appropriate encoding of
``fields`` based on the ``method`` used.
@ -68,18 +88,45 @@ class RequestMethods(object):
"""
method = method.upper()
urlopen_kw["request_url"] = url
if json is not None and body is not None:
raise TypeError(
"request got values for both 'body' and 'json' parameters which are mutually exclusive"
)
if json is not None:
if headers is None:
headers = self.headers.copy() # type: ignore
if not ("content-type" in map(str.lower, headers.keys())):
headers["Content-Type"] = "application/json" # type: ignore
body = _json.dumps(json, separators=(",", ":"), ensure_ascii=False).encode(
"utf-8"
)
if body is not None:
urlopen_kw["body"] = body
if method in self._encode_url_methods:
return self.request_encode_url(
method, url, fields=fields, headers=headers, **urlopen_kw
method,
url,
fields=fields, # type: ignore[arg-type]
headers=headers,
**urlopen_kw,
)
else:
return self.request_encode_body(
method, url, fields=fields, headers=headers, **urlopen_kw
)
def request_encode_url(self, method, url, fields=None, headers=None, **urlopen_kw):
def request_encode_url(
self,
method: str,
url: str,
fields: _TYPE_ENCODE_URL_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
**urlopen_kw: str,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the url. This is useful for request methods like GET, HEAD, DELETE, etc.
@ -87,7 +134,7 @@ class RequestMethods(object):
if headers is None:
headers = self.headers
extra_kw = {"headers": headers}
extra_kw: dict[str, typing.Any] = {"headers": headers}
extra_kw.update(urlopen_kw)
if fields:
@ -97,14 +144,14 @@ class RequestMethods(object):
def request_encode_body(
self,
method,
url,
fields=None,
headers=None,
encode_multipart=True,
multipart_boundary=None,
**urlopen_kw
):
method: str,
url: str,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
encode_multipart: bool = True,
multipart_boundary: str | None = None,
**urlopen_kw: str,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the body. This is useful for request methods like POST, PUT, PATCH, etc.
@ -143,7 +190,8 @@ class RequestMethods(object):
if headers is None:
headers = self.headers
extra_kw = {"headers": {}}
extra_kw: dict[str, typing.Any] = {"headers": HTTPHeaderDict(headers)}
body: bytes | str
if fields:
if "body" in urlopen_kw:
@ -157,14 +205,13 @@ class RequestMethods(object):
)
else:
body, content_type = (
urlencode(fields),
urlencode(fields), # type: ignore[arg-type]
"application/x-www-form-urlencoded",
)
extra_kw["body"] = body
extra_kw["headers"] = {"Content-Type": content_type}
extra_kw["headers"].setdefault("Content-Type", content_type)
extra_kw["headers"].update(headers)
extra_kw.update(urlopen_kw)
return self.urlopen(method, url, **extra_kw)

View file

@ -1,2 +1,4 @@
# This file is protected via CODEOWNERS
__version__ = "1.26.15"
from __future__ import annotations
__version__ = "2.0.5"

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,36 +0,0 @@
"""
This module provides means to detect the App Engine environment.
"""
import os
def is_appengine():
return is_local_appengine() or is_prod_appengine()
def is_appengine_sandbox():
"""Reports if the app is running in the first generation sandbox.
The second generation runtimes are technically still in a sandbox, but it
is much less restrictive, so generally you shouldn't need to check for it.
see https://cloud.google.com/appengine/docs/standard/runtimes
"""
return is_appengine() and os.environ["APPENGINE_RUNTIME"] == "python27"
def is_local_appengine():
return "APPENGINE_RUNTIME" in os.environ and os.environ.get(
"SERVER_SOFTWARE", ""
).startswith("Development/")
def is_prod_appengine():
return "APPENGINE_RUNTIME" in os.environ and os.environ.get(
"SERVER_SOFTWARE", ""
).startswith("Google App Engine/")
def is_prod_appengine_mvms():
"""Deprecated."""
return False

View file

@ -1,3 +1,5 @@
# type: ignore
"""
This module uses ctypes to bind a whole bunch of functions and constants from
SecureTransport. The goal here is to provide the low-level API to
@ -29,7 +31,8 @@ license and by oscrypto's:
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
"""
from __future__ import absolute_import
from __future__ import annotations
import platform
from ctypes import (
@ -48,8 +51,6 @@ from ctypes import (
)
from ctypes.util import find_library
from ...packages.six import raise_from
if platform.system() != "Darwin":
raise ImportError("Only macOS is supported")
@ -57,16 +58,16 @@ version = platform.mac_ver()[0]
version_info = tuple(map(int, version.split(".")))
if version_info < (10, 8):
raise OSError(
"Only OS X 10.8 and newer are supported, not %s.%s"
% (version_info[0], version_info[1])
f"Only OS X 10.8 and newer are supported, not {version_info[0]}.{version_info[1]}"
)
def load_cdll(name, macos10_16_path):
def load_cdll(name: str, macos10_16_path: str) -> CDLL:
"""Loads a CDLL by name, falling back to known path on 10.16+"""
try:
# Big Sur is technically 11 but we use 10.16 due to the Big Sur
# beta being labeled as 10.16.
path: str | None
if version_info >= (10, 16):
path = macos10_16_path
else:
@ -75,7 +76,7 @@ def load_cdll(name, macos10_16_path):
raise OSError # Caught and reraised as 'ImportError'
return CDLL(path, use_errno=True)
except OSError:
raise_from(ImportError("The library %s failed to load" % name), None)
raise ImportError(f"The library {name} failed to load") from None
Security = load_cdll(
@ -416,104 +417,14 @@ try:
CoreFoundation.CFStringRef = CFStringRef
CoreFoundation.CFDictionaryRef = CFDictionaryRef
except (AttributeError):
raise ImportError("Error initializing ctypes")
except AttributeError:
raise ImportError("Error initializing ctypes") from None
class CFConst(object):
class CFConst:
"""
A class object that acts as essentially a namespace for CoreFoundation
constants.
"""
kCFStringEncodingUTF8 = CFStringEncoding(0x08000100)
class SecurityConst(object):
"""
A class object that acts as essentially a namespace for Security constants.
"""
kSSLSessionOptionBreakOnServerAuth = 0
kSSLProtocol2 = 1
kSSLProtocol3 = 2
kTLSProtocol1 = 4
kTLSProtocol11 = 7
kTLSProtocol12 = 8
# SecureTransport does not support TLS 1.3 even if there's a constant for it
kTLSProtocol13 = 10
kTLSProtocolMaxSupported = 999
kSSLClientSide = 1
kSSLStreamType = 0
kSecFormatPEMSequence = 10
kSecTrustResultInvalid = 0
kSecTrustResultProceed = 1
# This gap is present on purpose: this was kSecTrustResultConfirm, which
# is deprecated.
kSecTrustResultDeny = 3
kSecTrustResultUnspecified = 4
kSecTrustResultRecoverableTrustFailure = 5
kSecTrustResultFatalTrustFailure = 6
kSecTrustResultOtherError = 7
errSSLProtocol = -9800
errSSLWouldBlock = -9803
errSSLClosedGraceful = -9805
errSSLClosedNoNotify = -9816
errSSLClosedAbort = -9806
errSSLXCertChainInvalid = -9807
errSSLCrypto = -9809
errSSLInternal = -9810
errSSLCertExpired = -9814
errSSLCertNotYetValid = -9815
errSSLUnknownRootCert = -9812
errSSLNoRootCert = -9813
errSSLHostNameMismatch = -9843
errSSLPeerHandshakeFail = -9824
errSSLPeerUserCancelled = -9839
errSSLWeakPeerEphemeralDHKey = -9850
errSSLServerAuthCompleted = -9841
errSSLRecordOverflow = -9847
errSecVerifyFailed = -67808
errSecNoTrustSettings = -25263
errSecItemNotFound = -25300
errSecInvalidTrustSettings = -25262
# Cipher suites. We only pick the ones our default cipher string allows.
# Source: https://developer.apple.com/documentation/security/1550981-ssl_cipher_suite_values
TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C
TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030
TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B
TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F
TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA9
TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA8
TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F
TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014
TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B
TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013
TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067
TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033
TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D
TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C
TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D
TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C
TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035
TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F
TLS_AES_128_GCM_SHA256 = 0x1301
TLS_AES_256_GCM_SHA384 = 0x1302
TLS_AES_128_CCM_8_SHA256 = 0x1305
TLS_AES_128_CCM_SHA256 = 0x1304

View file

@ -7,6 +7,8 @@ CoreFoundation messing about and memory management. The concerns in this module
are almost entirely about trying to avoid memory leaks and providing
appropriate and useful assistance to the higher-level code.
"""
from __future__ import annotations
import base64
import ctypes
import itertools
@ -15,8 +17,20 @@ import re
import ssl
import struct
import tempfile
import typing
from .bindings import CFConst, CoreFoundation, Security
from .bindings import ( # type: ignore[attr-defined]
CFArray,
CFConst,
CFData,
CFDictionary,
CFMutableArray,
CFString,
CFTypeRef,
CoreFoundation,
SecKeychainRef,
Security,
)
# This regular expression is used to grab PEM data out of a PEM bundle.
_PEM_CERTS_RE = re.compile(
@ -24,7 +38,7 @@ _PEM_CERTS_RE = re.compile(
)
def _cf_data_from_bytes(bytestring):
def _cf_data_from_bytes(bytestring: bytes) -> CFData:
"""
Given a bytestring, create a CFData object from it. This CFData object must
be CFReleased by the caller.
@ -34,7 +48,9 @@ def _cf_data_from_bytes(bytestring):
)
def _cf_dictionary_from_tuples(tuples):
def _cf_dictionary_from_tuples(
tuples: list[tuple[typing.Any, typing.Any]]
) -> CFDictionary:
"""
Given a list of Python tuples, create an associated CFDictionary.
"""
@ -56,7 +72,7 @@ def _cf_dictionary_from_tuples(tuples):
)
def _cfstr(py_bstr):
def _cfstr(py_bstr: bytes) -> CFString:
"""
Given a Python binary data, create a CFString.
The string must be CFReleased by the caller.
@ -70,7 +86,7 @@ def _cfstr(py_bstr):
return cf_str
def _create_cfstring_array(lst):
def _create_cfstring_array(lst: list[bytes]) -> CFMutableArray:
"""
Given a list of Python binary data, create an associated CFMutableArray.
The array must be CFReleased by the caller.
@ -97,11 +113,11 @@ def _create_cfstring_array(lst):
except BaseException as e:
if cf_arr:
CoreFoundation.CFRelease(cf_arr)
raise ssl.SSLError("Unable to allocate array: %s" % (e,))
raise ssl.SSLError(f"Unable to allocate array: {e}") from None
return cf_arr
def _cf_string_to_unicode(value):
def _cf_string_to_unicode(value: CFString) -> str | None:
"""
Creates a Unicode string from a CFString object. Used entirely for error
reporting.
@ -123,10 +139,12 @@ def _cf_string_to_unicode(value):
string = buffer.value
if string is not None:
string = string.decode("utf-8")
return string
return string # type: ignore[no-any-return]
def _assert_no_error(error, exception_class=None):
def _assert_no_error(
error: int, exception_class: type[BaseException] | None = None
) -> None:
"""
Checks the return code and throws an exception if there is an error to
report
@ -138,8 +156,8 @@ def _assert_no_error(error, exception_class=None):
output = _cf_string_to_unicode(cf_error_string)
CoreFoundation.CFRelease(cf_error_string)
if output is None or output == u"":
output = u"OSStatus %s" % error
if output is None or output == "":
output = f"OSStatus {error}"
if exception_class is None:
exception_class = ssl.SSLError
@ -147,7 +165,7 @@ def _assert_no_error(error, exception_class=None):
raise exception_class(output)
def _cert_array_from_pem(pem_bundle):
def _cert_array_from_pem(pem_bundle: bytes) -> CFArray:
"""
Given a bundle of certs in PEM format, turns them into a CFArray of certs
that can be used to validate a cert chain.
@ -193,23 +211,23 @@ def _cert_array_from_pem(pem_bundle):
return cert_array
def _is_cert(item):
def _is_cert(item: CFTypeRef) -> bool:
"""
Returns True if a given CFTypeRef is a certificate.
"""
expected = Security.SecCertificateGetTypeID()
return CoreFoundation.CFGetTypeID(item) == expected
return CoreFoundation.CFGetTypeID(item) == expected # type: ignore[no-any-return]
def _is_identity(item):
def _is_identity(item: CFTypeRef) -> bool:
"""
Returns True if a given CFTypeRef is an identity.
"""
expected = Security.SecIdentityGetTypeID()
return CoreFoundation.CFGetTypeID(item) == expected
return CoreFoundation.CFGetTypeID(item) == expected # type: ignore[no-any-return]
def _temporary_keychain():
def _temporary_keychain() -> tuple[SecKeychainRef, str]:
"""
This function creates a temporary Mac keychain that we can use to work with
credentials. This keychain uses a one-time password and a temporary file to
@ -244,7 +262,9 @@ def _temporary_keychain():
return keychain, tempdirectory
def _load_items_from_file(keychain, path):
def _load_items_from_file(
keychain: SecKeychainRef, path: str
) -> tuple[list[CFTypeRef], list[CFTypeRef]]:
"""
Given a single file, loads all the trust objects from it into arrays and
the keychain.
@ -299,7 +319,7 @@ def _load_items_from_file(keychain, path):
return (identities, certificates)
def _load_client_cert_chain(keychain, *paths):
def _load_client_cert_chain(keychain: SecKeychainRef, *paths: str | None) -> CFArray:
"""
Load certificates and maybe keys from a number of files. Has the end goal
of returning a CFArray containing one SecIdentityRef, and then zero or more
@ -335,10 +355,10 @@ def _load_client_cert_chain(keychain, *paths):
identities = []
# Filter out bad paths.
paths = (path for path in paths if path)
filtered_paths = (path for path in paths if path)
try:
for file_path in paths:
for file_path in filtered_paths:
new_identities, new_certs = _load_items_from_file(keychain, file_path)
identities.extend(new_identities)
certificates.extend(new_certs)
@ -383,7 +403,7 @@ TLS_PROTOCOL_VERSIONS = {
}
def _build_tls_unknown_ca_alert(version):
def _build_tls_unknown_ca_alert(version: str) -> bytes:
"""
Builds a TLS alert record for an unknown CA.
"""
@ -395,3 +415,60 @@ def _build_tls_unknown_ca_alert(version):
record_type_alert = 0x15
record = struct.pack(">BBBH", record_type_alert, ver_maj, ver_min, msg_len) + msg
return record
class SecurityConst:
"""
A class object that acts as essentially a namespace for Security constants.
"""
kSSLSessionOptionBreakOnServerAuth = 0
kSSLProtocol2 = 1
kSSLProtocol3 = 2
kTLSProtocol1 = 4
kTLSProtocol11 = 7
kTLSProtocol12 = 8
# SecureTransport does not support TLS 1.3 even if there's a constant for it
kTLSProtocol13 = 10
kTLSProtocolMaxSupported = 999
kSSLClientSide = 1
kSSLStreamType = 0
kSecFormatPEMSequence = 10
kSecTrustResultInvalid = 0
kSecTrustResultProceed = 1
# This gap is present on purpose: this was kSecTrustResultConfirm, which
# is deprecated.
kSecTrustResultDeny = 3
kSecTrustResultUnspecified = 4
kSecTrustResultRecoverableTrustFailure = 5
kSecTrustResultFatalTrustFailure = 6
kSecTrustResultOtherError = 7
errSSLProtocol = -9800
errSSLWouldBlock = -9803
errSSLClosedGraceful = -9805
errSSLClosedNoNotify = -9816
errSSLClosedAbort = -9806
errSSLXCertChainInvalid = -9807
errSSLCrypto = -9809
errSSLInternal = -9810
errSSLCertExpired = -9814
errSSLCertNotYetValid = -9815
errSSLUnknownRootCert = -9812
errSSLNoRootCert = -9813
errSSLHostNameMismatch = -9843
errSSLPeerHandshakeFail = -9824
errSSLPeerUserCancelled = -9839
errSSLWeakPeerEphemeralDHKey = -9850
errSSLServerAuthCompleted = -9841
errSSLRecordOverflow = -9847
errSecVerifyFailed = -67808
errSecNoTrustSettings = -25263
errSecItemNotFound = -25300
errSecInvalidTrustSettings = -25262

View file

@ -1,314 +0,0 @@
"""
This module provides a pool manager that uses Google App Engine's
`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
Example usage::
from urllib3 import PoolManager
from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
if is_appengine_sandbox():
# AppEngineManager uses AppEngine's URLFetch API behind the scenes
http = AppEngineManager()
else:
# PoolManager uses a socket-level API behind the scenes
http = PoolManager()
r = http.request('GET', 'https://google.com/')
There are `limitations <https://cloud.google.com/appengine/docs/python/\
urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
the best choice for your application. There are three options for using
urllib3 on Google App Engine:
1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
cost-effective in many circumstances as long as your usage is within the
limitations.
2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
Sockets also have `limitations and restrictions
<https://cloud.google.com/appengine/docs/python/sockets/\
#limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
To use sockets, be sure to specify the following in your ``app.yaml``::
env_variables:
GAE_USE_SOCKETS_HTTPLIB : 'true'
3. If you are using `App Engine Flexible
<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
:class:`PoolManager` without any configuration or special environment variables.
"""
from __future__ import absolute_import
import io
import logging
import warnings
from ..exceptions import (
HTTPError,
HTTPWarning,
MaxRetryError,
ProtocolError,
SSLError,
TimeoutError,
)
from ..packages.six.moves.urllib.parse import urljoin
from ..request import RequestMethods
from ..response import HTTPResponse
from ..util.retry import Retry
from ..util.timeout import Timeout
from . import _appengine_environ
try:
from google.appengine.api import urlfetch
except ImportError:
urlfetch = None
log = logging.getLogger(__name__)
class AppEnginePlatformWarning(HTTPWarning):
pass
class AppEnginePlatformError(HTTPError):
pass
class AppEngineManager(RequestMethods):
"""
Connection manager for Google App Engine sandbox applications.
This manager uses the URLFetch service directly instead of using the
emulated httplib, and is subject to URLFetch limitations as described in
the App Engine documentation `here
<https://cloud.google.com/appengine/docs/python/urlfetch>`_.
Notably it will raise an :class:`AppEnginePlatformError` if:
* URLFetch is not available.
* If you attempt to use this on App Engine Flexible, as full socket
support is available.
* If a request size is more than 10 megabytes.
* If a response size is more than 32 megabytes.
* If you use an unsupported request method such as OPTIONS.
Beyond those cases, it will raise normal urllib3 errors.
"""
def __init__(
self,
headers=None,
retries=None,
validate_certificate=True,
urlfetch_retries=True,
):
if not urlfetch:
raise AppEnginePlatformError(
"URLFetch is not available in this environment."
)
warnings.warn(
"urllib3 is using URLFetch on Google App Engine sandbox instead "
"of sockets. To use sockets directly instead of URLFetch see "
"https://urllib3.readthedocs.io/en/1.26.x/reference/urllib3.contrib.html.",
AppEnginePlatformWarning,
)
RequestMethods.__init__(self, headers)
self.validate_certificate = validate_certificate
self.urlfetch_retries = urlfetch_retries
self.retries = retries or Retry.DEFAULT
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
# Return False to re-raise any potential exceptions
return False
def urlopen(
self,
method,
url,
body=None,
headers=None,
retries=None,
redirect=True,
timeout=Timeout.DEFAULT_TIMEOUT,
**response_kw
):
retries = self._get_retries(retries, redirect)
try:
follow_redirects = redirect and retries.redirect != 0 and retries.total
response = urlfetch.fetch(
url,
payload=body,
method=method,
headers=headers or {},
allow_truncated=False,
follow_redirects=self.urlfetch_retries and follow_redirects,
deadline=self._get_absolute_timeout(timeout),
validate_certificate=self.validate_certificate,
)
except urlfetch.DeadlineExceededError as e:
raise TimeoutError(self, e)
except urlfetch.InvalidURLError as e:
if "too large" in str(e):
raise AppEnginePlatformError(
"URLFetch request too large, URLFetch only "
"supports requests up to 10mb in size.",
e,
)
raise ProtocolError(e)
except urlfetch.DownloadError as e:
if "Too many redirects" in str(e):
raise MaxRetryError(self, url, reason=e)
raise ProtocolError(e)
except urlfetch.ResponseTooLargeError as e:
raise AppEnginePlatformError(
"URLFetch response too large, URLFetch only supports"
"responses up to 32mb in size.",
e,
)
except urlfetch.SSLCertificateError as e:
raise SSLError(e)
except urlfetch.InvalidMethodError as e:
raise AppEnginePlatformError(
"URLFetch does not support method: %s" % method, e
)
http_response = self._urlfetch_response_to_http_response(
response, retries=retries, **response_kw
)
# Handle redirect?
redirect_location = redirect and http_response.get_redirect_location()
if redirect_location:
# Check for redirect response
if self.urlfetch_retries and retries.raise_on_redirect:
raise MaxRetryError(self, url, "too many redirects")
else:
if http_response.status == 303:
method = "GET"
try:
retries = retries.increment(
method, url, response=http_response, _pool=self
)
except MaxRetryError:
if retries.raise_on_redirect:
raise MaxRetryError(self, url, "too many redirects")
return http_response
retries.sleep_for_retry(http_response)
log.debug("Redirecting %s -> %s", url, redirect_location)
redirect_url = urljoin(url, redirect_location)
return self.urlopen(
method,
redirect_url,
body,
headers,
retries=retries,
redirect=redirect,
timeout=timeout,
**response_kw
)
# Check if we should retry the HTTP response.
has_retry_after = bool(http_response.headers.get("Retry-After"))
if retries.is_retry(method, http_response.status, has_retry_after):
retries = retries.increment(method, url, response=http_response, _pool=self)
log.debug("Retry: %s", url)
retries.sleep(http_response)
return self.urlopen(
method,
url,
body=body,
headers=headers,
retries=retries,
redirect=redirect,
timeout=timeout,
**response_kw
)
return http_response
def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
if is_prod_appengine():
# Production GAE handles deflate encoding automatically, but does
# not remove the encoding header.
content_encoding = urlfetch_resp.headers.get("content-encoding")
if content_encoding == "deflate":
del urlfetch_resp.headers["content-encoding"]
transfer_encoding = urlfetch_resp.headers.get("transfer-encoding")
# We have a full response's content,
# so let's make sure we don't report ourselves as chunked data.
if transfer_encoding == "chunked":
encodings = transfer_encoding.split(",")
encodings.remove("chunked")
urlfetch_resp.headers["transfer-encoding"] = ",".join(encodings)
original_response = HTTPResponse(
# In order for decoding to work, we must present the content as
# a file-like object.
body=io.BytesIO(urlfetch_resp.content),
msg=urlfetch_resp.header_msg,
headers=urlfetch_resp.headers,
status=urlfetch_resp.status_code,
**response_kw
)
return HTTPResponse(
body=io.BytesIO(urlfetch_resp.content),
headers=urlfetch_resp.headers,
status=urlfetch_resp.status_code,
original_response=original_response,
**response_kw
)
def _get_absolute_timeout(self, timeout):
if timeout is Timeout.DEFAULT_TIMEOUT:
return None # Defer to URLFetch's default.
if isinstance(timeout, Timeout):
if timeout._read is not None or timeout._connect is not None:
warnings.warn(
"URLFetch does not support granular timeout settings, "
"reverting to total or default URLFetch timeout.",
AppEnginePlatformWarning,
)
return timeout.total
return timeout
def _get_retries(self, retries, redirect):
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
if retries.connect or retries.read or retries.redirect:
warnings.warn(
"URLFetch only supports total retries and does not "
"recognize connect, read, or redirect retry parameters.",
AppEnginePlatformWarning,
)
return retries
# Alias methods from _appengine_environ to maintain public API interface.
is_appengine = _appengine_environ.is_appengine
is_appengine_sandbox = _appengine_environ.is_appengine_sandbox
is_local_appengine = _appengine_environ.is_local_appengine
is_prod_appengine = _appengine_environ.is_prod_appengine
is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms

Some files were not shown because too many files have changed in this diff Show more