Update Beautiful Soup 4.11.1 (r642) → 4.12.2 and soupsieve 2.3.2.post1 (792d566) → 2.4.1 (2e66beb).

2024-11-22 12:55:05 +00:00 · 2023-05-28 13:58:26 +01:00 · 2023-05-28 13:58:26 +01:00 · 997e6955b2
commit 997e6955b2
parent 18370cebab
14 changed files with 794 additions and 475 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -1,4 +1,10 @@
-### 3.29.2 (2023-05-28 07:45:00 UTC)
+### 3.30.0 (2023-0x-xx xx:xx:00 UTC)
 * Update Beautiful Soup 4.11.1 (r642) to 4.12.2
 * Update soupsieve 2.3.2.post1 (792d566) to 2.4.1 (2e66beb)
 ### 3.29.2 (2023-05-28 07:45:00 UTC)
 * Fix find show results returned as newest/oldest that are then sorted z to a
 * Fix add show "TheTVDB via Trakt"
--- a/lib/bs4/init.py
+++ b/lib/bs4/init.py
@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a
 provides methods and Pythonic idioms that make it easy to navigate,
 search, and modify the parse tree.
-Beautiful Soup works with Python 3.5 and up. It works better if lxml
+Beautiful Soup works with Python 3.6 and up. It works better if lxml
 and/or html5lib is installed.
 For more than you ever wanted to know about Beautiful Soup, see the
@ -15,8 +15,8 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.11.1"
+__version__ = "4.12.2"
-__copyright__ = "Copyright (c) 2004-2022 Leonard Richardson"
+__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
@ -38,11 +38,13 @@ from .builder import (
    builder_registry,
    ParserRejectedMarkup,
    XMLParsedAsHTMLWarning,
    HTMLParserTreeBuilder
 )
 from .dammit import UnicodeDammit
 from .element import (
    CData,
    Comment,
    CSS,
    DEFAULT_OUTPUT_ENCODING,
    Declaration,
    Doctype,
@ -211,7 +213,7 @@ class BeautifulSoup(Tag):
                warnings.warn(
                    'The "%s" argument to the BeautifulSoup constructor '
                    'has been renamed to "%s."' % (old_name, new_name),
-                    DeprecationWarning
+                    DeprecationWarning, stacklevel=3
                )
                return kwargs.pop(old_name)
            return None
@ -348,26 +350,50 @@ class BeautifulSoup(Tag):
        self.markup = None
        self.builder.soup = None
-    def __copy__(self):
+    def _clone(self):
-        """Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
+        """Create a new BeautifulSoup object with the same TreeBuilder,
-        copy = type(self)(
+        but not associated with any markup.
            self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
        )
-        # Although we encoded the tree to UTF-8, that may not have
+        This is the first step of the deepcopy process.
-        # been the encoding of the original markup. Set the copy's
+        """
-        # .original_encoding to reflect the original object's
+        clone = type(self)("", None, self.builder)
-        # .original_encoding.
+
-        copy.original_encoding = self.original_encoding
+        # Keep track of the encoding of the original document,
-        return copy
+        # since we won't be parsing it again.
        clone.original_encoding = self.original_encoding
        return clone
    def __getstate__(self):
        # Frequently a tree builder can't be pickled.
        d = dict(self.__dict__)
        if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
-            d['builder'] = None
+            d['builder'] = type(self.builder)
        # Store the contents as a Unicode string.
        d['contents'] = []
        d['markup'] = self.decode()
        # If _most_recent_element is present, it's a Tag object left
        # over from initial parse. It might not be picklable and we
        # don't need it.
        if '_most_recent_element' in d:
            del d['_most_recent_element']
        return d
    def __setstate__(self, state):
        # If necessary, restore the TreeBuilder by looking it up.
        self.__dict__ = state
        if isinstance(self.builder, type):
            self.builder = self.builder()
        elif not self.builder:
            # We don't know which builder was used to build this
            # parse tree, so use a default we know is always available.
            self.builder = HTMLParserTreeBuilder()
        self.builder.soup = self
        self.reset()
        self._feed()
        return state
    @classmethod
    def _decode_markup(cls, markup):
        """Ensure `markup` is bytes so it's safe to send into warnings.warn.
@ -405,7 +431,8 @@ class BeautifulSoup(Tag):
                    'The input looks more like a URL than markup. You may want to use'
                    ' an HTTP client like requests to get the document behind'
                    ' the URL, and feed that document to Beautiful Soup.',
-                    MarkupResemblesLocatorWarning
+                    MarkupResemblesLocatorWarning,
                    stacklevel=3
                )
                return True
        return False
@ -436,7 +463,7 @@ class BeautifulSoup(Tag):
                'The input looks more like a filename than markup. You may'
                ' want to open this file and pass the filehandle into'
                ' Beautiful Soup.',
-                MarkupResemblesLocatorWarning
+                MarkupResemblesLocatorWarning, stacklevel=3
            )
            return True
        return False
@ -467,6 +494,7 @@ class BeautifulSoup(Tag):
        self.open_tag_counter = Counter()
        self.preserve_whitespace_tag_stack = []
        self.string_container_stack = []
        self._most_recent_element = None
        self.pushTag(self)
    def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@ -748,7 +776,7 @@ class BeautifulSoup(Tag):
    def decode(self, pretty_print=False,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal", iterator=None):
        """Returns a string or Unicode representation of the parse tree
            as an HTML or XML document.
@ -775,7 +803,7 @@ class BeautifulSoup(Tag):
        else:
            indent_level = 0
        return prefix + super(BeautifulSoup, self).decode(
-            indent_level, eventual_encoding, formatter)
+            indent_level, eventual_encoding, formatter, iterator)
 # Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
 _s = BeautifulSoup
@ -789,7 +817,7 @@ class BeautifulStoneSoup(BeautifulSoup):
        warnings.warn(
            'The BeautifulStoneSoup class is deprecated. Instead of using '
            'it, pass features="xml" into the BeautifulSoup constructor.',
-            DeprecationWarning
+            DeprecationWarning, stacklevel=2
        )
        super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
--- a/lib/bs4/builder/_html5lib.py
+++ b/lib/bs4/builder/_html5lib.py
@ -70,7 +70,10 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
        # ATM because the html5lib TreeBuilder doesn't use
        # UnicodeDammit.
        if exclude_encodings:
-            warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
+            warnings.warn(
                "You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.",
                stacklevel=3
            )
        # html5lib only parses HTML, so if it's given XML that's worth
        # noting.
@ -81,7 +84,10 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
    # These methods are defined by Beautiful Soup.
    def feed(self, markup):
        if self.soup.parse_only is not None:
-            warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
+            warnings.warn(
                "You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.",
                stacklevel=4
            )
        parser = html5lib.HTMLParser(tree=self.create_treebuilder)
        self.underlying_builder.parser = parser
        extra_kwargs = dict()
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@ -10,30 +10,9 @@ __all__ = [
 from html.parser import HTMLParser
 try:
    from html.parser import HTMLParseError
 except ImportError as e:
    # HTMLParseError is removed in Python 3.5. Since it can never be
    # thrown in 3.5, we can just define our own class as a placeholder.
    class HTMLParseError(Exception):
        pass
 import sys
 import warnings
 # Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
 # argument, which we'd like to set to False. Unfortunately,
 # http://bugs.python.org/issue13273 makes strict=True a better bet
 # before Python 3.2.3.
 #
 # At the end of this file, we monkeypatch HTMLParser so that
 # strict=True works well on Python 3.2.2.
 major, minor, release = sys.version_info[:3]
 CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
 CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
 CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
 from ..element import (
    CData,
    Comment,
@ -45,6 +24,7 @@ from ..dammit import EntitySubstitution, UnicodeDammit
 from ..builder import (
    DetectsXMLParsedAsHTML,
    ParserRejectedMarkup,
    HTML,
    HTMLTreeBuilder,
    STRICT,
@ -91,18 +71,21 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
        self._initialize_xml_detector()
-    def error(self, msg):
+    def error(self, message):
-        """In Python 3, HTMLParser subclasses must implement error(), although
+        # NOTE: This method is required so long as Python 3.9 is
-        this requirement doesn't appear to be documented.
+        # supported. The corresponding code is removed from HTMLParser
-
+        # in 3.5, but not removed from ParserBase until 3.10.
-        In Python 2, HTMLParser implements error() by raising an exception,
+        # https://github.com/python/cpython/issues/76025
-        which we don't want to do.
+        #
-
+        # The original implementation turned the error into a warning,
-        In any event, this method is called only on very strange
+        # but in every case I discovered, this made HTMLParser
-        markup and our best strategy is to pretend it didn't happen
+        # immediately crash with an error message that was less
-        and keep going.
+        # helpful than the warning. The new implementation makes it
-        """
+        # more clear that html.parser just can't parse this
-        warnings.warn(msg)
+        # markup. The 3.10 implementation does the same, though it
        # raises AssertionError rather than calling a method. (We
        # catch this error and wrap it in a ParserRejectedMarkup.)
        raise ParserRejectedMarkup(message)
    def handle_startendtag(self, name, attrs):
        """Handle an incoming empty-element tag.
@ -203,9 +186,10 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
        :param name: Character number, possibly in hexadecimal.
        """
-        # XXX workaround for a bug in HTMLParser. Remove this once
+        # TODO: This was originally a workaround for a bug in
-        # it's fixed in all supported versions.
+        # HTMLParser. (http://bugs.python.org/issue13633) The bug has
-        # http://bugs.python.org/issue13633
+        # been fixed, but removing this code still makes some
        # Beautiful Soup tests fail. This needs investigation.
        if name.startswith('x'):
            real_name = int(name.lstrip('x'), 16)
        elif name.startswith('X'):
@ -333,10 +317,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
        parser_args = parser_args or []
        parser_kwargs = parser_kwargs or {}
        parser_kwargs.update(extra_parser_kwargs)
-        if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
+        parser_kwargs['convert_charrefs'] = False
            parser_kwargs['strict'] = False
        if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
            parser_kwargs['convert_charrefs'] = False
        self.parser_args = (parser_args, parser_kwargs)
    def prepare_markup(self, markup, user_specified_encoding=None,
@ -397,103 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
        parser.soup = self.soup
        try:
            parser.feed(markup)
-            parser.close()
+        except AssertionError as e:
-        except HTMLParseError as e:
+            # html.parser raises AssertionError in rare cases to
-            warnings.warn(RuntimeWarning(
+            # indicate a fatal problem with the markup, especially
-                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
+            # when there's an error in the doctype declaration.
-            raise e
+            raise ParserRejectedMarkup(e)
        parser.close()
        parser.already_closed_empty_element = []
 # Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
 # 3.2.3 code. This ensures they don't treat markup like <p></p> as a
 # string.
 #
 # XXX This code can be removed once most Python 3 users are on 3.2.3.
 if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
    import re
    attrfind_tolerant = re.compile(
        r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
        r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
    HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
    locatestarttagend = re.compile(r"""
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
 """, re.VERBOSE)
    BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
    from html.parser import tagfind, attrfind
    def parse_starttag(self, i):
        self.__starttag_text = None
        endpos = self.check_for_whole_start_tag(i)
        if endpos < 0:
            return endpos
        rawdata = self.rawdata
        self.__starttag_text = rawdata[i:endpos]
        # Now parse the data between i+1 and j into a tag and attrs
        attrs = []
        match = tagfind.match(rawdata, i+1)
        assert match, 'unexpected call to parse_starttag()'
        k = match.end()
        self.lasttag = tag = rawdata[i+1:k].lower()
        while k < endpos:
            if self.strict:
                m = attrfind.match(rawdata, k)
            else:
                m = attrfind_tolerant.match(rawdata, k)
            if not m:
                break
            attrname, rest, attrvalue = m.group(1, 2, 3)
            if not rest:
                attrvalue = None
            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
                 attrvalue[:1] == '"' == attrvalue[-1:]:
                attrvalue = attrvalue[1:-1]
            if attrvalue:
                attrvalue = self.unescape(attrvalue)
            attrs.append((attrname.lower(), attrvalue))
            k = m.end()
        end = rawdata[k:endpos].strip()
        if end not in (">", "/>"):
            lineno, offset = self.getpos()
            if "\n" in self.__starttag_text:
                lineno = lineno + self.__starttag_text.count("\n")
                offset = len(self.__starttag_text) \
                         - self.__starttag_text.rfind("\n")
            else:
                offset = offset + len(self.__starttag_text)
            if self.strict:
                self.error("junk characters in start tag: %r"
                           % (rawdata[k:endpos][:20],))
            self.handle_data(rawdata[i:endpos])
            return endpos
        if end.endswith('/>'):
            # XHTML-style empty tag: <span attr="value" />
            self.handle_startendtag(tag, attrs)
        else:
            self.handle_starttag(tag, attrs)
            if tag in self.CDATA_CONTENT_ELEMENTS:
                self.set_cdata_mode(tag)
        return endpos
    def set_cdata_mode(self, elem):
        self.cdata_elem = elem.lower()
        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
    BeautifulSoupHTMLParser.parse_starttag = parse_starttag
    BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
    CONSTRUCTOR_TAKES_STRICT = True
--- a/lib/bs4/css.py
+++ b/lib/bs4/css.py
@ -0,0 +1,280 @@
 """Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
 import warnings
 try:
    import soupsieve
 except ImportError as e:
    soupsieve = None
    warnings.warn(
        'The soupsieve package is not installed. CSS selectors cannot be used.'
    )
 class CSS(object):
    """A proxy object against the soupsieve library, to simplify its
    CSS selector API.
    Acquire this object through the .css attribute on the
    BeautifulSoup object, or on the Tag you want to use as the
    starting point for a CSS selector.
    The main advantage of doing this is that the tag to be selected
    against doesn't need to be explicitly specified in the function
    calls, since it's already scoped to a tag.
    """
    def __init__(self, tag, api=soupsieve):
        """Constructor.
        You don't need to instantiate this class yourself; instead,
        access the .css attribute on the BeautifulSoup object, or on
        the Tag you want to use as the starting point for your CSS
        selector.
        :param tag: All CSS selectors will use this as their starting
        point.
        :param api: A plug-in replacement for the soupsieve module,
        designed mainly for use in tests.
        """
        if api is None:
            raise NotImplementedError(
                "Cannot execute CSS selectors because the soupsieve package is not installed."
            )
        self.api = api
        self.tag = tag
    def escape(self, ident):
        """Escape a CSS identifier.
        This is a simple wrapper around soupselect.escape(). See the
        documentation for that function for more information.
        """
        if soupsieve is None:
            raise NotImplementedError(
                "Cannot escape CSS identifiers because the soupsieve package is not installed."
            )
        return self.api.escape(ident)
    def _ns(self, ns, select):
        """Normalize a dictionary of namespaces."""
        if not isinstance(select, self.api.SoupSieve) and ns is None:
            # If the selector is a precompiled pattern, it already has
            # a namespace context compiled in, which cannot be
            # replaced.
            ns = self.tag._namespaces
        return ns
    def _rs(self, results):
        """Normalize a list of results to a Resultset.
        A ResultSet is more consistent with the rest of Beautiful
        Soup's API, and ResultSet.__getattr__ has a helpful error
        message if you try to treat a list of results as a single
        result (a common mistake).
        """
        # Import here to avoid circular import
        from .element import ResultSet
        return ResultSet(None, results)
    def compile(self, select, namespaces=None, flags=0, **kwargs):
        """Pre-compile a selector and return the compiled object.
        :param selector: A CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
           used in the CSS selector to namespace URIs. By default,
           Beautiful Soup will use the prefixes it encountered while
           parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.compile() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
           soupsieve.compile() method.
        :return: A precompiled selector object.
        :rtype: soupsieve.SoupSieve
        """
        return self.api.compile(
            select, self._ns(namespaces, select), flags, **kwargs
        )
    def select_one(self, select, namespaces=None, flags=0, **kwargs):
        """Perform a CSS selection operation on the current Tag and return the
        first result.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.select_one()
        method.
        :param selector: A CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
           used in the CSS selector to namespace URIs. By default,
           Beautiful Soup will use the prefixes it encountered while
           parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.select_one() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
           soupsieve.select_one() method.
        :return: A Tag, or None if the selector has no match.
        :rtype: bs4.element.Tag
        """
        return self.api.select_one(
            select, self.tag, self._ns(namespaces, select), flags, **kwargs
        )
    def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
        """Perform a CSS selection operation on the current Tag.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.select()
        method.
        :param selector: A string containing a CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param limit: After finding this number of results, stop looking.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.select() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.select() method.
        :return: A ResultSet of Tag objects.
        :rtype: bs4.element.ResultSet
        """
        if limit is None:
            limit = 0
        return self._rs(
            self.api.select(
                select, self.tag, self._ns(namespaces, select), limit, flags,
                **kwargs
            )
        )
    def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
        """Perform a CSS selection operation on the current Tag.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.iselect()
        method. It is the same as select(), but it returns a generator
        instead of a list.
        :param selector: A string containing a CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param limit: After finding this number of results, stop looking.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.iselect() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.iselect() method.
        :return: A generator
        :rtype: types.GeneratorType
        """
        return self.api.iselect(
            select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
        )
    def closest(self, select, namespaces=None, flags=0, **kwargs):
        """Find the Tag closest to this one that matches the given selector.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.closest()
        method.
        :param selector: A string containing a CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.closest() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.closest() method.
        :return: A Tag, or None if there is no match.
        :rtype: bs4.Tag
        """
        return self.api.closest(
            select, self.tag, self._ns(namespaces, select), flags, **kwargs
        )
    def match(self, select, namespaces=None, flags=0, **kwargs):
        """Check whether this Tag matches the given CSS selector.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.match()
        method.
        :param: a CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.match() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.match() method.
        :return: True if this Tag matches the selector; False otherwise.
        :rtype: bool
        """
        return self.api.match(
            select, self.tag, self._ns(namespaces, select), flags, **kwargs
        )
    def filter(self, select, namespaces=None, flags=0, **kwargs):
        """Filter this Tag's direct children based on the given CSS selector.
        This uses the Soup Sieve library. It works the same way as
        passing this Tag into that library's soupsieve.filter()
        method. More information, for more information see the
        documentation for soupsieve.filter().
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.filter() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.filter() method.
        :return: A ResultSet of Tag objects.
        :rtype: bs4.element.ResultSet
        """
        return self._rs(
            self.api.filter(
                select, self.tag, self._ns(namespaces, select), flags, **kwargs
            )
        )
--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@ -59,21 +59,6 @@ def diagnose(data):
    if hasattr(data, 'read'):
        data = data.read()
    elif data.startswith("http:") or data.startswith("https:"):
        print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
        print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
        return
    else:
        try:
            if os.path.exists(data):
                print(('"%s" looks like a filename. Reading data from the file.' % data))
                with open(data) as fp:
                    data = fp.read()
        except ValueError:
            # This can happen on some platforms when the 'filename' is
            # too long. Assume it's data and not a filename.
            pass
        print("")
    for parser in basic_parsers:
        print(("Trying to parse your markup with %s" % parser))
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@ -8,14 +8,8 @@ except ImportError as e:
 import re
 import sys
 import warnings
 try:
    import soupsieve
 except ImportError as e:
    soupsieve = None
    warnings.warn(
        'The soupsieve package is not installed. CSS selectors cannot be used.'
    )
 from .css import CSS
 from .formatter import (
    Formatter,
    HTMLFormatter,
@ -154,6 +148,11 @@ class PageElement(object):
    NavigableString, Tag, etc. are all subclasses of PageElement.
    """
    # In general, we can't tell just by looking at an element whether
    # it's contained in an XML document or an HTML document. But for
    # Tags (q.v.) we can store this information at parse time.
    known_xml = None
    def setup(self, parent=None, previous_element=None, next_element=None,
              previous_sibling=None, next_sibling=None):
        """Sets up the initial relations between this element and
@ -496,13 +495,16 @@ class PageElement(object):
    def extend(self, tags):
        """Appends the given PageElements to this one's contents.
-        :param tags: A list of PageElements.
+        :param tags: A list of PageElements. If a single Tag is
            provided instead, this PageElement's contents will be extended
            with that Tag's contents.
        """
        if isinstance(tags, Tag):
-            # Calling self.append() on another tag's contents will change
+            tags = tags.contents
-            # the list we're iterating over. Make a list that won't
+        if isinstance(tags, list):
-            # change.
+            # Moving items around the tree may change their position in
-            tags = list(tags.contents)
+            # the original list. Make a list that won't change.
            tags = list(tags)
        for tag in tags:
            self.append(tag)
@ -586,8 +588,9 @@ class PageElement(object):
        :kwargs: A dictionary of filters on attribute values.
        :return: A ResultSet containing PageElements.
        """
        _stacklevel = kwargs.pop('_stacklevel', 2)
        return self._find_all(name, attrs, string, limit, self.next_elements,
-                             **kwargs)
+                              _stacklevel=_stacklevel+1, **kwargs)
    findAllNext = find_all_next  # BS3
    def find_next_sibling(self, name=None, attrs={}, string=None, **kwargs):
@ -624,8 +627,11 @@ class PageElement(object):
        :return: A ResultSet of PageElements.
        :rtype: bs4.element.ResultSet
        """
-        return self._find_all(name, attrs, string, limit,
+        _stacklevel = kwargs.pop('_stacklevel', 2)
-                              self.next_siblings, **kwargs)
+        return self._find_all(
            name, attrs, string, limit,
            self.next_siblings, _stacklevel=_stacklevel+1, **kwargs
        )
    findNextSiblings = find_next_siblings   # BS3
    fetchNextSiblings = find_next_siblings  # BS2
@ -663,8 +669,11 @@ class PageElement(object):
        :return: A ResultSet of PageElements.
        :rtype: bs4.element.ResultSet
        """
-        return self._find_all(name, attrs, string, limit, self.previous_elements,
+        _stacklevel = kwargs.pop('_stacklevel', 2)
-                           **kwargs)
+        return self._find_all(
            name, attrs, string, limit, self.previous_elements,
            _stacklevel=_stacklevel+1, **kwargs
        )
    findAllPrevious = find_all_previous  # BS3
    fetchPrevious = find_all_previous    # BS2
@ -702,8 +711,11 @@ class PageElement(object):
        :return: A ResultSet of PageElements.
        :rtype: bs4.element.ResultSet
        """
-        return self._find_all(name, attrs, string, limit,
+        _stacklevel = kwargs.pop('_stacklevel', 2)
-                              self.previous_siblings, **kwargs)
+        return self._find_all(
            name, attrs, string, limit,
            self.previous_siblings, _stacklevel=_stacklevel+1, **kwargs
        )
    findPreviousSiblings = find_previous_siblings   # BS3
    fetchPreviousSiblings = find_previous_siblings  # BS2
@ -724,7 +736,7 @@ class PageElement(object):
        # NOTE: We can't use _find_one because findParents takes a different
        # set of arguments.
        r = None
-        l = self.find_parents(name, attrs, 1, **kwargs)
+        l = self.find_parents(name, attrs, 1, _stacklevel=3, **kwargs)
        if l:
            r = l[0]
        return r
@ -744,8 +756,9 @@ class PageElement(object):
        :return: A PageElement.
        :rtype: bs4.element.Tag | bs4.element.NavigableString
        """
        _stacklevel = kwargs.pop('_stacklevel', 2)
        return self._find_all(name, attrs, None, limit, self.parents,
-                             **kwargs)
+                              _stacklevel=_stacklevel+1, **kwargs)
    findParents = find_parents   # BS3
    fetchParents = find_parents  # BS2
@ -771,19 +784,20 @@ class PageElement(object):
    def _find_one(self, method, name, attrs, string, **kwargs):
        r = None
-        l = method(name, attrs, string, 1, **kwargs)
+        l = method(name, attrs, string, 1, _stacklevel=4, **kwargs)
        if l:
            r = l[0]
        return r
    def _find_all(self, name, attrs, string, limit, generator, **kwargs):
        "Iterates over a generator looking for things that match."
        _stacklevel = kwargs.pop('_stacklevel', 3)
        if string is None and 'text' in kwargs:
            string = kwargs.pop('text')
            warnings.warn(
                "The 'text' argument to find()-type methods is deprecated. Use 'string' instead.",
-                DeprecationWarning
+                DeprecationWarning, stacklevel=_stacklevel
            )
        if isinstance(name, SoupStrainer):
@ -926,11 +940,6 @@ class NavigableString(str, PageElement):
    PREFIX = ''
    SUFFIX = ''
    # We can't tell just by looking at a string whether it's contained
    # in an XML document or an HTML document.
    known_xml = None
    def __new__(cls, value):
        """Create a new NavigableString.
@ -946,12 +955,22 @@ class NavigableString(str, PageElement):
        u.setup()
        return u
-    def __copy__(self):
+    def __deepcopy__(self, memo, recursive=False):
        """A copy of a NavigableString has the same contents and class
        as the original, but it is not connected to the parse tree.
        :param recursive: This parameter is ignored; it's only defined
           so that NavigableString.__deepcopy__ implements the same
           signature as Tag.__deepcopy__.
        """
        return type(self)(self)
    def __copy__(self):
        """A copy of a NavigableString can only be a deep copy, because
        only one PageElement can occupy a given place in a parse tree.
        """
        return self.__deepcopy__({})
    def __getnewargs__(self):
        return (str(self),)
@ -1296,22 +1315,57 @@ class Tag(PageElement):
    parserClass = _alias("parser_class")  # BS3
-    def __copy__(self):
+    def __deepcopy__(self, memo, recursive=True):
-        """A copy of a Tag is a new Tag, unconnected to the parse tree.
+        """A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
        Its contents are a copy of the old Tag's contents.
        """
        clone = self._clone()
        if recursive:
            # Clone this tag's descendants recursively, but without
            # making any recursive function calls.
            tag_stack = [clone]
            for event, element in self._event_stream(self.descendants):
                if event is Tag.END_ELEMENT_EVENT:
                    # Stop appending incoming Tags to the Tag that was
                    # just closed.
                    tag_stack.pop()
                else:
                    descendant_clone = element.__deepcopy__(
                        memo, recursive=False
                    )
                    # Add to its parent's .contents
                    tag_stack[-1].append(descendant_clone)
                    if event is Tag.START_ELEMENT_EVENT:
                        # Add the Tag itself to the stack so that its
                        # children will be .appended to it.
                        tag_stack.append(descendant_clone)
        return clone
    def __copy__(self):
        """A copy of a Tag must always be a deep copy, because a Tag's
        children can only have one parent at a time.
        """
        return self.__deepcopy__({})
    def _clone(self):
        """Create a new Tag just like this one, but with no
        contents and unattached to any parse tree.
        This is the first step in the deepcopy process.
        """
        clone = type(self)(
            None, self.builder, self.name, self.namespace,
            self.prefix, self.attrs, is_xml=self._is_xml,
            sourceline=self.sourceline, sourcepos=self.sourcepos,
            can_be_empty_element=self.can_be_empty_element,
            cdata_list_attributes=self.cdata_list_attributes,
-            preserve_whitespace_tags=self.preserve_whitespace_tags
+            preserve_whitespace_tags=self.preserve_whitespace_tags,
            interesting_string_types=self.interesting_string_types
        )
        for attr in ('can_be_empty_element', 'hidden'):
            setattr(clone, attr, getattr(self, attr))
        for child in self.contents:
            clone.append(child.__copy__())
        return clone
    @property
@ -1558,7 +1612,7 @@ class Tag(PageElement):
                '.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict(
                    name=tag_name
                ),
-                DeprecationWarning
+                DeprecationWarning, stacklevel=2
            )
            return self.find(tag_name)
        # We special case contents to avoid recursion.
@ -1634,106 +1688,212 @@ class Tag(PageElement):
    def decode(self, indent_level=None,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal",
-        """Render a Unicode representation of this PageElement and its
+               iterator=None):
-        contents.
+        pieces = []
        :param indent_level: Each line of the rendering will be
             indented this many spaces. Used internally in
             recursive calls while pretty-printing.
        :param eventual_encoding: The tag is destined to be
            encoded into this encoding. This method is _not_
            responsible for performing that encoding. This information
            is passed in so that it can be substituted in if the
            document contains a <META> tag that mentions the document's
            encoding.
        :param formatter: A Formatter object, or a string naming one of
            the standard formatters.
        """
        # First off, turn a non-Formatter `formatter` into a Formatter
        # object. This will stop the lookup from happening over and
        # over again.
        if not isinstance(formatter, Formatter):
            formatter = self.formatter_for_name(formatter)
-        attributes = formatter.attributes(self)
+
-        attrs = []
+        if indent_level is True:
-        for key, val in attributes:
+            indent_level = 0
-            if val is None:
+
-                decoded = key
+        # The currently active tag that put us into string literal
        # mode. Until this element is closed, children will be treated
        # as string literals and not pretty-printed. String literal
        # mode is turned on immediately after this tag begins, and
        # turned off immediately before it's closed. This means there
        # will be whitespace before and after the tag itself.
        string_literal_tag = None
        for event, element in self._event_stream(iterator):
            if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
                piece = element._format_tag(
                    eventual_encoding, formatter, opening=True
                )
            elif event is Tag.END_ELEMENT_EVENT:
                piece = element._format_tag(
                    eventual_encoding, formatter, opening=False
                )
                if indent_level is not None:
                    indent_level -= 1
            else:
-                if isinstance(val, list) or isinstance(val, tuple):
+                piece = element.output_ready(formatter)
                    val = ' '.join(val)
                elif not isinstance(val, str):
                    val = str(val)
                elif (
                        isinstance(val, AttributeValueWithCharsetSubstitution)
                        and eventual_encoding is not None
                ):
                    val = val.encode(eventual_encoding)
-                text = formatter.attribute_value(val)
+            # Now we need to apply the 'prettiness' -- extra
-                decoded = (
+            # whitespace before and/or after this tag. This can get
-                    str(key) + '='
+            # complicated because certain tags, like <pre> and
-                    + formatter.quoted_attribute_value(text))
+            # <script>, can't be prettified, since adding whitespace would
-            attrs.append(decoded)
+            # change the meaning of the content.
        close = ''
        closeTag = ''
            # The default behavior is to add whitespace before and
            # after an element when string literal mode is off, and to
            # leave things as they are when string literal mode is on.
            if string_literal_tag:
                indent_before = indent_after = False
            else:
                indent_before = indent_after = True
            # The only time the behavior is more complex than that is
            # when we encounter an opening or closing tag that might
            # put us into or out of string literal mode.
            if (event is Tag.START_ELEMENT_EVENT
                and not string_literal_tag
                and not element._should_pretty_print()):
                    # We are about to enter string literal mode. Add
                    # whitespace before this tag, but not after. We
                    # will stay in string literal mode until this tag
                    # is closed.
                    indent_before = True
                    indent_after = False
                    string_literal_tag = element
            elif (event is Tag.END_ELEMENT_EVENT
                  and element is string_literal_tag):
                # We are about to exit string literal mode by closing
                # the tag that sent us into that mode. Add whitespace
                # after this tag, but not before.
                indent_before = False
                indent_after = True
                string_literal_tag = None
            # Now we know whether to add whitespace before and/or
            # after this element.
            if indent_level is not None:
                if (indent_before or indent_after):
                    if isinstance(element, NavigableString):
                        piece = piece.strip()
                    if piece:
                        piece = self._indent_string(
                            piece, indent_level, formatter,
                            indent_before, indent_after
                        )
                if event == Tag.START_ELEMENT_EVENT:
                    indent_level += 1
            pieces.append(piece)
        return "".join(pieces)
    # Names for the different events yielded by _event_stream
    START_ELEMENT_EVENT = object()
    END_ELEMENT_EVENT = object()
    EMPTY_ELEMENT_EVENT = object()
    STRING_ELEMENT_EVENT = object()
    def _event_stream(self, iterator=None):
        """Yield a sequence of events that can be used to reconstruct the DOM
        for this element.
        This lets us recreate the nested structure of this element
        (e.g. when formatting it as a string) without using recursive
        method calls.
        This is similar in concept to the SAX API, but it's a simpler
        interface designed for internal use. The events are different
        from SAX and the arguments associated with the events are Tags
        and other Beautiful Soup objects.
        :param iterator: An alternate iterator to use when traversing
         the tree.
        """
        tag_stack = []
        iterator = iterator or self.self_and_descendants
        for c in iterator:
            # If the parent of the element we're about to yield is not
            # the tag currently on the stack, it means that the tag on
            # the stack closed before this element appeared.
            while tag_stack and c.parent != tag_stack[-1]:
                now_closed_tag = tag_stack.pop()
                yield Tag.END_ELEMENT_EVENT, now_closed_tag
            if isinstance(c, Tag):
                if c.is_empty_element:
                    yield Tag.EMPTY_ELEMENT_EVENT, c
                else:
                    yield Tag.START_ELEMENT_EVENT, c
                    tag_stack.append(c)
                    continue
            else:
                yield Tag.STRING_ELEMENT_EVENT, c
        while tag_stack:
            now_closed_tag = tag_stack.pop()
            yield Tag.END_ELEMENT_EVENT, now_closed_tag
    def _indent_string(self, s, indent_level, formatter,
                       indent_before, indent_after):
        """Add indentation whitespace before and/or after a string.
        :param s: The string to amend with whitespace.
        :param indent_level: The indentation level; affects how much
           whitespace goes before the string.
        :param indent_before: Whether or not to add whitespace
           before the string.
        :param indent_after: Whether or not to add whitespace
           (a newline) after the string.
        """
        space_before = ''
        if indent_before and indent_level:
            space_before = (formatter.indent * indent_level)
        space_after = ''
        if indent_after:
            space_after = "\n"
        return space_before + s + space_after
    def _format_tag(self, eventual_encoding, formatter, opening):
        # A tag starts with the < character (see below).
        # Then the / character, if this is a closing tag.
        closing_slash = ''
        if not opening:
            closing_slash = '/'
        # Then an optional namespace prefix.
        prefix = ''
        if self.prefix:
            prefix = self.prefix + ":"
-        if self.is_empty_element:
+        # Then a list of attribute values, if this is an opening tag.
-            close = formatter.void_element_close_prefix or ''
+        attribute_string = ''
-        else:
+        if opening:
-            closeTag = '</%s%s>' % (prefix, self.name)
+            attributes = formatter.attributes(self)
            attrs = []
            for key, val in attributes:
                if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, str):
                        val = str(val)
                    elif (
                            isinstance(val, AttributeValueWithCharsetSubstitution)
                            and eventual_encoding is not None
                    ):
                        val = val.encode(eventual_encoding)
-        pretty_print = self._should_pretty_print(indent_level)
+                    text = formatter.attribute_value(val)
-        space = ''
+                    decoded = (
-        indent_space = ''
+                        str(key) + '='
-        if indent_level is not None:
+                        + formatter.quoted_attribute_value(text))
-            indent_space = (formatter.indent * (indent_level - 1))
+                attrs.append(decoded)
        if pretty_print:
            space = indent_space
            indent_contents = indent_level + 1
        else:
            indent_contents = None
        contents = self.decode_contents(
            indent_contents, eventual_encoding, formatter
        )
        if self.hidden:
            # This is the 'document root' object.
            s = contents
        else:
            s = []
            attribute_string = ''
            if attrs:
                attribute_string = ' ' + ' '.join(attrs)
            if indent_level is not None:
                # Even if this particular tag is not pretty-printed,
                # we should indent up to the start of the tag.
                s.append(indent_space)
            s.append('<%s%s%s%s>' % (
                    prefix, self.name, attribute_string, close))
            if pretty_print:
                s.append("\n")
            s.append(contents)
            if pretty_print and contents and contents[-1] != "\n":
                s.append("\n")
            if pretty_print and closeTag:
                s.append(space)
            s.append(closeTag)
            if indent_level is not None and closeTag and self.next_sibling:
                # Even if this particular tag is not pretty-printed,
                # we're now done with the tag, and we should add a
                # newline if appropriate.
                s.append("\n")
            s = ''.join(s)
        return s
-    def _should_pretty_print(self, indent_level):
+        # Then an optional closing slash (for a void element in an
        # XML document).
        void_element_closing_slash = ''
        if self.is_empty_element:
            void_element_closing_slash = formatter.void_element_close_prefix or ''
        # Put it all together.
        return '<' + closing_slash + prefix + self.name + attribute_string + void_element_closing_slash + '>'
    def _should_pretty_print(self, indent_level=1):
        """Should this tag be pretty-printed?
        Most of them should, but some (such as <pre> in HTML
@ -1784,32 +1944,8 @@ class Tag(PageElement):
            the standard Formatters.
        """
-        # First off, turn a string formatter into a Formatter object. This
+        return self.decode(indent_level, eventual_encoding, formatter,
-        # will stop the lookup from happening over and over again.
+                           iterator=self.descendants)
        if not isinstance(formatter, Formatter):
            formatter = self.formatter_for_name(formatter)
        pretty_print = (indent_level is not None)
        s = []
        for c in self:
            text = None
            if isinstance(c, NavigableString):
                text = c.output_ready(formatter)
            elif isinstance(c, Tag):
                s.append(c.decode(indent_level, eventual_encoding,
                                  formatter))
            preserve_whitespace = (
                self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
            )
            if text and indent_level and not preserve_whitespace:
                text = text.strip()
            if text:
                if pretty_print and not preserve_whitespace:
                    s.append(formatter.indent * (indent_level - 1))
                s.append(text)
                if pretty_print and not preserve_whitespace:
                    s.append("\n")
        return ''.join(s)
    def encode_contents(
        self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
@ -1862,7 +1998,8 @@ class Tag(PageElement):
        :rtype: bs4.element.Tag | bs4.element.NavigableString
        """
        r = None
-        l = self.find_all(name, attrs, recursive, string, 1, **kwargs)
+        l = self.find_all(name, attrs, recursive, string, 1, _stacklevel=3,
                          **kwargs)
        if l:
            r = l[0]
        return r
@ -1889,7 +2026,9 @@ class Tag(PageElement):
        generator = self.descendants
        if not recursive:
            generator = self.children
-        return self._find_all(name, attrs, string, limit, generator, **kwargs)
+        _stacklevel = kwargs.pop('_stacklevel', 2)
        return self._find_all(name, attrs, string, limit, generator,
                              _stacklevel=_stacklevel+1, **kwargs)
    findAll = find_all       # BS3
    findChildren = find_all  # BS2
@ -1903,6 +2042,18 @@ class Tag(PageElement):
        # return iter() to make the purpose of the method clear
        return iter(self.contents)  # XXX This seems to be untested.
    @property
    def self_and_descendants(self):
        """Iterate over this PageElement and its children in a
        breadth-first sequence.
        :yield: A sequence of PageElements.
        """
        if not self.hidden:
            yield self
        for i in self.descendants:
            yield i
    @property
    def descendants(self):
        """Iterate over all children of this PageElement in a
@ -1929,16 +2080,13 @@ class Tag(PageElement):
           Beautiful Soup will use the prefixes it encountered while
           parsing the document.
-        :param kwargs: Keyword arguments to be passed into SoupSieve's 
+        :param kwargs: Keyword arguments to be passed into Soup Sieve's
           soupsieve.select() method.
        :return: A Tag.
        :rtype: bs4.element.Tag
        """
-        value = self.select(selector, namespaces, 1, **kwargs)
+        return self.css.select_one(selector, namespaces, **kwargs)
        if value:
            return value[0]
        return None
    def select(self, selector, namespaces=None, limit=None, **kwargs):
        """Perform a CSS selection operation on the current element.
@ -1960,21 +2108,12 @@ class Tag(PageElement):
        :return: A ResultSet of Tags.
        :rtype: bs4.element.ResultSet
        """
-        if namespaces is None:
+        return self.css.select(selector, namespaces, limit, **kwargs)
            namespaces = self._namespaces
-        if limit is None:
+    @property
-            limit = 0
+    def css(self):
-        if soupsieve is None:
+        """Return an interface to the CSS selector API."""
-            raise NotImplementedError(
+        return CSS(self)
                "Cannot execute CSS selectors because the soupsieve package is not installed."
            )
        results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
        # We do this because it's more consistent and because
        # ResultSet.__getattr__ has a helpful error message.
        return ResultSet(None, results)
    # Old names for backwards compatibility
    def childGenerator(self):
@ -1993,7 +2132,7 @@ class Tag(PageElement):
        """
        warnings.warn(
            'has_key is deprecated. Use has_attr(key) instead.',
-            DeprecationWarning
+            DeprecationWarning, stacklevel=2
        )
        return self.has_attr(key)
@ -2024,7 +2163,7 @@ class SoupStrainer(object):
            string = kwargs.pop('text')
            warnings.warn(
                "The 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead.",
-                DeprecationWarning
+                DeprecationWarning, stacklevel=2
            )
        self.name = self._normalize_search_value(name)
--- a/lib/bs4/formatter.py
+++ b/lib/bs4/formatter.py
@ -149,14 +149,14 @@ class HTMLFormatter(Formatter):
    """A generic Formatter for HTML."""
    REGISTRY = {}
    def __init__(self, *args, **kwargs):
-        return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
+        super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
 class XMLFormatter(Formatter):
    """A generic Formatter for XML."""
    REGISTRY = {}
    def __init__(self, *args, **kwargs):
-        return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
+        super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
 # Set up aliases for the default formatters.
--- a/lib/soupsieve/init.py
+++ b/lib/soupsieve/init.py
@ -32,7 +32,7 @@ from . import css_match as cm
 from . import css_types as ct
 from .util import DEBUG, SelectorSyntaxError  # noqa: F401
 import bs4  # type: ignore[import]
-from typing import Optional, Any, Iterator, Iterable
+from typing import Any, Iterator, Iterable
 __all__ = (
    'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@ -45,17 +45,14 @@ SoupSieve = cm.SoupSieve
 def compile(  # noqa: A001
    pattern: str,
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> cm.SoupSieve:
    """Compile CSS pattern."""
    ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces  # type: Optional[ct.Namespaces]
    cs = ct.CustomSelectors(custom) if custom is not None else custom  # type: Optional[ct.CustomSelectors]
    if isinstance(pattern, SoupSieve):
        if flags:
            raise ValueError("Cannot process 'flags' argument on a compiled selector list")
@ -65,7 +62,12 @@ def compile(  # noqa: A001
            raise ValueError("Cannot process 'custom' argument on a compiled selector list")
        return pattern
-    return cp._cached_css_compile(pattern, ns, cs, flags)
+    return cp._cached_css_compile(
        pattern,
        ct.Namespaces(namespaces) if namespaces is not None else namespaces,
        ct.CustomSelectors(custom) if custom is not None else custom,
        flags
    )
 def purge() -> None:
@ -77,10 +79,10 @@ def purge() -> None:
 def closest(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> 'bs4.Tag':
    """Match closest ancestor."""
@ -91,10 +93,10 @@ def closest(
 def match(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> bool:
    """Match node."""
@ -105,10 +107,10 @@ def match(
 def filter(  # noqa: A001
    select: str,
    iterable: Iterable['bs4.Tag'],
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> list['bs4.Tag']:
    """Filter list of nodes."""
@ -119,10 +121,10 @@ def filter(  # noqa: A001
 def select_one(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> 'bs4.Tag':
    """Select a single tag."""
@ -133,11 +135,11 @@ def select_one(
 def select(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    limit: int = 0,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> list['bs4.Tag']:
    """Select the specified tags."""
@ -148,11 +150,11 @@ def select(
 def iselect(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    limit: int = 0,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> Iterator['bs4.Tag']:
    """Iterate the specified tags."""
--- a/lib/soupsieve/meta.py
+++ b/lib/soupsieve/meta.py
@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
    return Version(major, minor, micro, release, pre, post, dev)
-__version_info__ = Version(2, 5, 0, "final", post=1)
+__version_info__ = Version(2, 4, 1, "final")
 __version__ = __version_info__._get_canonical()
--- a/lib/soupsieve/css_match.py
+++ b/lib/soupsieve/css_match.py
@ -6,7 +6,7 @@ import re
 from . import css_types as ct
 import unicodedata
 import bs4  # type: ignore[import]
-from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast  # noqa: F401
+from typing import Iterator, Iterable, Any, Callable, Sequence, cast  # noqa: F401
 # Empty tag pattern (whitespace okay)
 RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -171,7 +171,7 @@ class _DocumentNav:
    def get_children(
        self,
        el: bs4.Tag,
-        start: Optional[int] = None,
+        start: int | None = None,
        reverse: bool = False,
        tags: bool = True,
        no_iframe: bool = False
@ -239,22 +239,22 @@ class _DocumentNav:
        return parent
    @staticmethod
-    def get_tag_name(el: bs4.Tag) -> Optional[str]:
+    def get_tag_name(el: bs4.Tag) -> str | None:
        """Get tag."""
-        return cast(Optional[str], el.name)
+        return cast('str | None', el.name)
    @staticmethod
-    def get_prefix_name(el: bs4.Tag) -> Optional[str]:
+    def get_prefix_name(el: bs4.Tag) -> str | None:
        """Get prefix."""
-        return cast(Optional[str], el.prefix)
+        return cast('str | None', el.prefix)
    @staticmethod
-    def get_uri(el: bs4.Tag) -> Optional[str]:
+    def get_uri(el: bs4.Tag) -> str | None:
        """Get namespace `URI`."""
-        return cast(Optional[str], el.namespace)
+        return cast('str | None', el.namespace)
    @classmethod
    def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
@ -287,7 +287,7 @@ class _DocumentNav:
        return bool(ns and ns == NS_XHTML)
    @staticmethod
-    def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]:
+    def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
        """Return namespace and attribute name without the prefix."""
        return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@ -330,8 +330,8 @@ class _DocumentNav:
        cls,
        el: bs4.Tag,
        name: str,
-        default: Optional[str | Sequence[str]] = None
+        default: str | Sequence[str] | None = None
-    ) -> Optional[str | Sequence[str]]:
+    ) -> str | Sequence[str] | None:
        """Get attribute by name."""
        value = default
@ -348,7 +348,7 @@ class _DocumentNav:
        return value
    @classmethod
-    def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]:
+    def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
        """Iterate attributes."""
        for k, v in el.attrs.items():
@ -424,10 +424,10 @@ class Inputs:
        return 0 <= minutes <= 59
    @classmethod
-    def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]:
+    def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
        """Parse the input value."""
-        parsed = None  # type: Optional[tuple[float, ...]]
+        parsed = None  # type: tuple[float, ...] | None
        if value is None:
            return value
        if itype == "date":
@ -486,7 +486,7 @@ class CSSMatch(_DocumentNav):
        self,
        selectors: ct.SelectorList,
        scope: bs4.Tag,
-        namespaces: Optional[ct.Namespaces],
+        namespaces: ct.Namespaces | None,
        flags: int
    ) -> None:
        """Initialize."""
@ -545,19 +545,19 @@ class CSSMatch(_DocumentNav):
        return self.get_tag_ns(el) == NS_XHTML
-    def get_tag(self, el: bs4.Tag) -> Optional[str]:
+    def get_tag(self, el: bs4.Tag) -> str | None:
        """Get tag."""
        name = self.get_tag_name(el)
        return util.lower(name) if name is not None and not self.is_xml else name
-    def get_prefix(self, el: bs4.Tag) -> Optional[str]:
+    def get_prefix(self, el: bs4.Tag) -> str | None:
        """Get prefix."""
        prefix = self.get_prefix_name(el)
        return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
-    def find_bidi(self, el: bs4.Tag) -> Optional[int]:
+    def find_bidi(self, el: bs4.Tag) -> int | None:
        """Get directionality from element text."""
        for node in self.get_children(el, tags=False):
@ -653,8 +653,8 @@ class CSSMatch(_DocumentNav):
        self,
        el: bs4.Tag,
        attr: str,
-        prefix: Optional[str]
+        prefix: str | None
-    ) -> Optional[str | Sequence[str]]:
+    ) -> str | Sequence[str] | None:
        """Match attribute name and return value if it exists."""
        value = None
@ -751,7 +751,7 @@ class CSSMatch(_DocumentNav):
            name not in (self.get_tag(el), '*')
        )
-    def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool:
+    def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
        """Match the tag."""
        match = True
@ -1030,7 +1030,7 @@ class CSSMatch(_DocumentNav):
        """Match element if it contains text."""
        match = True
-        content = None  # type: Optional[str | Sequence[str]]
+        content = None  # type: str | Sequence[str] | None
        for contain_list in contains:
            if content is None:
                if contain_list.own:
@ -1099,7 +1099,7 @@ class CSSMatch(_DocumentNav):
        match = False
        name = cast(str, self.get_attribute_by_name(el, 'name'))
-        def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]:
+        def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
            """Find this input's form."""
            form = None
            parent = self.get_parent(el, no_iframe=True)
@ -1478,7 +1478,7 @@ class CSSMatch(_DocumentNav):
                    if lim < 1:
                        break
-    def closest(self) -> Optional[bs4.Tag]:
+    def closest(self) -> bs4.Tag | None:
        """Match closest ancestor."""
        current = self.tag
@ -1506,7 +1506,7 @@ class SoupSieve(ct.Immutable):
    pattern: str
    selectors: ct.SelectorList
-    namespaces: Optional[ct.Namespaces]
+    namespaces: ct.Namespaces | None
    custom: dict[str, str]
    flags: int
@ -1516,8 +1516,8 @@ class SoupSieve(ct.Immutable):
        self,
        pattern: str,
        selectors: ct.SelectorList,
-        namespaces: Optional[ct.Namespaces],
+        namespaces: ct.Namespaces | None,
-        custom: Optional[ct.CustomSelectors],
+        custom: ct.CustomSelectors | None,
        flags: int
    ):
        """Initialize."""
--- a/lib/soupsieve/css_parser.py
+++ b/lib/soupsieve/css_parser.py
@ -7,7 +7,7 @@ from . import css_match as cm
 from . import css_types as ct
 from .util import SelectorSyntaxError
 import warnings
-from typing import Optional, Match, Any, Iterator, cast
+from typing import Match, Any, Iterator, cast
 UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -113,7 +113,7 @@ VALUE = r'''
 '''.format(nl=NEWLINE, ident=IDENTIFIER)
 # Attribute value comparison. `!=` is handled special as it is non-standard.
 ATTR = r'''
-(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
+(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
 '''.format(ws=WSC, value=VALUE)
 # Selector patterns
@ -207,8 +207,8 @@ _MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE)
 def _cached_css_compile(
    pattern: str,
-    namespaces: Optional[ct.Namespaces],
+    namespaces: ct.Namespaces | None,
-    custom: Optional[ct.CustomSelectors],
+    custom: ct.CustomSelectors | None,
    flags: int
 ) -> cm.SoupSieve:
    """Cached CSS compile."""
@ -233,7 +233,7 @@ def _purge_cache() -> None:
    _cached_css_compile.cache_clear()
-def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
+def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
    """Process custom."""
    custom_selectors = {}
@ -317,7 +317,7 @@ class SelectorPattern:
        return self.name
-    def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
+    def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
        """Match the selector."""
        return self.re_pattern.match(selector, index)
@ -336,7 +336,7 @@ class SpecialPseudoPattern(SelectorPattern):
            for pseudo in p[1]:
                self.patterns[pseudo] = pattern
-        self.matched_name = None  # type: Optional[SelectorPattern]
+        self.matched_name = None  # type: SelectorPattern | None
        self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
    def get_name(self) -> str:
@ -344,7 +344,7 @@ class SpecialPseudoPattern(SelectorPattern):
        return '' if self.matched_name is None else self.matched_name.get_name()
-    def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
+    def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
        """Match the selector."""
        pseudo = None
@ -372,14 +372,14 @@ class _Selector:
    def __init__(self, **kwargs: Any) -> None:
        """Initialize."""
-        self.tag = kwargs.get('tag', None)  # type: Optional[ct.SelectorTag]
+        self.tag = kwargs.get('tag', None)  # type: ct.SelectorTag | None
        self.ids = kwargs.get('ids', [])  # type: list[str]
        self.classes = kwargs.get('classes', [])  # type: list[str]
        self.attributes = kwargs.get('attributes', [])  # type: list[ct.SelectorAttribute]
        self.nth = kwargs.get('nth', [])  # type: list[ct.SelectorNth]
        self.selectors = kwargs.get('selectors', [])  # type: list[ct.SelectorList]
        self.relations = kwargs.get('relations', [])  # type: list[_Selector]
-        self.rel_type = kwargs.get('rel_type', None)  # type: Optional[str]
+        self.rel_type = kwargs.get('rel_type', None)  # type: str | None
        self.contains = kwargs.get('contains', [])  # type: list[ct.SelectorContains]
        self.lang = kwargs.get('lang', [])  # type: list[ct.SelectorLang]
        self.flags = kwargs.get('flags', 0)  # type: int
@ -462,7 +462,7 @@ class CSSParser:
    def __init__(
        self,
        selector: str,
-        custom: Optional[dict[str, str | ct.SelectorList]] = None,
+        custom: dict[str, str | ct.SelectorList] | None = None,
        flags: int = 0
    ) -> None:
        """Initialize."""
@ -723,7 +723,7 @@ class CSSParser:
        if postfix == '_child':
            if m.group('of'):
                # Parse the rest of `of S`.
-                nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN | FLG_FORGIVE)
+                nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
            else:
                # Use default `*|*` for `of S`.
                nth_sel = CSS_NTH_OF_S_DEFAULT
@ -753,7 +753,7 @@ class CSSParser:
        if name == ':not':
            flags |= FLG_NOT
        elif name == ':has':
-            flags |= FLG_RELATIVE | FLG_FORGIVE
+            flags |= FLG_RELATIVE
        elif name in (':where', ':is'):
            flags |= FLG_FORGIVE
@ -777,11 +777,6 @@ class CSSParser:
        if not combinator:
            combinator = WS_COMBINATOR
        if combinator == COMMA_COMBINATOR:
            if not has_selector:
                # If we've not captured any selector parts, the comma is either at the beginning of the pattern
                # or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
                sel.no_match = True
            sel.rel_type = rel_type
            selectors[-1].relations.append(sel)
            rel_type = ":" + WS_COMBINATOR
@ -1070,22 +1065,12 @@ class CSSParser:
                selectors.append(sel)
        # Forgive empty slots in pseudo-classes that have lists (and are forgiving)
-        elif is_forgive:
+        elif is_forgive and (not selectors or not relations):
-            if is_relative:
+            # Handle normal pseudo-classes with empty slots like `:is()` etc.
-                # Handle relative selectors pseudo-classes with empty slots like `:has()`
+            sel.no_match = True
-                if selectors and selectors[-1].rel_type is None and rel_type == ': ':
+            del relations[:]
-                    sel.rel_type = rel_type
+            selectors.append(sel)
-                    sel.no_match = True
+            has_selector = True
                    selectors[-1].relations.append(sel)
                    has_selector = True
            else:
                # Handle normal pseudo-classes with empty slots
                if not selectors or not relations:
                    # Others like `:is()` etc.
                    sel.no_match = True
                    del relations[:]
                    selectors.append(sel)
                    has_selector = True
        if not has_selector:
            # We will always need to finish a selector when `:has()` is used as it leads with combining.
--- a/lib/soupsieve/css_types.py
+++ b/lib/soupsieve/css_types.py
@ -2,7 +2,7 @@
 from __future__ import annotations
 import copyreg
 from .pretty import pretty
-from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
+from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
 __all__ = (
    'Selector',
@ -189,28 +189,28 @@ class Selector(Immutable):
        'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
    )
-    tag: Optional[SelectorTag]
+    tag: SelectorTag | None
    ids: tuple[str, ...]
    classes: tuple[str, ...]
    attributes: tuple[SelectorAttribute, ...]
    nth: tuple[SelectorNth, ...]
    selectors: tuple[SelectorList, ...]
    relation: SelectorList
-    rel_type: Optional[str]
+    rel_type: str | None
    contains: tuple[SelectorContains, ...]
    lang: tuple[SelectorLang, ...]
    flags: int
    def __init__(
        self,
-        tag: Optional[SelectorTag],
+        tag: SelectorTag | None,
        ids: tuple[str, ...],
        classes: tuple[str, ...],
        attributes: tuple[SelectorAttribute, ...],
        nth: tuple[SelectorNth, ...],
        selectors: tuple[SelectorList, ...],
        relation: SelectorList,
-        rel_type: Optional[str],
+        rel_type: str | None,
        contains: tuple[SelectorContains, ...],
        lang: tuple[SelectorLang, ...],
        flags: int
@ -247,9 +247,9 @@ class SelectorTag(Immutable):
    __slots__ = ("name", "prefix", "_hash")
    name: str
-    prefix: Optional[str]
+    prefix: str | None
-    def __init__(self, name: str, prefix: Optional[str]) -> None:
+    def __init__(self, name: str, prefix: str | None) -> None:
        """Initialize."""
        super().__init__(name=name, prefix=prefix)
@ -262,15 +262,15 @@ class SelectorAttribute(Immutable):
    attribute: str
    prefix: str
-    pattern: Optional[Pattern[str]]
+    pattern: Pattern[str] | None
-    xml_type_pattern: Optional[Pattern[str]]
+    xml_type_pattern: Pattern[str] | None
    def __init__(
        self,
        attribute: str,
        prefix: str,
-        pattern: Optional[Pattern[str]],
+        pattern: Pattern[str] | None,
-        xml_type_pattern: Optional[Pattern[str]]
+        xml_type_pattern: Pattern[str] | None
    ) -> None:
        """Initialize."""
@ -360,7 +360,7 @@ class SelectorList(Immutable):
    def __init__(
        self,
-        selectors: Optional[Iterable[Selector | SelectorNull]] = None,
+        selectors: Iterable[Selector | SelectorNull] | None = None,
        is_not: bool = False,
        is_html: bool = False
    ) -> None:
--- a/lib/soupsieve/util.py
+++ b/lib/soupsieve/util.py
@ -3,7 +3,7 @@ from __future__ import annotations
 from functools import wraps, lru_cache
 import warnings
 import re
-from typing import Callable, Any, Optional
+from typing import Callable, Any
 DEBUG = 0x00001
@ -27,7 +27,7 @@ def lower(string: str) -> str:
 class SelectorSyntaxError(Exception):
    """Syntax error in a CSS selector."""
-    def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
+    def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
        """Initialize."""
        self.line = None
@ -84,7 +84,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
    col = 1
    text = []  # type: list[str]
    line = 1
-    offset = None  # type: Optional[int]
+    offset = None  # type: int | None
    # Split pattern by newline and handle the text before the newline
    for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):