Update Beautiful Soup 4.13.0b3 (55e006b) → 4.13.4 (9752e85).

2025-12-04 16:14:36 +00:00 · 2025-06-26 18:09:22 +01:00 · 2025-06-26 18:09:22 +01:00 · 4a7ea7746f
commit 4a7ea7746f
parent 2b039695b8
11 changed files with 169 additions and 66 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -2,7 +2,7 @@

 * Update apprise 1.8.0 (81caf92) to 1.9.2 (a2a2216)
 * Update attr 23.2.0 (b393d79) to 24.3.0 (598494a)
-* Update Beautiful Soup 4.12.3 (7fb5175) to 4.13.0b3 (55e006b)
+* Update Beautiful Soup 4.12.3 (7fb5175) to 4.13.4 (9752e85)
 * Update CacheControl 0.14.0 (e2be0c2) to 0.14.3 (116113c)
 * Update certifi 2024.08.30 to 2024.12.14
 * Update chardet packages 5.1.0 (8087f00) to 5.3.0dev0 (8e8dfcd)
@ -34,6 +34,7 @@

 [develop changelog]

+* Update Beautiful Soup 4.12.3 (7fb5175) to 4.13.0b3 (55e006b)
 * Update CacheControl 0.14.0 (e2be0c2) to 0.14.2 (928422d)
 * Update Msgpack 1.0.6 (e1d3d5d) to 1.1.0 (0eeabfb)
 * Update soupsieve 2.5.0 (dc71495) to 2.6.0 (a8080d9)
--- a/lib/bs4/init.py
+++ b/lib/bs4/init.py
@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """

 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.13.0b3"
+__version__ = "4.13.4"
 __copyright__ = "Copyright (c) 2004-2025 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
--- a/lib/bs4/_deprecation.py
+++ b/lib/bs4/_deprecation.py
@ -25,7 +25,7 @@ def _deprecated_alias(old_name: str, new_name: str, version: str):
    :meta private:
    """

-    @property
+    @property # type:ignore
    def alias(self) -> Any:
        ":meta private:"
        warnings.warn(
--- a/lib/bs4/builder/init.py
+++ b/lib/bs4/builder/init.py
@ -192,17 +192,11 @@ class TreeBuilder(object):
     doesn't keep track of this information, then store_line_numbers
     is irrelevant.

-    :param attribute_dict_class: A Tag's attribute values (available
-      as tag.attrs) willl be stored in an instance of this class.
-      The default is Beautiful Soup's built-in `AttributeDict` class and
-      you will probably never need to change it.
-
    :param attribute_dict_class: The value of a multi-valued attribute
      (such as HTML's 'class') willl be stored in an instance of this
      class.  The default is Beautiful Soup's built-in
      `AttributeValueList`, which is a normal Python list, and you
      will probably never need to change it.
-
    """

    USE_DEFAULT: Any = object()  #: :meta private:
@ -266,7 +260,7 @@ class TreeBuilder(object):

    #: The textual contents of tags with these names should be
    #: instantiated with some class other than `bs4.element.NavigableString`.
-    DEFAULT_STRING_CONTAINERS: Dict[str, Type[bs4.element.NavigableString]] = {}
+    DEFAULT_STRING_CONTAINERS: Dict[str, Type[bs4.element.NavigableString]] = {} # type:ignore

    #: By default, tags are treated as empty-element tags if they have
    #: no contents--that is, using XML rules. HTMLTreeBuilder
@ -605,7 +599,7 @@ class HTMLTreeBuilder(TreeBuilder):
    #:
    #: TODO: Arguably <noscript> could go here but it seems
    #: qualitatively different from the other tags.
-    DEFAULT_STRING_CONTAINERS: Dict[str, Type[bs4.element.NavigableString]] = {
+    DEFAULT_STRING_CONTAINERS: Dict[str, Type[bs4.element.NavigableString]] = { # type:ignore
        "rt": RubyTextString,
        "rp": RubyParenthesisString,
        "style": Stylesheet,
--- a/lib/bs4/builder/_html5lib.py
+++ b/lib/bs4/builder/_html5lib.py
@ -136,7 +136,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
            # HTMLBinaryInputStream.__init__.
            extra_kwargs["override_encoding"] = self.user_specified_encoding

-        doc = parser.parse(markup, **extra_kwargs)
+        doc = parser.parse(markup, **extra_kwargs) # type:ignore

        # Set the character encoding detected by the tokenizer.
        if isinstance(markup, str):
@ -144,7 +144,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
            # charEncoding to UTF-8 if it gets Unicode input.
            doc.original_encoding = None
        else:
-            original_encoding = parser.tokenizer.stream.charEncoding[0]
+            original_encoding = parser.tokenizer.stream.charEncoding[0] # type:ignore
            # The encoding is an html5lib Encoding object. We want to
            # use a string for compatibility with other tree builders.
            original_encoding = original_encoding.name
@ -227,7 +227,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
            # This represents the point immediately after the end of the
            # tag. We don't know when the tag started, but we do know
            # where it ended -- the character just before this one.
-            sourceline, sourcepos = self.parser.tokenizer.stream.position()
+            sourceline, sourcepos = self.parser.tokenizer.stream.position() # type:ignore
            assert sourcepos is not None
            sourcepos = sourcepos - 1
        tag = self.soup.new_tag(
@ -266,7 +266,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
    def getDocument(self) -> "BeautifulSoup":
        return self.soup

-    def testSerializer(self, element: "Element") -> str:
+    def testSerializer(self, element: "Element") -> None:
        """This is only used by the html5lib unit tests. Since we
        don't currently hook into those tests, the implementation is
        left blank.
@ -337,7 +337,7 @@ class BeautifulSoupNode(treebuilder_base.Node):

    # TODO-TYPING: typeshed stubs are incorrect about this;
    # cloneNode returns a new Node, not None.
-    def cloneNode(self) -> treebuilder_base.Node:
+    def cloneNode(self) -> treebuilder_base.Node: # type:ignore
        raise NotImplementedError()


@ -564,12 +564,12 @@ class Element(BeautifulSoupNode):

    # TODO-TYPING: typeshed stubs are incorrect about this;
    # hasContent returns a boolean, not None.
-    def hasContent(self) -> bool:
+    def hasContent(self) -> bool: # type:ignore
        return len(self.element.contents) > 0

    # TODO-TYPING: typeshed stubs are incorrect about this;
    # cloneNode returns a new Node, not None.
-    def cloneNode(self) -> treebuilder_base.Node:
+    def cloneNode(self) -> treebuilder_base.Node: # type:ignore
        tag = self.soup.new_tag(self.element.name, self.namespace)
        node = Element(tag, self.soup, self.namespace)
        for key, value in self.attributes:
--- a/lib/bs4/builder/_lxml.py
+++ b/lib/bs4/builder/_lxml.py
@ -22,11 +22,13 @@ from typing import (
    TYPE_CHECKING,
    Union,
 )
-from typing_extensions import TypeAlias

 from io import BytesIO
 from io import StringIO
-from lxml import etree
+
+from typing_extensions import TypeAlias
+
+from lxml import etree # type:ignore
 from ..element import (
    AttributeDict,
    XMLAttributeDict,
@ -180,6 +182,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        self.soup = None
        self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
        self.active_namespace_prefixes = [dict(self.DEFAULT_NSMAPS)]
+        if self.is_xml:
+            self.processing_instruction_class = XMLProcessingInstruction
+        else:
+            self.processing_instruction_class = ProcessingInstruction
+
        if "attribute_dict_class" not in kwargs:
            kwargs["attribute_dict_class"] = XMLAttributeDict
        super(LXMLTreeBuilderForXML, self).__init__(**kwargs)
@ -226,14 +233,10 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            document to Unicode and parsing it. Each strategy will be tried
            in turn.
        """
-        is_html = not self.is_xml
-        if is_html:
-            self.processing_instruction_class = ProcessingInstruction
+        if not self.is_xml:
            # We're in HTML mode, so if we're given XML, that's worth
            # noting.
            DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup, stacklevel=3)
-        else:
-            self.processing_instruction_class = XMLProcessingInstruction

        if isinstance(markup, str):
            # We were given Unicode. Maybe lxml can parse Unicode on
@ -274,7 +277,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            markup,
            known_definite_encodings=known_definite_encodings,
            user_encodings=user_encodings,
-            is_html=is_html,
+            is_html=not self.is_xml,
            exclude_encodings=exclude_encodings,
        )
        for encoding in detector.encodings:
--- a/lib/bs4/dammit.py
+++ b/lib/bs4/dammit.py
@ -47,7 +47,7 @@ import warnings
 chardet_module: Optional[ModuleType] = None
 try:
    #  PyPI package: cchardet
-    import cchardet
+    import cchardet # type:ignore

    chardet_module = cchardet
 except ImportError:
@ -60,7 +60,7 @@ except ImportError:
    except ImportError:
        try:
            # PyPI package: charset-normalizer
-            import charset_normalizer
+            import charset_normalizer # type:ignore

            chardet_module = charset_normalizer
        except ImportError:
@ -797,9 +797,9 @@ class UnicodeDammit:
        )

        # Short-circuit if the data is in Unicode to begin with.
-        if isinstance(markup, str) or markup == b"":
-            self.markup = markup
-            self.unicode_markup = str(markup)
+        if isinstance(markup, str):
+            self.markup = markup.encode("utf8")
+            self.unicode_markup = markup
            self.original_encoding = None
            return

--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@ -52,7 +52,7 @@ def diagnose(data: "_IncomingMarkup") -> None:
    if "lxml" in basic_parsers:
        basic_parsers.append("lxml-xml")
        try:
-            from lxml import etree
+            from lxml import etree # type:ignore

            print(("Found lxml version %s" % ".".join(map(str, etree.LXML_VERSION))))
        except ImportError:
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@ -37,6 +37,7 @@ from typing import (
    TypeVar,
    Union,
    cast,
+    overload,
 )
 from typing_extensions import (
    Self,
@ -223,7 +224,7 @@ class AttributeValueList(List[str]):
    """


-class AttributeDict(dict):
+class AttributeDict(Dict[Any,Any]):
    """Superclass for the dictionary used to hold a tag's
    attributes. You can use this, but it's just a regular dict with no
    special logic.
@ -235,7 +236,7 @@ class XMLAttributeDict(AttributeDict):
    incoming values for consistency with the HTML spec.
    """

-    def __setitem__(self, key: str, value: Any):
+    def __setitem__(self, key: str, value: Any) -> None:
        """Set an attribute value, possibly modifying it to comply with
        the XML spec.

@ -273,7 +274,7 @@ class HTMLAttributeDict(AttributeDict):
    around boolean attributes that XML doesn't have.
    """

-    def __setitem__(self, key: str, value: Any):
+    def __setitem__(self, key: str, value: Any) -> None:
        """Set an attribute value, possibly modifying it to comply
        with the HTML spec,
        """
@ -389,7 +390,7 @@ class PageElement(object):
        :param previous_element: The element parsed immediately before
            this one.

-        :param next_element: The element parsed immediately before
+        :param next_element: The element parsed immediately after
            this one.

        :param previous_sibling: The most recently encountered element
@ -1231,7 +1232,7 @@ class PageElement(object):
        """
        return self._self_and(self.parents)

-    def _self_and(self, other_generator):
+    def _self_and(self, other_generator:Iterator[PageElement]) -> Iterator[PageElement]:
        """Modify a generator by yielding this element, then everything
        yielded by the other generator.
        """
@ -1317,6 +1318,14 @@ class NavigableString(str, PageElement):
    def __getnewargs__(self) -> Tuple[str]:
        return (str(self),)

+    # TODO-TYPING This should be SupportsIndex|slice but SupportsIndex
+    # is introduced in 3.8. This can be changed once 3.7 support is dropped.
+    def __getitem__(self, key: Union[int|slice]) -> str: # type:ignore
+        """Raise an exception """
+        if isinstance(key, str):
+            raise TypeError("string indices must be integers, not '{0}'. Are you treating a NavigableString like a Tag?".format(key.__class__.__name__))
+        return super(NavigableString, self).__getitem__(key)
+
    @property
    def string(self) -> str:
        """Convenience property defined to match `Tag.string`.
@ -2188,7 +2197,8 @@ class Tag(PageElement):
        elif isinstance(value, list):
            list_value = value
        else:
-            value = cast(str, value)
+            if not isinstance(value, str):
+                value = cast(str, value)
            list_value = self.attribute_value_list_class([value])
        return list_value

@ -2597,6 +2607,22 @@ class Tag(PageElement):
            or self.name not in self.preserve_whitespace_tags
        )

+    @overload
+    def prettify(
+        self,
+        encoding: None = None,
+        formatter: _FormatterOrName = "minimal",
+    ) -> str:
+        ...
+
+    @overload
+    def prettify(
+        self,
+        encoding: _Encoding,
+        formatter: _FormatterOrName = "minimal",
+    ) -> bytes:
+        ...
+
    def prettify(
        self,
        encoding: Optional[_Encoding] = None,
--- a/lib/bs4/filter.py
+++ b/lib/bs4/filter.py
@ -78,6 +78,21 @@ class ElementFilter(object):
        """
        self.match_function = match_function

+    @property
+    def includes_everything(self) -> bool:
+        """Does this `ElementFilter` obviously include everything? If so,
+        the filter process can be made much faster.
+
+        The `ElementFilter` might turn out to include everything even
+        if this returns `False`, but it won't include everything in an
+        obvious way.
+
+        The base `ElementFilter` implementation includes things based on
+        the match function, so includes_everything is only true if
+        there is no match function.
+        """
+        return not self.match_function
+
    @property
    def excludes_everything(self) -> bool:
        """Does this `ElementFilter` obviously exclude everything? If
@ -88,19 +103,25 @@ class ElementFilter(object):
        if this returns `False`, but it won't exclude everything in an
        obvious way.

-        The base `ElementFilter` implementation excludes *nothing*, so
-        the base implementation of `excludes_everything` always
-        returns `False`.
+        The base `ElementFilter` implementation excludes things based
+        on a match function we can't inspect, so excludes_everything
+        is always false.
        """
        return False

-    def match(self, element: PageElement) -> bool:
+    def match(self, element: PageElement, _known_rules:bool=False) -> bool:
        """Does the given PageElement match the rules set down by this
        ElementFilter?

        The base implementation delegates to the function passed in to
        the constructor.
+
+        :param _known_rules: Defined for compatibility with
+            SoupStrainer._match(). Used more for consistency than because
+            we need the performance optimization.
        """
+        if not _known_rules and self.includes_everything:
+            return True
        if not self.match_function:
            return True
        return self.match_function(element)
@ -111,13 +132,18 @@ class ElementFilter(object):
        Acts like Python's built-in `filter`, using
        `ElementFilter.match` as the filtering function.
        """
+        # If there are no rules at all, don't bother filtering. Let
+        # anything through.
+        if self.includes_everything:
+            for i in generator:
+                yield i
        while True:
            try:
                i = next(generator)
            except StopIteration:
                break
            if i:
-                if self.match(i):
+                if self.match(i, _known_rules=True):
                    yield cast("_OneElement", i)

    def find(self, generator: Iterator[PageElement]) -> _AtMostOneElement:
@ -190,6 +216,7 @@ class MatchRule(object):
    string: Optional[str]
    pattern: Optional[_RegularExpressionProtocol]
    present: Optional[bool]
+    exclude_everything: Optional[bool]
    # TODO-TYPING: All MatchRule objects also have an attribute
    # ``function``, but the type of the function depends on the
    # subclass.
@ -200,6 +227,7 @@ class MatchRule(object):
        pattern: Optional[_RegularExpressionProtocol] = None,
        function: Optional[Callable] = None,
        present: Optional[bool] = None,
+        exclude_everything: Optional[bool] = None
    ):
        if isinstance(string, bytes):
            string = string.decode("utf8")
@ -212,19 +240,20 @@ class MatchRule(object):
            self.pattern = pattern
        self.function = function
        self.present = present
+        self.exclude_everything = exclude_everything

        values = [
            x
-            for x in (self.string, self.pattern, self.function, self.present)
+            for x in (self.string, self.pattern, self.function, self.present, self.exclude_everything)
            if x is not None
        ]
        if len(values) == 0:
            raise ValueError(
-                "Either string, pattern, function or present must be provided."
+                "Either string, pattern, function, present, or exclude_everything must be provided."
            )
        if len(values) > 1:
            raise ValueError(
-                "At most one of string, pattern, function and present must be provided."
+                "At most one of string, pattern, function, present, and exclude_everything must be provided."
            )

    def _base_match(self, string: Optional[str]) -> Optional[bool]:
@ -234,6 +263,10 @@ class MatchRule(object):
        :return: True or False if we have a (positive or negative)
        match; None if we need to keep trying.
        """
+        # self.exclude_everything matches nothing.
+        if self.exclude_everything:
+            return False
+
        # self.present==True matches everything except None.
        if self.present is True:
            return string is not None
@ -357,9 +390,15 @@ class SoupStrainer(ElementFilter):
                stacklevel=2,
            )

-        self.name_rules = cast(
-            List[TagNameMatchRule], list(self._make_match_rules(name, TagNameMatchRule))
-        )
+        if name is None and not attrs and not string and not kwargs:
+            # Special case for backwards compatibility. Instantiating
+            # a SoupStrainer with no arguments whatsoever gets you one
+            # that matches all Tags, and only Tags.
+            self.name_rules = [TagNameMatchRule(present=True)]
+        else:
+                self.name_rules = cast(
+                    List[TagNameMatchRule], list(self._make_match_rules(name, TagNameMatchRule))
+                )
        self.attribute_rules = defaultdict(list)

        if not isinstance(attrs, dict):
@ -395,17 +434,35 @@ class SoupStrainer(ElementFilter):
        #: variable might have. Look at the .string_rules list instead.
        self.__string = string

+    @property
+    def includes_everything(self) -> bool:
+        """Check whether the provided rules will obviously include
+        everything. (They might include everything even if this returns `False`,
+        but not in an obvious way.)
+        """
+        return not self.name_rules and not self.string_rules and not self.attribute_rules
+
    @property
    def excludes_everything(self) -> bool:
        """Check whether the provided rules will obviously exclude
        everything. (They might exclude everything even if this returns `False`,
        but not in an obvious way.)
        """
-        return (
-            True
-            if (self.string_rules and (self.name_rules or self.attribute_rules))
-            else False
-        )
+        if (self.string_rules and (self.name_rules or self.attribute_rules)):
+            # This is self-contradictory, so the rules exclude everything.
+            return True
+
+        # If there's a rule that ended up treated as an "exclude everything"
+        # rule due to creating a logical inconsistency, then the rules
+        # exclude everything.
+        if any(x.exclude_everything for x in self.string_rules):
+            return True
+        if any(x.exclude_everything for x in self.name_rules):
+            return True
+        for ruleset in self.attribute_rules.values():
+            if any(x.exclude_everything for x in ruleset):
+                return True
+        return False

    @property
    def string(self) -> Optional[_StrainableString]:
@ -454,18 +511,24 @@ class SoupStrainer(ElementFilter):
        elif isinstance(obj, _RegularExpressionProtocol):
            yield rule_class(pattern=obj)
        elif hasattr(obj, "__iter__"):
+            if not obj:
+                # The attribute is being matched against the null set,
+                # which means it should exclude everything.
+                yield rule_class(exclude_everything=True)
            for o in obj:
                if not isinstance(o, (bytes, str)) and hasattr(o, "__iter__"):
                    # This is almost certainly the user's
                    # mistake. This list contains another list, which
                    # opens up the possibility of infinite
                    # self-reference. In the interests of avoiding
-                    # infinite recursion, we'll ignore this item
-                    # rather than looking inside.
+                    # infinite recursion, we'll treat this as an
+                    # impossible match and issue a rule that excludes
+                    # everything, rather than looking inside.
                    warnings.warn(
                        f"Ignoring nested list {o} to avoid the possibility of infinite recursion.",
                        stacklevel=5,
                    )
+                    yield rule_class(exclude_everything=True)
                    continue
                for x in cls._make_match_rules(o, rule_class):
                    yield x
@ -487,6 +550,10 @@ class SoupStrainer(ElementFilter):
        but a `SoupStrainer` that *only* contains `StringMatchRule`
        cannot match a `Tag`, only a `NavigableString`.
        """
+        # If there are no rules at all, let anything through.
+        #if self.includes_everything:
+        #    return True
+
        # String rules cannot not match a Tag on their own.
        if not self.name_rules and not self.attribute_rules:
            return False
@ -515,8 +582,12 @@ class SoupStrainer(ElementFilter):
                #     [f"{k}={v}" for k, v in sorted(tag.attrs.items())]
                # )
                # print(f"Testing <{tag.name} {attrs}>{tag.string}</{tag.name}> against {rule}")
+
+                # If the rule contains a function, the function will be called
+                # with `tag`. It will not be called a second time with
+                # `prefixed_name`.
                if rule.matches_tag(tag) or (
-                    prefixed_name is not None and rule.matches_string(prefixed_name)
+                        not rule.function and prefixed_name is not None and rule.matches_string(prefixed_name)
                ):
                    name_matches = True
                    break
@ -647,24 +718,30 @@ class SoupStrainer(ElementFilter):
                return True
        return False

-    def match(self, element: PageElement) -> bool:
+    def match(self, element: PageElement, _known_rules: bool=False) -> bool:
        """Does the given `PageElement` match the rules set down by this
        `SoupStrainer`?

        The find_* methods rely heavily on this method to find matches.

        :param element: A `PageElement`.
+        :param _known_rules: Set to true in the common case where
+           we already checked and found at least one rule in this SoupStrainer
+           that might exclude a PageElement. Without this, we need
+           to check .includes_everything every time, just to be safe.
        :return: `True` if the element matches this `SoupStrainer`'s rules; `False` otherwise.
        """
+        # If there are no rules at all, let anything through.
+        if not _known_rules and self.includes_everything:
+            return True
        if isinstance(element, Tag):
            return self.matches_tag(element)
        assert isinstance(element, NavigableString)
        if not (self.name_rules or self.attribute_rules):
            # A NavigableString can only match a SoupStrainer that
-            # does not define any name or attribute restrictions.
-            for rule in self.string_rules:
-                if rule.matches_string(element):
-                    return True
+            # does not define any name or attribute rules.
+            # Then it comes down to the string rules.
+            return self.matches_any_string_rule(element)
        return False

    @_deprecated("allow_tag_creation", "4.13.0")
--- a/lib/bs4/formatter.py
+++ b/lib/bs4/formatter.py
@ -83,7 +83,7 @@ class Formatter(EntitySubstitution):
        void_element_close_prefix: str = "/",
        cdata_containing_tags: Optional[Set[str]] = None,
        empty_attributes_are_booleans: bool = False,
-        indent: int = 1,
+        indent: Union[int,str] = 1,
    ):
        r"""Constructor.

@ -168,7 +168,7 @@ class Formatter(EntitySubstitution):
        return self.substitute(value)

    def attributes(
-        self, tag: bs4.element.Tag
+        self, tag: bs4.element.Tag # type:ignore
    ) -> Iterable[Tuple[str, Optional[_AttributeValue]]]:
        """Reorder a tag's attributes however you want.

@ -201,7 +201,7 @@ class HTMLFormatter(Formatter):
        void_element_close_prefix: str = "/",
        cdata_containing_tags: Optional[Set[str]] = None,
        empty_attributes_are_booleans: bool = False,
-        indent: int = 1,
+        indent: Union[int,str] = 1,
    ):
        super(HTMLFormatter, self).__init__(
            self.HTML,
@ -209,6 +209,7 @@ class HTMLFormatter(Formatter):
            void_element_close_prefix,
            cdata_containing_tags,
            empty_attributes_are_booleans,
+            indent=indent
        )


@ -223,7 +224,7 @@ class XMLFormatter(Formatter):
        void_element_close_prefix: str = "/",
        cdata_containing_tags: Optional[Set[str]] = None,
        empty_attributes_are_booleans: bool = False,
-        indent: int = 1,
+        indent: Union[int,str] = 1,
    ):
        super(XMLFormatter, self).__init__(
            self.XML,
@ -231,6 +232,7 @@ class XMLFormatter(Formatter):
            void_element_close_prefix,
            cdata_containing_tags,
            empty_attributes_are_booleans,
+            indent=indent,
        )