Merge branch 'feature/UpdateBSoup' into dev

This commit is contained in:
JackDandy 2024-06-06 12:00:13 +01:00
commit 5a82e05a6f
6 changed files with 21 additions and 11 deletions

View file

@ -1,5 +1,6 @@
### 3.32.0 (2024-xx-xx xx:xx:00 UTC) ### 3.32.0 (2024-xx-xx xx:xx:00 UTC)
* Update Beautiful Soup 4.12.2 (30c58a1) to 4.12.3 (7fb5175)
* Update CacheControl 0.13.1 (783a338) to 0.14.0 (e2be0c2) * Update CacheControl 0.13.1 (783a338) to 0.14.0 (e2be0c2)
* Update filelock 3.12.4 (c1163ae) to 3.14.0 (8556141) * Update filelock 3.12.4 (c1163ae) to 3.14.0 (8556141)
* Update idna library 3.4 (cab054c) to 3.7 (1d365e1) * Update idna library 3.4 (cab054c) to 3.7 (1d365e1)

View file

@ -15,8 +15,8 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
""" """
__author__ = "Leonard Richardson (leonardr@segfault.org)" __author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.12.2" __version__ = "4.12.3"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson" __copyright__ = "Copyright (c) 2004-2024 Leonard Richardson"
# Use of this source code is governed by the MIT license. # Use of this source code is governed by the MIT license.
__license__ = "MIT" __license__ = "MIT"

View file

@ -514,15 +514,19 @@ class DetectsXMLParsedAsHTML(object):
XML_PREFIX_B = b'<?xml' XML_PREFIX_B = b'<?xml'
@classmethod @classmethod
def warn_if_markup_looks_like_xml(cls, markup): def warn_if_markup_looks_like_xml(cls, markup, stacklevel=3):
"""Perform a check on some markup to see if it looks like XML """Perform a check on some markup to see if it looks like XML
that's not XHTML. If so, issue a warning. that's not XHTML. If so, issue a warning.
This is much less reliable than doing the check while parsing, This is much less reliable than doing the check while parsing,
but some of the tree builders can't do that. but some of the tree builders can't do that.
:param stacklevel: The stacklevel of the code calling this
function.
:return: True if the markup looks like non-XHTML XML, False :return: True if the markup looks like non-XHTML XML, False
otherwise. otherwise.
""" """
if isinstance(markup, bytes): if isinstance(markup, bytes):
prefix = cls.XML_PREFIX_B prefix = cls.XML_PREFIX_B
@ -535,15 +539,16 @@ class DetectsXMLParsedAsHTML(object):
and markup.startswith(prefix) and markup.startswith(prefix)
and not looks_like_html.search(markup[:500]) and not looks_like_html.search(markup[:500])
): ):
cls._warn() cls._warn(stacklevel=stacklevel+2)
return True return True
return False return False
@classmethod @classmethod
def _warn(cls): def _warn(cls, stacklevel=5):
"""Issue a warning about XML being parsed as HTML.""" """Issue a warning about XML being parsed as HTML."""
warnings.warn( warnings.warn(
XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning,
stacklevel=stacklevel
) )
def _initialize_xml_detector(self): def _initialize_xml_detector(self):
@ -583,7 +588,7 @@ class DetectsXMLParsedAsHTML(object):
# We encountered an XML declaration and then a tag other # We encountered an XML declaration and then a tag other
# than 'html'. This is a reliable indicator that a # than 'html'. This is a reliable indicator that a
# non-XHTML document is being parsed as XML. # non-XHTML document is being parsed as XML.
self._warn() self._warn(stacklevel=10)
def register_treebuilders_from(module): def register_treebuilders_from(module):

View file

@ -77,7 +77,9 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
# html5lib only parses HTML, so if it's given XML that's worth # html5lib only parses HTML, so if it's given XML that's worth
# noting. # noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
markup, stacklevel=3
)
yield (markup, None, None, False) yield (markup, None, None, False)

View file

@ -179,7 +179,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.processing_instruction_class = ProcessingInstruction self.processing_instruction_class = ProcessingInstruction
# We're in HTML mode, so if we're given XML, that's worth # We're in HTML mode, so if we're given XML, that's worth
# noting. # noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
markup, stacklevel=3
)
else: else:
self.processing_instruction_class = XMLProcessingInstruction self.processing_instruction_class = XMLProcessingInstruction

View file

@ -51,7 +51,7 @@ class Formatter(EntitySubstitution):
void_element_close_prefix='/', cdata_containing_tags=None, void_element_close_prefix='/', cdata_containing_tags=None,
empty_attributes_are_booleans=False, indent=1, empty_attributes_are_booleans=False, indent=1,
): ):
"""Constructor. r"""Constructor.
:param language: This should be Formatter.XML if you are formatting :param language: This should be Formatter.XML if you are formatting
XML markup and Formatter.HTML if you are formatting HTML markup. XML markup and Formatter.HTML if you are formatting HTML markup.
@ -76,7 +76,7 @@ class Formatter(EntitySubstitution):
negative, or "" will only insert newlines. Using a negative, or "" will only insert newlines. Using a
positive integer indent indents that many spaces per positive integer indent indents that many spaces per
level. If indent is a string (such as "\t"), that string level. If indent is a string (such as "\t"), that string
is used to indent each level. The default behavior to is used to indent each level. The default behavior is to
indent one space per level. indent one space per level.
""" """
self.language = language self.language = language