Merge branch 'feature/UpdateBSoup' into dev

This commit is contained in:
JackDandy 2024-06-06 12:00:13 +01:00
commit 5a82e05a6f
6 changed files with 21 additions and 11 deletions

View file

@ -1,5 +1,6 @@
### 3.32.0 (2024-xx-xx xx:xx:00 UTC)
* Update Beautiful Soup 4.12.2 (30c58a1) to 4.12.3 (7fb5175)
* Update CacheControl 0.13.1 (783a338) to 0.14.0 (e2be0c2)
* Update filelock 3.12.4 (c1163ae) to 3.14.0 (8556141)
* Update idna library 3.4 (cab054c) to 3.7 (1d365e1)

View file

@ -15,8 +15,8 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.12.2"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
__version__ = "4.12.3"
__copyright__ = "Copyright (c) 2004-2024 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"

View file

@ -514,15 +514,19 @@ class DetectsXMLParsedAsHTML(object):
XML_PREFIX_B = b'<?xml'
@classmethod
def warn_if_markup_looks_like_xml(cls, markup):
def warn_if_markup_looks_like_xml(cls, markup, stacklevel=3):
"""Perform a check on some markup to see if it looks like XML
that's not XHTML. If so, issue a warning.
This is much less reliable than doing the check while parsing,
but some of the tree builders can't do that.
:param stacklevel: The stacklevel of the code calling this
function.
:return: True if the markup looks like non-XHTML XML, False
otherwise.
"""
if isinstance(markup, bytes):
prefix = cls.XML_PREFIX_B
@ -535,15 +539,16 @@ class DetectsXMLParsedAsHTML(object):
and markup.startswith(prefix)
and not looks_like_html.search(markup[:500])
):
cls._warn()
cls._warn(stacklevel=stacklevel+2)
return True
return False
@classmethod
def _warn(cls):
def _warn(cls, stacklevel=5):
"""Issue a warning about XML being parsed as HTML."""
warnings.warn(
XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning
XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning,
stacklevel=stacklevel
)
def _initialize_xml_detector(self):
@ -583,7 +588,7 @@ class DetectsXMLParsedAsHTML(object):
# We encountered an XML declaration and then a tag other
# than 'html'. This is a reliable indicator that a
# non-XHTML document is being parsed as XML.
self._warn()
self._warn(stacklevel=10)
def register_treebuilders_from(module):

View file

@ -77,7 +77,9 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
# html5lib only parses HTML, so if it's given XML that's worth
# noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup)
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
markup, stacklevel=3
)
yield (markup, None, None, False)

View file

@ -179,7 +179,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.processing_instruction_class = ProcessingInstruction
# We're in HTML mode, so if we're given XML, that's worth
# noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup)
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
markup, stacklevel=3
)
else:
self.processing_instruction_class = XMLProcessingInstruction

View file

@ -51,7 +51,7 @@ class Formatter(EntitySubstitution):
void_element_close_prefix='/', cdata_containing_tags=None,
empty_attributes_are_booleans=False, indent=1,
):
"""Constructor.
r"""Constructor.
:param language: This should be Formatter.XML if you are formatting
XML markup and Formatter.HTML if you are formatting HTML markup.
@ -76,7 +76,7 @@ class Formatter(EntitySubstitution):
negative, or "" will only insert newlines. Using a
positive integer indent indents that many spaces per
level. If indent is a string (such as "\t"), that string
is used to indent each level. The default behavior to
is used to indent each level. The default behavior is to
indent one space per level.
"""
self.language = language