Merge branch 'feature/UpdateFeedparser' into dev

2024-12-21 02:03:38 +00:00 · 2023-04-27 12:37:26 +01:00 · 2023-04-27 12:37:26 +01:00 · 0794ca330f
commit 0794ca330f
parent f8188b93f3 864d8fffac
33 changed files with 3280 additions and 2349 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -2,6 +2,7 @@

 * Update attr 22.2.0 (a9960de) to 22.2.0 (683d056)
 * Update diskcache 5.4.0 (1cb1425) to 5.6.1 (4d30686)
+* Update feedparser 6.0.10 (5fcb3ae) to 6.0.10 (6d032b8)
 * Update filelock 3.9.0 (ce3e891) to 3.11.0 (d3241b9)
 * Update Msgpack 1.0.4 (b5acfd5) to 1.0.5 (0516c2c)
 * Update Requests library 2.28.1 (ec553c2) to 2.29.0 (87d63de)
--- a/lib/feedparser/init.py
+++ b/lib/feedparser/init.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -27,12 +27,18 @@

 from .api import parse
 from .datetimes import registerDateHandler
-from .exceptions import *
+from .exceptions import (
+    CharacterEncodingOverride,
+    CharacterEncodingUnknown,
+    FeedparserError,
+    NonXMLContentType,
+    UndeclaredNamespace,
+)
 from .util import FeedParserDict

-__author__ = 'Kurt McKee <contactme@kurtmckee.org>'
-__license__ = 'BSD 2-clause'
-__version__ = '6.0.10'
+__author__ = "Kurt McKee <contactme@kurtmckee.org>"
+__license__ = "BSD 2-clause"
+__version__ = "6.0.10"

 # HTTP "User-Agent" header to send to servers when downloading feeds.
 # If you are embedding feedparser in a larger application, you should
@ -46,3 +52,20 @@ RESOLVE_RELATIVE_URIS = 1
 # If you want feedparser to automatically sanitize all potentially unsafe
 # HTML content, set this to 1.
 SANITIZE_HTML = 1
+
+
+# If you want feedparser to use only a prefix of the feed to detect encodings
+# (uses less memory), set this to 1.
+OPTIMISTIC_ENCODING_DETECTION = 1
+
+
+__all__ = (
+    "parse",
+    "registerDateHandler",
+    "FeedParserDict",
+    "FeedparserError",
+    "CharacterEncodingOverride",
+    "CharacterEncodingUnknown",
+    "NonXMLContentType",
+    "UndeclaredNamespace",
+)
--- a/lib/feedparser/api.py
+++ b/lib/feedparser/api.py
@ -1,5 +1,5 @@
 # The public API for feedparser
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -26,29 +26,23 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

-import datetime
 import io
-import time
-from typing import Dict, List, Union
 import urllib.error
 import urllib.parse
 import xml.sax
+from typing import IO, Dict, Optional, Union

-import sgmllib3k as sgmllib
-
-from .datetimes import registerDateHandler, _parse_date
-from .encodings import convert_to_utf8
-from .html import BaseHTMLProcessor
 from . import http
+from .encodings import MissingEncoding, convert_file_to_utf8
+from .html import BaseHTMLProcessor
 from .mixin import XMLParserMixin
+from .parsers.json import JSONParser
 from .parsers.loose import LooseXMLParser
 from .parsers.strict import StrictXMLParser
-from .parsers.json import JSONParser
 from .sanitizer import replace_doctype
-from .urls import convert_to_idn, make_safe_absolute_uri
+from .urls import make_safe_absolute_uri
 from .util import FeedParserDict

-
 # List of preferred XML parsers, by SAX driver name.  These will be tried first,
 # but if they're not installed, Python will keep searching through its own list
 # of pre-installed parsers until it finds one that supports everything we need.
@ -57,27 +51,30 @@ PREFERRED_XML_PARSERS = ["drv_libxml2"]
 _XML_AVAILABLE = True

 SUPPORTED_VERSIONS = {
-    '': 'unknown',
-    'rss090': 'RSS 0.90',
-    'rss091n': 'RSS 0.91 (Netscape)',
-    'rss091u': 'RSS 0.91 (Userland)',
-    'rss092': 'RSS 0.92',
-    'rss093': 'RSS 0.93',
-    'rss094': 'RSS 0.94',
-    'rss20': 'RSS 2.0',
-    'rss10': 'RSS 1.0',
-    'rss': 'RSS (unknown version)',
-    'atom01': 'Atom 0.1',
-    'atom02': 'Atom 0.2',
-    'atom03': 'Atom 0.3',
-    'atom10': 'Atom 1.0',
-    'atom': 'Atom (unknown version)',
-    'cdf': 'CDF',
-    'json1': 'JSON feed 1',
+    "": "unknown",
+    "rss090": "RSS 0.90",
+    "rss091n": "RSS 0.91 (Netscape)",
+    "rss091u": "RSS 0.91 (Userland)",
+    "rss092": "RSS 0.92",
+    "rss093": "RSS 0.93",
+    "rss094": "RSS 0.94",
+    "rss20": "RSS 2.0",
+    "rss10": "RSS 1.0",
+    "rss": "RSS (unknown version)",
+    "atom01": "Atom 0.1",
+    "atom02": "Atom 0.2",
+    "atom03": "Atom 0.3",
+    "atom10": "Atom 1.0",
+    "atom": "Atom (unknown version)",
+    "cdf": "CDF",
+    "json1": "JSON feed 1",
 }


-def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result):
+def _open_resource(
+    url_file_stream_or_string,
+    result,
+):
    """URL, filename, or string --> stream

    This function lets you define parsers that take any input source
@ -86,43 +83,44 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
    to have all the basic stdio read methods (read, readline, readlines).
    Just .close() the object when you're done with it.

-    If the etag argument is supplied, it will be used as the value of an
-    If-None-Match request header.
-
-    If the modified argument is supplied, it can be a tuple of 9 integers
-    (as returned by gmtime() in the standard Python time module) or a date
-    string in any format supported by feedparser. Regardless, it MUST
-    be in GMT (Greenwich Mean Time). It will be reformatted into an
-    RFC 1123-compliant date and used as the value of an If-Modified-Since
-    request header.
-
-    If the agent argument is supplied, it will be used as the value of a
-    User-Agent request header.
-
-    If the referrer argument is supplied, it will be used as the value of a
-    Referer[sic] request header.
-
-    If handlers is supplied, it is a list of handlers used to build a
-    urllib2 opener.
-
-    if request_headers is supplied it is a dictionary of HTTP request headers
-    that will override the values generated by FeedParser.
-
-    :return: A bytes object.
+    :return: A seekable, readable file object.
    """

-    if hasattr(url_file_stream_or_string, 'read'):
-        return url_file_stream_or_string.read()
+    # Some notes on the history of the implementation of _open_resource().
+    #
+    # parse() might need to go over the feed content twice:
+    # if the strict parser fails, it tries again with the loose parser.
+    #
+    # In 5.2.0, this returned an open file, to be read() by parse().
+    # By 6.0.8, this returned bytes directly.
+    #
+    # Since #296 (>6.0.8), this once again returns an open file
+    # (to reduce memory usage, see convert_file_to_utf8() for details).
+    # However, to accommodate parse() needing the content twice,
+    # the returned file is guaranteed to be seekable.
+    # (If the underlying resource is not seekable,
+    # the content is read and wrapped in a io.BytesIO/StringIO.)

-    if isinstance(url_file_stream_or_string, str) \
-       and urllib.parse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'):
-        return http.get(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result)
+    if callable(getattr(url_file_stream_or_string, "read", None)):
+        if callable(getattr(url_file_stream_or_string, "seekable", None)):
+            if url_file_stream_or_string.seekable():
+                return url_file_stream_or_string
+        return _to_in_memory_file(url_file_stream_or_string.read())
+
+    looks_like_url = isinstance(
+        url_file_stream_or_string, str
+    ) and urllib.parse.urlparse(url_file_stream_or_string)[0] in (
+        "http",
+        "https",
+    )
+    if looks_like_url:
+        data = http.get(url_file_stream_or_string, result)
+        return io.BytesIO(data)

    # try to open with native open function (if url_file_stream_or_string is a filename)
    try:
-        with open(url_file_stream_or_string, 'rb') as f:
-            data = f.read()
-    except (IOError, UnicodeEncodeError, TypeError, ValueError):
+        return open(url_file_stream_or_string, "rb")
+    except (OSError, TypeError, ValueError):
        # if url_file_stream_or_string is a str object that
        # cannot be converted to the encoding returned by
        # sys.getfilesystemencoding(), a UnicodeEncodeError
@ -131,33 +129,32 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
        # (such as an XML document encoded in UTF-32), TypeError will
        # be thrown.
        pass
-    else:
-        return data

-    # treat url_file_stream_or_string as string
-    if not isinstance(url_file_stream_or_string, bytes):
-        return url_file_stream_or_string.encode('utf-8')
-    return url_file_stream_or_string
+    # treat url_file_stream_or_string as bytes/string
+    return _to_in_memory_file(url_file_stream_or_string)
+
+
+def _to_in_memory_file(data):
+    if isinstance(data, str):
+        return io.StringIO(data)
+    else:
+        return io.BytesIO(data)


 class LooseFeedParser(LooseXMLParser, XMLParserMixin, BaseHTMLProcessor):
    pass

+
 class StrictFeedParser(StrictXMLParser, XMLParserMixin, xml.sax.handler.ContentHandler):
    pass


 def parse(
    url_file_stream_or_string,
-        etag: str = None,
-        modified: Union[str, datetime.datetime, time.struct_time] = None,
-        agent: str = None,
-        referrer: str = None,
-        handlers: List = None,
-        request_headers: Dict[str, str] = None,
-        response_headers: Dict[str, str] = None,
-        resolve_relative_uris: bool = None,
-        sanitize_html: bool = None,
+    response_headers: Optional[Dict[str, str]] = None,
+    resolve_relative_uris: Optional[bool] = None,
+    sanitize_html: Optional[bool] = None,
+    optimistic_encoding_detection: Optional[bool] = None,
 ) -> FeedParserDict:
    """Parse a feed from a URL, file, stream, or string.

@ -174,20 +171,6 @@ def parse(
        When a URL is not passed the feed location to use in relative URL
        resolution should be passed in the ``Content-Location`` response header
        (see ``response_headers`` below).
-    :param etag:
-        HTTP ``ETag`` request header.
-    :param modified:
-        HTTP ``Last-Modified`` request header.
-    :param agent:
-        HTTP ``User-Agent`` request header, which defaults to
-        the value of :data:`feedparser.USER_AGENT`.
-    :param referrer:
-        HTTP ``Referer`` [sic] request header.
-    :param handlers:
-        A list of handlers that will be passed to urllib2.
-    :param request_headers:
-        A mapping of HTTP header name to HTTP header value to add to the
-        request, overriding internally generated values.
    :param response_headers:
        A mapping of HTTP header name to HTTP header value. Multiple values may
        be joined with a comma. If a HTTP request was made, these headers
@ -201,20 +184,14 @@ def parse(
        Should feedparser skip HTML sanitization? Only disable this if you know
        what you are doing!  Defaults to the value of
        :data:`feedparser.SANITIZE_HTML`, which is ``True``.
+    :param optimistic_encoding_detection:
+        Should feedparser use only a prefix of the feed to detect encodings
+        (uses less memory, but the wrong encoding may be detected in rare cases).
+        Defaults to the value of
+        :data:`feedparser.OPTIMISTIC_ENCODING_DETECTION`, which is ``True``.

    """

-    # Avoid a cyclic import.
-    if not agent:
-        import feedparser
-        agent = feedparser.USER_AGENT
-    if sanitize_html is None:
-        import feedparser
-        sanitize_html = bool(feedparser.SANITIZE_HTML)
-    if resolve_relative_uris is None:
-        import feedparser
-        resolve_relative_uris = bool(feedparser.RESOLVE_RELATIVE_URIS)
-
    result = FeedParserDict(
        bozo=False,
        entries=[],
@ -223,50 +200,110 @@ def parse(
    )

    try:
-        data = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result)
+        file = _open_resource(
+            url_file_stream_or_string,
+            result,
+        )
    except urllib.error.URLError as error:
-        result.update({
-            'bozo': True,
-            'bozo_exception': error,
-        })
+        result.update(
+            {
+                "bozo": True,
+                "bozo_exception": error,
+            }
+        )
        return result

-    if not data:
+    # at this point, the file is guaranteed to be seekable;
+    # we read 1 byte/character to see if it's empty and return early
+    # (this preserves the behavior in 6.0.8)
+    initial_file_offset = file.tell()
+    if not file.read(1):
        return result
+    file.seek(initial_file_offset)

    # overwrite existing headers using response_headers
-    result['headers'].update(response_headers or {})
+    result["headers"].update(response_headers or {})

-    data = convert_to_utf8(result['headers'], data, result)
-    use_json_parser = result['content-type'] == 'application/json'
-    use_strict_parser = result['encoding'] and True or False
+    try:
+        _parse_file_inplace(
+            file,
+            result,
+            resolve_relative_uris=resolve_relative_uris,
+            sanitize_html=sanitize_html,
+            optimistic_encoding_detection=optimistic_encoding_detection,
+        )
+    finally:
+        if not hasattr(url_file_stream_or_string, "read"):
+            # the file does not come from the user, close it
+            file.close()

-    if not use_json_parser:
-        result['version'], data, entities = replace_doctype(data)
+    return result
+
+
+def _parse_file_inplace(
+    file: Union[IO[bytes], IO[str]],
+    result: dict,
+    *,
+    resolve_relative_uris: Optional[bool] = None,
+    sanitize_html: Optional[bool] = None,
+    optimistic_encoding_detection: Optional[bool] = None,
+) -> None:
+    # Avoid a cyclic import.
+    import feedparser
+
+    if sanitize_html is None:
+        sanitize_html = bool(feedparser.SANITIZE_HTML)
+    if resolve_relative_uris is None:
+        resolve_relative_uris = bool(feedparser.RESOLVE_RELATIVE_URIS)
+    if optimistic_encoding_detection is None:
+        optimistic_encoding_detection = bool(feedparser.OPTIMISTIC_ENCODING_DETECTION)
+
+    stream_factory = convert_file_to_utf8(
+        result["headers"], file, result, optimistic_encoding_detection
+    )
+    # We're done with file, all access must happen through stream_factory.
+    del file
+
+    # Some notes about the stream_factory.get_{text,binary}_file() methods:
+    #
+    # Calling them a second time will raise io.UnsupportedOperation
+    # if the underlying file was not seekable.
+    #
+    # Calling close() on the returned file is ignored
+    # (that is, the underlying file is *not* closed),
+    # because the SAX parser closes the file when done;
+    # we don't want that, since we might try again with the loose parser.
+
+    use_json_parser = False
+    if result["content-type"] in {"application/json", "application/feed+json"}:
+        use_json_parser = True
+    use_strict_parser = bool(result["encoding"])
+
+    result["version"], stream_factory.prefix, entities = replace_doctype(
+        stream_factory.prefix
+    )

    # Ensure that baseuri is an absolute URI using an acceptable URI scheme.
-    contentloc = result['headers'].get('content-location', '')
-    href = result.get('href', '')
-    baseuri = make_safe_absolute_uri(href, contentloc) or make_safe_absolute_uri(contentloc) or href
+    contentloc = result["headers"].get("content-location", "")
+    href = result.get("href", "")
+    baseuri = (
+        make_safe_absolute_uri(href, contentloc)
+        or make_safe_absolute_uri(contentloc)
+        or href
+    )

-    baselang = result['headers'].get('content-language', None)
+    baselang = result["headers"].get("content-language", None)
    if isinstance(baselang, bytes) and baselang is not None:
-        baselang = baselang.decode('utf-8', 'ignore')
+        baselang = baselang.decode("utf-8", "ignore")

    if not _XML_AVAILABLE:
        use_strict_parser = False
+
    feed_parser: Union[JSONParser, StrictFeedParser, LooseFeedParser]
-    if use_json_parser:
-        result['version'] = None
-        feed_parser = JSONParser(baseuri, baselang, 'utf-8')
-        try:
-            feed_parser.feed(data)
-        except Exception as e:
-            result['bozo'] = 1
-            result['bozo_exception'] = e
-    elif use_strict_parser:
+
+    if use_strict_parser and not use_json_parser:
        # Initialize the SAX parser.
-        feed_parser = StrictFeedParser(baseuri, baselang, 'utf-8')
+        feed_parser = StrictFeedParser(baseuri, baselang, "utf-8")
        feed_parser.resolve_relative_uris = resolve_relative_uris
        feed_parser.sanitize_html = sanitize_html
        saxparser = xml.sax.make_parser(PREFERRED_XML_PARSERS)
@ -279,27 +316,62 @@ def parse(
        saxparser.setContentHandler(feed_parser)
        saxparser.setErrorHandler(feed_parser)
        source = xml.sax.xmlreader.InputSource()
-        source.setByteStream(io.BytesIO(data))
+
+        # If an encoding was detected, decode the file on the fly;
+        # otherwise, pass it as-is and let the SAX parser deal with it.
+        try:
+            source.setCharacterStream(stream_factory.get_text_file())
+        except MissingEncoding:
+            source.setByteStream(stream_factory.get_binary_file())
+
        try:
            saxparser.parse(source)
        except xml.sax.SAXException as e:
-            result['bozo'] = 1
-            result['bozo_exception'] = feed_parser.exc or e
+            result["bozo"] = 1
+            result["bozo_exception"] = feed_parser.exc or e
            use_strict_parser = False

-    # The loose XML parser will be tried if the JSON parser was not used,
-    # and if the strict XML parser was not used (or if it failed).
-    if not use_json_parser and not use_strict_parser:
-        feed_parser = LooseFeedParser(baseuri, baselang, 'utf-8', entities)
+    # The loose XML parser will be tried if the strict XML parser was not used
+    # (or if it failed to parse the feed).
+    if not use_strict_parser and not use_json_parser:
+        feed_parser = LooseFeedParser(baseuri, baselang, "utf-8", entities)
        feed_parser.resolve_relative_uris = resolve_relative_uris
        feed_parser.sanitize_html = sanitize_html
-        feed_parser.feed(data.decode('utf-8', 'replace'))

-    result['feed'] = feed_parser.feeddata
-    result['entries'] = feed_parser.entries
-    result['version'] = result['version'] or feed_parser.version
+        # If an encoding was detected, use it; otherwise, assume utf-8 and do your best.
+        # Will raise io.UnsupportedOperation if the underlying file is not seekable.
+        data = stream_factory.get_text_file("utf-8", "replace").read()
+
+        # As of 6.0.8, LooseFeedParser.feed() can be called exactly once
+        # with the entire data (it does some re.sub() and str.replace() on it).
+        #
+        # SGMLParser (of which LooseFeedParser is a subclass)
+        # *can* be fed in a streaming fashion,
+        # by calling feed() repeatedly with chunks of text.
+        #
+        # When/if LooseFeedParser will support being fed chunks,
+        # replace the read() call above with read(size)/feed() calls in a loop.
+
+        feed_parser.feed(data)
+
+        # If parsing with the loose XML parser resulted in no information,
+        # flag that the JSON parser should be tried.
+        if not (feed_parser.entries or feed_parser.feeddata or feed_parser.version):
+            use_json_parser = True
+
+    if use_json_parser:
+        result["version"] = None
+        feed_parser = JSONParser(baseuri, baselang, "utf-8")
+        try:
+            feed_parser.feed(stream_factory.get_file())
+        except Exception as e:
+            result["bozo"] = 1
+            result["bozo_exception"] = e
+
+    result["feed"] = feed_parser.feeddata
+    result["entries"] = feed_parser.entries
+    result["version"] = result["version"] or feed_parser.version
    if isinstance(feed_parser, JSONParser):
-        result['namespaces'] = {}
+        result["namespaces"] = {}
    else:
-        result['namespaces'] = feed_parser.namespaces_in_use
-    return result
+        result["namespaces"] = feed_parser.namespaces_in_use
--- a/lib/feedparser/datetimes/init.py
+++ b/lib/feedparser/datetimes/init.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -27,11 +27,12 @@

 from time import struct_time
 from typing import Callable, List, Optional
+
 from .asctime import _parse_date_asctime
 from .greek import _parse_date_greek
 from .hungarian import _parse_date_hungarian
 from .iso8601 import _parse_date_iso8601
-from .korean import _parse_date_onblog, _parse_date_nate
+from .korean import _parse_date_nate, _parse_date_onblog
 from .perforce import _parse_date_perforce
 from .rfc822 import _parse_date_rfc822
 from .w3dtf import _parse_date_w3dtf
--- a/lib/feedparser/datetimes/asctime.py
+++ b/lib/feedparser/datetimes/asctime.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -28,18 +28,18 @@
 from .rfc822 import _parse_date_rfc822

 _months = [
-    'jan',
-    'feb',
-    'mar',
-    'apr',
-    'may',
-    'jun',
-    'jul',
-    'aug',
-    'sep',
-    'oct',
-    'nov',
-    'dec',
+    "jan",
+    "feb",
+    "mar",
+    "apr",
+    "may",
+    "jun",
+    "jul",
+    "aug",
+    "sep",
+    "oct",
+    "nov",
+    "dec",
 ]


@ -59,13 +59,22 @@ def _parse_date_asctime(dt):

    # Insert a GMT timezone, if needed.
    if len(parts) == 5:
-        parts.insert(4, '+0000')
+        parts.insert(4, "+0000")

    # Exit if there are not six parts.
    if len(parts) != 6:
        return None

    # Reassemble the parts in an RFC822-compatible order and parse them.
-    return _parse_date_rfc822(' '.join([
-        parts[0], parts[2], parts[1], parts[5], parts[3], parts[4],
-    ]))
+    return _parse_date_rfc822(
+        " ".join(
+            [
+                parts[0],
+                parts[2],
+                parts[1],
+                parts[5],
+                parts[3],
+                parts[4],
+            ]
+        )
+    )
--- a/lib/feedparser/datetimes/greek.py
+++ b/lib/feedparser/datetimes/greek.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -31,38 +31,40 @@ from .rfc822 import _parse_date_rfc822

 # Unicode strings for Greek date strings
 _greek_months = {
-   '\u0399\u03b1\u03bd': 'Jan',        # c9e1ed in iso-8859-7
-   '\u03a6\u03b5\u03b2': 'Feb',        # d6e5e2 in iso-8859-7
-   '\u039c\u03ac\u03ce': 'Mar',        # ccdcfe in iso-8859-7
-   '\u039c\u03b1\u03ce': 'Mar',        # cce1fe in iso-8859-7
-   '\u0391\u03c0\u03c1': 'Apr',        # c1f0f1 in iso-8859-7
-   '\u039c\u03ac\u03b9': 'May',        # ccdce9 in iso-8859-7
-   '\u039c\u03b1\u03ca': 'May',        # cce1fa in iso-8859-7
-   '\u039c\u03b1\u03b9': 'May',        # cce1e9 in iso-8859-7
-   '\u0399\u03bf\u03cd\u03bd': 'Jun',  # c9effded in iso-8859-7
-   '\u0399\u03bf\u03bd': 'Jun',        # c9efed in iso-8859-7
-   '\u0399\u03bf\u03cd\u03bb': 'Jul',  # c9effdeb in iso-8859-7
-   '\u0399\u03bf\u03bb': 'Jul',        # c9f9eb in iso-8859-7
-   '\u0391\u03cd\u03b3': 'Aug',        # c1fde3 in iso-8859-7
-   '\u0391\u03c5\u03b3': 'Aug',        # c1f5e3 in iso-8859-7
-   '\u03a3\u03b5\u03c0': 'Sep',        # d3e5f0 in iso-8859-7
-   '\u039f\u03ba\u03c4': 'Oct',        # cfeaf4 in iso-8859-7
-   '\u039d\u03bf\u03ad': 'Nov',        # cdefdd in iso-8859-7
-   '\u039d\u03bf\u03b5': 'Nov',        # cdefe5 in iso-8859-7
-   '\u0394\u03b5\u03ba': 'Dec',        # c4e5ea in iso-8859-7
+    "\u0399\u03b1\u03bd": "Jan",  # c9e1ed in iso-8859-7
+    "\u03a6\u03b5\u03b2": "Feb",  # d6e5e2 in iso-8859-7
+    "\u039c\u03ac\u03ce": "Mar",  # ccdcfe in iso-8859-7
+    "\u039c\u03b1\u03ce": "Mar",  # cce1fe in iso-8859-7
+    "\u0391\u03c0\u03c1": "Apr",  # c1f0f1 in iso-8859-7
+    "\u039c\u03ac\u03b9": "May",  # ccdce9 in iso-8859-7
+    "\u039c\u03b1\u03ca": "May",  # cce1fa in iso-8859-7
+    "\u039c\u03b1\u03b9": "May",  # cce1e9 in iso-8859-7
+    "\u0399\u03bf\u03cd\u03bd": "Jun",  # c9effded in iso-8859-7
+    "\u0399\u03bf\u03bd": "Jun",  # c9efed in iso-8859-7
+    "\u0399\u03bf\u03cd\u03bb": "Jul",  # c9effdeb in iso-8859-7
+    "\u0399\u03bf\u03bb": "Jul",  # c9f9eb in iso-8859-7
+    "\u0391\u03cd\u03b3": "Aug",  # c1fde3 in iso-8859-7
+    "\u0391\u03c5\u03b3": "Aug",  # c1f5e3 in iso-8859-7
+    "\u03a3\u03b5\u03c0": "Sep",  # d3e5f0 in iso-8859-7
+    "\u039f\u03ba\u03c4": "Oct",  # cfeaf4 in iso-8859-7
+    "\u039d\u03bf\u03ad": "Nov",  # cdefdd in iso-8859-7
+    "\u039d\u03bf\u03b5": "Nov",  # cdefe5 in iso-8859-7
+    "\u0394\u03b5\u03ba": "Dec",  # c4e5ea in iso-8859-7
 }

 _greek_wdays = {
-   '\u039a\u03c5\u03c1': 'Sun',  # caf5f1 in iso-8859-7
-   '\u0394\u03b5\u03c5': 'Mon',  # c4e5f5 in iso-8859-7
-   '\u03a4\u03c1\u03b9': 'Tue',  # d4f1e9 in iso-8859-7
-   '\u03a4\u03b5\u03c4': 'Wed',  # d4e5f4 in iso-8859-7
-   '\u03a0\u03b5\u03bc': 'Thu',  # d0e5ec in iso-8859-7
-   '\u03a0\u03b1\u03c1': 'Fri',  # d0e1f1 in iso-8859-7
-   '\u03a3\u03b1\u03b2': 'Sat',  # d3e1e2 in iso-8859-7
+    "\u039a\u03c5\u03c1": "Sun",  # caf5f1 in iso-8859-7
+    "\u0394\u03b5\u03c5": "Mon",  # c4e5f5 in iso-8859-7
+    "\u03a4\u03c1\u03b9": "Tue",  # d4f1e9 in iso-8859-7
+    "\u03a4\u03b5\u03c4": "Wed",  # d4e5f4 in iso-8859-7
+    "\u03a0\u03b5\u03bc": "Thu",  # d0e5ec in iso-8859-7
+    "\u03a0\u03b1\u03c1": "Fri",  # d0e1f1 in iso-8859-7
+    "\u03a3\u03b1\u03b2": "Sat",  # d3e1e2 in iso-8859-7
 }

-_greek_date_format_re = re.compile(r'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)')
+_greek_date_format_re = re.compile(
+    r"([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)"
+)


 def _parse_date_greek(date_string):
@ -72,15 +74,17 @@ def _parse_date_greek(date_string):
        return
    wday = _greek_wdays[m.group(1)]
    month = _greek_months[m.group(3)]
-    rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \
-                 {
-                     'wday': wday,
-                     'day': m.group(2),
-                     'month': month,
-                     'year': m.group(4),
-                     'hour': m.group(5),
-                     'minute': m.group(6),
-                     'second': m.group(7),
-                     'zonediff': m.group(8),
+    rfc822date = (
+        "%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(offset)s"
+        % {
+            "wday": wday,
+            "day": m.group(2),
+            "month": month,
+            "year": m.group(4),
+            "hour": m.group(5),
+            "minute": m.group(6),
+            "second": m.group(7),
+            "offset": m.group(8),
        }
+    )
    return _parse_date_rfc822(rfc822date)
--- a/lib/feedparser/datetimes/hungarian.py
+++ b/lib/feedparser/datetimes/hungarian.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -31,21 +31,23 @@ from .w3dtf import _parse_date_w3dtf

 # Unicode strings for Hungarian date strings
 _hungarian_months = {
-    'janu\u00e1r':   '01',  # e1 in iso-8859-2
-    'febru\u00e1ri': '02',  # e1 in iso-8859-2
-    'm\u00e1rcius':  '03',  # e1 in iso-8859-2
-    '\u00e1prilis':  '04',  # e1 in iso-8859-2
-    'm\u00e1ujus':   '05',  # e1 in iso-8859-2
-    'j\u00fanius':   '06',  # fa in iso-8859-2
-    'j\u00falius':   '07',  # fa in iso-8859-2
-    'augusztus':     '08',
-    'szeptember':    '09',
-    'okt\u00f3ber':  '10',  # f3 in iso-8859-2
-    'november':      '11',
-    'december':      '12',
+    "janu\u00e1r": "01",  # e1 in iso-8859-2
+    "febru\u00e1ri": "02",  # e1 in iso-8859-2
+    "m\u00e1rcius": "03",  # e1 in iso-8859-2
+    "\u00e1prilis": "04",  # e1 in iso-8859-2
+    "m\u00e1ujus": "05",  # e1 in iso-8859-2
+    "j\u00fanius": "06",  # fa in iso-8859-2
+    "j\u00falius": "07",  # fa in iso-8859-2
+    "augusztus": "08",
+    "szeptember": "09",
+    "okt\u00f3ber": "10",  # f3 in iso-8859-2
+    "november": "11",
+    "december": "12",
 }

-_hungarian_date_format_re = re.compile(r'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})([+-](\d{,2}:\d{2}))')
+_hungarian_date_format_re = re.compile(
+    r"(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})([+-](\d{,2}:\d{2}))"
+)


 def _parse_date_hungarian(date_string):
@ -56,17 +58,9 @@ def _parse_date_hungarian(date_string):
    month = _hungarian_months[m.group(2)]
    day = m.group(3)
    if len(day) == 1:
-        day = '0' + day
+        day = "0" + day
    hour = m.group(4)
    if len(hour) == 1:
-        hour = '0' + hour
-    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)s' % \
-                {
-                    'year': m.group(1),
-                    'month': month,
-                    'day': day,
-                    'hour': hour,
-                    'minute': m.group(5),
-                    'zonediff': m.group(6),
-                }
+        hour = "0" + hour
+    w3dtfdate = f"{m.group(1)}-{month}-{day}T{hour}:{m.group(5)}{m.group(6)}"
    return _parse_date_w3dtf(w3dtfdate)
--- a/lib/feedparser/datetimes/iso8601.py
+++ b/lib/feedparser/datetimes/iso8601.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -38,36 +38,36 @@ import time
 # Please note the order in templates is significant because we need a
 # greedy match.
 _iso8601_tmpl = [
-    'YYYY-?MM-?DD',
-    'YYYY-0MM?-?DD',
-    'YYYY-MM',
-    'YYYY-?OOO',
-    'YY-?MM-?DD',
-    'YY-?OOO',
-    'YYYY',
-    '-YY-?MM',
-    '-OOO',
-    '-YY',
-    '--MM-?DD',
-    '--MM',
-    '---DD',
-    'CC',
-    '',
+    "YYYY-?MM-?DD",
+    "YYYY-0MM?-?DD",
+    "YYYY-MM",
+    "YYYY-?OOO",
+    "YY-?MM-?DD",
+    "YY-?OOO",
+    "YYYY",
+    "-YY-?MM",
+    "-OOO",
+    "-YY",
+    "--MM-?DD",
+    "--MM",
+    "---DD",
+    "CC",
+    "",
 ]

 _iso8601_re = [
-    tmpl.replace(
-    'YYYY', r'(?P<year>\d{4})').replace(
-    'YY', r'(?P<year>\d\d)').replace(
-    'MM', r'(?P<month>[01]\d)').replace(
-    'DD', r'(?P<day>[0123]\d)').replace(
-    'OOO', r'(?P<ordinal>[0123]\d\d)').replace(
-    'CC', r'(?P<century>\d\d$)')
-    + r'(T?(?P<hour>\d{2}):(?P<minute>\d{2})'
-    + r'(:(?P<second>\d{2}))?'
-    + r'(\.(?P<fracsecond>\d+))?'
-    + r'(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?'
-    for tmpl in _iso8601_tmpl]
+    tmpl.replace("YYYY", r"(?P<year>\d{4})")
+    .replace("YY", r"(?P<year>\d\d)")
+    .replace("MM", r"(?P<month>[01]\d)")
+    .replace("DD", r"(?P<day>[0123]\d)")
+    .replace("OOO", r"(?P<ordinal>[0123]\d\d)")
+    .replace("CC", r"(?P<century>\d\d$)")
+    + r"(T?(?P<hour>\d{2}):(?P<minute>\d{2})"
+    + r"(:(?P<second>\d{2}))?"
+    + r"(\.(?P<fracsecond>\d+))?"
+    + r"(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?"
+    for tmpl in _iso8601_tmpl
+]
 _iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]


@ -83,21 +83,21 @@ def _parse_date_iso8601(date_string):
    if m.span() == (0, 0):
        return
    params = m.groupdict()
-    ordinal = params.get('ordinal', 0)
+    ordinal = params.get("ordinal", 0)
    if ordinal:
        ordinal = int(ordinal)
    else:
        ordinal = 0
-    year = params.get('year', '--')
-    if not year or year == '--':
+    year = params.get("year", "--")
+    if not year or year == "--":
        year = time.gmtime()[0]
    elif len(year) == 2:
        # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
        year = 100 * int(time.gmtime()[0] / 100) + int(year)
    else:
        year = int(year)
-    month = params.get('month', '-')
-    if not month or month == '-':
+    month = params.get("month", "-")
+    if not month or month == "-":
        # ordinals are NOT normalized by mktime, we simulate them
        # by setting month=1, day=ordinal
        if ordinal:
@ -105,13 +105,14 @@ def _parse_date_iso8601(date_string):
        else:
            month = time.gmtime()[1]
    month = int(month)
-    day = params.get('day', 0)
+    day = params.get("day", 0)
    if not day:
        # see above
        if ordinal:
            day = ordinal
-        elif params.get('century', 0) or \
-                 params.get('year', 0) or params.get('month', 0):
+        elif (
+            params.get("century", 0) or params.get("year", 0) or params.get("month", 0)
+        ):
            day = 1
        else:
            day = time.gmtime()[2]
@ -119,29 +120,38 @@ def _parse_date_iso8601(date_string):
        day = int(day)
    # special case of the century - is the first year of the 21st century
    # 2000 or 2001 ? The debate goes on...
-    if 'century' in params:
-        year = (int(params['century']) - 1) * 100 + 1
+    if "century" in params:
+        year = (int(params["century"]) - 1) * 100 + 1
    # in ISO 8601 most fields are optional
-    for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']:
+    for field in ["hour", "minute", "second", "tzhour", "tzmin"]:
        if not params.get(field, None):
            params[field] = 0
-    hour = int(params.get('hour', 0))
-    minute = int(params.get('minute', 0))
-    second = int(float(params.get('second', 0)))
+    hour = int(params.get("hour", 0))
+    minute = int(params.get("minute", 0))
+    second = int(float(params.get("second", 0)))
    # weekday is normalized by mktime(), we can ignore it
    weekday = 0
    daylight_savings_flag = -1
-    tm = [year, month, day, hour, minute, second, weekday,
-          ordinal, daylight_savings_flag]
+    tm = [
+        year,
+        month,
+        day,
+        hour,
+        minute,
+        second,
+        weekday,
+        ordinal,
+        daylight_savings_flag,
+    ]
    # ISO 8601 time zone adjustments
-    tz = params.get('tz')
-    if tz and tz != 'Z':
-        if tz[0] == '-':
-            tm[3] += int(params.get('tzhour', 0))
-            tm[4] += int(params.get('tzmin', 0))
-        elif tz[0] == '+':
-            tm[3] -= int(params.get('tzhour', 0))
-            tm[4] -= int(params.get('tzmin', 0))
+    tz = params.get("tz")
+    if tz and tz != "Z":
+        if tz[0] == "-":
+            tm[3] += int(params.get("tzhour", 0))
+            tm[4] += int(params.get("tzmin", 0))
+        elif tz[0] == "+":
+            tm[3] -= int(params.get("tzhour", 0))
+            tm[4] -= int(params.get("tzmin", 0))
        else:
            return None
    # Python's time.mktime() is a wrapper around the ANSI C mktime(3c)
--- a/lib/feedparser/datetimes/korean.py
+++ b/lib/feedparser/datetimes/korean.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -30,20 +30,21 @@ import re
 from .w3dtf import _parse_date_w3dtf

 # 8-bit date handling routines written by ytrewq1.
-_korean_year = '\ub144' # b3e2 in euc-kr
-_korean_month = '\uc6d4' # bff9 in euc-kr
-_korean_day = '\uc77c' # c0cf in euc-kr
-_korean_am = '\uc624\uc804' # bfc0 c0fc in euc-kr
-_korean_pm = '\uc624\ud6c4' # bfc0 c8c4 in euc-kr
+_korean_year = "\ub144"  # b3e2 in euc-kr
+_korean_month = "\uc6d4"  # bff9 in euc-kr
+_korean_day = "\uc77c"  # c0cf in euc-kr
+_korean_am = "\uc624\uc804"  # bfc0 c0fc in euc-kr
+_korean_pm = "\uc624\ud6c4"  # bfc0 c8c4 in euc-kr

 _korean_onblog_date_re = re.compile(
-    r'(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})'
+    r"(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})"
    % (_korean_year, _korean_month, _korean_day)
 )

 _korean_nate_date_re = re.compile(
-    r'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})'
-    % (_korean_am, _korean_pm))
+    r"(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})"
+    % (_korean_am, _korean_pm)
+)


 def _parse_date_onblog(dateString):
@ -51,10 +52,18 @@ def _parse_date_onblog(dateString):
    m = _korean_onblog_date_re.match(dateString)
    if not m:
        return
-    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
-                {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),
-                 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),
-                 'zonediff': '+09:00'}
+    w3dtfdate = (
+        "%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s"
+        % {
+            "year": m.group(1),
+            "month": m.group(2),
+            "day": m.group(3),
+            "hour": m.group(4),
+            "minute": m.group(5),
+            "second": m.group(6),
+            "zonediff": "+09:00",
+        }
+    )
    return _parse_date_w3dtf(w3dtfdate)


@ -69,15 +78,17 @@ def _parse_date_nate(dateString):
        hour += 12
    hour = str(hour)
    if len(hour) == 1:
-        hour = '0' + hour
-    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
-                {
-                    'year': m.group(1),
-                    'month': m.group(2),
-                    'day': m.group(3),
-                    'hour': hour,
-                    'minute': m.group(6),
-                    'second': m.group(7),
-                    'zonediff': '+09:00',
+        hour = "0" + hour
+    w3dtfdate = (
+        "%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s"
+        % {
+            "year": m.group(1),
+            "month": m.group(2),
+            "day": m.group(3),
+            "hour": hour,
+            "minute": m.group(6),
+            "second": m.group(7),
+            "zonediff": "+09:00",
        }
+    )
    return _parse_date_w3dtf(w3dtfdate)
--- a/lib/feedparser/datetimes/perforce.py
+++ b/lib/feedparser/datetimes/perforce.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -33,14 +33,31 @@ import time
 def _parse_date_perforce(date_string):
    """parse a date in yyyy/mm/dd hh:mm:ss TTT format"""
    # Fri, 2006/09/15 08:19:53 EDT
-    _my_date_pattern = re.compile(r'(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})')
+    _my_date_pattern = re.compile(
+        r"(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})"
+    )

    m = _my_date_pattern.search(date_string)
    if m is None:
        return None
    dow, year, month, day, hour, minute, second, tz = m.groups()
-    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
-    new_date_string = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz)
+    months = [
+        "Jan",
+        "Feb",
+        "Mar",
+        "Apr",
+        "May",
+        "Jun",
+        "Jul",
+        "Aug",
+        "Sep",
+        "Oct",
+        "Nov",
+        "Dec",
+    ]
+    new_date_string = (
+        f"{dow}, {day} {months[int(month) - 1]} {year} {hour}:{minute}:{second} {tz}"
+    )
    tm = email.utils.parsedate_tz(new_date_string)
    if tm:
        return time.gmtime(email.utils.mktime_tz(tm))
--- a/lib/feedparser/datetimes/rfc822.py
+++ b/lib/feedparser/datetimes/rfc822.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -28,20 +28,45 @@
 import datetime

 timezone_names = {
-    'ut': 0, 'gmt': 0, 'z': 0,
-    'adt': -3, 'ast': -4, 'at': -4,
-    'edt': -4, 'est': -5, 'et': -5,
-    'cdt': -5, 'cst': -6, 'ct': -6,
-    'mdt': -6, 'mst': -7, 'mt': -7,
-    'pdt': -7, 'pst': -8, 'pt': -8,
-    'a': -1, 'n': 1,
-    'm': -12, 'y': 12,
-    'met': 1, 'mest': 2,
+    "ut": 0,
+    "gmt": 0,
+    "z": 0,
+    "adt": -3,
+    "ast": -4,
+    "at": -4,
+    "edt": -4,
+    "est": -5,
+    "et": -5,
+    "cdt": -5,
+    "cst": -6,
+    "ct": -6,
+    "mdt": -6,
+    "mst": -7,
+    "mt": -7,
+    "pdt": -7,
+    "pst": -8,
+    "pt": -8,
+    "a": -1,
+    "n": 1,
+    "m": -12,
+    "y": 12,
+    "met": 1,
+    "mest": 2,
 }
-day_names = {'mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'}
+day_names = {"mon", "tue", "wed", "thu", "fri", "sat", "sun"}
 months = {
-    'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6,
-    'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12,
+    "jan": 1,
+    "feb": 2,
+    "mar": 3,
+    "apr": 4,
+    "may": 5,
+    "jun": 6,
+    "jul": 7,
+    "aug": 8,
+    "sep": 9,
+    "oct": 10,
+    "nov": 11,
+    "dec": 12,
 }


@ -63,7 +88,7 @@ def _parse_date_rfc822(date):
    parts = date.lower().split()
    if len(parts) < 5:
        # Assume that the time and timezone are missing
-        parts.extend(('00:00:00', '0000'))
+        parts.extend(("00:00:00", "0000"))
    # Remove the day name
    if parts[0][:3] in day_names:
        parts = parts[1:]
@ -101,26 +126,26 @@ def _parse_date_rfc822(date):
        year += (1900, 2000)[year < 90]

    # Handle the time (default to 00:00:00).
-    time_parts = parts[3].split(':')
-    time_parts.extend(('0',) * (3 - len(time_parts)))
+    time_parts = parts[3].split(":")
+    time_parts.extend(("0",) * (3 - len(time_parts)))
    try:
-        (hour, minute, second) = [int(i) for i in time_parts]
+        (hour, minute, second) = (int(i) for i in time_parts)
    except ValueError:
        return None

    # Handle the timezone information, if any (default to +0000).
    # Strip 'Etc/' from the timezone.
-    if parts[4].startswith('etc/'):
+    if parts[4].startswith("etc/"):
        parts[4] = parts[4][4:]
    # Normalize timezones that start with 'gmt':
    # GMT-05:00 => -0500
    # GMT => GMT
-    if parts[4].startswith('gmt'):
-        parts[4] = ''.join(parts[4][3:].split(':')) or 'gmt'
+    if parts[4].startswith("gmt"):
+        parts[4] = "".join(parts[4][3:].split(":")) or "gmt"
    # Handle timezones like '-0500', '+0500', and 'EST'
-    if parts[4] and parts[4][0] in ('-', '+'):
+    if parts[4] and parts[4][0] in ("-", "+"):
        try:
-            if ':' in parts[4]:
+            if ":" in parts[4]:
                timezone_hours = int(parts[4][1:3])
                timezone_minutes = int(parts[4][4:])
            else:
@ -128,7 +153,7 @@ def _parse_date_rfc822(date):
                timezone_minutes = int(parts[4][3:])
        except ValueError:
            return None
-        if parts[4].startswith('-'):
+        if parts[4].startswith("-"):
            timezone_hours *= -1
            timezone_minutes *= -1
    else:
--- a/lib/feedparser/datetimes/w3dtf.py
+++ b/lib/feedparser/datetimes/w3dtf.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -28,14 +28,28 @@
 import datetime

 timezonenames = {
-    'ut': 0, 'gmt': 0, 'z': 0,
-    'adt': -3, 'ast': -4, 'at': -4,
-    'edt': -4, 'est': -5, 'et': -5,
-    'cdt': -5, 'cst': -6, 'ct': -6,
-    'mdt': -6, 'mst': -7, 'mt': -7,
-    'pdt': -7, 'pst': -8, 'pt': -8,
-    'a': -1, 'n': 1,
-    'm': -12, 'y': 12,
+    "ut": 0,
+    "gmt": 0,
+    "z": 0,
+    "adt": -3,
+    "ast": -4,
+    "at": -4,
+    "edt": -4,
+    "est": -5,
+    "et": -5,
+    "cdt": -5,
+    "cst": -6,
+    "ct": -6,
+    "mdt": -6,
+    "mst": -7,
+    "mt": -7,
+    "pdt": -7,
+    "pst": -8,
+    "pt": -8,
+    "a": -1,
+    "n": 1,
+    "m": -12,
+    "y": 12,
 }
 # W3 date and time format parser
 # http://www.w3.org/TR/NOTE-datetime
@ -47,57 +61,57 @@ timezonenames = {
 def _parse_date_w3dtf(datestr):
    if not datestr.strip():
        return None
-    parts = datestr.lower().split('t')
+    parts = datestr.lower().split("t")
    if len(parts) == 1:
        # This may be a date only, or may be an MSSQL-style date
        parts = parts[0].split()
        if len(parts) == 1:
            # Treat this as a date only
-            parts.append('00:00:00z')
+            parts.append("00:00:00z")
    elif len(parts) > 2:
        return None
-    date = parts[0].split('-', 2)
+    date = parts[0].split("-", 2)
    if not date or len(date[0]) != 4:
        return None
    # Ensure that `date` has 3 elements. Using '1' sets the default
    # month to January and the default day to the 1st of the month.
-    date.extend(['1'] * (3 - len(date)))
+    date.extend(["1"] * (3 - len(date)))
    try:
-        year, month, day = [int(i) for i in date]
+        year, month, day = (int(i) for i in date)
    except ValueError:
        # `date` may have more than 3 elements or may contain
        # non-integer strings.
        return None
-    if parts[1].endswith('z'):
+    if parts[1].endswith("z"):
        parts[1] = parts[1][:-1]
-        parts.append('z')
+        parts.append("z")
    # Append the numeric timezone offset, if any, to parts.
    # If this is an MSSQL-style date then parts[2] already contains
    # the timezone information, so `append()` will not affect it.
    # Add 1 to each value so that if `find()` returns -1 it will be
    # treated as False.
-    loc = parts[1].find('-') + 1 or parts[1].find('+') + 1 or len(parts[1]) + 1
+    loc = parts[1].find("-") + 1 or parts[1].find("+") + 1 or len(parts[1]) + 1
    loc = loc - 1
    parts.append(parts[1][loc:])
    parts[1] = parts[1][:loc]
-    time = parts[1].split(':', 2)
+    time = parts[1].split(":", 2)
    # Ensure that time has 3 elements. Using '0' means that the
    # minutes and seconds, if missing, will default to 0.
-    time.extend(['0'] * (3 - len(time)))
-    if parts[2][:1] in ('-', '+'):
+    time.extend(["0"] * (3 - len(time)))
+    if parts[2][:1] in ("-", "+"):
        try:
            tzhour = int(parts[2][1:3])
            tzmin = int(parts[2][4:])
        except ValueError:
            return None
-        if parts[2].startswith('-'):
+        if parts[2].startswith("-"):
            tzhour = tzhour * -1
            tzmin = tzmin * -1
    else:
        tzhour = timezonenames.get(parts[2], 0)
        tzmin = 0
    try:
-        hour, minute, second = [int(float(i)) for i in time]
+        hour, minute, second = (int(float(i)) for i in time)
    except ValueError:
        return None
    # Create the datetime object and timezone delta objects
--- a/lib/feedparser/encodings.py
+++ b/lib/feedparser/encodings.py
@ -1,5 +1,5 @@
 # Character encoding routines
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -26,9 +26,12 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

+from __future__ import annotations
+
 import codecs
+import io
 import re
-import typing as t
+import typing

 try:
    try:
@ -38,36 +41,38 @@ try:
 except ImportError:
    lazy_chardet_encoding = None
 else:
+
    def lazy_chardet_encoding(data):
-        return chardet.detect(data)['encoding'] or ''
+        return chardet.detect(data)["encoding"] or ""
+

 from .exceptions import (
    CharacterEncodingOverride,
    CharacterEncodingUnknown,
+    FeedparserError,
    NonXMLContentType,
 )

-
 # Each marker represents some of the characters of the opening XML
 # processing instruction ('<?xm') in the specified encoding.
-EBCDIC_MARKER = b'\x4C\x6F\xA7\x94'
-UTF16BE_MARKER = b'\x00\x3C\x00\x3F'
-UTF16LE_MARKER = b'\x3C\x00\x3F\x00'
-UTF32BE_MARKER = b'\x00\x00\x00\x3C'
-UTF32LE_MARKER = b'\x3C\x00\x00\x00'
+EBCDIC_MARKER = b"\x4C\x6F\xA7\x94"
+UTF16BE_MARKER = b"\x00\x3C\x00\x3F"
+UTF16LE_MARKER = b"\x3C\x00\x3F\x00"
+UTF32BE_MARKER = b"\x00\x00\x00\x3C"
+UTF32LE_MARKER = b"\x3C\x00\x00\x00"

-ZERO_BYTES = '\x00\x00'
+ZERO_BYTES = b"\x00\x00"

 # Match the opening XML declaration.
 # Example: <?xml version="1.0" encoding="utf-8"?>
-RE_XML_DECLARATION = re.compile(r'^<\?xml[^>]*?>')
+RE_XML_DECLARATION = re.compile(r"^<\?xml[^>]*?>")

 # Capture the value of the XML processing instruction's encoding attribute.
 # Example: <?xml version="1.0" encoding="utf-8"?>
-RE_XML_PI_ENCODING = re.compile(br'^<\?.*encoding=[\'"](.*?)[\'"].*\?>')
+RE_XML_PI_ENCODING = re.compile(rb'^<\?.*encoding=[\'"](.*?)[\'"].*\?>')


-def parse_content_type(line: str) -> t.Tuple[str, str]:
+def parse_content_type(line: str) -> tuple[str, str]:
    """Parse an HTTP Content-Type header.

    The return value will be a tuple of strings:
@ -91,11 +96,10 @@ def parse_content_type(line: str) -> t.Tuple[str, str]:
    return mime_type, charset_value


-def convert_to_utf8(http_headers, data, result):
-    """Detect and convert the character encoding to UTF-8.
-
-    http_headers is a dictionary
-    data is a raw string (not Unicode)"""
+def convert_to_utf8(
+    http_headers: dict[str, str], data: bytes, result: dict[str, typing.Any]
+) -> bytes:
+    """Detect and convert the character encoding to UTF-8."""

    # This is so much trickier than it sounds, it's not even funny.
    # According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type
@ -134,12 +138,10 @@ def convert_to_utf8(http_headers, data, result):

    # Of course, none of this guarantees that we will be able to parse the
    # feed in the declared character encoding (assuming it was declared
-    # correctly, which many are not).  iconv_codec can help a lot;
-    # you should definitely install it if you can.
-    # http://cjkpython.i18n.org/
+    # correctly, which many are not).

-    bom_encoding = ''
-    xml_encoding = ''
+    bom_encoding = ""
+    xml_encoding = ""

    # Look at the first few bytes of the document to guess what
    # its encoding may be. We only need to decode enough of the
@ -149,50 +151,63 @@ def convert_to_utf8(http_headers, data, result):
    # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
    # Check for BOMs first.
    if data[:4] == codecs.BOM_UTF32_BE:
-        bom_encoding = 'utf-32be'
+        bom_encoding = "utf-32be"
        data = data[4:]
    elif data[:4] == codecs.BOM_UTF32_LE:
-        bom_encoding = 'utf-32le'
+        bom_encoding = "utf-32le"
        data = data[4:]
    elif data[:2] == codecs.BOM_UTF16_BE and data[2:4] != ZERO_BYTES:
-        bom_encoding = 'utf-16be'
+        bom_encoding = "utf-16be"
        data = data[2:]
    elif data[:2] == codecs.BOM_UTF16_LE and data[2:4] != ZERO_BYTES:
-        bom_encoding = 'utf-16le'
+        bom_encoding = "utf-16le"
        data = data[2:]
    elif data[:3] == codecs.BOM_UTF8:
-        bom_encoding = 'utf-8'
+        bom_encoding = "utf-8"
        data = data[3:]
    # Check for the characters '<?xm' in several encodings.
    elif data[:4] == EBCDIC_MARKER:
-        bom_encoding = 'cp037'
+        bom_encoding = "cp037"
    elif data[:4] == UTF16BE_MARKER:
-        bom_encoding = 'utf-16be'
+        bom_encoding = "utf-16be"
    elif data[:4] == UTF16LE_MARKER:
-        bom_encoding = 'utf-16le'
+        bom_encoding = "utf-16le"
    elif data[:4] == UTF32BE_MARKER:
-        bom_encoding = 'utf-32be'
+        bom_encoding = "utf-32be"
    elif data[:4] == UTF32LE_MARKER:
-        bom_encoding = 'utf-32le'
+        bom_encoding = "utf-32le"

    tempdata = data
    try:
        if bom_encoding:
-            tempdata = data.decode(bom_encoding).encode('utf-8')
+            tempdata = data.decode(bom_encoding).encode("utf-8")
    except UnicodeDecodeError:
        xml_encoding_match = None
    else:
        xml_encoding_match = RE_XML_PI_ENCODING.match(tempdata)

    if xml_encoding_match:
-        xml_encoding = xml_encoding_match.groups()[0].decode('utf-8').lower()
+        xml_encoding = xml_encoding_match.groups()[0].decode("utf-8").lower()
        # Normalize the xml_encoding if necessary.
-        if bom_encoding and (xml_encoding in (
-            'u16', 'utf-16', 'utf16', 'utf_16',
-            'u32', 'utf-32', 'utf32', 'utf_32',
-            'iso-10646-ucs-2', 'iso-10646-ucs-4',
-            'csucs4', 'csunicode', 'ucs-2', 'ucs-4'
-        )):
+        if bom_encoding and (
+            xml_encoding
+            in (
+                "u16",
+                "utf-16",
+                "utf16",
+                "utf_16",
+                "u32",
+                "utf-32",
+                "utf32",
+                "utf_32",
+                "iso-10646-ucs-2",
+                "iso-10646-ucs-4",
+                "csucs4",
+                "csunicode",
+                "ucs-2",
+                "ucs-4",
+            )
+        ):
            xml_encoding = bom_encoding

    # Find the HTTP Content-Type and, hopefully, a character
@ -200,115 +215,436 @@ def convert_to_utf8(http_headers, data, result):
    # to choose the "correct" encoding among the BOM encoding,
    # XML declaration encoding, and HTTP encoding, following the
    # heuristic defined in RFC 3023.
-    http_content_type = http_headers.get('content-type') or ''
+    http_content_type = http_headers.get("content-type") or ""
    http_content_type, http_encoding = parse_content_type(http_content_type)

    acceptable_content_type = 0
-    application_content_types = ('application/xml', 'application/xml-dtd',
-                                 'application/xml-external-parsed-entity')
-    text_content_types = ('text/xml', 'text/xml-external-parsed-entity')
-    json_content_types = ('application/feed+json', 'application/json')
+    application_content_types = (
+        "application/xml",
+        "application/xml-dtd",
+        "application/xml-external-parsed-entity",
+    )
+    text_content_types = ("text/xml", "text/xml-external-parsed-entity")
+    json_content_types = ("application/feed+json", "application/json")
    json = False
-    if (
-            http_content_type in application_content_types
-            or (
-                    http_content_type.startswith('application/')
-                    and http_content_type.endswith('+xml')
-            )
+    if http_content_type in application_content_types or (
+        http_content_type.startswith("application/")
+        and http_content_type.endswith("+xml")
    ):
        acceptable_content_type = 1
-        rfc3023_encoding = http_encoding or xml_encoding or 'utf-8'
-    elif (
-            http_content_type in text_content_types
-            or (
-                    http_content_type.startswith('text/')
-                    and http_content_type.endswith('+xml')
-            )
+        rfc3023_encoding = http_encoding or xml_encoding or "utf-8"
+    elif http_content_type in text_content_types or (
+        http_content_type.startswith("text/") and http_content_type.endswith("+xml")
    ):
        acceptable_content_type = 1
-        rfc3023_encoding = http_encoding or 'us-ascii'
-    elif (
-            http_content_type in json_content_types
-            or (
-                    not http_content_type
-                    and data and data.lstrip()[0] == '{'
-            )
+        rfc3023_encoding = http_encoding or "us-ascii"
+    elif http_content_type in json_content_types or (
+        not http_content_type and data and data.lstrip().startswith(b"{")
    ):
        http_content_type = json_content_types[0]
        acceptable_content_type = 1
        json = True
-        rfc3023_encoding = http_encoding or 'utf-8'  # RFC 7159, 8.1.
-    elif http_content_type.startswith('text/'):
-        rfc3023_encoding = http_encoding or 'us-ascii'
-    elif http_headers and 'content-type' not in http_headers:
-        rfc3023_encoding = xml_encoding or 'iso-8859-1'
+        rfc3023_encoding = http_encoding or "utf-8"  # RFC 7159, 8.1.
+    elif http_content_type.startswith("text/"):
+        rfc3023_encoding = http_encoding or "us-ascii"
+    elif http_headers and "content-type" not in http_headers:
+        rfc3023_encoding = xml_encoding or "iso-8859-1"
    else:
-        rfc3023_encoding = xml_encoding or 'utf-8'
+        rfc3023_encoding = xml_encoding or "utf-8"
    # gb18030 is a superset of gb2312, so always replace gb2312
    # with gb18030 for greater compatibility.
-    if rfc3023_encoding.lower() == 'gb2312':
-        rfc3023_encoding = 'gb18030'
-    if xml_encoding.lower() == 'gb2312':
-        xml_encoding = 'gb18030'
+    if rfc3023_encoding.lower() == "gb2312":
+        rfc3023_encoding = "gb18030"
+    if xml_encoding.lower() == "gb2312":
+        xml_encoding = "gb18030"

    # there are four encodings to keep track of:
    # - http_encoding is the encoding declared in the Content-Type HTTP header
    # - xml_encoding is the encoding declared in the <?xml declaration
    # - bom_encoding is the encoding sniffed from the first 4 bytes of the XML data
-    # - rfc3023_encoding is the actual encoding, as per RFC 3023 and a variety of other conflicting specifications
-    error = None
+    # - rfc3023_encoding is the actual encoding, as per RFC 3023
+    #   and a variety of other conflicting specifications
+    error: FeedparserError | None = None

    if http_headers and (not acceptable_content_type):
-        if 'content-type' in http_headers:
-            msg = '%s is not an accepted media type' % http_headers['content-type']
+        if "content-type" in http_headers:
+            msg = "%s is not an accepted media type" % http_headers["content-type"]
        else:
-            msg = 'no Content-type specified'
+            msg = "no Content-type specified"
        error = NonXMLContentType(msg)

    # determine character encoding
-    known_encoding = 0
+    known_encoding = False
    tried_encodings = []
    # try: HTTP encoding, declared XML encoding, encoding sniffed from BOM
-    for proposed_encoding in (rfc3023_encoding, xml_encoding, bom_encoding,
-                              lazy_chardet_encoding, 'utf-8', 'windows-1252', 'iso-8859-2'):
-        if callable(proposed_encoding):
-            proposed_encoding = proposed_encoding(data)
+    for encoding_to_try in (
+        rfc3023_encoding,
+        xml_encoding,
+        bom_encoding,
+        lazy_chardet_encoding,
+        "utf-8",
+        "windows-1252",
+        "iso-8859-2",
+    ):
+        if callable(encoding_to_try):
+            proposed_encoding = encoding_to_try(data)
+        else:
+            proposed_encoding = encoding_to_try
        if not proposed_encoding:
            continue
        if proposed_encoding in tried_encodings:
            continue
        tried_encodings.append(proposed_encoding)
        try:
-            data = data.decode(proposed_encoding)
+            text = data.decode(proposed_encoding)
        except (UnicodeDecodeError, LookupError):
-            pass
-        else:
-            known_encoding = 1
+            continue
+
+        known_encoding = True
        if not json:
            # Update the encoding in the opening XML processing instruction.
-                new_declaration = '''<?xml version='1.0' encoding='utf-8'?>'''
-                if RE_XML_DECLARATION.search(data):
-                    data = RE_XML_DECLARATION.sub(new_declaration, data)
+            new_declaration = """<?xml version='1.0' encoding='utf-8'?>"""
+            if RE_XML_DECLARATION.search(text):
+                text = RE_XML_DECLARATION.sub(new_declaration, text)
            else:
-                    data = new_declaration + '\n' + data
-            data = data.encode('utf-8')
+                text = new_declaration + "\n" + text
+        data = text.encode("utf-8")
        break
+
    # if still no luck, give up
    if not known_encoding:
        error = CharacterEncodingUnknown(
-            'document encoding unknown, I tried ' +
-            '%s, %s, utf-8, windows-1252, and iso-8859-2 but nothing worked' %
-            (rfc3023_encoding, xml_encoding))
-        rfc3023_encoding = ''
+            "document encoding unknown, I tried "
+            + "%s, %s, utf-8, windows-1252, and iso-8859-2 but nothing worked"
+            % (rfc3023_encoding, xml_encoding)
+        )
+        rfc3023_encoding = ""
    elif proposed_encoding != rfc3023_encoding:
        error = CharacterEncodingOverride(
-            'document declared as %s, but parsed as %s' %
-            (rfc3023_encoding, proposed_encoding))
+            "document declared as %s, but parsed as %s"
+            % (rfc3023_encoding, proposed_encoding)
+        )
        rfc3023_encoding = proposed_encoding

-    result['content-type'] = http_content_type  # for selecting the parser
-    result['encoding'] = rfc3023_encoding
+    result["content-type"] = http_content_type  # for selecting the parser
+    result["encoding"] = rfc3023_encoding
    if error:
-        result['bozo'] = True
-        result['bozo_exception'] = error
+        result["bozo"] = True
+        result["bozo_exception"] = error
    return data
+
+
+# How much to read from a binary file in order to detect encoding.
+# In inital tests, 4k was enough for ~160 mostly-English feeds;
+# 64k seems like a safe margin.
+CONVERT_FILE_PREFIX_LEN = 2**16
+
+# How much to read from a text file, and use as an utf-8 bytes prefix.
+# Note that no encoding detection is needed in this case.
+CONVERT_FILE_STR_PREFIX_LEN = 2**13
+
+CONVERT_FILE_TEST_CHUNK_LEN = 2**16
+
+
+def convert_file_to_utf8(
+    http_headers, file, result, optimistic_encoding_detection=True
+):
+    """Like convert_to_utf8(), but for a stream.
+
+    Unlike convert_to_utf8(), do not read the entire file in memory;
+    instead, return a text stream that decodes it on the fly.
+    This should consume significantly less memory,
+    because it avoids (repeatedly) converting the entire file contents
+    from bytes to str and back.
+
+    To detect the encoding, only a prefix of the file contents is used.
+    In rare cases, the wrong encoding may be detected for this prefix;
+    use optimistic_encoding_detection=False to use the entire file contents
+    (equivalent to a plain convert_to_utf8() call).
+
+    Args:
+        http_headers (dict): The response headers.
+        file (IO[bytes] or IO[str]): A read()-able (binary) stream.
+        result (dict): The result dictionary.
+        optimistic_encoding_detection (bool):
+            If true, use only a prefix of the file content to detect encoding.
+
+    Returns:
+        StreamFactory: a stream factory, with the detected encoding set, if any
+
+    """
+    # Currently, this wraps convert_to_utf8(), because the logic is simply
+    # too complicated to ensure it's re-implemented correctly for a stream.
+    # That said, it should be possible to change the implementation
+    # transparently (not sure it's worth it, though).
+
+    # If file is a text stream, we don't need to detect encoding;
+    # we still need a bytes prefix to run functions on for side effects:
+    # convert_to_utf8() to sniff / set result['content-type'], and
+    # replace_doctype() to extract safe_entities.
+
+    if isinstance(file.read(0), str):
+        prefix = file.read(CONVERT_FILE_STR_PREFIX_LEN).encode("utf-8")
+        prefix = convert_to_utf8(http_headers, prefix, result)
+        result["encoding"] = "utf-8"
+        return StreamFactory(prefix, file, "utf-8")
+
+    if optimistic_encoding_detection:
+        prefix = convert_file_prefix_to_utf8(http_headers, file, result)
+        factory = StreamFactory(prefix, file, result.get("encoding"))
+
+        # Before returning factory, ensure the entire file can be decoded;
+        # if it cannot, fall back to convert_to_utf8().
+        #
+        # Not doing this means feedparser.parse() may raise UnicodeDecodeError
+        # instead of setting bozo_exception to CharacterEncodingOverride,
+        # breaking the 6.x API.
+
+        try:
+            text_file = factory.get_text_file()
+        except MissingEncoding:
+            return factory
+        try:
+            # read in chunks to limit memory usage
+            while text_file.read(CONVERT_FILE_TEST_CHUNK_LEN):
+                pass
+        except UnicodeDecodeError:
+            # fall back to convert_to_utf8()
+            file = factory.get_binary_file()
+        else:
+            return factory
+
+    # this shouldn't increase memory usage if file is BytesIO,
+    # since BytesIO does copy-on-write; https://bugs.python.org/issue22003
+    data = convert_to_utf8(http_headers, file.read(), result)
+
+    # note that data *is* the prefix
+    return StreamFactory(data, io.BytesIO(b""), result.get("encoding"))
+
+
+def convert_file_prefix_to_utf8(
+    http_headers,
+    file: typing.IO[bytes],
+    result,
+    *,
+    prefix_len: int = CONVERT_FILE_PREFIX_LEN,
+    read_to_ascii_len: int = 2**8,
+) -> bytes:
+    """Like convert_to_utf8(), but only use the prefix of a binary file.
+
+    Set result like convert_to_utf8() would.
+
+    Return the updated prefix, as bytes.
+
+    """
+    # This is complicated by convert_to_utf8() detecting the wrong encoding
+    # if we have only part of the bytes that make a code-point:
+    #
+    # '😀'.encode('utf-8')      -> utf-8
+    # '😀'.encode('utf-8')[:-1] -> windows-1252 + bozo
+
+    prefix = file.read(prefix_len - 1)
+
+    # reading up to after an ASCII byte increases
+    # the likelihood of being on a code point boundary
+    prefix += read_to_after_ascii_byte(file, read_to_ascii_len)
+
+    # call convert_to_utf8() up to 4 times,
+    # to make sure we eventually land on a code point boundary
+    candidates = []
+    for attempt in range(4):
+        byte = file.read(1)
+
+        # we're at the end of the file, and the loop already ran once
+        if not byte and attempt != 0:
+            break
+
+        prefix += byte
+
+        fake_result: typing.Any = {}
+        converted_prefix = convert_to_utf8(http_headers, prefix, fake_result)
+
+        # an encoding was detected successfully, keep it
+        if not fake_result.get("bozo"):
+            break
+
+        candidates.append((file.tell(), converted_prefix, fake_result))
+
+    # no encoding was detected successfully, pick the "best" one
+    else:
+
+        def key(candidate):
+            *_, result = candidate
+
+            exc = result.get("bozo_exception")
+            exc_score = 0
+            if isinstance(exc, NonXMLContentType):
+                exc_score = 20
+            elif isinstance(exc, CharacterEncodingOverride):
+                exc_score = 10
+
+            return (
+                exc_score,
+                # prefer utf- encodings to anything else
+                result.get("encoding").startswith("utf-"),
+            )
+
+        candidates.sort(key=key)
+        offset, converted_prefix, fake_result = candidates[-1]
+
+        file.seek(offset)
+
+    result.update(fake_result)
+    return converted_prefix
+
+
+def read_to_after_ascii_byte(file: typing.IO[bytes], max_len: int) -> bytes:
+    offset = file.tell()
+    buffer = b""
+
+    for _ in range(max_len):
+        byte = file.read(1)
+
+        # end of file, nothing to do
+        if not byte:
+            break
+
+        buffer += byte
+
+        # we stop after a ASCII character
+        if byte < b"\x80":
+            break
+
+    # couldn't find an ASCII character, reset the file to the original offset
+    else:
+        file.seek(offset)
+        return b""
+
+    return buffer
+
+
+class MissingEncoding(io.UnsupportedOperation):
+    pass
+
+
+class StreamFactory:
+
+    """Decode on the fly a binary stream that *may* have a known encoding.
+
+    If the underlying stream is seekable, it is possible to call
+    the get_{text,binary}_file() methods more than once.
+
+    """
+
+    def __init__(self, prefix: bytes, file, encoding=None):
+        self.prefix = prefix
+        self.file = ResetFileWrapper(file)
+        self.encoding = encoding
+        self.should_reset = False
+
+    def get_text_file(self, fallback_encoding=None, errors="strict"):
+        encoding = self.encoding or fallback_encoding
+        if encoding is None:
+            raise MissingEncoding("cannot create text stream without encoding")
+
+        if isinstance(self.file.read(0), str):
+            file = PrefixFileWrapper(self.prefix.decode(encoding), self.file)
+        else:
+            file = PrefixFileWrapper(
+                self.prefix.decode("utf-8", errors),
+                codecs.getreader(encoding)(self.file, errors),
+            )
+
+        self.reset()
+        return file
+
+    def get_binary_file(self):
+        if isinstance(self.file.read(0), str):
+            raise io.UnsupportedOperation(
+                "underlying stream is text, not binary"
+            ) from None
+
+        file = PrefixFileWrapper(self.prefix, self.file)
+
+        self.reset()
+        return file
+
+    def get_file(self):
+        try:
+            return self.get_text_file()
+        except MissingEncoding:
+            return self.get_binary_file()
+
+    def reset(self):
+        if self.should_reset:
+            self.file.reset()
+        self.should_reset = True
+
+
+class ResetFileWrapper:
+    """Given a seekable file, allow reading its content again
+    (from the current position) by calling reset().
+
+    """
+
+    def __init__(self, file):
+        self.file = file
+        try:
+            self.file_initial_offset = file.tell()
+        except OSError:
+            self.file_initial_offset = None
+
+    def read(self, size=-1):
+        return self.file.read(size)
+
+    def reset(self):
+        # raises io.UnsupportedOperation if the underlying stream is not seekable
+        self.file.seek(self.file_initial_offset)
+
+
+class PrefixFileWrapper:
+    """Stitch a (possibly modified) prefix and a file into a new file object.
+
+    >>> file = io.StringIO('abcdef')
+    >>> file.read(2)
+    'ab'
+    >>> wrapped = PrefixFileWrapper(file.read(2).upper(), file)
+    >>> wrapped.read()
+    'CDef'
+
+    """
+
+    def __init__(self, prefix, file):
+        self.prefix = prefix
+        self.file = file
+        self.offset = 0
+
+    def read(self, size=-1):
+        buffer = self.file.read(0)
+
+        if self.offset < len(self.prefix):
+            if size < 0:
+                chunk = self.prefix
+            else:
+                chunk = self.prefix[self.offset : self.offset + size]
+                size -= len(chunk)
+            buffer += chunk
+            self.offset += len(chunk)
+
+        while True:
+            chunk = self.file.read(size)
+            if not chunk:
+                break
+            buffer += chunk
+            self.offset += len(chunk)
+
+            if size <= 0:
+                break
+
+            size -= len(chunk)
+
+        return buffer
+
+    def close(self):
+        # do not touch the underlying stream
+        pass
--- a/lib/feedparser/exceptions.py
+++ b/lib/feedparser/exceptions.py
@ -1,5 +1,5 @@
 # Exceptions used throughout feedparser
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -27,11 +27,11 @@
 # POSSIBILITY OF SUCH DAMAGE.

 __all__ = [
-    'FeedparserError',
-    'CharacterEncodingOverride',
-    'CharacterEncodingUnknown',
-    'NonXMLContentType',
-    'UndeclaredNamespace',
+    "FeedparserError",
+    "CharacterEncodingOverride",
+    "CharacterEncodingUnknown",
+    "NonXMLContentType",
+    "UndeclaredNamespace",
 ]


--- a/lib/feedparser/html.py
+++ b/lib/feedparser/html.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -28,36 +28,49 @@
 import html.entities
 import re

-import sgmllib3k as sgmllib
+# These items must all be imported into this module due to .__code__ replacements.
+from .sgml import (  # noqa: F401
+    attrfind,
+    charref,
+    endbracket,
+    entityref,
+    incomplete,
+    interesting,
+    sgmllib,
+    shorttag,
+    shorttagopen,
+    starttagopen,
+    tagfind,
+)

 _cp1252 = {
-    128: '\u20ac',  # euro sign
-    130: '\u201a',  # single low-9 quotation mark
-    131: '\u0192',  # latin small letter f with hook
-    132: '\u201e',  # double low-9 quotation mark
-    133: '\u2026',  # horizontal ellipsis
-    134: '\u2020',  # dagger
-    135: '\u2021',  # double dagger
-    136: '\u02c6',  # modifier letter circumflex accent
-    137: '\u2030',  # per mille sign
-    138: '\u0160',  # latin capital letter s with caron
-    139: '\u2039',  # single left-pointing angle quotation mark
-    140: '\u0152',  # latin capital ligature oe
-    142: '\u017d',  # latin capital letter z with caron
-    145: '\u2018',  # left single quotation mark
-    146: '\u2019',  # right single quotation mark
-    147: '\u201c',  # left double quotation mark
-    148: '\u201d',  # right double quotation mark
-    149: '\u2022',  # bullet
-    150: '\u2013',  # en dash
-    151: '\u2014',  # em dash
-    152: '\u02dc',  # small tilde
-    153: '\u2122',  # trade mark sign
-    154: '\u0161',  # latin small letter s with caron
-    155: '\u203a',  # single right-pointing angle quotation mark
-    156: '\u0153',  # latin small ligature oe
-    158: '\u017e',  # latin small letter z with caron
-    159: '\u0178',  # latin capital letter y with diaeresis
+    128: "\u20ac",  # euro sign
+    130: "\u201a",  # single low-9 quotation mark
+    131: "\u0192",  # latin small letter f with hook
+    132: "\u201e",  # double low-9 quotation mark
+    133: "\u2026",  # horizontal ellipsis
+    134: "\u2020",  # dagger
+    135: "\u2021",  # double dagger
+    136: "\u02c6",  # modifier letter circumflex accent
+    137: "\u2030",  # per mille sign
+    138: "\u0160",  # latin capital letter s with caron
+    139: "\u2039",  # single left-pointing angle quotation mark
+    140: "\u0152",  # latin capital ligature oe
+    142: "\u017d",  # latin capital letter z with caron
+    145: "\u2018",  # left single quotation mark
+    146: "\u2019",  # right single quotation mark
+    147: "\u201c",  # left double quotation mark
+    148: "\u201d",  # right double quotation mark
+    149: "\u2022",  # bullet
+    150: "\u2013",  # en dash
+    151: "\u2014",  # em dash
+    152: "\u02dc",  # small tilde
+    153: "\u2122",  # trade mark sign
+    154: "\u0161",  # latin small letter s with caron
+    155: "\u203a",  # single right-pointing angle quotation mark
+    156: "\u0153",  # latin small ligature oe
+    158: "\u017e",  # latin small letter z with caron
+    159: "\u0178",  # latin capital letter y with diaeresis
 }


@ -65,28 +78,28 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
    special = re.compile("""[<>'"]""")
    bare_ampersand = re.compile(r"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)")
    elements_no_end_tag = {
-        'area',
-        'base',
-        'basefont',
-        'br',
-        'col',
-        'command',
-        'embed',
-        'frame',
-        'hr',
-        'img',
-        'input',
-        'isindex',
-        'keygen',
-        'link',
-        'meta',
-        'param',
-        'source',
-        'track',
-        'wbr',
+        "area",
+        "base",
+        "basefont",
+        "br",
+        "col",
+        "command",
+        "embed",
+        "frame",
+        "hr",
+        "img",
+        "input",
+        "isindex",
+        "keygen",
+        "link",
+        "meta",
+        "param",
+        "source",
+        "track",
+        "wbr",
    }

-    def __init__(self, encoding=None, _type='application/xhtml+xml'):
+    def __init__(self, encoding=None, _type="application/xhtml+xml"):
        if encoding:
            self.encoding = encoding
        self._type = _type
@ -105,9 +118,9 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):

        tag = match.group(1)
        if tag in self.elements_no_end_tag:
-            return '<' + tag + ' />'
+            return "<" + tag + " />"
        else:
-            return '<' + tag + '></' + tag + '>'
+            return "<" + tag + "></" + tag + ">"

    # By declaring these methods and overriding their compiled code
    # with the code from sgmllib, the original code will execute in
@ -128,8 +141,8 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):

    def parse_starttag(self, i):
        j = self.__parse_starttag(i)
-        if self._type == 'application/xhtml+xml':
-            if j > 2 and self.rawdata[j-2:j] == '/>':
+        if self._type == "application/xhtml+xml":
+            if j > 2 and self.rawdata[j - 2 : j] == "/>":
                self.unknown_endtag(self.lasttag)
        return j

@ -139,10 +152,10 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
        :rtype: None
        """

-        data = re.sub(r'<!((?!DOCTYPE|--|\[))', r'&lt;!\1', data, re.IGNORECASE)
-        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
-        data = data.replace('&#39;', "'")
-        data = data.replace('&#34;', '"')
+        data = re.sub(r"<!((?!DOCTYPE|--|\[))", r"&lt;!\1", data, re.IGNORECASE)
+        data = re.sub(r"<([^<>\s]+?)\s*/>", self._shorttag_replace, data)
+        data = data.replace("&#39;", "'")
+        data = data.replace("&#34;", '"')
        super().feed(data)
        super().close()

@ -160,8 +173,7 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
        # *attrs* into a dictionary, then convert it back to a list.
        attrs_d = {k.lower(): v for k, v in attrs}
        attrs = [
-            (k, k in ('rel', 'type') and v.lower() or v)
-            for k, v in attrs_d.items()
+            (k, k in ("rel", "type") and v.lower() or v) for k, v in attrs_d.items()
        ]
        attrs.sort()
        return attrs
@ -177,22 +189,19 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
        # attrs is a list of (attr, value) tuples
        # e.g. for <pre class='screen'>, tag='pre', attrs=[('class', 'screen')]
        uattrs = []
-        strattrs = ''
+        strattrs = ""
        if attrs:
            for key, value in attrs:
-                value = value.replace('>', '&gt;')
-                value = value.replace('<', '&lt;')
-                value = value.replace('"', '&quot;')
+                value = value.replace(">", "&gt;")
+                value = value.replace("<", "&lt;")
+                value = value.replace('"', "&quot;")
                value = self.bare_ampersand.sub("&amp;", value)
                uattrs.append((key, value))
-            strattrs = ''.join(
-                ' %s="%s"' % (key, value)
-                for key, value in uattrs
-            )
+            strattrs = "".join(f' {key}="{value}"' for key, value in uattrs)
        if tag in self.elements_no_end_tag:
-            self.pieces.append('<%s%s />' % (tag, strattrs))
+            self.pieces.append(f"<{tag}{strattrs} />")
        else:
-            self.pieces.append('<%s%s>' % (tag, strattrs))
+            self.pieces.append(f"<{tag}{strattrs}>")

    def unknown_endtag(self, tag):
        """
@ -214,15 +223,15 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
        # Called for each character reference, e.g. '&#160;' will extract '160'
        # Reconstruct the original character reference.
        ref = ref.lower()
-        if ref.startswith('x'):
+        if ref.startswith("x"):
            value = int(ref[1:], 16)
        else:
            value = int(ref)

        if value in _cp1252:
-            self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:])
+            self.pieces.append("&#%s;" % hex(ord(_cp1252[value]))[1:])
        else:
-            self.pieces.append('&#%s;' % ref)
+            self.pieces.append("&#%s;" % ref)

    def handle_entityref(self, ref):
        """
@ -232,10 +241,10 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):

        # Called for each entity reference, e.g. '&copy;' will extract 'copy'
        # Reconstruct the original entity reference.
-        if ref in html.entities.name2codepoint or ref == 'apos':
-            self.pieces.append('&%s;' % ref)
+        if ref in html.entities.name2codepoint or ref == "apos":
+            self.pieces.append("&%s;" % ref)
        else:
-            self.pieces.append('&amp;%s' % ref)
+            self.pieces.append("&amp;%s" % ref)

    def handle_data(self, text):
        """
@ -256,7 +265,7 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):

        # Called for HTML comments, e.g. <!-- insert Javascript code here -->
        # Reconstruct the original comment.
-        self.pieces.append('<!--%s-->' % text)
+        self.pieces.append("<!--%s-->" % text)

    def handle_pi(self, text):
        """
@ -266,7 +275,7 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):

        # Called for each processing instruction, e.g. <?instruction>
        # Reconstruct original processing instruction.
-        self.pieces.append('<?%s>' % text)
+        self.pieces.append("<?%s>" % text)

    def handle_decl(self, text):
        """
@ -278,9 +287,9 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
        # <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
        #     "http://www.w3.org/TR/html4/loose.dtd">
        # Reconstruct original DOCTYPE
-        self.pieces.append('<!%s>' % text)
+        self.pieces.append("<!%s>" % text)

-    _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match
+    _new_declname_match = re.compile(r"[a-zA-Z][-_.a-zA-Z0-9:]*\s*").match

    def _scan_name(self, i, declstartpos):
        """
@ -311,7 +320,7 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
        :rtype: str
        """

-        return '&#%s;' % name
+        return "&#%s;" % name

    def convert_entityref(self, name):
        """
@ -319,7 +328,7 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
        :rtype: str
        """

-        return '&%s;' % name
+        return "&%s;" % name

    def output(self):
        """Return processed HTML as a single string.
@ -327,7 +336,7 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
        :rtype: str
        """

-        return ''.join(self.pieces)
+        return "".join(self.pieces)

    def parse_declaration(self, i):
        """
@ -339,5 +348,5 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
            return sgmllib.SGMLParser.parse_declaration(self, i)
        except (AssertionError, sgmllib.SGMLParseError):
            # Escape the doctype declaration and continue parsing.
-            self.handle_data('&lt;')
-            return i+1
+            self.handle_data("&lt;")
+            return i + 1
--- a/lib/feedparser/http.py
+++ b/lib/feedparser/http.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -25,203 +25,54 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

-import base64
-import datetime
-import gzip
-import io
-import re
-import struct
-import urllib.parse
-import urllib.request
-import zlib
+from __future__ import annotations
+
+import typing
+
+import requests

 from .datetimes import _parse_date
-from .urls import convert_to_idn
+
+# HTTP "Accept" header to send to servers when downloading feeds.
+ACCEPT_HEADER: str = (
+    "application/atom+xml"
+    ",application/rdf+xml"
+    ",application/rss+xml"
+    ",application/x-netcdf"
+    ",application/xml"
+    ";q=0.9,text/xml"
+    ";q=0.2,*/*"
+    ";q=0.1"
+)


-# HTTP "Accept" header to send to servers when downloading feeds.  If you don't
-# want to send an Accept header, set this to None.
-ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"
-
-
-class URLHandler(urllib.request.HTTPDigestAuthHandler, urllib.request.HTTPRedirectHandler, urllib.request.HTTPDefaultErrorHandler):
-    def http_error_default(self, req, fp, code, msg, headers):
-        # The default implementation just raises HTTPError.
-        # Forget that.
-        fp.status = code
-        return fp
-
-    def http_error_301(self, req, fp, code, msg, hdrs):
-        result = urllib.request.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, hdrs)
-        if not result:
-            return fp
-        result.status = code
-        result.newurl = result.geturl()
-        return result
-
-    # The default implementations in urllib.request.HTTPRedirectHandler
-    # are identical, so hardcoding a http_error_301 call above
-    # won't affect anything
-    http_error_300 = http_error_301
-    http_error_302 = http_error_301
-    http_error_303 = http_error_301
-    http_error_307 = http_error_301
-
-    def http_error_401(self, req, fp, code, msg, headers):
-        # Check if
-        # - server requires digest auth, AND
-        # - we tried (unsuccessfully) with basic auth, AND
-        # If all conditions hold, parse authentication information
-        # out of the Authorization header we sent the first time
-        # (for the username and password) and the WWW-Authenticate
-        # header the server sent back (for the realm) and retry
-        # the request with the appropriate digest auth headers instead.
-        # This evil genius hack has been brought to you by Aaron Swartz.
-        host = urllib.parse.urlparse(req.get_full_url())[1]
-        if 'Authorization' not in req.headers or 'WWW-Authenticate' not in headers:
-            return self.http_error_default(req, fp, code, msg, headers)
-        auth = base64.decodebytes(req.headers['Authorization'].split(' ')[1].encode()).decode()
-        user, passw = auth.split(':')
-        realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0]
-        self.add_password(realm, host, user, passw)
-        retry = self.http_error_auth_reqed('www-authenticate', host, req, headers)
-        self.reset_retry_count()
-        return retry
-
-
-def _build_urllib2_request(url, agent, accept_header, etag, modified, referrer, auth, request_headers):
-    request = urllib.request.Request(url)
-    request.add_header('User-Agent', agent)
-    if etag:
-        request.add_header('If-None-Match', etag)
-    if isinstance(modified, str):
-        modified = _parse_date(modified)
-    elif isinstance(modified, datetime.datetime):
-        modified = modified.utctimetuple()
-    if modified:
-        # format into an RFC 1123-compliant timestamp. We can't use
-        # time.strftime() since the %a and %b directives can be affected
-        # by the current locale, but RFC 2616 states that dates must be
-        # in English.
-        short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
-        months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
-        request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5]))
-    if referrer:
-        request.add_header('Referer', referrer)
-    request.add_header('Accept-encoding', 'gzip, deflate')
-    if auth:
-        request.add_header('Authorization', 'Basic %s' % auth)
-    if accept_header:
-        request.add_header('Accept', accept_header)
-    # use this for whatever -- cookies, special headers, etc
-    # [('Cookie','Something'),('x-special-header','Another Value')]
-    for header_name, header_value in request_headers.items():
-        request.add_header(header_name, header_value)
-    request.add_header('A-IM', 'feed')  # RFC 3229 support
-    return request
-
-
-def get(url, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, result=None):
-    if handlers is None:
-        handlers = []
-    elif not isinstance(handlers, list):
-        handlers = [handlers]
-    if request_headers is None:
-        request_headers = {}
-
-    # Deal with the feed URI scheme
-    if url.startswith('feed:http'):
-        url = url[5:]
-    elif url.startswith('feed:'):
-        url = 'http:' + url[5:]
-    if not agent:
+def get(url: str, result: dict[str, typing.Any]) -> bytes:
    from . import USER_AGENT
+
    agent = USER_AGENT
-    # Test for inline user:password credentials for HTTP basic auth
-    auth = None
-    if not url.startswith('ftp:'):
-        url_pieces = urllib.parse.urlparse(url)
-        if url_pieces.username:
-            new_pieces = list(url_pieces)
-            new_pieces[1] = url_pieces.hostname
-            if url_pieces.port:
-                new_pieces[1] = f'{url_pieces.hostname}:{url_pieces.port}'
-            url = urllib.parse.urlunparse(new_pieces)
-            auth = base64.standard_b64encode(f'{url_pieces.username}:{url_pieces.password}'.encode()).decode()

-    # iri support
-    if not isinstance(url, bytes):
-        url = convert_to_idn(url)
-
-    # Prevent UnicodeEncodeErrors caused by Unicode characters in the path.
-    bits = []
-    for c in url:
    try:
-            c.encode('ascii')
-        except UnicodeEncodeError:
-            bits.append(urllib.parse.quote(c))
-        else:
-            bits.append(c)
-    url = ''.join(bits)
+        response = requests.get(
+            url,
+            headers={"User-Agent": agent, "Accept": ACCEPT_HEADER},
+            timeout=10,
+        )
+    except requests.RequestException as exception:
+        result["bozo"] = True
+        result["bozo_exception"] = exception
+        return b""

-    # try to open with urllib2 (to use optional headers)
-    request = _build_urllib2_request(url, agent, ACCEPT_HEADER, etag, modified, referrer, auth, request_headers)
-    opener = urllib.request.build_opener(*tuple(handlers + [URLHandler()]))
-    opener.addheaders = []  # RMK - must clear so we only send our custom User-Agent
-    f = opener.open(request)
-    data = f.read()
-    f.close()
-
-    # lowercase all of the HTTP headers for comparisons per RFC 2616
-    result['headers'] = {k.lower(): v for k, v in f.headers.items()}
-
-    # if feed is gzip-compressed, decompress it
-    if data and 'gzip' in result['headers'].get('content-encoding', ''):
-        try:
-            data = gzip.GzipFile(fileobj=io.BytesIO(data)).read()
-        except (EOFError, IOError, struct.error) as e:
-            # IOError can occur if the gzip header is bad.
-            # struct.error can occur if the data is damaged.
-            result['bozo'] = True
-            result['bozo_exception'] = e
-            if isinstance(e, struct.error):
-                # A gzip header was found but the data is corrupt.
-                # Ideally, we should re-request the feed without the
-                # 'Accept-encoding: gzip' header, but we don't.
-                data = None
-    elif data and 'deflate' in result['headers'].get('content-encoding', ''):
-        try:
-            data = zlib.decompress(data)
-        except zlib.error:
-            try:
-                # The data may have no headers and no checksum.
-                data = zlib.decompress(data, -15)
-            except zlib.error as e:
-                result['bozo'] = True
-                result['bozo_exception'] = e
+    # Lowercase the HTTP header keys for comparisons per RFC 2616.
+    result["headers"] = {k.lower(): v for k, v in response.headers.items()}

    # save HTTP headers
-    if 'etag' in result['headers']:
-        etag = result['headers'].get('etag', '')
-        if isinstance(etag, bytes):
-            etag = etag.decode('utf-8', 'ignore')
-        if etag:
-            result['etag'] = etag
-    if 'last-modified' in result['headers']:
-        modified = result['headers'].get('last-modified', '')
+    if "etag" in result["headers"]:
+        result["etag"] = result["headers"]["etag"]
+    if "last-modified" in result["headers"]:
+        modified = result["headers"]["last-modified"]
        if modified:
-            result['modified'] = modified
-            result['modified_parsed'] = _parse_date(modified)
-    if isinstance(f.url, bytes):
-        result['href'] = f.url.decode('utf-8', 'ignore')
-    else:
-        result['href'] = f.url
-    result['status'] = getattr(f, 'status', None) or 200
-
-    # Stop processing if the server sent HTTP 304 Not Modified.
-    if getattr(f, 'code', 0) == 304:
-        result['version'] = ''
-        result['debug_message'] = 'The feed has not changed since you last checked, ' + \
-            'so the server sent no data.  This is a feature, not a bug!'
-
-    return data
+            result["modified"] = modified
+            result["modified_parsed"] = _parse_date(modified)
+    result["href"] = response.url
+    result["status"] = response.status_code
+    return response.content
--- a/lib/feedparser/mixin.py
+++ b/lib/feedparser/mixin.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -30,14 +30,20 @@ import binascii
 import copy
 import html.entities
 import re
-from typing import Dict
 import xml.sax.saxutils
+from typing import Dict

 from .html import _cp1252
 from .namespaces import _base, cc, dc, georss, itunes, mediarss, psc
-from .sanitizer import sanitize_html, HTMLSanitizer
-from .util import FeedParserDict
+from .sanitizer import HTMLSanitizer, sanitize_html
 from .urls import _urljoin, make_safe_absolute_uri, resolve_relative_uris
+from .util import FeedParserDict
+
+email_pattern = re.compile(
+    r"(([a-zA-Z0-9_.+-]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)"
+    r"|(([a-zA-Z0-9-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(]?))"
+    r"(\?subject=\S+)?"
+)


 class XMLParserMixin(
@ -50,117 +56,118 @@ class XMLParserMixin(
    psc.Namespace,
 ):
    namespaces = {
-        '': '',
-        'http://backend.userland.com/rss': '',
-        'http://blogs.law.harvard.edu/tech/rss': '',
-        'http://purl.org/rss/1.0/': '',
-        'http://my.netscape.com/rdf/simple/0.9/': '',
-        'http://example.com/newformat#': '',
-        'http://example.com/necho': '',
-        'http://purl.org/echo/': '',
-        'uri/of/echo/namespace#': '',
-        'http://purl.org/pie/': '',
-        'http://purl.org/atom/ns#': '',
-        'http://www.w3.org/2005/Atom': '',
-        'http://purl.org/rss/1.0/modules/rss091#': '',
-
-        'http://webns.net/mvcb/':                                'admin',
-        'http://purl.org/rss/1.0/modules/aggregation/':          'ag',
-        'http://purl.org/rss/1.0/modules/annotate/':             'annotate',
-        'http://media.tangent.org/rss/1.0/':                     'audio',
-        'http://backend.userland.com/blogChannelModule':         'blogChannel',
-        'http://creativecommons.org/ns#license':                 'cc',
-        'http://web.resource.org/cc/':                           'cc',
-        'http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html': 'creativeCommons',
-        'http://backend.userland.com/creativeCommonsRssModule':  'creativeCommons',
-        'http://purl.org/rss/1.0/modules/company':               'co',
-        'http://purl.org/rss/1.0/modules/content/':              'content',
-        'http://my.theinfo.org/changed/1.0/rss/':                'cp',
-        'http://purl.org/dc/elements/1.1/':                      'dc',
-        'http://purl.org/dc/terms/':                             'dcterms',
-        'http://purl.org/rss/1.0/modules/email/':                'email',
-        'http://purl.org/rss/1.0/modules/event/':                'ev',
-        'http://rssnamespace.org/feedburner/ext/1.0':            'feedburner',
-        'http://freshmeat.net/rss/fm/':                          'fm',
-        'http://xmlns.com/foaf/0.1/':                            'foaf',
-        'http://www.w3.org/2003/01/geo/wgs84_pos#':              'geo',
-        'http://www.georss.org/georss':                          'georss',
-        'http://www.opengis.net/gml':                            'gml',
-        'http://postneo.com/icbm/':                              'icbm',
-        'http://purl.org/rss/1.0/modules/image/':                'image',
-        'http://www.itunes.com/DTDs/PodCast-1.0.dtd':            'itunes',
-        'http://example.com/DTDs/PodCast-1.0.dtd':               'itunes',
-        'http://purl.org/rss/1.0/modules/link/':                 'l',
-        'http://search.yahoo.com/mrss':                          'media',
+        "": "",
+        "http://backend.userland.com/rss": "",
+        "http://blogs.law.harvard.edu/tech/rss": "",
+        "http://purl.org/rss/1.0/": "",
+        "http://my.netscape.com/rdf/simple/0.9/": "",
+        "http://example.com/newformat#": "",
+        "http://example.com/necho": "",
+        "http://purl.org/echo/": "",
+        "uri/of/echo/namespace#": "",
+        "http://purl.org/pie/": "",
+        "http://purl.org/atom/ns#": "",
+        "http://www.w3.org/2005/Atom": "",
+        "http://purl.org/rss/1.0/modules/rss091#": "",
+        "http://webns.net/mvcb/": "admin",
+        "http://purl.org/rss/1.0/modules/aggregation/": "ag",
+        "http://purl.org/rss/1.0/modules/annotate/": "annotate",
+        "http://media.tangent.org/rss/1.0/": "audio",
+        "http://backend.userland.com/blogChannelModule": "blogChannel",
+        "http://creativecommons.org/ns#license": "cc",
+        "http://web.resource.org/cc/": "cc",
+        "http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html": (
+            "creativeCommons"
+        ),
+        "http://backend.userland.com/creativeCommonsRssModule": "creativeCommons",
+        "http://purl.org/rss/1.0/modules/company": "co",
+        "http://purl.org/rss/1.0/modules/content/": "content",
+        "http://my.theinfo.org/changed/1.0/rss/": "cp",
+        "http://purl.org/dc/elements/1.1/": "dc",
+        "http://purl.org/dc/terms/": "dcterms",
+        "http://purl.org/rss/1.0/modules/email/": "email",
+        "http://purl.org/rss/1.0/modules/event/": "ev",
+        "http://rssnamespace.org/feedburner/ext/1.0": "feedburner",
+        "http://freshmeat.net/rss/fm/": "fm",
+        "http://xmlns.com/foaf/0.1/": "foaf",
+        "http://www.w3.org/2003/01/geo/wgs84_pos#": "geo",
+        "http://www.georss.org/georss": "georss",
+        "http://www.opengis.net/gml": "gml",
+        "http://postneo.com/icbm/": "icbm",
+        "http://purl.org/rss/1.0/modules/image/": "image",
+        "http://www.itunes.com/DTDs/PodCast-1.0.dtd": "itunes",
+        "http://example.com/DTDs/PodCast-1.0.dtd": "itunes",
+        "http://purl.org/rss/1.0/modules/link/": "l",
+        "http://search.yahoo.com/mrss": "media",
        # Version 1.1.2 of the Media RSS spec added the trailing slash on the namespace
-        'http://search.yahoo.com/mrss/':                         'media',
-        'http://madskills.com/public/xml/rss/module/pingback/':  'pingback',
-        'http://prismstandard.org/namespaces/1.2/basic/':        'prism',
-        'http://www.w3.org/1999/02/22-rdf-syntax-ns#':           'rdf',
-        'http://www.w3.org/2000/01/rdf-schema#':                 'rdfs',
-        'http://purl.org/rss/1.0/modules/reference/':            'ref',
-        'http://purl.org/rss/1.0/modules/richequiv/':            'reqv',
-        'http://purl.org/rss/1.0/modules/search/':               'search',
-        'http://purl.org/rss/1.0/modules/slash/':                'slash',
-        'http://schemas.xmlsoap.org/soap/envelope/':             'soap',
-        'http://purl.org/rss/1.0/modules/servicestatus/':        'ss',
-        'http://hacks.benhammersley.com/rss/streaming/':         'str',
-        'http://purl.org/rss/1.0/modules/subscription/':         'sub',
-        'http://purl.org/rss/1.0/modules/syndication/':          'sy',
-        'http://schemas.pocketsoap.com/rss/myDescModule/':       'szf',
-        'http://purl.org/rss/1.0/modules/taxonomy/':             'taxo',
-        'http://purl.org/rss/1.0/modules/threading/':            'thr',
-        'http://purl.org/rss/1.0/modules/textinput/':            'ti',
-        'http://madskills.com/public/xml/rss/module/trackback/': 'trackback',
-        'http://wellformedweb.org/commentAPI/':                  'wfw',
-        'http://purl.org/rss/1.0/modules/wiki/':                 'wiki',
-        'http://www.w3.org/1999/xhtml':                          'xhtml',
-        'http://www.w3.org/1999/xlink':                          'xlink',
-        'http://www.w3.org/XML/1998/namespace':                  'xml',
-        'http://podlove.org/simple-chapters':                    'psc',
+        "http://search.yahoo.com/mrss/": "media",
+        "http://madskills.com/public/xml/rss/module/pingback/": "pingback",
+        "http://prismstandard.org/namespaces/1.2/basic/": "prism",
+        "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
+        "http://www.w3.org/2000/01/rdf-schema#": "rdfs",
+        "http://purl.org/rss/1.0/modules/reference/": "ref",
+        "http://purl.org/rss/1.0/modules/richequiv/": "reqv",
+        "http://purl.org/rss/1.0/modules/search/": "search",
+        "http://purl.org/rss/1.0/modules/slash/": "slash",
+        "http://schemas.xmlsoap.org/soap/envelope/": "soap",
+        "http://purl.org/rss/1.0/modules/servicestatus/": "ss",
+        "http://hacks.benhammersley.com/rss/streaming/": "str",
+        "http://purl.org/rss/1.0/modules/subscription/": "sub",
+        "http://purl.org/rss/1.0/modules/syndication/": "sy",
+        "http://schemas.pocketsoap.com/rss/myDescModule/": "szf",
+        "http://purl.org/rss/1.0/modules/taxonomy/": "taxo",
+        "http://purl.org/rss/1.0/modules/threading/": "thr",
+        "http://purl.org/rss/1.0/modules/textinput/": "ti",
+        "http://madskills.com/public/xml/rss/module/trackback/": "trackback",
+        "http://wellformedweb.org/commentAPI/": "wfw",
+        "http://purl.org/rss/1.0/modules/wiki/": "wiki",
+        "http://www.w3.org/1999/xhtml": "xhtml",
+        "http://www.w3.org/1999/xlink": "xlink",
+        "http://www.w3.org/XML/1998/namespace": "xml",
+        "http://podlove.org/simple-chapters": "psc",
    }
    _matchnamespaces: Dict[str, str] = {}

    can_be_relative_uri = {
-        'comments',
-        'docs',
-        'href',
-        'icon',
-        'id',
-        'link',
-        'logo',
-        'url',
-        'wfw_comment',
-        'wfw_commentrss',
+        "comments",
+        "docs",
+        "href",
+        "icon",
+        "id",
+        "link",
+        "logo",
+        "url",
+        "wfw_comment",
+        "wfw_commentrss",
    }

    can_contain_relative_uris = {
-        'content',
-        'copyright',
-        'description',
-        'info',
-        'rights',
-        'subtitle',
-        'summary',
-        'tagline',
-        'title',
+        "content",
+        "copyright",
+        "description",
+        "info",
+        "rights",
+        "subtitle",
+        "summary",
+        "tagline",
+        "title",
    }

    can_contain_dangerous_markup = {
-        'content',
-        'copyright',
-        'description',
-        'info',
-        'rights',
-        'subtitle',
-        'summary',
-        'tagline',
-        'title',
+        "content",
+        "copyright",
+        "description",
+        "info",
+        "rights",
+        "subtitle",
+        "summary",
+        "tagline",
+        "title",
    }

    html_types = {
-        'application/xhtml+xml',
-        'text/html',
+        "application/xhtml+xml",
+        "text/html",
    }

    def __init__(self):
@ -169,7 +176,7 @@ class XMLParserMixin(
                self._matchnamespaces[k.lower()] = v
        self.feeddata = FeedParserDict()  # feed-level data
        self.entries = []  # list of entry-level data
-        self.version = ''  # feed type/version, see SUPPORTED_VERSIONS
+        self.version = ""  # feed type/version, see SUPPORTED_VERSIONS
        self.namespaces_in_use = {}  # dictionary of namespaces defined by the feed
        self.resolve_relative_uris = False
        self.sanitize_html = False
@ -198,7 +205,7 @@ class XMLParserMixin(
        self.depth = 0
        self.hasContent = 0
        if self.lang:
-            self.feeddata['language'] = self.lang.replace('_', '-')
+            self.feeddata["language"] = self.lang.replace("_", "-")

        # A map of the following form:
        #     {
@ -208,7 +215,7 @@ class XMLParserMixin(
        #         },
        #     }
        self.property_depth_map = {}
-        super(XMLParserMixin, self).__init__()
+        super().__init__()

    def _normalize_attributes(self, kv):
        raise NotImplementedError
@ -222,72 +229,80 @@ class XMLParserMixin(

        # track xml:base and xml:lang
        attrs_d = dict(attrs)
-        baseuri = attrs_d.get('xml:base', attrs_d.get('base')) or self.baseuri
+        baseuri = attrs_d.get("xml:base", attrs_d.get("base")) or self.baseuri
        if isinstance(baseuri, bytes):
-            baseuri = baseuri.decode(self.encoding, 'ignore')
+            baseuri = baseuri.decode(self.encoding, "ignore")
        # ensure that self.baseuri is always an absolute URI that
        # uses a whitelisted URI scheme (e.g. not `javscript:`)
        if self.baseuri:
            self.baseuri = make_safe_absolute_uri(self.baseuri, baseuri) or self.baseuri
        else:
            self.baseuri = _urljoin(self.baseuri, baseuri)
-        lang = attrs_d.get('xml:lang', attrs_d.get('lang'))
-        if lang == '':
+        lang = attrs_d.get("xml:lang", attrs_d.get("lang"))
+        if lang == "":
            # xml:lang could be explicitly set to '', we need to capture that
            lang = None
        elif lang is None:
            # if no xml:lang is specified, use parent lang
            lang = self.lang
        if lang:
-            if tag in ('feed', 'rss', 'rdf:RDF'):
-                self.feeddata['language'] = lang.replace('_', '-')
+            if tag in ("feed", "rss", "rdf:RDF"):
+                self.feeddata["language"] = lang.replace("_", "-")
        self.lang = lang
        self.basestack.append(self.baseuri)
        self.langstack.append(lang)

        # track namespaces
        for prefix, uri in attrs:
-            if prefix.startswith('xmlns:'):
+            if prefix.startswith("xmlns:"):
                self.track_namespace(prefix[6:], uri)
-            elif prefix == 'xmlns':
+            elif prefix == "xmlns":
                self.track_namespace(None, uri)

        # track inline content
-        if self.incontent and not self.contentparams.get('type', 'xml').endswith('xml'):
-            if tag in ('xhtml:div', 'div'):
+        if self.incontent and not self.contentparams.get("type", "xml").endswith("xml"):
+            if tag in ("xhtml:div", "div"):
                return  # typepad does this 10/2007
            # element declared itself as escaped markup, but it isn't really
-            self.contentparams['type'] = 'application/xhtml+xml'
-        if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml':
-            if tag.find(':') != -1:
-                prefix, tag = tag.split(':', 1)
-                namespace = self.namespaces_in_use.get(prefix, '')
-                if tag == 'math' and namespace == 'http://www.w3.org/1998/Math/MathML':
-                    attrs.append(('xmlns', namespace))
-                if tag == 'svg' and namespace == 'http://www.w3.org/2000/svg':
-                    attrs.append(('xmlns', namespace))
-            if tag == 'svg':
+            self.contentparams["type"] = "application/xhtml+xml"
+        if self.incontent and self.contentparams.get("type") == "application/xhtml+xml":
+            if tag.find(":") != -1:
+                prefix, tag = tag.split(":", 1)
+                namespace = self.namespaces_in_use.get(prefix, "")
+                if tag == "math" and namespace == "http://www.w3.org/1998/Math/MathML":
+                    attrs.append(("xmlns", namespace))
+                if tag == "svg" and namespace == "http://www.w3.org/2000/svg":
+                    attrs.append(("xmlns", namespace))
+            if tag == "svg":
                self.svgOK += 1
-            return self.handle_data('<%s%s>' % (tag, self.strattrs(attrs)), escape=0)
+            return self.handle_data(f"<{tag}{self.strattrs(attrs)}>", escape=0)

        # match namespaces
-        if tag.find(':') != -1:
-            prefix, suffix = tag.split(':', 1)
+        if tag.find(":") != -1:
+            prefix, suffix = tag.split(":", 1)
        else:
-            prefix, suffix = '', tag
+            prefix, suffix = "", tag
        prefix = self.namespacemap.get(prefix, prefix)
        if prefix:
-            prefix = prefix + '_'
+            prefix = prefix + "_"

        # Special hack for better tracking of empty textinput/image elements in
        # illformed feeds.
-        if (not prefix) and tag not in ('title', 'link', 'description', 'name'):
+        if (not prefix) and tag not in ("title", "link", "description", "name"):
            self.intextinput = 0
-        if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'):
+        if (not prefix) and tag not in (
+            "title",
+            "link",
+            "description",
+            "url",
+            "href",
+            "width",
+            "height",
+        ):
            self.inimage = 0

        # call special handler (if defined) or default handler
-        methodname = '_start_' + prefix + suffix
+        methodname = "_start_" + prefix + suffix
        try:
            method = getattr(self, methodname)
            return method(attrs_d)
@ -305,18 +320,18 @@ class XMLParserMixin(

    def unknown_endtag(self, tag):
        # match namespaces
-        if tag.find(':') != -1:
-            prefix, suffix = tag.split(':', 1)
+        if tag.find(":") != -1:
+            prefix, suffix = tag.split(":", 1)
        else:
-            prefix, suffix = '', tag
+            prefix, suffix = "", tag
        prefix = self.namespacemap.get(prefix, prefix)
        if prefix:
-            prefix = prefix + '_'
-        if suffix == 'svg' and self.svgOK:
+            prefix = prefix + "_"
+        if suffix == "svg" and self.svgOK:
            self.svgOK -= 1

        # call special handler (if defined) or default handler
-        methodname = '_end_' + prefix + suffix
+        methodname = "_end_" + prefix + suffix
        try:
            if self.svgOK:
                raise AttributeError()
@ -326,14 +341,14 @@ class XMLParserMixin(
            self.pop(prefix + suffix)

        # track inline content
-        if self.incontent and not self.contentparams.get('type', 'xml').endswith('xml'):
+        if self.incontent and not self.contentparams.get("type", "xml").endswith("xml"):
            # element declared itself as escaped markup, but it isn't really
-            if tag in ('xhtml:div', 'div'):
+            if tag in ("xhtml:div", "div"):
                return  # typepad does this 10/2007
-            self.contentparams['type'] = 'application/xhtml+xml'
-        if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml':
-            tag = tag.split(':')[-1]
-            self.handle_data('</%s>' % tag, escape=0)
+            self.contentparams["type"] = "application/xhtml+xml"
+        if self.incontent and self.contentparams.get("type") == "application/xhtml+xml":
+            tag = tag.split(":")[-1]
+            self.handle_data("</%s>" % tag, escape=0)

        # track xml:base and xml:lang going out of scope
        if self.basestack:
@ -352,33 +367,33 @@ class XMLParserMixin(
        if not self.elementstack:
            return
        ref = ref.lower()
-        if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'):
-            text = '&#%s;' % ref
+        if ref in ("34", "38", "39", "60", "62", "x22", "x26", "x27", "x3c", "x3e"):
+            text = "&#%s;" % ref
        else:
-            if ref[0] == 'x':
+            if ref[0] == "x":
                c = int(ref[1:], 16)
            else:
                c = int(ref)
-            text = chr(c).encode('utf-8')
+            text = chr(c).encode("utf-8")
        self.elementstack[-1][2].append(text)

    def handle_entityref(self, ref):
        # Called for each entity reference, e.g. for '&copy;', ref is 'copy'
        if not self.elementstack:
            return
-        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
-            text = '&%s;' % ref
+        if ref in ("lt", "gt", "quot", "amp", "apos"):
+            text = "&%s;" % ref
        elif ref in self.entities:
            text = self.entities[ref]
-            if text.startswith('&#') and text.endswith(';'):
+            if text.startswith("&#") and text.endswith(";"):
                return self.handle_entityref(text)
        else:
            try:
                html.entities.name2codepoint[ref]
            except KeyError:
-                text = '&%s;' % ref
+                text = "&%s;" % ref
            else:
-                text = chr(html.entities.name2codepoint[ref]).encode('utf-8')
+                text = chr(html.entities.name2codepoint[ref]).encode("utf-8")
        self.elementstack[-1][2].append(text)

    def handle_data(self, text, escape=1):
@ -386,7 +401,7 @@ class XMLParserMixin(
        # not containing any character or entity references
        if not self.elementstack:
            return
-        if escape and self.contentparams.get('type') == 'application/xhtml+xml':
+        if escape and self.contentparams.get("type") == "application/xhtml+xml":
            text = xml.sax.saxutils.escape(text)
        self.elementstack[-1][2].append(text)

@ -403,18 +418,18 @@ class XMLParserMixin(

    def parse_declaration(self, i):
        # Override internal declaration handler to handle CDATA blocks.
-        if self.rawdata[i:i+9] == '<![CDATA[':
-            k = self.rawdata.find(']]>', i)
+        if self.rawdata[i : i + 9] == "<![CDATA[":
+            k = self.rawdata.find("]]>", i)
            if k == -1:
                # CDATA block began but didn't finish
                k = len(self.rawdata)
                return k
-            self.handle_data(xml.sax.saxutils.escape(self.rawdata[i+9:k]), 0)
-            return k+3
+            self.handle_data(xml.sax.saxutils.escape(self.rawdata[i + 9 : k]), 0)
+            return k + 3
        else:
-            k = self.rawdata.find('>', i)
+            k = self.rawdata.find(">", i)
            if k >= 0:
-                return k+1
+                return k + 1
            else:
                # We have an incomplete CDATA block.
                return k
@ -422,35 +437,35 @@ class XMLParserMixin(
    @staticmethod
    def map_content_type(content_type):
        content_type = content_type.lower()
-        if content_type == 'text' or content_type == 'plain':
-            content_type = 'text/plain'
-        elif content_type == 'html':
-            content_type = 'text/html'
-        elif content_type == 'xhtml':
-            content_type = 'application/xhtml+xml'
+        if content_type == "text" or content_type == "plain":
+            content_type = "text/plain"
+        elif content_type == "html":
+            content_type = "text/html"
+        elif content_type == "xhtml":
+            content_type = "application/xhtml+xml"
        return content_type

    def track_namespace(self, prefix, uri):
        loweruri = uri.lower()
        if not self.version:
-            if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/'):
-                self.version = 'rss090'
-            elif loweruri == 'http://purl.org/rss/1.0/':
-                self.version = 'rss10'
-            elif loweruri == 'http://www.w3.org/2005/atom':
-                self.version = 'atom10'
-        if loweruri.find('backend.userland.com/rss') != -1:
+            if (prefix, loweruri) == (None, "http://my.netscape.com/rdf/simple/0.9/"):
+                self.version = "rss090"
+            elif loweruri == "http://purl.org/rss/1.0/":
+                self.version = "rss10"
+            elif loweruri == "http://www.w3.org/2005/atom":
+                self.version = "atom10"
+        if loweruri.find("backend.userland.com/rss") != -1:
            # match any backend.userland.com namespace
-            uri = 'http://backend.userland.com/rss'
+            uri = "http://backend.userland.com/rss"
            loweruri = uri
        if loweruri in self._matchnamespaces:
            self.namespacemap[prefix] = self._matchnamespaces[loweruri]
            self.namespaces_in_use[self._matchnamespaces[loweruri]] = uri
        else:
-            self.namespaces_in_use[prefix or ''] = uri
+            self.namespaces_in_use[prefix or ""] = uri

    def resolve_uri(self, uri):
-        return _urljoin(self.baseuri or '', uri)
+        return _urljoin(self.baseuri or "", uri)

    @staticmethod
    def decode_entities(element, data):
@ -458,8 +473,8 @@ class XMLParserMixin(

    @staticmethod
    def strattrs(attrs):
-        return ''.join(
-            ' %s="%s"' % (t[0], xml.sax.saxutils.escape(t[1], {'"': '&quot;'}))
+        return "".join(
+            ' {}="{}"'.format(t[0], xml.sax.saxutils.escape(t[1], {'"': "&quot;"}))
            for t in attrs
        )

@ -475,11 +490,14 @@ class XMLParserMixin(
        element, expecting_text, pieces = self.elementstack.pop()

        # Ensure each piece is a str for Python 3
-        for (i, v) in enumerate(pieces):
+        for i, v in enumerate(pieces):
            if isinstance(v, bytes):
-                pieces[i] = v.decode('utf-8')
+                pieces[i] = v.decode("utf-8")

-        if self.version == 'atom10' and self.contentparams.get('type', 'text') == 'application/xhtml+xml':
+        if (
+            self.version == "atom10"
+            and self.contentparams.get("type", "text") == "application/xhtml+xml"
+        ):
            # remove enclosing child element, but only if it is a <div> and
            # only if all the remaining content is nested underneath it.
            # This means that the divs would be retained in the following:
@ -488,76 +506,95 @@ class XMLParserMixin(
                del pieces[-1]
            while pieces and len(pieces) > 1 and not pieces[0].strip():
                del pieces[0]
-            if pieces and (pieces[0] == '<div>' or pieces[0].startswith('<div ')) and pieces[-1] == '</div>':
+            if (
+                pieces
+                and (pieces[0] == "<div>" or pieces[0].startswith("<div "))
+                and pieces[-1] == "</div>"
+            ):
                depth = 0
                for piece in pieces[:-1]:
-                    if piece.startswith('</'):
+                    if piece.startswith("</"):
                        depth -= 1
                        if depth == 0:
                            break
-                    elif piece.startswith('<') and not piece.endswith('/>'):
+                    elif piece.startswith("<") and not piece.endswith("/>"):
                        depth += 1
                else:
                    pieces = pieces[1:-1]

-        output = ''.join(pieces)
+        output = "".join(pieces)
        if strip_whitespace:
            output = output.strip()
        if not expecting_text:
            return output

        # decode base64 content
-        if base64 and self.contentparams.get('base64', 0):
+        if base64 and self.contentparams.get("base64", 0):
            try:
-                output = base64.decodebytes(output.encode('utf8')).decode('utf8')
+                output = base64.decodebytes(output.encode("utf8")).decode("utf8")
            except (binascii.Error, binascii.Incomplete, UnicodeDecodeError):
                pass

        # resolve relative URIs
        if (element in self.can_be_relative_uri) and output:
            # do not resolve guid elements with isPermalink="false"
-            if not element == 'id' or self.guidislink:
+            if not element == "id" or self.guidislink:
                output = self.resolve_uri(output)

        # decode entities within embedded markup
-        if not self.contentparams.get('base64', 0):
+        if not self.contentparams.get("base64", 0):
            output = self.decode_entities(element, output)

        # some feed formats require consumers to guess
        # whether the content is html or plain text
-        if not self.version.startswith('atom') and self.contentparams.get('type') == 'text/plain':
+        if (
+            not self.version.startswith("atom")
+            and self.contentparams.get("type") == "text/plain"
+        ):
            if self.looks_like_html(output):
-                self.contentparams['type'] = 'text/html'
+                self.contentparams["type"] = "text/html"

        # remove temporary cruft from contentparams
        try:
-            del self.contentparams['mode']
+            del self.contentparams["mode"]
        except KeyError:
            pass
        try:
-            del self.contentparams['base64']
+            del self.contentparams["base64"]
        except KeyError:
            pass

-        is_htmlish = self.map_content_type(self.contentparams.get('type', 'text/html')) in self.html_types
+        is_htmlish = (
+            self.map_content_type(self.contentparams.get("type", "text/html"))
+            in self.html_types
+        )
        # resolve relative URIs within embedded markup
        if is_htmlish and self.resolve_relative_uris:
            if element in self.can_contain_relative_uris:
-                output = resolve_relative_uris(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html'))
+                output = resolve_relative_uris(
+                    output,
+                    self.baseuri,
+                    self.encoding,
+                    self.contentparams.get("type", "text/html"),
+                )

        # sanitize embedded markup
        if is_htmlish and self.sanitize_html:
            if element in self.can_contain_dangerous_markup:
-                output = sanitize_html(output, self.encoding, self.contentparams.get('type', 'text/html'))
+                output = sanitize_html(
+                    output, self.encoding, self.contentparams.get("type", "text/html")
+                )

        if self.encoding and isinstance(output, bytes):
-            output = output.decode(self.encoding, 'ignore')
+            output = output.decode(self.encoding, "ignore")

        # address common error where people take data that is already
        # utf-8, presume that it is iso-8859-1, and re-encode it.
-        if self.encoding in ('utf-8', 'utf-8_INVALID_PYTHON_3') and not isinstance(output, bytes):
+        if self.encoding in ("utf-8", "utf-8_INVALID_PYTHON_3") and not isinstance(
+            output, bytes
+        ):
            try:
-                output = output.encode('iso-8859-1').decode('utf-8')
+                output = output.encode("iso-8859-1").decode("utf-8")
            except (UnicodeEncodeError, UnicodeDecodeError):
                pass

@ -567,65 +604,74 @@ class XMLParserMixin(

        # categories/tags/keywords/whatever are handled in _end_category or
        # _end_tags or _end_itunes_keywords
-        if element in ('category', 'tags', 'itunes_keywords'):
+        if element in ("category", "tags", "itunes_keywords"):
            return output

-        if element == 'title' and -1 < self.title_depth <= self.depth:
+        if element == "title" and -1 < self.title_depth <= self.depth:
            return output

        # store output in appropriate place(s)
        if self.inentry and not self.insource:
-            if element == 'content':
+            if element == "content":
                self.entries[-1].setdefault(element, [])
                contentparams = copy.deepcopy(self.contentparams)
-                contentparams['value'] = output
+                contentparams["value"] = output
                self.entries[-1][element].append(contentparams)
-            elif element == 'link':
+            elif element == "link":
                if not self.inimage:
                    # query variables in urls in link elements are improperly
                    # converted from `?a=1&b=2` to `?a=1&b;=2` as if they're
                    # unhandled character references. fix this special case.
-                    output = output.replace('&amp;', '&')
+                    output = output.replace("&amp;", "&")
                    output = re.sub("&([A-Za-z0-9_]+);", r"&\g<1>", output)
                    self.entries[-1][element] = output
                    if output:
-                        self.entries[-1]['links'][-1]['href'] = output
+                        self.entries[-1]["links"][-1]["href"] = output
            else:
-                if element == 'description':
-                    element = 'summary'
-                old_value_depth = self.property_depth_map.setdefault(self.entries[-1], {}).get(element)
+                if element == "description":
+                    element = "summary"
+                old_value_depth = self.property_depth_map.setdefault(
+                    self.entries[-1], {}
+                ).get(element)
                if old_value_depth is None or self.depth <= old_value_depth:
                    self.property_depth_map[self.entries[-1]][element] = self.depth
                    self.entries[-1][element] = output
                if self.incontent:
                    contentparams = copy.deepcopy(self.contentparams)
-                    contentparams['value'] = output
-                    self.entries[-1][element + '_detail'] = contentparams
-        elif self.infeed or self.insource:  # and (not self.intextinput) and (not self.inimage):
+                    contentparams["value"] = output
+                    self.entries[-1][element + "_detail"] = contentparams
+        elif (
+            self.infeed or self.insource
+        ):  # and (not self.intextinput) and (not self.inimage):
            context = self._get_context()
-            if element == 'description':
-                element = 'subtitle'
+            if element == "description":
+                element = "subtitle"
            context[element] = output
-            if element == 'link':
+            if element == "link":
                # fix query variables; see above for the explanation
                output = re.sub("&([A-Za-z0-9_]+);", r"&\g<1>", output)
                context[element] = output
-                context['links'][-1]['href'] = output
+                context["links"][-1]["href"] = output
            elif self.incontent:
                contentparams = copy.deepcopy(self.contentparams)
-                contentparams['value'] = output
-                context[element + '_detail'] = contentparams
+                contentparams["value"] = output
+                context[element + "_detail"] = contentparams
        return output

    def push_content(self, tag, attrs_d, default_content_type, expecting_text):
        self.incontent += 1
        if self.lang:
-            self.lang = self.lang.replace('_', '-')
-        self.contentparams = FeedParserDict({
-            'type': self.map_content_type(attrs_d.get('type', default_content_type)),
-            'language': self.lang,
-            'base': self.baseuri})
-        self.contentparams['base64'] = self._is_base64(attrs_d, self.contentparams)
+            self.lang = self.lang.replace("_", "-")
+        self.contentparams = FeedParserDict(
+            {
+                "type": self.map_content_type(
+                    attrs_d.get("type", default_content_type)
+                ),
+                "language": self.lang,
+                "base": self.baseuri,
+            }
+        )
+        self.contentparams["base64"] = self._is_base64(attrs_d, self.contentparams)
        self.push(tag, expecting_text)

    def pop_content(self, tag):
@ -646,55 +692,61 @@ class XMLParserMixin(
        """

        # must have a close tag or an entity reference to qualify
-        if not (re.search(r'</(\w+)>', s) or re.search(r'&#?\w+;', s)):
+        if not (re.search(r"</(\w+)>", s) or re.search(r"&#?\w+;", s)):
            return False

        # all tags must be in a restricted subset of valid HTML tags
-        if any((t for t in re.findall(r'</?(\w+)', s) if t.lower() not in HTMLSanitizer.acceptable_elements)):
+        if any(
+            t
+            for t in re.findall(r"</?(\w+)", s)
+            if t.lower() not in HTMLSanitizer.acceptable_elements
+        ):
            return False

        # all entities must have been defined as valid HTML entities
-        if any((e for e in re.findall(r'&(\w+);', s) if e not in html.entities.entitydefs)):
+        if any(
+            e for e in re.findall(r"&(\w+);", s) if e not in html.entities.entitydefs
+        ):
            return False

        return True

    def _map_to_standard_prefix(self, name):
-        colonpos = name.find(':')
+        colonpos = name.find(":")
        if colonpos != -1:
            prefix = name[:colonpos]
-            suffix = name[colonpos+1:]
+            suffix = name[colonpos + 1 :]
            prefix = self.namespacemap.get(prefix, prefix)
-            name = prefix + ':' + suffix
+            name = prefix + ":" + suffix
        return name

    def _get_attribute(self, attrs_d, name):
        return attrs_d.get(self._map_to_standard_prefix(name))

    def _is_base64(self, attrs_d, contentparams):
-        if attrs_d.get('mode', '') == 'base64':
+        if attrs_d.get("mode", "") == "base64":
            return 1
-        if self.contentparams['type'].startswith('text/'):
+        if self.contentparams["type"].startswith("text/"):
            return 0
-        if self.contentparams['type'].endswith('+xml'):
+        if self.contentparams["type"].endswith("+xml"):
            return 0
-        if self.contentparams['type'].endswith('/xml'):
+        if self.contentparams["type"].endswith("/xml"):
            return 0
        return 1

    @staticmethod
    def _enforce_href(attrs_d):
-        href = attrs_d.get('url', attrs_d.get('uri', attrs_d.get('href', None)))
+        href = attrs_d.get("url", attrs_d.get("uri", attrs_d.get("href", None)))
        if href:
            try:
-                del attrs_d['url']
+                del attrs_d["url"]
            except KeyError:
                pass
            try:
-                del attrs_d['uri']
+                del attrs_d["uri"]
            except KeyError:
                pass
-            attrs_d['href'] = href
+            attrs_d["href"] = href
        return attrs_d

    def _save(self, key, value, overwrite=False):
@ -707,37 +759,37 @@ class XMLParserMixin(
    def _get_context(self):
        if self.insource:
            context = self.sourcedata
-        elif self.inimage and 'image' in self.feeddata:
-            context = self.feeddata['image']
+        elif self.inimage and "image" in self.feeddata:
+            context = self.feeddata["image"]
        elif self.intextinput:
-            context = self.feeddata['textinput']
+            context = self.feeddata["textinput"]
        elif self.inentry:
            context = self.entries[-1]
        else:
            context = self.feeddata
        return context

-    def _save_author(self, key, value, prefix='author'):
+    def _save_author(self, key, value, prefix="author"):
        context = self._get_context()
-        context.setdefault(prefix + '_detail', FeedParserDict())
-        context[prefix + '_detail'][key] = value
+        context.setdefault(prefix + "_detail", FeedParserDict())
+        context[prefix + "_detail"][key] = value
        self._sync_author_detail()
-        context.setdefault('authors', [FeedParserDict()])
-        context['authors'][-1][key] = value
+        context.setdefault("authors", [FeedParserDict()])
+        context["authors"][-1][key] = value

    def _save_contributor(self, key, value):
        context = self._get_context()
-        context.setdefault('contributors', [FeedParserDict()])
-        context['contributors'][-1][key] = value
+        context.setdefault("contributors", [FeedParserDict()])
+        context["contributors"][-1][key] = value

-    def _sync_author_detail(self, key='author'):
+    def _sync_author_detail(self, key="author"):
        context = self._get_context()
-        detail = context.get('%ss' % key, [FeedParserDict()])[-1]
+        detail = context.get("%ss" % key, [FeedParserDict()])[-1]
        if detail:
-            name = detail.get('name')
-            email = detail.get('email')
+            name = detail.get("name")
+            email = detail.get("email")
            if name and email:
-                context[key] = '%s (%s)' % (name, email)
+                context[key] = f"{name} ({email})"
            elif name:
                context[key] = name
            elif email:
@ -746,31 +798,31 @@ class XMLParserMixin(
            author, email = context.get(key), None
            if not author:
                return
-            emailmatch = re.search(r"(([a-zA-Z0-9_.+-]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(]?))(\?subject=\S+)?", author)
+            emailmatch = email_pattern.search(author)
            if emailmatch:
                email = emailmatch.group(0)
                # probably a better way to do the following, but it passes
                # all the tests
-                author = author.replace(email, '')
-                author = author.replace('()', '')
-                author = author.replace('<>', '')
-                author = author.replace('&lt;&gt;', '')
+                author = author.replace(email, "")
+                author = author.replace("()", "")
+                author = author.replace("<>", "")
+                author = author.replace("&lt;&gt;", "")
                author = author.strip()
-                if author and (author[0] == '('):
+                if author and (author[0] == "("):
                    author = author[1:]
-                if author and (author[-1] == ')'):
+                if author and (author[-1] == ")"):
                    author = author[:-1]
                author = author.strip()
            if author or email:
-                context.setdefault('%s_detail' % key, detail)
+                context.setdefault("%s_detail" % key, detail)
            if author:
-                detail['name'] = author
+                detail["name"] = author
            if email:
-                detail['email'] = email
+                detail["email"] = email

    def _add_tag(self, term, scheme, label):
        context = self._get_context()
-        tags = context.setdefault('tags', [])
+        tags = context.setdefault("tags", [])
        if (not term) and (not scheme) and (not label):
            return
        value = FeedParserDict(term=term, scheme=scheme, label=label)
@ -781,8 +833,8 @@ class XMLParserMixin(
        # This is a completely-made up element. Its semantics are determined
        # only by a single feed that precipitated bug report 392 on Google Code.
        # In short, this is junk code.
-        self.push('tags', 1)
+        self.push("tags", 1)

    def _end_tags(self):
-        for term in self.pop('tags').split(','):
+        for term in self.pop("tags").split(","):
            self._add_tag(term.strip(), None, None)
--- a/lib/feedparser/namespaces/_base.py
+++ b/lib/feedparser/namespaces/_base.py
@ -1,5 +1,5 @@
 # Support for the Atom, RSS, RDF, and CDF feed formats
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -33,7 +33,7 @@ from ..urls import make_safe_absolute_uri
 from ..util import FeedParserDict


-class Namespace(object):
+class Namespace:
    """Support for the Atom, RSS, RDF, and CDF feed formats.

    The feed formats all share common elements, some of which have conflicting
@ -42,452 +42,490 @@ class Namespace(object):
    """

    supported_namespaces = {
-        '': '',
-        'http://backend.userland.com/rss': '',
-        'http://blogs.law.harvard.edu/tech/rss': '',
-        'http://purl.org/rss/1.0/': '',
-        'http://my.netscape.com/rdf/simple/0.9/': '',
-        'http://example.com/newformat#': '',
-        'http://example.com/necho': '',
-        'http://purl.org/echo/': '',
-        'uri/of/echo/namespace#': '',
-        'http://purl.org/pie/': '',
-        'http://purl.org/atom/ns#': '',
-        'http://www.w3.org/2005/Atom': '',
-        'http://purl.org/rss/1.0/modules/rss091#': '',
+        "": "",
+        "http://backend.userland.com/rss": "",
+        "http://blogs.law.harvard.edu/tech/rss": "",
+        "http://purl.org/rss/1.0/": "",
+        "http://my.netscape.com/rdf/simple/0.9/": "",
+        "http://example.com/newformat#": "",
+        "http://example.com/necho": "",
+        "http://purl.org/echo/": "",
+        "uri/of/echo/namespace#": "",
+        "http://purl.org/pie/": "",
+        "http://purl.org/atom/ns#": "",
+        "http://www.w3.org/2005/Atom": "",
+        "http://purl.org/rss/1.0/modules/rss091#": "",
    }

    def _start_rss(self, attrs_d):
        versionmap = {
-            '0.91': 'rss091u',
-            '0.92': 'rss092',
-            '0.93': 'rss093',
-            '0.94': 'rss094',
+            "0.91": "rss091u",
+            "0.92": "rss092",
+            "0.93": "rss093",
+            "0.94": "rss094",
        }

        # If we're here then this is an RSS feed.
        # If we don't have a version or have a version that starts with something
        # other than RSS then there's been a mistake. Correct it.
-        if not self.version or not self.version.startswith('rss'):
-            attr_version = attrs_d.get('version', '')
+        if not self.version or not self.version.startswith("rss"):
+            attr_version = attrs_d.get("version", "")
            version = versionmap.get(attr_version)
            if version:
                self.version = version
-            elif attr_version.startswith('2.'):
-                self.version = 'rss20'
+            elif attr_version.startswith("2."):
+                self.version = "rss20"
            else:
-                self.version = 'rss'
+                self.version = "rss"

    def _start_channel(self, attrs_d):
        self.infeed = 1
        self._cdf_common(attrs_d)

    def _cdf_common(self, attrs_d):
-        if 'lastmod' in attrs_d:
+        if "lastmod" in attrs_d:
            self._start_modified({})
-            self.elementstack[-1][-1] = attrs_d['lastmod']
+            self.elementstack[-1][-1] = attrs_d["lastmod"]
            self._end_modified()
-        if 'href' in attrs_d:
+        if "href" in attrs_d:
            self._start_link({})
-            self.elementstack[-1][-1] = attrs_d['href']
+            self.elementstack[-1][-1] = attrs_d["href"]
            self._end_link()

    def _start_feed(self, attrs_d):
        self.infeed = 1
-        versionmap = {'0.1': 'atom01',
-                      '0.2': 'atom02',
-                      '0.3': 'atom03'}
+        versionmap = {"0.1": "atom01", "0.2": "atom02", "0.3": "atom03"}
        if not self.version:
-            attr_version = attrs_d.get('version')
+            attr_version = attrs_d.get("version")
            version = versionmap.get(attr_version)
            if version:
                self.version = version
            else:
-                self.version = 'atom'
+                self.version = "atom"

    def _end_channel(self):
        self.infeed = 0
+
    _end_feed = _end_channel

    def _start_image(self, attrs_d):
        context = self._get_context()
        if not self.inentry:
-            context.setdefault('image', FeedParserDict())
+            context.setdefault("image", FeedParserDict())
        self.inimage = 1
        self.title_depth = -1
-        self.push('image', 0)
+        self.push("image", 0)

    def _end_image(self):
-        self.pop('image')
+        self.pop("image")
        self.inimage = 0

    def _start_textinput(self, attrs_d):
        context = self._get_context()
-        context.setdefault('textinput', FeedParserDict())
+        context.setdefault("textinput", FeedParserDict())
        self.intextinput = 1
        self.title_depth = -1
-        self.push('textinput', 0)
+        self.push("textinput", 0)
+
    _start_textInput = _start_textinput

    def _end_textinput(self):
-        self.pop('textinput')
+        self.pop("textinput")
        self.intextinput = 0
+
    _end_textInput = _end_textinput

    def _start_author(self, attrs_d):
        self.inauthor = 1
-        self.push('author', 1)
+        self.push("author", 1)
        # Append a new FeedParserDict when expecting an author
        context = self._get_context()
-        context.setdefault('authors', [])
-        context['authors'].append(FeedParserDict())
+        context.setdefault("authors", [])
+        context["authors"].append(FeedParserDict())
+
    _start_managingeditor = _start_author

    def _end_author(self):
-        self.pop('author')
+        self.pop("author")
        self.inauthor = 0
        self._sync_author_detail()
+
    _end_managingeditor = _end_author

    def _start_contributor(self, attrs_d):
        self.incontributor = 1
        context = self._get_context()
-        context.setdefault('contributors', [])
-        context['contributors'].append(FeedParserDict())
-        self.push('contributor', 0)
+        context.setdefault("contributors", [])
+        context["contributors"].append(FeedParserDict())
+        self.push("contributor", 0)

    def _end_contributor(self):
-        self.pop('contributor')
+        self.pop("contributor")
        self.incontributor = 0

    def _start_name(self, attrs_d):
-        self.push('name', 0)
+        self.push("name", 0)

    def _end_name(self):
-        value = self.pop('name')
+        value = self.pop("name")
        if self.inpublisher:
-            self._save_author('name', value, 'publisher')
+            self._save_author("name", value, "publisher")
        elif self.inauthor:
-            self._save_author('name', value)
+            self._save_author("name", value)
        elif self.incontributor:
-            self._save_contributor('name', value)
+            self._save_contributor("name", value)
        elif self.intextinput:
            context = self._get_context()
-            context['name'] = value
+            context["name"] = value

    def _start_width(self, attrs_d):
-        self.push('width', 0)
+        self.push("width", 0)

    def _end_width(self):
-        value = self.pop('width')
+        value = self.pop("width")
        try:
            value = int(value)
        except ValueError:
            value = 0
        if self.inimage:
            context = self._get_context()
-            context['width'] = value
+            context["width"] = value

    def _start_height(self, attrs_d):
-        self.push('height', 0)
+        self.push("height", 0)

    def _end_height(self):
-        value = self.pop('height')
+        value = self.pop("height")
        try:
            value = int(value)
        except ValueError:
            value = 0
        if self.inimage:
            context = self._get_context()
-            context['height'] = value
+            context["height"] = value

    def _start_url(self, attrs_d):
-        self.push('href', 1)
+        self.push("href", 1)
+
    _start_homepage = _start_url
    _start_uri = _start_url

    def _end_url(self):
-        value = self.pop('href')
+        value = self.pop("href")
        if self.inauthor:
-            self._save_author('href', value)
+            self._save_author("href", value)
        elif self.incontributor:
-            self._save_contributor('href', value)
+            self._save_contributor("href", value)
+
    _end_homepage = _end_url
    _end_uri = _end_url

    def _start_email(self, attrs_d):
-        self.push('email', 0)
+        self.push("email", 0)

    def _end_email(self):
-        value = self.pop('email')
+        value = self.pop("email")
        if self.inpublisher:
-            self._save_author('email', value, 'publisher')
+            self._save_author("email", value, "publisher")
        elif self.inauthor:
-            self._save_author('email', value)
+            self._save_author("email", value)
        elif self.incontributor:
-            self._save_contributor('email', value)
+            self._save_contributor("email", value)

    def _start_subtitle(self, attrs_d):
-        self.push_content('subtitle', attrs_d, 'text/plain', 1)
+        self.push_content("subtitle", attrs_d, "text/plain", 1)
+
    _start_tagline = _start_subtitle

    def _end_subtitle(self):
-        self.pop_content('subtitle')
+        self.pop_content("subtitle")
+
    _end_tagline = _end_subtitle

    def _start_rights(self, attrs_d):
-        self.push_content('rights', attrs_d, 'text/plain', 1)
+        self.push_content("rights", attrs_d, "text/plain", 1)
+
    _start_copyright = _start_rights

    def _end_rights(self):
-        self.pop_content('rights')
+        self.pop_content("rights")
+
    _end_copyright = _end_rights

    def _start_item(self, attrs_d):
        self.entries.append(FeedParserDict())
-        self.push('item', 0)
+        self.push("item", 0)
        self.inentry = 1
        self.guidislink = 0
        self.title_depth = -1
-        id = self._get_attribute(attrs_d, 'rdf:about')
+        id = self._get_attribute(attrs_d, "rdf:about")
        if id:
            context = self._get_context()
-            context['id'] = id
+            context["id"] = id
        self._cdf_common(attrs_d)
+
    _start_entry = _start_item

    def _end_item(self):
-        self.pop('item')
+        self.pop("item")
        self.inentry = 0
        self.hasContent = 0
+
    _end_entry = _end_item

    def _start_language(self, attrs_d):
-        self.push('language', 1)
+        self.push("language", 1)

    def _end_language(self):
-        self.lang = self.pop('language')
+        self.lang = self.pop("language")

    def _start_webmaster(self, attrs_d):
-        self.push('publisher', 1)
+        self.push("publisher", 1)

    def _end_webmaster(self):
-        self.pop('publisher')
-        self._sync_author_detail('publisher')
+        self.pop("publisher")
+        self._sync_author_detail("publisher")

    def _start_published(self, attrs_d):
-        self.push('published', 1)
+        self.push("published", 1)
+
    _start_issued = _start_published
    _start_pubdate = _start_published

    def _end_published(self):
-        value = self.pop('published')
-        self._save('published_parsed', _parse_date(value), overwrite=True)
+        value = self.pop("published")
+        self._save("published_parsed", _parse_date(value), overwrite=True)
+
    _end_issued = _end_published
    _end_pubdate = _end_published

    def _start_updated(self, attrs_d):
-        self.push('updated', 1)
+        self.push("updated", 1)
+
    _start_modified = _start_updated
    _start_lastbuilddate = _start_updated

    def _end_updated(self):
-        value = self.pop('updated')
+        value = self.pop("updated")
        parsed_value = _parse_date(value)
-        self._save('updated_parsed', parsed_value, overwrite=True)
+        self._save("updated_parsed", parsed_value, overwrite=True)
+
    _end_modified = _end_updated
    _end_lastbuilddate = _end_updated

    def _start_created(self, attrs_d):
-        self.push('created', 1)
+        self.push("created", 1)

    def _end_created(self):
-        value = self.pop('created')
-        self._save('created_parsed', _parse_date(value), overwrite=True)
+        value = self.pop("created")
+        self._save("created_parsed", _parse_date(value), overwrite=True)

    def _start_expirationdate(self, attrs_d):
-        self.push('expired', 1)
+        self.push("expired", 1)

    def _end_expirationdate(self):
-        self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True)
+        self._save("expired_parsed", _parse_date(self.pop("expired")), overwrite=True)

    def _start_category(self, attrs_d):
-        term = attrs_d.get('term')
-        scheme = attrs_d.get('scheme', attrs_d.get('domain'))
-        label = attrs_d.get('label')
+        term = attrs_d.get("term")
+        scheme = attrs_d.get("scheme", attrs_d.get("domain"))
+        label = attrs_d.get("label")
        self._add_tag(term, scheme, label)
-        self.push('category', 1)
+        self.push("category", 1)
+
    _start_keywords = _start_category

    def _end_category(self):
-        value = self.pop('category')
+        value = self.pop("category")
        if not value:
            return
        context = self._get_context()
-        tags = context['tags']
-        if value and len(tags) and not tags[-1]['term']:
-            tags[-1]['term'] = value
+        tags = context["tags"]
+        if value and len(tags) and not tags[-1]["term"]:
+            tags[-1]["term"] = value
        else:
            self._add_tag(value, None, None)
+
    _end_keywords = _end_category

    def _start_cloud(self, attrs_d):
-        self._get_context()['cloud'] = FeedParserDict(attrs_d)
+        self._get_context()["cloud"] = FeedParserDict(attrs_d)

    def _start_link(self, attrs_d):
-        attrs_d.setdefault('rel', 'alternate')
-        if attrs_d['rel'] == 'self':
-            attrs_d.setdefault('type', 'application/atom+xml')
+        attrs_d.setdefault("rel", "alternate")
+        if attrs_d["rel"] == "self":
+            attrs_d.setdefault("type", "application/atom+xml")
        else:
-            attrs_d.setdefault('type', 'text/html')
+            attrs_d.setdefault("type", "text/html")
        context = self._get_context()
        attrs_d = self._enforce_href(attrs_d)
-        if 'href' in attrs_d:
-            attrs_d['href'] = self.resolve_uri(attrs_d['href'])
+        if "href" in attrs_d:
+            attrs_d["href"] = self.resolve_uri(attrs_d["href"])
        expecting_text = self.infeed or self.inentry or self.insource
-        context.setdefault('links', [])
+        context.setdefault("links", [])
        if not (self.inentry and self.inimage):
-            context['links'].append(FeedParserDict(attrs_d))
-        if 'href' in attrs_d:
+            context["links"].append(FeedParserDict(attrs_d))
+        if "href" in attrs_d:
            if (
-                    attrs_d.get('rel') == 'alternate'
-                    and self.map_content_type(attrs_d.get('type')) in self.html_types
+                attrs_d.get("rel") == "alternate"
+                and self.map_content_type(attrs_d.get("type")) in self.html_types
            ):
-                context['link'] = attrs_d['href']
+                context["link"] = attrs_d["href"]
        else:
-            self.push('link', expecting_text)
+            self.push("link", expecting_text)

    def _end_link(self):
-        self.pop('link')
+        self.pop("link")

    def _start_guid(self, attrs_d):
-        self.guidislink = (attrs_d.get('ispermalink', 'true') == 'true')
-        self.push('id', 1)
+        self.guidislink = attrs_d.get("ispermalink", "true") == "true"
+        self.push("id", 1)
+
    _start_id = _start_guid

    def _end_guid(self):
-        value = self.pop('id')
-        self._save('guidislink', self.guidislink and 'link' not in self._get_context())
+        value = self.pop("id")
+        self._save("guidislink", self.guidislink and "link" not in self._get_context())
        if self.guidislink:
            # guid acts as link, but only if 'ispermalink' is not present or is 'true',
            # and only if the item doesn't already have a link element
-            self._save('link', value)
+            self._save("link", value)
+
    _end_id = _end_guid

    def _start_title(self, attrs_d):
        if self.svgOK:
-            return self.unknown_starttag('title', list(attrs_d.items()))
-        self.push_content('title', attrs_d, 'text/plain', self.infeed or self.inentry or self.insource)
+            return self.unknown_starttag("title", list(attrs_d.items()))
+        self.push_content(
+            "title", attrs_d, "text/plain", self.infeed or self.inentry or self.insource
+        )

    def _end_title(self):
        if self.svgOK:
            return
-        value = self.pop_content('title')
+        value = self.pop_content("title")
        if not value:
            return
        self.title_depth = self.depth

    def _start_description(self, attrs_d):
        context = self._get_context()
-        if 'summary' in context and not self.hasContent:
-            self._summaryKey = 'content'
+        if "summary" in context and not self.hasContent:
+            self._summaryKey = "content"
            self._start_content(attrs_d)
        else:
-            self.push_content('description', attrs_d, 'text/html', self.infeed or self.inentry or self.insource)
+            self.push_content(
+                "description",
+                attrs_d,
+                "text/html",
+                self.infeed or self.inentry or self.insource,
+            )

    def _start_abstract(self, attrs_d):
-        self.push_content('description', attrs_d, 'text/plain', self.infeed or self.inentry or self.insource)
+        self.push_content(
+            "description",
+            attrs_d,
+            "text/plain",
+            self.infeed or self.inentry or self.insource,
+        )

    def _end_description(self):
-        if self._summaryKey == 'content':
+        if self._summaryKey == "content":
            self._end_content()
        else:
-            self.pop_content('description')
+            self.pop_content("description")
        self._summaryKey = None
+
    _end_abstract = _end_description

    def _start_info(self, attrs_d):
-        self.push_content('info', attrs_d, 'text/plain', 1)
+        self.push_content("info", attrs_d, "text/plain", 1)
+
    _start_feedburner_browserfriendly = _start_info

    def _end_info(self):
-        self.pop_content('info')
+        self.pop_content("info")
+
    _end_feedburner_browserfriendly = _end_info

    def _start_generator(self, attrs_d):
        if attrs_d:
            attrs_d = self._enforce_href(attrs_d)
-            if 'href' in attrs_d:
-                attrs_d['href'] = self.resolve_uri(attrs_d['href'])
-        self._get_context()['generator_detail'] = FeedParserDict(attrs_d)
-        self.push('generator', 1)
+            if "href" in attrs_d:
+                attrs_d["href"] = self.resolve_uri(attrs_d["href"])
+        self._get_context()["generator_detail"] = FeedParserDict(attrs_d)
+        self.push("generator", 1)

    def _end_generator(self):
-        value = self.pop('generator')
+        value = self.pop("generator")
        context = self._get_context()
-        if 'generator_detail' in context:
-            context['generator_detail']['name'] = value
+        if "generator_detail" in context:
+            context["generator_detail"]["name"] = value

    def _start_summary(self, attrs_d):
        context = self._get_context()
-        if 'summary' in context and not self.hasContent:
-            self._summaryKey = 'content'
+        if "summary" in context and not self.hasContent:
+            self._summaryKey = "content"
            self._start_content(attrs_d)
        else:
-            self._summaryKey = 'summary'
-            self.push_content(self._summaryKey, attrs_d, 'text/plain', 1)
+            self._summaryKey = "summary"
+            self.push_content(self._summaryKey, attrs_d, "text/plain", 1)

    def _end_summary(self):
-        if self._summaryKey == 'content':
+        if self._summaryKey == "content":
            self._end_content()
        else:
-            self.pop_content(self._summaryKey or 'summary')
+            self.pop_content(self._summaryKey or "summary")
        self._summaryKey = None

    def _start_enclosure(self, attrs_d):
        attrs_d = self._enforce_href(attrs_d)
        context = self._get_context()
-        attrs_d['rel'] = 'enclosure'
-        context.setdefault('links', []).append(FeedParserDict(attrs_d))
+        attrs_d["rel"] = "enclosure"
+        context.setdefault("links", []).append(FeedParserDict(attrs_d))

    def _start_source(self, attrs_d):
-        if 'url' in attrs_d:
+        if "url" in attrs_d:
            # This means that we're processing a source element from an RSS 2.0 feed
-            self.sourcedata['href'] = attrs_d['url']
-        self.push('source', 1)
+            self.sourcedata["href"] = attrs_d["url"]
+        self.push("source", 1)
        self.insource = 1
        self.title_depth = -1

    def _end_source(self):
        self.insource = 0
-        value = self.pop('source')
+        value = self.pop("source")
        if value:
-            self.sourcedata['title'] = value
-        self._get_context()['source'] = copy.deepcopy(self.sourcedata)
+            self.sourcedata["title"] = value
+        self._get_context()["source"] = copy.deepcopy(self.sourcedata)
        self.sourcedata.clear()

    def _start_content(self, attrs_d):
        self.hasContent = 1
-        self.push_content('content', attrs_d, 'text/plain', 1)
-        src = attrs_d.get('src')
+        self.push_content("content", attrs_d, "text/plain", 1)
+        src = attrs_d.get("src")
        if src:
-            self.contentparams['src'] = src
-        self.push('content', 1)
+            self.contentparams["src"] = src
+        self.push("content", 1)

    def _start_body(self, attrs_d):
-        self.push_content('content', attrs_d, 'application/xhtml+xml', 1)
+        self.push_content("content", attrs_d, "application/xhtml+xml", 1)
+
    _start_xhtml_body = _start_body

    def _start_content_encoded(self, attrs_d):
        self.hasContent = 1
-        self.push_content('content', attrs_d, 'text/html', 1)
+        self.push_content("content", attrs_d, "text/html", 1)
+
    _start_fullitem = _start_content_encoded

    def _end_content(self):
-        copyToSummary = self.map_content_type(self.contentparams.get('type')) in ({'text/plain'} | self.html_types)
-        value = self.pop_content('content')
+        copyToSummary = self.map_content_type(self.contentparams.get("type")) in (
+            {"text/plain"} | self.html_types
+        )
+        value = self.pop_content("content")
        if copyToSummary:
-            self._save('summary', value)
+            self._save("summary", value)

    _end_body = _end_content
    _end_xhtml_body = _end_content
@ -495,12 +533,12 @@ class Namespace(object):
    _end_fullitem = _end_content

    def _start_newlocation(self, attrs_d):
-        self.push('newlocation', 1)
+        self.push("newlocation", 1)

    def _end_newlocation(self):
-        url = self.pop('newlocation')
+        url = self.pop("newlocation")
        context = self._get_context()
        # don't set newlocation if the context isn't right
        if context is not self.feeddata:
            return
-        context['newlocation'] = make_safe_absolute_uri(self.baseuri, url.strip())
+        context["newlocation"] = make_safe_absolute_uri(self.baseuri, url.strip())
--- a/lib/feedparser/namespaces/admin.py
+++ b/lib/feedparser/namespaces/admin.py
@ -1,5 +1,5 @@
 # Support for the administrative elements extension
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -29,25 +29,25 @@
 from ..util import FeedParserDict


-class Namespace(object):
+class Namespace:
    # RDF Site Summary 1.0 Modules: Administrative
    # http://web.resource.org/rss/1.0/modules/admin/

    supported_namespaces = {
-        'http://webns.net/mvcb/': 'admin',
+        "http://webns.net/mvcb/": "admin",
    }

    def _start_admin_generatoragent(self, attrs_d):
-        self.push('generator', 1)
-        value = self._get_attribute(attrs_d, 'rdf:resource')
+        self.push("generator", 1)
+        value = self._get_attribute(attrs_d, "rdf:resource")
        if value:
            self.elementstack[-1][2].append(value)
-        self.pop('generator')
-        self._get_context()['generator_detail'] = FeedParserDict({'href': value})
+        self.pop("generator")
+        self._get_context()["generator_detail"] = FeedParserDict({"href": value})

    def _start_admin_errorreportsto(self, attrs_d):
-        self.push('errorreportsto', 1)
-        value = self._get_attribute(attrs_d, 'rdf:resource')
+        self.push("errorreportsto", 1)
+        value = self._get_attribute(attrs_d, "rdf:resource")
        if value:
            self.elementstack[-1][2].append(value)
-        self.pop('errorreportsto')
+        self.pop("errorreportsto")
--- a/lib/feedparser/namespaces/cc.py
+++ b/lib/feedparser/namespaces/cc.py
@ -1,5 +1,5 @@
 # Support for the Creative Commons licensing extensions
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -29,41 +29,42 @@
 from ..util import FeedParserDict


-class Namespace(object):
+class Namespace:
    supported_namespaces = {
        # RDF-based namespace
-        'http://creativecommons.org/ns#license': 'cc',
-
+        "http://creativecommons.org/ns#license": "cc",
        # Old RDF-based namespace
-        'http://web.resource.org/cc/': 'cc',
-
+        "http://web.resource.org/cc/": "cc",
        # RSS-based namespace
-        'http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html': 'creativecommons',
-
+        "http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html": (
+            "creativecommons"
+        ),
        # Old RSS-based namespace
-        'http://backend.userland.com/creativeCommonsRssModule': 'creativecommons',
+        "http://backend.userland.com/creativeCommonsRssModule": "creativecommons",
    }

    def _start_cc_license(self, attrs_d):
        context = self._get_context()
-        value = self._get_attribute(attrs_d, 'rdf:resource')
+        value = self._get_attribute(attrs_d, "rdf:resource")
        attrs_d = FeedParserDict()
-        attrs_d['rel'] = 'license'
+        attrs_d["rel"] = "license"
        if value:
-            attrs_d['href'] = value
-        context.setdefault('links', []).append(attrs_d)
+            attrs_d["href"] = value
+        context.setdefault("links", []).append(attrs_d)

    def _start_creativecommons_license(self, attrs_d):
-        self.push('license', 1)
+        self.push("license", 1)
+
    _start_creativeCommons_license = _start_creativecommons_license

    def _end_creativecommons_license(self):
-        value = self.pop('license')
+        value = self.pop("license")
        context = self._get_context()
        attrs_d = FeedParserDict()
-        attrs_d['rel'] = 'license'
+        attrs_d["rel"] = "license"
        if value:
-            attrs_d['href'] = value
-        context.setdefault('links', []).append(attrs_d)
-        del context['license']
+            attrs_d["href"] = value
+        context.setdefault("links", []).append(attrs_d)
+        del context["license"]
+
    _end_creativeCommons_license = _end_creativecommons_license
--- a/lib/feedparser/namespaces/dc.py
+++ b/lib/feedparser/namespaces/dc.py
@ -1,5 +1,5 @@
 # Support for the Dublin Core metadata extensions
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -30,10 +30,10 @@ from ..datetimes import _parse_date
 from ..util import FeedParserDict


-class Namespace(object):
+class Namespace:
    supported_namespaces = {
-        'http://purl.org/dc/elements/1.1/': 'dc',
-        'http://purl.org/dc/terms/': 'dcterms',
+        "http://purl.org/dc/elements/1.1/": "dc",
+        "http://purl.org/dc/terms/": "dcterms",
    }

    def _end_dc_author(self):
@ -109,25 +109,29 @@ class Namespace(object):
        self._start_updated(attrs_d)

    def _start_dcterms_valid(self, attrs_d):
-        self.push('validity', 1)
+        self.push("validity", 1)

    def _end_dcterms_valid(self):
-        for validity_detail in self.pop('validity').split(';'):
-            if '=' in validity_detail:
-                key, value = validity_detail.split('=', 1)
-                if key == 'start':
-                    self._save('validity_start', value, overwrite=True)
-                    self._save('validity_start_parsed', _parse_date(value), overwrite=True)
-                elif key == 'end':
-                    self._save('validity_end', value, overwrite=True)
-                    self._save('validity_end_parsed', _parse_date(value), overwrite=True)
+        for validity_detail in self.pop("validity").split(";"):
+            if "=" in validity_detail:
+                key, value = validity_detail.split("=", 1)
+                if key == "start":
+                    self._save("validity_start", value, overwrite=True)
+                    self._save(
+                        "validity_start_parsed", _parse_date(value), overwrite=True
+                    )
+                elif key == "end":
+                    self._save("validity_end", value, overwrite=True)
+                    self._save(
+                        "validity_end_parsed", _parse_date(value), overwrite=True
+                    )

    def _start_dc_contributor(self, attrs_d):
        self.incontributor = 1
        context = self._get_context()
-        context.setdefault('contributors', [])
-        context['contributors'].append(FeedParserDict())
-        self.push('name', 0)
+        context.setdefault("contributors", [])
+        context["contributors"].append(FeedParserDict())
+        self.push("name", 0)

    def _end_dc_contributor(self):
        self._end_name()
--- a/lib/feedparser/namespaces/georss.py
+++ b/lib/feedparser/namespaces/georss.py
@ -1,5 +1,5 @@
 # Support for the GeoRSS format
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -26,27 +26,24 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

-# Required for Python 3.6 compatibility.
-from __future__ import generator_stop
-
 from ..util import FeedParserDict


-class Namespace(object):
+class Namespace:
    supported_namespaces = {
-        'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo',
-        'http://www.georss.org/georss': 'georss',
-        'http://www.opengis.net/gml': 'gml',
+        "http://www.w3.org/2003/01/geo/wgs84_pos#": "geo",
+        "http://www.georss.org/georss": "georss",
+        "http://www.opengis.net/gml": "gml",
    }

    def __init__(self):
        self.ingeometry = 0
-        super(Namespace, self).__init__()
+        super().__init__()

    def _start_georssgeom(self, attrs_d):
-        self.push('geometry', 0)
+        self.push("geometry", 0)
        context = self._get_context()
-        context['where'] = FeedParserDict()
+        context["where"] = FeedParserDict()

    _start_georss_point = _start_georssgeom
    _start_georss_line = _start_georssgeom
@ -55,76 +52,77 @@ class Namespace(object):

    def _save_where(self, geometry):
        context = self._get_context()
-        context['where'].update(geometry)
+        context["where"].update(geometry)

    def _end_georss_point(self):
-        geometry = _parse_georss_point(self.pop('geometry'))
+        geometry = _parse_georss_point(self.pop("geometry"))
        if geometry:
            self._save_where(geometry)

    def _end_georss_line(self):
-        geometry = _parse_georss_line(self.pop('geometry'))
+        geometry = _parse_georss_line(self.pop("geometry"))
        if geometry:
            self._save_where(geometry)

    def _end_georss_polygon(self):
-        this = self.pop('geometry')
+        this = self.pop("geometry")
        geometry = _parse_georss_polygon(this)
        if geometry:
            self._save_where(geometry)

    def _end_georss_box(self):
-        geometry = _parse_georss_box(self.pop('geometry'))
+        geometry = _parse_georss_box(self.pop("geometry"))
        if geometry:
            self._save_where(geometry)

    def _start_where(self, attrs_d):
-        self.push('where', 0)
+        self.push("where", 0)
        context = self._get_context()
-        context['where'] = FeedParserDict()
+        context["where"] = FeedParserDict()
+
    _start_georss_where = _start_where

    def _parse_srs_attrs(self, attrs_d):
-        srs_name = attrs_d.get('srsname')
+        srs_name = attrs_d.get("srsname")
        try:
-            srs_dimension = int(attrs_d.get('srsdimension', '2'))
+            srs_dimension = int(attrs_d.get("srsdimension", "2"))
        except ValueError:
            srs_dimension = 2
        context = self._get_context()
-        if 'where' not in context:
-            context['where'] = {}
-        context['where']['srsName'] = srs_name
-        context['where']['srsDimension'] = srs_dimension
+        if "where" not in context:
+            context["where"] = {}
+        context["where"]["srsName"] = srs_name
+        context["where"]["srsDimension"] = srs_dimension

    def _start_gml_point(self, attrs_d):
        self._parse_srs_attrs(attrs_d)
        self.ingeometry = 1
-        self.push('geometry', 0)
+        self.push("geometry", 0)

    def _start_gml_linestring(self, attrs_d):
        self._parse_srs_attrs(attrs_d)
-        self.ingeometry = 'linestring'
-        self.push('geometry', 0)
+        self.ingeometry = "linestring"
+        self.push("geometry", 0)

    def _start_gml_polygon(self, attrs_d):
        self._parse_srs_attrs(attrs_d)
-        self.push('geometry', 0)
+        self.push("geometry", 0)

    def _start_gml_exterior(self, attrs_d):
-        self.push('geometry', 0)
+        self.push("geometry", 0)

    def _start_gml_linearring(self, attrs_d):
-        self.ingeometry = 'polygon'
-        self.push('geometry', 0)
+        self.ingeometry = "polygon"
+        self.push("geometry", 0)

    def _start_gml_pos(self, attrs_d):
-        self.push('pos', 0)
+        self.push("pos", 0)

    def _end_gml_pos(self):
-        this = self.pop('pos')
+        this = self.pop("pos")
        context = self._get_context()
-        srs_name = context['where'].get('srsName')
-        srs_dimension = context['where'].get('srsDimension', 2)
+        srs_name = context["where"].get("srsName")
+        srs_dimension = context["where"].get("srsDimension", 2)
        swap = True
        if srs_name and "EPSG" in srs_name:
            epsg = int(srs_name.split(":")[-1])
@ -134,25 +132,25 @@ class Namespace(object):
            self._save_where(geometry)

    def _start_gml_poslist(self, attrs_d):
-        self.push('pos', 0)
+        self.push("pos", 0)

    def _end_gml_poslist(self):
-        this = self.pop('pos')
+        this = self.pop("pos")
        context = self._get_context()
-        srs_name = context['where'].get('srsName')
-        srs_dimension = context['where'].get('srsDimension', 2)
+        srs_name = context["where"].get("srsName")
+        srs_dimension = context["where"].get("srsDimension", 2)
        swap = True
        if srs_name and "EPSG" in srs_name:
            epsg = int(srs_name.split(":")[-1])
            swap = bool(epsg in _geogCS)
-        geometry = _parse_poslist(
-            this, self.ingeometry, swap=swap, dims=srs_dimension)
+        geometry = _parse_poslist(this, self.ingeometry, swap=swap, dims=srs_dimension)
        if geometry:
            self._save_where(geometry)

    def _end_geom(self):
        self.ingeometry = 0
-        self.pop('geometry')
+        self.pop("geometry")
+
    _end_gml_point = _end_geom
    _end_gml_linestring = _end_geom
    _end_gml_linearring = _end_geom
@ -160,19 +158,21 @@ class Namespace(object):
    _end_gml_polygon = _end_geom

    def _end_where(self):
-        self.pop('where')
+        self.pop("where")
+
    _end_georss_where = _end_where


 # GeoRSS geometry parsers. Each return a dict with 'type' and 'coordinates'
 # items, or None in the case of a parsing error.

+
 def _parse_poslist(value, geom_type, swap=True, dims=2):
-    if geom_type == 'linestring':
+    if geom_type == "linestring":
        return _parse_georss_line(value, swap, dims)
-    elif geom_type == 'polygon':
+    elif geom_type == "polygon":
        ring = _parse_georss_line(value, swap, dims)
-        return {'type': 'Polygon', 'coordinates': (ring['coordinates'],)}
+        return {"type": "Polygon", "coordinates": (ring["coordinates"],)}
    else:
        return None

@ -180,10 +180,10 @@ def _parse_poslist(value, geom_type, swap=True, dims=2):
 def _gen_georss_coords(value, swap=True, dims=2):
    # A generator of (lon, lat) pairs from a string of encoded GeoRSS
    # coordinates. Converts to floats and swaps order.
-    latlons = (float(ll) for ll in value.replace(',', ' ').split())
+    latlons = (float(ll) for ll in value.replace(",", " ").split())
    while True:
        try:
-            t = [next(latlons), next(latlons)][::swap and -1 or 1]
+            t = [next(latlons), next(latlons)][:: swap and -1 or 1]
            if dims == 3:
                t.append(next(latlons))
            yield tuple(t)
@ -196,7 +196,7 @@ def _parse_georss_point(value, swap=True, dims=2):
    # whitespace. We'll also handle comma separators.
    try:
        coords = list(_gen_georss_coords(value, swap, dims))
-        return {'type': 'Point', 'coordinates': coords[0]}
+        return {"type": "Point", "coordinates": coords[0]}
    except (IndexError, ValueError):
        return None

@ -207,7 +207,7 @@ def _parse_georss_line(value, swap=True, dims=2):
    # whitespace. There must be at least two pairs.
    try:
        coords = list(_gen_georss_coords(value, swap, dims))
-        return {'type': 'LineString', 'coordinates': coords}
+        return {"type": "LineString", "coordinates": coords}
    except (IndexError, ValueError):
        return None

@ -223,7 +223,7 @@ def _parse_georss_polygon(value, swap=True, dims=2):
        return None
    if len(ring) < 4:
        return None
-    return {'type': 'Polygon', 'coordinates': (ring,)}
+    return {"type": "Polygon", "coordinates": (ring,)}


 def _parse_georss_box(value, swap=True, dims=2):
@ -233,7 +233,7 @@ def _parse_georss_box(value, swap=True, dims=2):
    # first pair is the lower corner, the second is the upper corner.
    try:
        coords = list(_gen_georss_coords(value, swap, dims))
-        return {'type': 'Box', 'coordinates': tuple(coords)}
+        return {"type": "Box", "coordinates": tuple(coords)}
    except (IndexError, ValueError):
        return None

@ -241,38 +241,443 @@ def _parse_georss_box(value, swap=True, dims=2):
 # The list of EPSG codes for geographic (latitude/longitude) coordinate
 # systems to support decoding of GeoRSS GML profiles.
 _geogCS = [
-    3819, 3821, 3824, 3889, 3906, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008,
-    4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4018, 4019, 4020, 4021, 4022,
-    4023, 4024, 4025, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036,
-    4041, 4042, 4043, 4044, 4045, 4046, 4047, 4052, 4053, 4054, 4055, 4075, 4081,
-    4120, 4121, 4122, 4123, 4124, 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132,
-    4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, 4141, 4142, 4143, 4144, 4145,
-    4146, 4147, 4148, 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, 4157, 4158,
-    4159, 4160, 4161, 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 4170, 4171,
-    4172, 4173, 4174, 4175, 4176, 4178, 4179, 4180, 4181, 4182, 4183, 4184, 4185,
-    4188, 4189, 4190, 4191, 4192, 4193, 4194, 4195, 4196, 4197, 4198, 4199, 4200,
-    4201, 4202, 4203, 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, 4212, 4213,
-    4214, 4215, 4216, 4218, 4219, 4220, 4221, 4222, 4223, 4224, 4225, 4226, 4227,
-    4228, 4229, 4230, 4231, 4232, 4233, 4234, 4235, 4236, 4237, 4238, 4239, 4240,
-    4241, 4242, 4243, 4244, 4245, 4246, 4247, 4248, 4249, 4250, 4251, 4252, 4253,
-    4254, 4255, 4256, 4257, 4258, 4259, 4260, 4261, 4262, 4263, 4264, 4265, 4266,
-    4267, 4268, 4269, 4270, 4271, 4272, 4273, 4274, 4275, 4276, 4277, 4278, 4279,
-    4280, 4281, 4282, 4283, 4284, 4285, 4286, 4287, 4288, 4289, 4291, 4292, 4293,
-    4294, 4295, 4296, 4297, 4298, 4299, 4300, 4301, 4302, 4303, 4304, 4306, 4307,
-    4308, 4309, 4310, 4311, 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, 4322,
-    4324, 4326, 4463, 4470, 4475, 4483, 4490, 4555, 4558, 4600, 4601, 4602, 4603,
-    4604, 4605, 4606, 4607, 4608, 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616,
-    4617, 4618, 4619, 4620, 4621, 4622, 4623, 4624, 4625, 4626, 4627, 4628, 4629,
-    4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, 4641, 4642,
-    4643, 4644, 4645, 4646, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, 4665,
-    4666, 4667, 4668, 4669, 4670, 4671, 4672, 4673, 4674, 4675, 4676, 4677, 4678,
-    4679, 4680, 4681, 4682, 4683, 4684, 4685, 4686, 4687, 4688, 4689, 4690, 4691,
-    4692, 4693, 4694, 4695, 4696, 4697, 4698, 4699, 4700, 4701, 4702, 4703, 4704,
-    4705, 4706, 4707, 4708, 4709, 4710, 4711, 4712, 4713, 4714, 4715, 4716, 4717,
-    4718, 4719, 4720, 4721, 4722, 4723, 4724, 4725, 4726, 4727, 4728, 4729, 4730,
-    4731, 4732, 4733, 4734, 4735, 4736, 4737, 4738, 4739, 4740, 4741, 4742, 4743,
-    4744, 4745, 4746, 4747, 4748, 4749, 4750, 4751, 4752, 4753, 4754, 4755, 4756,
-    4757, 4758, 4759, 4760, 4761, 4762, 4763, 4764, 4765, 4801, 4802, 4803, 4804,
-    4805, 4806, 4807, 4808, 4809, 4810, 4811, 4813, 4814, 4815, 4816, 4817, 4818,
-    4819, 4820, 4821, 4823, 4824, 4901, 4902, 4903, 4904, 4979,
+    3819,
+    3821,
+    3824,
+    3889,
+    3906,
+    4001,
+    4002,
+    4003,
+    4004,
+    4005,
+    4006,
+    4007,
+    4008,
+    4009,
+    4010,
+    4011,
+    4012,
+    4013,
+    4014,
+    4015,
+    4016,
+    4018,
+    4019,
+    4020,
+    4021,
+    4022,
+    4023,
+    4024,
+    4025,
+    4027,
+    4028,
+    4029,
+    4030,
+    4031,
+    4032,
+    4033,
+    4034,
+    4035,
+    4036,
+    4041,
+    4042,
+    4043,
+    4044,
+    4045,
+    4046,
+    4047,
+    4052,
+    4053,
+    4054,
+    4055,
+    4075,
+    4081,
+    4120,
+    4121,
+    4122,
+    4123,
+    4124,
+    4125,
+    4126,
+    4127,
+    4128,
+    4129,
+    4130,
+    4131,
+    4132,
+    4133,
+    4134,
+    4135,
+    4136,
+    4137,
+    4138,
+    4139,
+    4140,
+    4141,
+    4142,
+    4143,
+    4144,
+    4145,
+    4146,
+    4147,
+    4148,
+    4149,
+    4150,
+    4151,
+    4152,
+    4153,
+    4154,
+    4155,
+    4156,
+    4157,
+    4158,
+    4159,
+    4160,
+    4161,
+    4162,
+    4163,
+    4164,
+    4165,
+    4166,
+    4167,
+    4168,
+    4169,
+    4170,
+    4171,
+    4172,
+    4173,
+    4174,
+    4175,
+    4176,
+    4178,
+    4179,
+    4180,
+    4181,
+    4182,
+    4183,
+    4184,
+    4185,
+    4188,
+    4189,
+    4190,
+    4191,
+    4192,
+    4193,
+    4194,
+    4195,
+    4196,
+    4197,
+    4198,
+    4199,
+    4200,
+    4201,
+    4202,
+    4203,
+    4204,
+    4205,
+    4206,
+    4207,
+    4208,
+    4209,
+    4210,
+    4211,
+    4212,
+    4213,
+    4214,
+    4215,
+    4216,
+    4218,
+    4219,
+    4220,
+    4221,
+    4222,
+    4223,
+    4224,
+    4225,
+    4226,
+    4227,
+    4228,
+    4229,
+    4230,
+    4231,
+    4232,
+    4233,
+    4234,
+    4235,
+    4236,
+    4237,
+    4238,
+    4239,
+    4240,
+    4241,
+    4242,
+    4243,
+    4244,
+    4245,
+    4246,
+    4247,
+    4248,
+    4249,
+    4250,
+    4251,
+    4252,
+    4253,
+    4254,
+    4255,
+    4256,
+    4257,
+    4258,
+    4259,
+    4260,
+    4261,
+    4262,
+    4263,
+    4264,
+    4265,
+    4266,
+    4267,
+    4268,
+    4269,
+    4270,
+    4271,
+    4272,
+    4273,
+    4274,
+    4275,
+    4276,
+    4277,
+    4278,
+    4279,
+    4280,
+    4281,
+    4282,
+    4283,
+    4284,
+    4285,
+    4286,
+    4287,
+    4288,
+    4289,
+    4291,
+    4292,
+    4293,
+    4294,
+    4295,
+    4296,
+    4297,
+    4298,
+    4299,
+    4300,
+    4301,
+    4302,
+    4303,
+    4304,
+    4306,
+    4307,
+    4308,
+    4309,
+    4310,
+    4311,
+    4312,
+    4313,
+    4314,
+    4315,
+    4316,
+    4317,
+    4318,
+    4319,
+    4322,
+    4324,
+    4326,
+    4463,
+    4470,
+    4475,
+    4483,
+    4490,
+    4555,
+    4558,
+    4600,
+    4601,
+    4602,
+    4603,
+    4604,
+    4605,
+    4606,
+    4607,
+    4608,
+    4609,
+    4610,
+    4611,
+    4612,
+    4613,
+    4614,
+    4615,
+    4616,
+    4617,
+    4618,
+    4619,
+    4620,
+    4621,
+    4622,
+    4623,
+    4624,
+    4625,
+    4626,
+    4627,
+    4628,
+    4629,
+    4630,
+    4631,
+    4632,
+    4633,
+    4634,
+    4635,
+    4636,
+    4637,
+    4638,
+    4639,
+    4640,
+    4641,
+    4642,
+    4643,
+    4644,
+    4645,
+    4646,
+    4657,
+    4658,
+    4659,
+    4660,
+    4661,
+    4662,
+    4663,
+    4664,
+    4665,
+    4666,
+    4667,
+    4668,
+    4669,
+    4670,
+    4671,
+    4672,
+    4673,
+    4674,
+    4675,
+    4676,
+    4677,
+    4678,
+    4679,
+    4680,
+    4681,
+    4682,
+    4683,
+    4684,
+    4685,
+    4686,
+    4687,
+    4688,
+    4689,
+    4690,
+    4691,
+    4692,
+    4693,
+    4694,
+    4695,
+    4696,
+    4697,
+    4698,
+    4699,
+    4700,
+    4701,
+    4702,
+    4703,
+    4704,
+    4705,
+    4706,
+    4707,
+    4708,
+    4709,
+    4710,
+    4711,
+    4712,
+    4713,
+    4714,
+    4715,
+    4716,
+    4717,
+    4718,
+    4719,
+    4720,
+    4721,
+    4722,
+    4723,
+    4724,
+    4725,
+    4726,
+    4727,
+    4728,
+    4729,
+    4730,
+    4731,
+    4732,
+    4733,
+    4734,
+    4735,
+    4736,
+    4737,
+    4738,
+    4739,
+    4740,
+    4741,
+    4742,
+    4743,
+    4744,
+    4745,
+    4746,
+    4747,
+    4748,
+    4749,
+    4750,
+    4751,
+    4752,
+    4753,
+    4754,
+    4755,
+    4756,
+    4757,
+    4758,
+    4759,
+    4760,
+    4761,
+    4762,
+    4763,
+    4764,
+    4765,
+    4801,
+    4802,
+    4803,
+    4804,
+    4805,
+    4806,
+    4807,
+    4808,
+    4809,
+    4810,
+    4811,
+    4813,
+    4814,
+    4815,
+    4816,
+    4817,
+    4818,
+    4819,
+    4820,
+    4821,
+    4823,
+    4824,
+    4901,
+    4902,
+    4903,
+    4904,
+    4979,
 ]
--- a/lib/feedparser/namespaces/itunes.py
+++ b/lib/feedparser/namespaces/itunes.py
@ -1,5 +1,5 @@
 # Support for the iTunes format
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -29,13 +29,12 @@
 from ..util import FeedParserDict


-class Namespace(object):
+class Namespace:
    supported_namespaces = {
        # Canonical namespace
-        'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes',
-
+        "http://www.itunes.com/DTDs/PodCast-1.0.dtd": "itunes",
        # Extra namespace
-        'http://example.com/DTDs/PodCast-1.0.dtd': 'itunes',
+        "http://example.com/DTDs/PodCast-1.0.dtd": "itunes",
    }

    def _start_itunes_author(self, attrs_d):
@ -73,37 +72,42 @@ class Namespace(object):

    def _start_itunes_owner(self, attrs_d):
        self.inpublisher = 1
-        self.push('publisher', 0)
+        self.push("publisher", 0)

    def _end_itunes_owner(self):
-        self.pop('publisher')
+        self.pop("publisher")
        self.inpublisher = 0
-        self._sync_author_detail('publisher')
+        self._sync_author_detail("publisher")

    def _end_itunes_keywords(self):
-        for term in self.pop('itunes_keywords').split(','):
+        for term in self.pop("itunes_keywords").split(","):
            if term.strip():
-                self._add_tag(term.strip(), 'http://www.itunes.com/', None)
+                self._add_tag(term.strip(), "http://www.itunes.com/", None)

    def _start_itunes_category(self, attrs_d):
-        self._add_tag(attrs_d.get('text'), 'http://www.itunes.com/', None)
-        self.push('category', 1)
+        self._add_tag(attrs_d.get("text"), "http://www.itunes.com/", None)
+        self.push("category", 1)

    def _start_itunes_image(self, attrs_d):
-        self.push('itunes_image', 0)
-        if attrs_d.get('href'):
-            self._get_context()['image'] = FeedParserDict({'href': attrs_d.get('href')})
-        elif attrs_d.get('url'):
-            self._get_context()['image'] = FeedParserDict({'href': attrs_d.get('url')})
+        self.push("itunes_image", 0)
+        if attrs_d.get("href"):
+            self._get_context()["image"] = FeedParserDict({"href": attrs_d.get("href")})
+        elif attrs_d.get("url"):
+            self._get_context()["image"] = FeedParserDict({"href": attrs_d.get("url")})
+
    _start_itunes_link = _start_itunes_image

    def _end_itunes_block(self):
-        value = self.pop('itunes_block', 0)
-        self._get_context()['itunes_block'] = (value == 'yes' or value == 'Yes') and 1 or 0
+        value = self.pop("itunes_block", 0)
+        self._get_context()["itunes_block"] = (
+            (value == "yes" or value == "Yes") and 1 or 0
+        )

    def _end_itunes_explicit(self):
-        value = self.pop('itunes_explicit', 0)
+        value = self.pop("itunes_explicit", 0)
        # Convert 'yes' -> True, 'clean' to False, and any other value to None
        # False and None both evaluate as False, so the difference can be ignored
        # by applications that only need to know if the content is explicit.
-        self._get_context()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0]
+        self._get_context()["itunes_explicit"] = (None, False, True)[
+            (value == "yes" and 2) or value == "clean" or 0
+        ]
--- a/lib/feedparser/namespaces/mediarss.py
+++ b/lib/feedparser/namespaces/mediarss.py
@ -1,5 +1,5 @@
 # Support for the Media RSS format
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -29,24 +29,23 @@
 from ..util import FeedParserDict


-class Namespace(object):
+class Namespace:
    supported_namespaces = {
        # Canonical namespace
-        'http://search.yahoo.com/mrss/': 'media',
-
+        "http://search.yahoo.com/mrss/": "media",
        # Old namespace (no trailing slash)
-        'http://search.yahoo.com/mrss': 'media',
+        "http://search.yahoo.com/mrss": "media",
    }

    def _start_media_category(self, attrs_d):
-        attrs_d.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema')
+        attrs_d.setdefault("scheme", "http://search.yahoo.com/mrss/category_schema")
        self._start_category(attrs_d)

    def _end_media_category(self):
        self._end_category()

    def _end_media_keywords(self):
-        for term in self.pop('media_keywords').split(','):
+        for term in self.pop("media_keywords").split(","):
            if term.strip():
                self._add_tag(term.strip(), None, None)

@ -64,26 +63,26 @@ class Namespace(object):

    def _start_media_rating(self, attrs_d):
        context = self._get_context()
-        context.setdefault('media_rating', attrs_d)
-        self.push('rating', 1)
+        context.setdefault("media_rating", attrs_d)
+        self.push("rating", 1)

    def _end_media_rating(self):
-        rating = self.pop('rating')
+        rating = self.pop("rating")
        if rating is not None and rating.strip():
            context = self._get_context()
-            context['media_rating']['content'] = rating
+            context["media_rating"]["content"] = rating

    def _start_media_credit(self, attrs_d):
        context = self._get_context()
-        context.setdefault('media_credit', [])
-        context['media_credit'].append(attrs_d)
-        self.push('credit', 1)
+        context.setdefault("media_credit", [])
+        context["media_credit"].append(attrs_d)
+        self.push("credit", 1)

    def _end_media_credit(self):
-        credit = self.pop('credit')
+        credit = self.pop("credit")
        if credit is not None and credit.strip():
            context = self._get_context()
-            context['media_credit'][-1]['content'] = credit
+            context["media_credit"][-1]["content"] = credit

    def _start_media_description(self, attrs_d):
        self._start_description(attrs_d)
@ -93,49 +92,51 @@ class Namespace(object):

    def _start_media_restriction(self, attrs_d):
        context = self._get_context()
-        context.setdefault('media_restriction', attrs_d)
-        self.push('restriction', 1)
+        context.setdefault("media_restriction", attrs_d)
+        self.push("restriction", 1)

    def _end_media_restriction(self):
-        restriction = self.pop('restriction')
+        restriction = self.pop("restriction")
        if restriction is not None and restriction.strip():
            context = self._get_context()
-            context['media_restriction']['content'] = [cc.strip().lower() for cc in restriction.split(' ')]
+            context["media_restriction"]["content"] = [
+                cc.strip().lower() for cc in restriction.split(" ")
+            ]

    def _start_media_license(self, attrs_d):
        context = self._get_context()
-        context.setdefault('media_license', attrs_d)
-        self.push('license', 1)
+        context.setdefault("media_license", attrs_d)
+        self.push("license", 1)

    def _end_media_license(self):
-        license_ = self.pop('license')
+        license_ = self.pop("license")
        if license_ is not None and license_.strip():
            context = self._get_context()
-            context['media_license']['content'] = license_
+            context["media_license"]["content"] = license_

    def _start_media_content(self, attrs_d):
        context = self._get_context()
-        context.setdefault('media_content', [])
-        context['media_content'].append(attrs_d)
+        context.setdefault("media_content", [])
+        context["media_content"].append(attrs_d)

    def _start_media_thumbnail(self, attrs_d):
        context = self._get_context()
-        context.setdefault('media_thumbnail', [])
-        self.push('url', 1) # new
-        context['media_thumbnail'].append(attrs_d)
+        context.setdefault("media_thumbnail", [])
+        self.push("url", 1)  # new
+        context["media_thumbnail"].append(attrs_d)

    def _end_media_thumbnail(self):
-        url = self.pop('url')
+        url = self.pop("url")
        context = self._get_context()
        if url is not None and url.strip():
-            if 'url' not in context['media_thumbnail'][-1]:
-                context['media_thumbnail'][-1]['url'] = url
+            if "url" not in context["media_thumbnail"][-1]:
+                context["media_thumbnail"][-1]["url"] = url

    def _start_media_player(self, attrs_d):
-        self.push('media_player', 0)
-        self._get_context()['media_player'] = FeedParserDict(attrs_d)
+        self.push("media_player", 0)
+        self._get_context()["media_player"] = FeedParserDict(attrs_d)

    def _end_media_player(self):
-        value = self.pop('media_player')
+        value = self.pop("media_player")
        context = self._get_context()
-        context['media_player']['content'] = value
+        context["media_player"]["content"] = value
--- a/lib/feedparser/namespaces/psc.py
+++ b/lib/feedparser/namespaces/psc.py
@ -1,5 +1,5 @@
 # Support for the Podlove Simple Chapters format
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -32,36 +32,36 @@ import re
 from .. import util


-class Namespace(object):
+class Namespace:
    supported_namespaces = {
-        'http://podlove.org/simple-chapters': 'psc',
+        "http://podlove.org/simple-chapters": "psc",
    }

    def __init__(self):
        # chapters will only be captured while psc_chapters_flag is True.
        self.psc_chapters_flag = False
-        super(Namespace, self).__init__()
+        super().__init__()

    def _start_psc_chapters(self, attrs_d):
        context = self._get_context()
-        if 'psc_chapters' not in context:
+        if "psc_chapters" not in context:
            self.psc_chapters_flag = True
-            attrs_d['chapters'] = []
-            context['psc_chapters'] = util.FeedParserDict(attrs_d)
+            attrs_d["chapters"] = []
+            context["psc_chapters"] = util.FeedParserDict(attrs_d)

    def _end_psc_chapters(self):
        self.psc_chapters_flag = False

    def _start_psc_chapter(self, attrs_d):
        if self.psc_chapters_flag:
-            start = self._get_attribute(attrs_d, 'start')
-            attrs_d['start_parsed'] = _parse_psc_chapter_start(start)
+            start = self._get_attribute(attrs_d, "start")
+            attrs_d["start_parsed"] = _parse_psc_chapter_start(start)

-            context = self._get_context()['psc_chapters']
-            context['chapters'].append(util.FeedParserDict(attrs_d))
+            context = self._get_context()["psc_chapters"]
+            context["chapters"].append(util.FeedParserDict(attrs_d))


-format_ = re.compile(r'^((\d{2}):)?(\d{2}):(\d{2})(\.(\d{3}))?$')
+format_ = re.compile(r"^((\d{2}):)?(\d{2}):(\d{2})(\.(\d{3}))?$")


 def _parse_psc_chapter_start(start):
@ -71,4 +71,4 @@ def _parse_psc_chapter_start(start):

    _, h, m, s, _, ms = m.groups()
    h, m, s, ms = (int(h or 0), int(m), int(s), int(ms or 0))
-    return datetime.timedelta(0, h*60*60 + m*60 + s, ms*1000)
+    return datetime.timedelta(0, h * 60 * 60 + m * 60 + s, ms * 1000)
--- a/lib/feedparser/parsers/json.py
+++ b/lib/feedparser/parsers/json.py
@ -34,37 +34,37 @@ from ..util import FeedParserDict

 class JSONParser:
    VERSIONS = {
-        'https://jsonfeed.org/version/1': 'json1',
-        'https://jsonfeed.org/version/1.1': 'json11',
+        "https://jsonfeed.org/version/1": "json1",
+        "https://jsonfeed.org/version/1.1": "json11",
    }
    FEED_FIELDS = (
-        ('title', 'title'),
-        ('icon', 'image'),
-        ('home_page_url', 'link'),
-        ('description', 'description'),
+        ("title", "title"),
+        ("icon", "image"),
+        ("home_page_url", "link"),
+        ("description", "description"),
    )
    ITEM_FIELDS = (
-        ('title', 'title'),
-        ('id', 'guid'),
-        ('url', 'link'),
-        ('summary', 'summary'),
-        ('external_url', 'source'),
+        ("title", "title"),
+        ("id", "guid"),
+        ("url", "link"),
+        ("summary", "summary"),
+        ("external_url", "source"),
    )

    def __init__(self, baseuri=None, baselang=None, encoding=None):
-        self.baseuri = baseuri or ''
+        self.baseuri = baseuri or ""
        self.lang = baselang or None
-        self.encoding = encoding or 'utf-8'  # character encoding
+        self.encoding = encoding or "utf-8"  # character encoding

        self.version = None
        self.feeddata = FeedParserDict()
        self.namespacesInUse = []
        self.entries = []

-    def feed(self, data):
-        data = json.loads(data)
+    def feed(self, file):
+        data = json.load(file)

-        v = data.get('version', '')
+        v = data.get("version", "")
        try:
            self.version = self.VERSIONS[v]
        except KeyError:
@ -73,11 +73,11 @@ class JSONParser:
        for src, dst in self.FEED_FIELDS:
            if src in data:
                self.feeddata[dst] = data[src]
-        if 'author' in data:
-            self.parse_author(data['author'], self.feeddata)
+        if "author" in data:
+            self.parse_author(data["author"], self.feeddata)
        # TODO: hubs; expired has no RSS equivalent

-        self.entries = [self.parse_entry(e) for e in data['items']]
+        self.entries = [self.parse_entry(e) for e in data["items"]]

    def parse_entry(self, e):
        entry = FeedParserDict()
@ -85,49 +85,51 @@ class JSONParser:
            if src in e:
                entry[dst] = e[src]

-        if 'content_text' in e:
-            entry['content'] = c = FeedParserDict()
-            c['value'] = e['content_text']
-            c['type'] = 'text'
-        elif 'content_html' in e:
-            entry['content'] = c = FeedParserDict()
-            c['value'] = sanitize_html(e['content_html'], self.encoding, 'application/json')
-            c['type'] = 'html'
+        if "content_text" in e:
+            entry["content"] = c = FeedParserDict()
+            c["value"] = e["content_text"]
+            c["type"] = "text"
+        elif "content_html" in e:
+            entry["content"] = c = FeedParserDict()
+            c["value"] = sanitize_html(
+                e["content_html"], self.encoding, "application/json"
+            )
+            c["type"] = "html"

-        if 'date_published' in e:
-            entry['published'] = e['date_published']
-            entry['published_parsed'] = _parse_date(e['date_published'])
-        if 'date_updated' in e:
-            entry['updated'] = e['date_modified']
-            entry['updated_parsed'] = _parse_date(e['date_modified'])
+        if "date_published" in e:
+            entry["published"] = e["date_published"]
+            entry["published_parsed"] = _parse_date(e["date_published"])
+        if "date_updated" in e:
+            entry["updated"] = e["date_modified"]
+            entry["updated_parsed"] = _parse_date(e["date_modified"])

-        if 'tags' in e:
-            entry['category'] = e['tags']
+        if "tags" in e:
+            entry["category"] = e["tags"]

-        if 'author' in e:
-            self.parse_author(e['author'], entry)
+        if "author" in e:
+            self.parse_author(e["author"], entry)

-        if 'attachments' in e:
-            entry['enclosures'] = [self.parse_attachment(a) for a in e['attachments']]
+        if "attachments" in e:
+            entry["enclosures"] = [self.parse_attachment(a) for a in e["attachments"]]

        return entry

    @staticmethod
    def parse_author(parent, dest):
-        dest['author_detail'] = detail = FeedParserDict()
-        if 'name' in parent:
-            dest['author'] = detail['name'] = parent['name']
-        if 'url' in parent:
-            if parent['url'].startswith('mailto:'):
-                detail['email'] = parent['url'][7:]
+        dest["author_detail"] = detail = FeedParserDict()
+        if "name" in parent:
+            dest["author"] = detail["name"] = parent["name"]
+        if "url" in parent:
+            if parent["url"].startswith("mailto:"):
+                detail["email"] = parent["url"][7:]
            else:
-                detail['href'] = parent['url']
+                detail["href"] = parent["url"]

    @staticmethod
    def parse_attachment(attachment):
        enc = FeedParserDict()
-        enc['href'] = attachment['url']
-        enc['type'] = attachment['mime_type']
-        if 'size_in_bytes' in attachment:
-            enc['length'] = attachment['size_in_bytes']
+        enc["href"] = attachment["url"]
+        enc["type"] = attachment["mime_type"]
+        if "size_in_bytes" in attachment:
+            enc["length"] = attachment["size_in_bytes"]
        return enc
--- a/lib/feedparser/parsers/loose.py
+++ b/lib/feedparser/parsers/loose.py
@ -1,5 +1,5 @@
 # The loose feed parser that interfaces with an SGML parsing library
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -26,52 +26,50 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

+
 class LooseXMLParser:
    contentparams = None

    def __init__(self, baseuri=None, baselang=None, encoding=None, entities=None):
-        self.baseuri = baseuri or ''
+        self.baseuri = baseuri or ""
        self.lang = baselang or None
-        self.encoding = encoding or 'utf-8'  # character encoding
+        self.encoding = encoding or "utf-8"  # character encoding
        self.entities = entities or {}
        super().__init__()

    @staticmethod
    def _normalize_attributes(kv):
        k = kv[0].lower()
-        v = k in ('rel', 'type') and kv[1].lower() or kv[1]
+        v = k in ("rel", "type") and kv[1].lower() or kv[1]
        # the sgml parser doesn't handle entities in attributes, nor
        # does it pass the attribute values through as unicode, while
        # strict xml parsers do -- account for this difference
-        v = v.replace('&amp;', '&')
+        v = v.replace("&amp;", "&")
        return k, v

    def decode_entities(self, element, data):
-        data = data.replace('&#60;', '&lt;')
-        data = data.replace('&#x3c;', '&lt;')
-        data = data.replace('&#x3C;', '&lt;')
-        data = data.replace('&#62;', '&gt;')
-        data = data.replace('&#x3e;', '&gt;')
-        data = data.replace('&#x3E;', '&gt;')
-        data = data.replace('&#38;', '&amp;')
-        data = data.replace('&#x26;', '&amp;')
-        data = data.replace('&#34;', '&quot;')
-        data = data.replace('&#x22;', '&quot;')
-        data = data.replace('&#39;', '&apos;')
-        data = data.replace('&#x27;', '&apos;')
-        if not self.contentparams.get('type', 'xml').endswith('xml'):
-            data = data.replace('&lt;', '<')
-            data = data.replace('&gt;', '>')
-            data = data.replace('&amp;', '&')
-            data = data.replace('&quot;', '"')
-            data = data.replace('&apos;', "'")
-            data = data.replace('&#x2f;', '/')
-            data = data.replace('&#x2F;', '/')
+        data = data.replace("&#60;", "&lt;")
+        data = data.replace("&#x3c;", "&lt;")
+        data = data.replace("&#x3C;", "&lt;")
+        data = data.replace("&#62;", "&gt;")
+        data = data.replace("&#x3e;", "&gt;")
+        data = data.replace("&#x3E;", "&gt;")
+        data = data.replace("&#38;", "&amp;")
+        data = data.replace("&#x26;", "&amp;")
+        data = data.replace("&#34;", "&quot;")
+        data = data.replace("&#x22;", "&quot;")
+        data = data.replace("&#39;", "&apos;")
+        data = data.replace("&#x27;", "&apos;")
+        if not self.contentparams.get("type", "xml").endswith("xml"):
+            data = data.replace("&lt;", "<")
+            data = data.replace("&gt;", ">")
+            data = data.replace("&amp;", "&")
+            data = data.replace("&quot;", '"')
+            data = data.replace("&apos;", "'")
+            data = data.replace("&#x2f;", "/")
+            data = data.replace("&#x2F;", "/")
        return data

    @staticmethod
    def strattrs(attrs):
-        return ''.join(
-            ' %s="%s"' % (n, v.replace('"', '&quot;'))
-            for n, v in attrs
-        )
+        return "".join(' {}="{}"'.format(n, v.replace('"', "&quot;")) for n, v in attrs)
--- a/lib/feedparser/parsers/strict.py
+++ b/lib/feedparser/parsers/strict.py
@ -1,5 +1,5 @@
 # The strict feed parser that interfaces with an XML parsing library
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -34,15 +34,15 @@ class StrictXMLParser:
        self.bozo = 0
        self.exc = None
        self.decls = {}
-        self.baseuri = baseuri or ''
+        self.baseuri = baseuri or ""
        self.lang = baselang
        self.encoding = encoding
-        super(StrictXMLParser, self).__init__()
+        super().__init__()

    @staticmethod
    def _normalize_attributes(kv):
        k = kv[0].lower()
-        v = k in ('rel', 'type') and kv[1].lower() or kv[1]
+        v = k in ("rel", "type") and kv[1].lower() or kv[1]
        return k, v

    def startPrefixMapping(self, prefix, uri):
@ -51,23 +51,29 @@ class StrictXMLParser:
        # Jython uses '' instead of None; standardize on None
        prefix = prefix or None
        self.track_namespace(prefix, uri)
-        if prefix and uri == 'http://www.w3.org/1999/xlink':
-            self.decls['xmlns:' + prefix] = uri
+        if prefix and uri == "http://www.w3.org/1999/xlink":
+            self.decls["xmlns:" + prefix] = uri

    def startElementNS(self, name, qname, attrs):
        namespace, localname = name
-        lowernamespace = str(namespace or '').lower()
-        if lowernamespace.find('backend.userland.com/rss') != -1:
+        lowernamespace = str(namespace or "").lower()
+        if lowernamespace.find("backend.userland.com/rss") != -1:
            # match any backend.userland.com namespace
-            namespace = 'http://backend.userland.com/rss'
+            namespace = "http://backend.userland.com/rss"
            lowernamespace = namespace
-        if qname and qname.find(':') > 0:
-            givenprefix = qname.split(':')[0]
+        if qname and qname.find(":") > 0:
+            givenprefix = qname.split(":")[0]
        else:
            givenprefix = None
        prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
-        if givenprefix and (prefix is None or (prefix == '' and lowernamespace == '')) and givenprefix not in self.namespaces_in_use:
-            raise UndeclaredNamespace("'%s' is not associated with a namespace" % givenprefix)
+        if (
+            givenprefix
+            and (prefix is None or (prefix == "" and lowernamespace == ""))
+            and givenprefix not in self.namespaces_in_use
+        ):
+            raise UndeclaredNamespace(
+                "'%s' is not associated with a namespace" % givenprefix
+            )
        localname = str(localname).lower()

        # qname implementation is horribly broken in Python 2.1 (it
@ -78,24 +84,24 @@ class StrictXMLParser:
        # at all).  Thanks to MatejC for helping me test this and
        # tirelessly telling me that it didn't work yet.
        attrsD, self.decls = self.decls, {}
-        if localname == 'math' and namespace == 'http://www.w3.org/1998/Math/MathML':
-            attrsD['xmlns'] = namespace
-        if localname == 'svg' and namespace == 'http://www.w3.org/2000/svg':
-            attrsD['xmlns'] = namespace
+        if localname == "math" and namespace == "http://www.w3.org/1998/Math/MathML":
+            attrsD["xmlns"] = namespace
+        if localname == "svg" and namespace == "http://www.w3.org/2000/svg":
+            attrsD["xmlns"] = namespace

        if prefix:
-            localname = prefix.lower() + ':' + localname
+            localname = prefix.lower() + ":" + localname
        elif namespace and not qname:  # Expat
            for name, value in self.namespaces_in_use.items():
                if name and value == namespace:
-                    localname = name + ':' + localname
+                    localname = name + ":" + localname
                    break

        for (namespace, attrlocalname), attrvalue in attrs.items():
-            lowernamespace = (namespace or '').lower()
-            prefix = self._matchnamespaces.get(lowernamespace, '')
+            lowernamespace = (namespace or "").lower()
+            prefix = self._matchnamespaces.get(lowernamespace, "")
            if prefix:
-                attrlocalname = prefix + ':' + attrlocalname
+                attrlocalname = prefix + ":" + attrlocalname
            attrsD[str(attrlocalname).lower()] = attrvalue
        for qname in attrs.getQNames():
            attrsD[str(qname).lower()] = attrs.getValueByQName(qname)
@ -107,18 +113,18 @@ class StrictXMLParser:

    def endElementNS(self, name, qname):
        namespace, localname = name
-        lowernamespace = str(namespace or '').lower()
-        if qname and qname.find(':') > 0:
-            givenprefix = qname.split(':')[0]
+        lowernamespace = str(namespace or "").lower()
+        if qname and qname.find(":") > 0:
+            givenprefix = qname.split(":")[0]
        else:
-            givenprefix = ''
+            givenprefix = ""
        prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
        if prefix:
-            localname = prefix + ':' + localname
+            localname = prefix + ":" + localname
        elif namespace and not qname:  # Expat
            for name, value in self.namespaces_in_use.items():
                if name and value == namespace:
-                    localname = name + ':' + localname
+                    localname = name + ":" + localname
                    break
        localname = str(localname).lower()
        self.unknown_endtag(localname)
--- a/lib/feedparser/py.typed
+++ b/lib/feedparser/py.typed
--- a/lib/feedparser/sanitizer.py
+++ b/lib/feedparser/sanitizer.py
--- a/lib/feedparser/sgml.py
+++ b/lib/feedparser/sgml.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -27,20 +27,20 @@

 import re

-import sgmllib # type: ignore[import]
+import sgmllib3k as sgmllib

 __all__ = [
-    'sgmllib',
-    'charref',
-    'tagfind',
-    'attrfind',
-    'entityref',
-    'incomplete',
-    'interesting',
-    'shorttag',
-    'shorttagopen',
-    'starttagopen',
-    'endbracket',
+    "sgmllib",
+    "charref",
+    "tagfind",
+    "attrfind",
+    "entityref",
+    "incomplete",
+    "interesting",
+    "shorttag",
+    "shorttagopen",
+    "starttagopen",
+    "endbracket",
 ]

 # sgmllib defines a number of module-level regular expressions that are
@ -49,20 +49,20 @@ __all__ = [
 # names, and the compiled code objects of several sgmllib.SGMLParser
 # methods are copied into _BaseHTMLProcessor so that they execute in
 # feedparser's scope instead of sgmllib's scope.
-charref = re.compile(r'&#(\d+|[xX][0-9a-fA-F]+);')
-tagfind = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*')
+charref = re.compile(r"&#(\d+|[xX][0-9a-fA-F]+);")
+tagfind = re.compile(r"[a-zA-Z][-_.:a-zA-Z0-9]*")
 attrfind = re.compile(
    r"""\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)[$]?(\s*=\s*"""
    r"""('[^']*'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$()_#=~'"@]*))?"""
 )

 # Unfortunately, these must be copied over to prevent NameError exceptions
-entityref = sgmllib.entityref
-incomplete = sgmllib.incomplete
-interesting = sgmllib.interesting
-shorttag = sgmllib.shorttag
-shorttagopen = sgmllib.shorttagopen
-starttagopen = sgmllib.starttagopen
+entityref = sgmllib.SGMLParser.entityref
+incomplete = sgmllib.SGMLParser.incomplete
+interesting = sgmllib.SGMLParser.interesting
+shorttag = sgmllib.SGMLParser.shorttag
+shorttagopen = sgmllib.SGMLParser.shorttagopen
+starttagopen = sgmllib.SGMLParser.starttagopen


 class _EndBracketRegEx:
@ -70,12 +70,12 @@ class _EndBracketRegEx:
        # Overriding the built-in sgmllib.endbracket regex allows the
        # parser to find angle brackets embedded in element attributes.
        self.endbracket = re.compile(
-            r'('
+            r"("
            r"""[^'"<>]"""
            r"""|"[^"]*"(?=>|/|\s|\w+=)"""
            r"""|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])"""
            r"""|.*?(?=[<>]"""
-            r')'
+            r")"
        )

    def search(self, target, index=0):
--- a/lib/feedparser/urls.py
+++ b/lib/feedparser/urls.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -37,103 +37,116 @@ from .html import BaseHTMLProcessor
 #   https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
 # Many more will likely need to be added!
 ACCEPTABLE_URI_SCHEMES = (
-    'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'magnet',
-    'mailto', 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu',
-    'sftp', 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet',
-    'wais',
+    "file",
+    "ftp",
+    "gopher",
+    "h323",
+    "hdl",
+    "http",
+    "https",
+    "imap",
+    "magnet",
+    "mailto",
+    "mms",
+    "news",
+    "nntp",
+    "prospero",
+    "rsync",
+    "rtsp",
+    "rtspu",
+    "sftp",
+    "shttp",
+    "sip",
+    "sips",
+    "snews",
+    "svn",
+    "svn+ssh",
+    "telnet",
+    "wais",
    # Additional common-but-unofficial schemes
-    'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs',
-    'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg',
+    "aim",
+    "callto",
+    "cvs",
+    "facetime",
+    "feed",
+    "git",
+    "gtalk",
+    "irc",
+    "ircs",
+    "irc6",
+    "itms",
+    "mms",
+    "msnim",
+    "skype",
+    "ssh",
+    "smb",
+    "svn",
+    "ymsg",
 )

-_urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)')
+_urifixer = re.compile("^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)")


 def _urljoin(base, uri):
-    uri = _urifixer.sub(r'\1\3', uri)
+    uri = _urifixer.sub(r"\1\3", uri)
    try:
        uri = urllib.parse.urljoin(base, uri)
    except ValueError:
-        uri = ''
+        uri = ""
    return uri


-def convert_to_idn(url):
-    """Convert a URL to IDN notation"""
-    # this function should only be called with a unicode string
-    # strategy: if the host cannot be encoded in ascii, then
-    # it'll be necessary to encode it in idn form
-    parts = list(urllib.parse.urlsplit(url))
-    try:
-        parts[1].encode('ascii')
-    except UnicodeEncodeError:
-        # the url needs to be converted to idn notation
-        host = parts[1].rsplit(':', 1)
-        newhost = []
-        port = ''
-        if len(host) == 2:
-            port = host.pop()
-        for h in host[0].split('.'):
-            newhost.append(h.encode('idna').decode('utf-8'))
-        parts[1] = '.'.join(newhost)
-        if port:
-            parts[1] += ':' + port
-        return urllib.parse.urlunsplit(parts)
-    else:
-        return url
-
-
 def make_safe_absolute_uri(base, rel=None):
    # bail if ACCEPTABLE_URI_SCHEMES is empty
    if not ACCEPTABLE_URI_SCHEMES:
-        return _urljoin(base, rel or '')
+        return _urljoin(base, rel or "")
    if not base:
-        return rel or ''
+        return rel or ""
    if not rel:
        try:
            scheme = urllib.parse.urlparse(base)[0]
        except ValueError:
-            return ''
+            return ""
        if not scheme or scheme in ACCEPTABLE_URI_SCHEMES:
            return base
-        return ''
+        return ""
    uri = _urljoin(base, rel)
-    if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES:
-        return ''
+    if uri.strip().split(":", 1)[0] not in ACCEPTABLE_URI_SCHEMES:
+        return ""
    return uri


 class RelativeURIResolver(BaseHTMLProcessor):
    relative_uris = {
-        ('a', 'href'),
-        ('applet', 'codebase'),
-        ('area', 'href'),
-        ('audio', 'src'),
-        ('blockquote', 'cite'),
-        ('body', 'background'),
-        ('del', 'cite'),
-        ('form', 'action'),
-        ('frame', 'longdesc'),
-        ('frame', 'src'),
-        ('iframe', 'longdesc'),
-        ('iframe', 'src'),
-        ('head', 'profile'),
-        ('img', 'longdesc'),
-        ('img', 'src'),
-        ('img', 'usemap'),
-        ('input', 'src'),
-        ('input', 'usemap'),
-        ('ins', 'cite'),
-        ('link', 'href'),
-        ('object', 'classid'),
-        ('object', 'codebase'),
-        ('object', 'data'),
-        ('object', 'usemap'),
-        ('q', 'cite'),
-        ('script', 'src'),
-        ('source', 'src'),
-        ('video', 'poster'),
-        ('video', 'src'),
+        ("a", "href"),
+        ("applet", "codebase"),
+        ("area", "href"),
+        ("audio", "src"),
+        ("blockquote", "cite"),
+        ("body", "background"),
+        ("del", "cite"),
+        ("form", "action"),
+        ("frame", "longdesc"),
+        ("frame", "src"),
+        ("iframe", "longdesc"),
+        ("iframe", "src"),
+        ("head", "profile"),
+        ("img", "longdesc"),
+        ("img", "src"),
+        ("img", "usemap"),
+        ("input", "src"),
+        ("input", "usemap"),
+        ("ins", "cite"),
+        ("link", "href"),
+        ("object", "classid"),
+        ("object", "codebase"),
+        ("object", "data"),
+        ("object", "usemap"),
+        ("q", "cite"),
+        ("script", "src"),
+        ("source", "src"),
+        ("video", "poster"),
+        ("video", "src"),
    }

    def __init__(self, baseuri, encoding, _type):
@ -145,8 +158,14 @@ class RelativeURIResolver(BaseHTMLProcessor):

    def unknown_starttag(self, tag, attrs):
        attrs = self.normalize_attrs(attrs)
-        attrs = [(key, ((tag, key) in self.relative_uris) and self.resolve_uri(value) or value) for key, value in attrs]
-        super(RelativeURIResolver, self).unknown_starttag(tag, attrs)
+        attrs = [
+            (
+                key,
+                ((tag, key) in self.relative_uris) and self.resolve_uri(value) or value,
+            )
+            for key, value in attrs
+        ]
+        super().unknown_starttag(tag, attrs)


 def resolve_relative_uris(html_source, base_uri, encoding, type_):
--- a/lib/feedparser/util.py
+++ b/lib/feedparser/util.py
@ -1,4 +1,4 @@
-# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
+# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@ -30,22 +30,22 @@ import warnings

 class FeedParserDict(dict):
    keymap = {
-        'channel': 'feed',
-        'items': 'entries',
-        'guid': 'id',
-        'date': 'updated',
-        'date_parsed': 'updated_parsed',
-        'description': ['summary', 'subtitle'],
-        'description_detail': ['summary_detail', 'subtitle_detail'],
-        'url': ['href'],
-        'modified': 'updated',
-        'modified_parsed': 'updated_parsed',
-        'issued': 'published',
-        'issued_parsed': 'published_parsed',
-        'copyright': 'rights',
-        'copyright_detail': 'rights_detail',
-        'tagline': 'subtitle',
-        'tagline_detail': 'subtitle_detail',
+        "channel": "feed",
+        "items": "entries",
+        "guid": "id",
+        "date": "updated",
+        "date_parsed": "updated_parsed",
+        "description": ["summary", "subtitle"],
+        "description_detail": ["summary_detail", "subtitle_detail"],
+        "url": ["href"],
+        "modified": "updated",
+        "modified_parsed": "updated_parsed",
+        "issued": "published",
+        "issued_parsed": "published_parsed",
+        "copyright": "rights",
+        "copyright_detail": "rights_detail",
+        "tagline": "subtitle",
+        "tagline_detail": "subtitle_detail",
    }

    def __getitem__(self, key, _stacklevel=2):
@ -53,28 +53,29 @@ class FeedParserDict(dict):
        :return: A :class:`FeedParserDict`.
        """

-        if key == 'category':
+        if key == "category":
            try:
-                return dict.__getitem__(self, 'tags')[0]['term']
+                return dict.__getitem__(self, "tags")[0]["term"]
            except IndexError:
                raise KeyError("object doesn't have key 'category'")
-        elif key == 'enclosures':
+        elif key == "enclosures":
            return [
-                FeedParserDict([(name, value) for (name, value) in link.items() if name != 'rel'])
-                for link in dict.__getitem__(self, 'links')
-                if link['rel'] == 'enclosure'
+                FeedParserDict(
+                    [(name, value) for (name, value) in link.items() if name != "rel"]
+                )
+                for link in dict.__getitem__(self, "links")
+                if link["rel"] == "enclosure"
            ]
-        elif key == 'license':
-            for link in dict.__getitem__(self, 'links'):
-                if link['rel'] == 'license' and 'href' in link:
-                    return link['href']
-        elif key == 'updated':
+        elif key == "license":
+            for link in dict.__getitem__(self, "links"):
+                if link["rel"] == "license" and "href" in link:
+                    return link["href"]
+        elif key == "updated":
            # Temporarily help developers out by keeping the old
            # broken behavior that was reported in issue 310.
            # This fix was proposed in issue 328.
-            if (
-                    not dict.__contains__(self, 'updated')
-                    and dict.__contains__(self, 'published')
+            if not dict.__contains__(self, "updated") and dict.__contains__(
+                self, "published"
            ):
                warnings.warn(
                    "To avoid breaking existing software while "
@ -85,12 +86,11 @@ class FeedParserDict(dict):
                    DeprecationWarning,
                    stacklevel=_stacklevel,
                )
-                return dict.__getitem__(self, 'published')
-            return dict.__getitem__(self, 'updated')
-        elif key == 'updated_parsed':
-            if (
-                    not dict.__contains__(self, 'updated_parsed')
-                    and dict.__contains__(self, 'published_parsed')
+                return dict.__getitem__(self, "published")
+            return dict.__getitem__(self, "updated")
+        elif key == "updated_parsed":
+            if not dict.__contains__(self, "updated_parsed") and dict.__contains__(
+                self, "published_parsed"
            ):
                warnings.warn(
                    "To avoid breaking existing software while "
@ -101,8 +101,8 @@ class FeedParserDict(dict):
                    DeprecationWarning,
                    stacklevel=_stacklevel,
                )
-                return dict.__getitem__(self, 'published_parsed')
-            return dict.__getitem__(self, 'updated_parsed')
+                return dict.__getitem__(self, "published_parsed")
+            return dict.__getitem__(self, "updated_parsed")
        else:
            realkey = self.keymap.get(key, key)
            if isinstance(realkey, list):
@ -114,7 +114,7 @@ class FeedParserDict(dict):
        return dict.__getitem__(self, key)

    def __contains__(self, key):
-        if key in ('updated', 'updated_parsed'):
+        if key in ("updated", "updated_parsed"):
            # Temporarily help developers out by keeping the old
            # broken behavior that was reported in issue 310.
            # This fix was proposed in issue 328.