Merge branch 'feature/UpdateFeedparser' into dev

This commit is contained in:
JackDandy 2023-02-09 14:37:12 +00:00
commit b9cfd96e57
32 changed files with 391 additions and 193 deletions

View file

@ -7,6 +7,7 @@
* Update Msgpack 1.0.0 (fa7d744) to 1.0.4 (b5acfd5)
* Update certifi 2022.09.24 to 2022.12.07
* Update diskcache 5.1.0 (40ce0de) to 5.4.0 (1cb1425)
* Update feedparser 6.0.1 (98d189fa) to 6.0.10 (5fcb3ae)
* Update humanize 3.5.0 (b6b0ea5) to 4.0.0 (a1514eb)
* Update profilehooks module 1.12.0 (3ee1f60) to 1.12.1 (c3fc078)
* Update Rarfile 4.0 (55fe778) to 4.1a1 (8a72967)

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -32,7 +32,7 @@ from .util import FeedParserDict
__author__ = 'Kurt McKee <contactme@kurtmckee.org>'
__license__ = 'BSD 2-clause'
__version__ = '6.0.1'
__version__ = '6.0.10'
# HTTP "User-Agent" header to send to servers when downloading feeds.
# If you are embedding feedparser in a larger application, you should

View file

@ -1,5 +1,5 @@
# The public API for feedparser
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -26,7 +26,11 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import datetime
import io
import time
from typing import Dict, List, Union
import urllib.error
import urllib.parse
import xml.sax
@ -34,13 +38,12 @@ import sgmllib3k as sgmllib
from .datetimes import registerDateHandler, _parse_date
from .encodings import convert_to_utf8
from .exceptions import *
from .html import _BaseHTMLProcessor
from .html import BaseHTMLProcessor
from . import http
from . import mixin
from .mixin import _FeedParserMixin
from .parsers.loose import _LooseFeedParser
from .parsers.strict import _StrictFeedParser
from .mixin import XMLParserMixin
from .parsers.loose import LooseXMLParser
from .parsers.strict import StrictXMLParser
from .parsers.json import JSONParser
from .sanitizer import replace_doctype
from .urls import convert_to_idn, make_safe_absolute_uri
from .util import FeedParserDict
@ -70,6 +73,7 @@ SUPPORTED_VERSIONS = {
'atom10': 'Atom 1.0',
'atom': 'Atom (unknown version)',
'cdf': 'CDF',
'json1': 'JSON feed 1',
}
@ -136,20 +140,25 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
return url_file_stream_or_string
LooseFeedParser = type(
'LooseFeedParser',
(_LooseFeedParser, _FeedParserMixin, _BaseHTMLProcessor, object),
{},
)
class LooseFeedParser(LooseXMLParser, XMLParserMixin, BaseHTMLProcessor):
pass
StrictFeedParser = type(
'StrictFeedParser',
(_StrictFeedParser, _FeedParserMixin, xml.sax.handler.ContentHandler, object),
{},
)
class StrictFeedParser(StrictXMLParser, XMLParserMixin, xml.sax.handler.ContentHandler):
pass
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None, resolve_relative_uris=None, sanitize_html=None):
def parse(
url_file_stream_or_string,
etag: str = None,
modified: Union[str, datetime.datetime, time.struct_time] = None,
agent: str = None,
referrer: str = None,
handlers: List = None,
request_headers: Dict[str, str] = None,
response_headers: Dict[str, str] = None,
resolve_relative_uris: bool = None,
sanitize_html: bool = None,
) -> FeedParserDict:
"""Parse a feed from a URL, file, stream, or string.
:param url_file_stream_or_string:
@ -165,45 +174,46 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
When a URL is not passed the feed location to use in relative URL
resolution should be passed in the ``Content-Location`` response header
(see ``response_headers`` below).
:param str etag: HTTP ``ETag`` request header.
:param modified: HTTP ``Last-Modified`` request header.
:type modified: :class:`str`, :class:`time.struct_time` 9-tuple, or
:class:`datetime.datetime`
:param str agent: HTTP ``User-Agent`` request header, which defaults to
:param etag:
HTTP ``ETag`` request header.
:param modified:
HTTP ``Last-Modified`` request header.
:param agent:
HTTP ``User-Agent`` request header, which defaults to
the value of :data:`feedparser.USER_AGENT`.
:param referrer: HTTP ``Referer`` [sic] request header.
:param referrer:
HTTP ``Referer`` [sic] request header.
:param handlers:
A list of handlers that will be passed to urllib2.
:param request_headers:
A mapping of HTTP header name to HTTP header value to add to the
request, overriding internally generated values.
:type request_headers: :class:`dict` mapping :class:`str` to :class:`str`
:param response_headers:
A mapping of HTTP header name to HTTP header value. Multiple values may
be joined with a comma. If a HTTP request was made, these headers
override any matching headers in the response. Otherwise this specifies
the entirety of the response headers.
:type response_headers: :class:`dict` mapping :class:`str` to :class:`str`
:param bool resolve_relative_uris:
:param resolve_relative_uris:
Should feedparser attempt to resolve relative URIs absolute ones within
HTML content? Defaults to the value of
:data:`feedparser.RESOLVE_RELATIVE_URIS`, which is ``True``.
:param bool sanitize_html:
:param sanitize_html:
Should feedparser skip HTML sanitization? Only disable this if you know
what you are doing! Defaults to the value of
:data:`feedparser.SANITIZE_HTML`, which is ``True``.
:return: A :class:`FeedParserDict`.
"""
if not agent or sanitize_html is None or resolve_relative_uris is None:
import feedparser
# Avoid a cyclic import.
if not agent:
import feedparser
agent = feedparser.USER_AGENT
if sanitize_html is None:
sanitize_html = feedparser.SANITIZE_HTML
import feedparser
sanitize_html = bool(feedparser.SANITIZE_HTML)
if resolve_relative_uris is None:
resolve_relative_uris = feedparser.RESOLVE_RELATIVE_URIS
import feedparser
resolve_relative_uris = bool(feedparser.RESOLVE_RELATIVE_URIS)
result = FeedParserDict(
bozo=False,
@ -212,7 +222,14 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
headers={},
)
try:
data = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result)
except urllib.error.URLError as error:
result.update({
'bozo': True,
'bozo_exception': error,
})
return result
if not data:
return result
@ -221,8 +238,10 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['headers'].update(response_headers or {})
data = convert_to_utf8(result['headers'], data, result)
use_json_parser = result['content-type'] == 'application/json'
use_strict_parser = result['encoding'] and True or False
if not use_json_parser:
result['version'], data, entities = replace_doctype(data)
# Ensure that baseuri is an absolute URI using an acceptable URI scheme.
@ -235,36 +254,52 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
baselang = baselang.decode('utf-8', 'ignore')
if not _XML_AVAILABLE:
use_strict_parser = 0
if use_strict_parser:
# initialize the SAX parser
feedparser = StrictFeedParser(baseuri, baselang, 'utf-8')
feedparser.resolve_relative_uris = resolve_relative_uris
feedparser.sanitize_html = sanitize_html
use_strict_parser = False
feed_parser: Union[JSONParser, StrictFeedParser, LooseFeedParser]
if use_json_parser:
result['version'] = None
feed_parser = JSONParser(baseuri, baselang, 'utf-8')
try:
feed_parser.feed(data)
except Exception as e:
result['bozo'] = 1
result['bozo_exception'] = e
elif use_strict_parser:
# Initialize the SAX parser.
feed_parser = StrictFeedParser(baseuri, baselang, 'utf-8')
feed_parser.resolve_relative_uris = resolve_relative_uris
feed_parser.sanitize_html = sanitize_html
saxparser = xml.sax.make_parser(PREFERRED_XML_PARSERS)
saxparser.setFeature(xml.sax.handler.feature_namespaces, 1)
try:
# disable downloading external doctype references, if possible
# Disable downloading external doctype references, if possible.
saxparser.setFeature(xml.sax.handler.feature_external_ges, 0)
except xml.sax.SAXNotSupportedException:
pass
saxparser.setContentHandler(feedparser)
saxparser.setErrorHandler(feedparser)
saxparser.setContentHandler(feed_parser)
saxparser.setErrorHandler(feed_parser)
source = xml.sax.xmlreader.InputSource()
source.setByteStream(io.BytesIO(data))
try:
saxparser.parse(source)
except xml.sax.SAXException as e:
result['bozo'] = 1
result['bozo_exception'] = feedparser.exc or e
use_strict_parser = 0
if not use_strict_parser:
feedparser = LooseFeedParser(baseuri, baselang, 'utf-8', entities)
feedparser.resolve_relative_uris = resolve_relative_uris
feedparser.sanitize_html = sanitize_html
feedparser.feed(data.decode('utf-8', 'replace'))
result['feed'] = feedparser.feeddata
result['entries'] = feedparser.entries
result['version'] = result['version'] or feedparser.version
result['namespaces'] = feedparser.namespaces_in_use
result['bozo_exception'] = feed_parser.exc or e
use_strict_parser = False
# The loose XML parser will be tried if the JSON parser was not used,
# and if the strict XML parser was not used (or if it failed).
if not use_json_parser and not use_strict_parser:
feed_parser = LooseFeedParser(baseuri, baselang, 'utf-8', entities)
feed_parser.resolve_relative_uris = resolve_relative_uris
feed_parser.sanitize_html = sanitize_html
feed_parser.feed(data.decode('utf-8', 'replace'))
result['feed'] = feed_parser.feeddata
result['entries'] = feed_parser.entries
result['version'] = result['version'] or feed_parser.version
if isinstance(feed_parser, JSONParser):
result['namespaces'] = {}
else:
result['namespaces'] = feed_parser.namespaces_in_use
return result

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -25,6 +25,8 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from time import struct_time
from typing import Callable, List, Optional
from .asctime import _parse_date_asctime
from .greek import _parse_date_greek
from .hungarian import _parse_date_hungarian
@ -34,7 +36,7 @@ from .perforce import _parse_date_perforce
from .rfc822 import _parse_date_rfc822
from .w3dtf import _parse_date_w3dtf
_date_handlers = []
_date_handlers: List[Callable[[str], Optional[struct_time]]] = []
def registerDateHandler(func):

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -68,15 +68,7 @@ _iso8601_re = [
+ r'(\.(?P<fracsecond>\d+))?'
+ r'(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?'
for tmpl in _iso8601_tmpl]
try:
del tmpl
except NameError:
pass
_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]
try:
del regex
except NameError:
pass
def _parse_date_iso8601(date_string):

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -25,7 +25,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import email._parseaddr
import email.utils
import re
import time
@ -41,6 +41,6 @@ def _parse_date_perforce(date_string):
dow, year, month, day, hour, minute, second, tz = m.groups()
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
new_date_string = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz)
tm = email._parseaddr.parsedate_tz(new_date_string)
tm = email.utils.parsedate_tz(new_date_string)
if tm:
return time.gmtime(email._parseaddr.mktime_tz(tm))
return time.gmtime(email.utils.mktime_tz(tm))

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,5 +1,5 @@
# Character encoding routines
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -26,17 +26,16 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import cgi
import codecs
import re
import typing as t
try:
try:
import cchardet as chardet
import cchardet as chardet # type: ignore[import]
except ImportError:
import chardet
import chardet # type: ignore[no-redef]
except ImportError:
chardet = None
lazy_chardet_encoding = None
else:
def lazy_chardet_encoding(data):
@ -68,6 +67,30 @@ RE_XML_DECLARATION = re.compile(r'^<\?xml[^>]*?>')
RE_XML_PI_ENCODING = re.compile(br'^<\?.*encoding=[\'"](.*?)[\'"].*\?>')
def parse_content_type(line: str) -> t.Tuple[str, str]:
"""Parse an HTTP Content-Type header.
The return value will be a tuple of strings:
the MIME type, and the value of the "charset" (if any).
This is a custom replacement for Python's cgi.parse_header().
The cgi module will be removed in Python 3.13.
"""
chunks = line.split(";")
if not chunks:
return "", ""
mime_type = chunks[0].strip()
charset_value = ""
for chunk in chunks[1:]:
key, _, value = chunk.partition("=")
if key.strip().lower() == "charset":
charset_value = value.strip().strip("\"'")
return mime_type, charset_value
def convert_to_utf8(http_headers, data, result):
"""Detect and convert the character encoding to UTF-8.
@ -156,10 +179,7 @@ def convert_to_utf8(http_headers, data, result):
try:
if bom_encoding:
tempdata = data.decode(bom_encoding).encode('utf-8')
except (UnicodeDecodeError, LookupError):
# feedparser recognizes UTF-32 encodings that aren't
# available in Python 2.4 and 2.5, so it's possible to
# encounter a LookupError during decoding.
except UnicodeDecodeError:
xml_encoding_match = None
else:
xml_encoding_match = RE_XML_PI_ENCODING.match(tempdata)
@ -181,15 +201,14 @@ def convert_to_utf8(http_headers, data, result):
# XML declaration encoding, and HTTP encoding, following the
# heuristic defined in RFC 3023.
http_content_type = http_headers.get('content-type') or ''
http_content_type, params = cgi.parse_header(http_content_type)
http_encoding = params.get('charset', '').replace("'", "")
if isinstance(http_encoding, bytes):
http_encoding = http_encoding.decode('utf-8', 'ignore')
http_content_type, http_encoding = parse_content_type(http_content_type)
acceptable_content_type = 0
application_content_types = ('application/xml', 'application/xml-dtd',
'application/xml-external-parsed-entity')
text_content_types = ('text/xml', 'text/xml-external-parsed-entity')
json_content_types = ('application/feed+json', 'application/json')
json = False
if (
http_content_type in application_content_types
or (
@ -208,6 +227,17 @@ def convert_to_utf8(http_headers, data, result):
):
acceptable_content_type = 1
rfc3023_encoding = http_encoding or 'us-ascii'
elif (
http_content_type in json_content_types
or (
not http_content_type
and data and data.lstrip()[0] == '{'
)
):
http_content_type = json_content_types[0]
acceptable_content_type = 1
json = True
rfc3023_encoding = http_encoding or 'utf-8' # RFC 7159, 8.1.
elif http_content_type.startswith('text/'):
rfc3023_encoding = http_encoding or 'us-ascii'
elif http_headers and 'content-type' not in http_headers:
@ -230,7 +260,7 @@ def convert_to_utf8(http_headers, data, result):
if http_headers and (not acceptable_content_type):
if 'content-type' in http_headers:
msg = '%s is not an XML media type' % http_headers['content-type']
msg = '%s is not an accepted media type' % http_headers['content-type']
else:
msg = 'no Content-type specified'
error = NonXMLContentType(msg)
@ -254,6 +284,7 @@ def convert_to_utf8(http_headers, data, result):
pass
else:
known_encoding = 1
if not json:
# Update the encoding in the opening XML processing instruction.
new_declaration = '''<?xml version='1.0' encoding='utf-8'?>'''
if RE_XML_DECLARATION.search(data):
@ -275,6 +306,7 @@ def convert_to_utf8(http_headers, data, result):
(rfc3023_encoding, proposed_encoding))
rfc3023_encoding = proposed_encoding
result['content-type'] = http_content_type # for selecting the parser
result['encoding'] = rfc3023_encoding
if error:
result['bozo'] = True

View file

@ -1,5 +1,5 @@
# Exceptions used throughout feedparser
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -27,7 +27,7 @@
# POSSIBILITY OF SUCH DAMAGE.
__all__ = [
'ThingsNobodyCaresAboutButMe',
'FeedparserError',
'CharacterEncodingOverride',
'CharacterEncodingUnknown',
'NonXMLContentType',
@ -35,19 +35,19 @@ __all__ = [
]
class ThingsNobodyCaresAboutButMe(Exception):
class FeedparserError(Exception):
pass
class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe):
class CharacterEncodingOverride(FeedparserError):
pass
class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe):
class CharacterEncodingUnknown(FeedparserError):
pass
class NonXMLContentType(ThingsNobodyCaresAboutButMe):
class NonXMLContentType(FeedparserError):
pass

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -61,7 +61,7 @@ _cp1252 = {
}
class _BaseHTMLProcessor(sgmllib.SGMLParser, object):
class BaseHTMLProcessor(sgmllib.SGMLParser):
special = re.compile("""[<>'"]""")
bare_ampersand = re.compile(r"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)")
elements_no_end_tag = {
@ -91,11 +91,11 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser, object):
self.encoding = encoding
self._type = _type
self.pieces = []
super(_BaseHTMLProcessor, self).__init__()
super().__init__()
def reset(self):
self.pieces = []
super(_BaseHTMLProcessor, self).reset()
super().reset()
def _shorttag_replace(self, match):
"""
@ -118,23 +118,13 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser, object):
raise NotImplementedError
# Replace goahead with SGMLParser's goahead() code object.
try:
goahead.__code__ = sgmllib.SGMLParser.goahead.__code__
except AttributeError:
# Python 2
# noinspection PyUnresolvedReferences
goahead.func_code = sgmllib.SGMLParser.goahead.func_code
def __parse_starttag(self, i):
raise NotImplementedError
# Replace __parse_starttag with SGMLParser's parse_starttag() code object.
try:
__parse_starttag.__code__ = sgmllib.SGMLParser.parse_starttag.__code__
except AttributeError:
# Python 2
# noinspection PyUnresolvedReferences
__parse_starttag.func_code = sgmllib.SGMLParser.parse_starttag.func_code
def parse_starttag(self, i):
j = self.__parse_starttag(i)
@ -153,8 +143,8 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser, object):
data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
data = data.replace('&#39;', "'")
data = data.replace('&#34;', '"')
super(_BaseHTMLProcessor, self).feed(data)
super(_BaseHTMLProcessor, self).close()
super().feed(data)
super().close()
@staticmethod
def normalize_attrs(attrs):
@ -315,8 +305,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser, object):
# self.updatepos(declstartpos, i)
return None, -1
@staticmethod
def convert_charref(name):
def convert_charref(self, name):
"""
:type name: str
:rtype: str
@ -324,8 +313,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser, object):
return '&#%s;' % name
@staticmethod
def convert_entityref(name):
def convert_entityref(self, name):
"""
:type name: str
:rtype: str
@ -349,7 +337,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser, object):
try:
return sgmllib.SGMLParser.parse_declaration(self, i)
except sgmllib.SGMLParseError:
except (AssertionError, sgmllib.SGMLParseError):
# Escape the doctype declaration and continue parsing.
self.handle_data('&lt;')
return i+1

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -44,7 +44,7 @@ from .urls import convert_to_idn
ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"
class _FeedURLHandler(urllib.request.HTTPDigestAuthHandler, urllib.request.HTTPRedirectHandler, urllib.request.HTTPDefaultErrorHandler):
class URLHandler(urllib.request.HTTPDigestAuthHandler, urllib.request.HTTPRedirectHandler, urllib.request.HTTPDefaultErrorHandler):
def http_error_default(self, req, fp, code, msg, headers):
# The default implementation just raises HTTPError.
# Forget that.
@ -53,6 +53,8 @@ class _FeedURLHandler(urllib.request.HTTPDigestAuthHandler, urllib.request.HTTPR
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib.request.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, hdrs)
if not result:
return fp
result.status = code
result.newurl = result.geturl()
return result
@ -78,7 +80,7 @@ class _FeedURLHandler(urllib.request.HTTPDigestAuthHandler, urllib.request.HTTPR
host = urllib.parse.urlparse(req.get_full_url())[1]
if 'Authorization' not in req.headers or 'WWW-Authenticate' not in headers:
return self.http_error_default(req, fp, code, msg, headers)
auth = base64.decodebytes(req.headers['Authorization'].split(' ')[1].encode('utf8'))
auth = base64.decodebytes(req.headers['Authorization'].split(' ')[1].encode()).decode()
user, passw = auth.split(':')
realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0]
self.add_password(realm, host, user, passw)
@ -145,15 +147,26 @@ def get(url, etag=None, modified=None, agent=None, referrer=None, handlers=None,
if url_pieces.port:
new_pieces[1] = f'{url_pieces.hostname}:{url_pieces.port}'
url = urllib.parse.urlunparse(new_pieces)
auth = base64.standard_b64encode(f'{url_pieces.username}:{url_pieces.password}').strip()
auth = base64.standard_b64encode(f'{url_pieces.username}:{url_pieces.password}'.encode()).decode()
# iri support
if not isinstance(url, bytes):
url = convert_to_idn(url)
# Prevent UnicodeEncodeErrors caused by Unicode characters in the path.
bits = []
for c in url:
try:
c.encode('ascii')
except UnicodeEncodeError:
bits.append(urllib.parse.quote(c))
else:
bits.append(c)
url = ''.join(bits)
# try to open with urllib2 (to use optional headers)
request = _build_urllib2_request(url, agent, ACCEPT_HEADER, etag, modified, referrer, auth, request_headers)
opener = urllib.request.build_opener(*tuple(handlers + [_FeedURLHandler()]))
opener = urllib.request.build_opener(*tuple(handlers + [URLHandler()]))
opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent
f = opener.open(request)
data = f.read()
@ -203,7 +216,7 @@ def get(url, etag=None, modified=None, agent=None, referrer=None, handlers=None,
result['href'] = f.url.decode('utf-8', 'ignore')
else:
result['href'] = f.url
result['status'] = getattr(f, 'status', 200)
result['status'] = getattr(f, 'status', None) or 200
# Stop processing if the server sent HTTP 304 Not Modified.
if getattr(f, 'code', 0) == 304:

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -30,16 +30,17 @@ import binascii
import copy
import html.entities
import re
from typing import Dict
import xml.sax.saxutils
from .html import _cp1252
from .namespaces import _base, cc, dc, georss, itunes, mediarss, psc
from .sanitizer import _sanitize_html, _HTMLSanitizer
from .sanitizer import sanitize_html, HTMLSanitizer
from .util import FeedParserDict
from .urls import _urljoin, make_safe_absolute_uri, resolve_relative_uris
class _FeedParserMixin(
class XMLParserMixin(
_base.Namespace,
cc.Namespace,
dc.Namespace,
@ -118,7 +119,7 @@ class _FeedParserMixin(
'http://www.w3.org/XML/1998/namespace': 'xml',
'http://podlove.org/simple-chapters': 'psc',
}
_matchnamespaces = {}
_matchnamespaces: Dict[str, str] = {}
can_be_relative_uri = {
'comments',
@ -170,6 +171,8 @@ class _FeedParserMixin(
self.entries = [] # list of entry-level data
self.version = '' # feed type/version, see SUPPORTED_VERSIONS
self.namespaces_in_use = {} # dictionary of namespaces defined by the feed
self.resolve_relative_uris = False
self.sanitize_html = False
# the following are used internally to track state;
# this is really out of control and should be refactored
@ -193,6 +196,7 @@ class _FeedParserMixin(
self.svgOK = 0
self.title_depth = -1
self.depth = 0
self.hasContent = 0
if self.lang:
self.feeddata['language'] = self.lang.replace('_', '-')
@ -204,7 +208,7 @@ class _FeedParserMixin(
# },
# }
self.property_depth_map = {}
super(_FeedParserMixin, self).__init__()
super(XMLParserMixin, self).__init__()
def _normalize_attributes(self, kv):
raise NotImplementedError
@ -506,9 +510,7 @@ class _FeedParserMixin(
if base64 and self.contentparams.get('base64', 0):
try:
output = base64.decodebytes(output.encode('utf8')).decode('utf8')
except binascii.Error:
pass
except binascii.Incomplete:
except (binascii.Error, binascii.Incomplete, UnicodeDecodeError):
pass
# resolve relative URIs
@ -546,7 +548,7 @@ class _FeedParserMixin(
# sanitize embedded markup
if is_htmlish and self.sanitize_html:
if element in self.can_contain_dangerous_markup:
output = _sanitize_html(output, self.encoding, self.contentparams.get('type', 'text/html'))
output = sanitize_html(output, self.encoding, self.contentparams.get('type', 'text/html'))
if self.encoding and isinstance(output, bytes):
output = output.decode(self.encoding, 'ignore')
@ -648,7 +650,7 @@ class _FeedParserMixin(
return False
# all tags must be in a restricted subset of valid HTML tags
if any((t for t in re.findall(r'</?(\w+)', s) if t.lower() not in _HTMLSanitizer.acceptable_elements)):
if any((t for t in re.findall(r'</?(\w+)', s) if t.lower() not in HTMLSanitizer.acceptable_elements)):
return False
# all entities must have been defined as valid HTML entities
@ -744,7 +746,7 @@ class _FeedParserMixin(
author, email = context.get(key), None
if not author:
return
emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author)
emailmatch = re.search(r"(([a-zA-Z0-9_.+-]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(]?))(\?subject=\S+)?", author)
if emailmatch:
email = emailmatch.group(0)
# probably a better way to do the following, but it passes

View file

@ -1,5 +1,5 @@
# Support for the Atom, RSS, RDF, and CDF feed formats
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -259,6 +259,7 @@ class Namespace(object):
def _end_item(self):
self.pop('item')
self.inentry = 0
self.hasContent = 0
_end_entry = _end_item
def _start_language(self, attrs_d):
@ -388,7 +389,7 @@ class Namespace(object):
def _start_description(self, attrs_d):
context = self._get_context()
if 'summary' in context:
if 'summary' in context and not self.hasContent:
self._summaryKey = 'content'
self._start_content(attrs_d)
else:
@ -429,7 +430,7 @@ class Namespace(object):
def _start_summary(self, attrs_d):
context = self._get_context()
if 'summary' in context:
if 'summary' in context and not self.hasContent:
self._summaryKey = 'content'
self._start_content(attrs_d)
else:
@ -466,6 +467,7 @@ class Namespace(object):
self.sourcedata.clear()
def _start_content(self, attrs_d):
self.hasContent = 1
self.push_content('content', attrs_d, 'text/plain', 1)
src = attrs_d.get('src')
if src:
@ -477,6 +479,7 @@ class Namespace(object):
_start_xhtml_body = _start_body
def _start_content_encoded(self, attrs_d):
self.hasContent = 1
self.push_content('content', attrs_d, 'text/html', 1)
_start_fullitem = _start_content_encoded

View file

@ -1,5 +1,5 @@
# Support for the administrative elements extension
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,5 +1,5 @@
# Support for the Creative Commons licensing extensions
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,5 +1,5 @@
# Support for the Dublin Core metadata extensions
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,5 +1,5 @@
# Support for the GeoRSS format
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -91,6 +91,8 @@ class Namespace(object):
except ValueError:
srs_dimension = 2
context = self._get_context()
if 'where' not in context:
context['where'] = {}
context['where']['srsName'] = srs_name
context['where']['srsDimension'] = srs_dimension

View file

@ -1,5 +1,5 @@
# Support for the iTunes format
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,5 +1,5 @@
# Support for the Media RSS format
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -1,5 +1,5 @@
# Support for the Podlove Simple Chapters format
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#

View file

@ -0,0 +1,133 @@
# The JSON feed parser
# Copyright 2017 Beat Bolli
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import json
from ..datetimes import _parse_date
from ..sanitizer import sanitize_html
from ..util import FeedParserDict
class JSONParser:
VERSIONS = {
'https://jsonfeed.org/version/1': 'json1',
'https://jsonfeed.org/version/1.1': 'json11',
}
FEED_FIELDS = (
('title', 'title'),
('icon', 'image'),
('home_page_url', 'link'),
('description', 'description'),
)
ITEM_FIELDS = (
('title', 'title'),
('id', 'guid'),
('url', 'link'),
('summary', 'summary'),
('external_url', 'source'),
)
def __init__(self, baseuri=None, baselang=None, encoding=None):
self.baseuri = baseuri or ''
self.lang = baselang or None
self.encoding = encoding or 'utf-8' # character encoding
self.version = None
self.feeddata = FeedParserDict()
self.namespacesInUse = []
self.entries = []
def feed(self, data):
data = json.loads(data)
v = data.get('version', '')
try:
self.version = self.VERSIONS[v]
except KeyError:
raise ValueError("Unrecognized JSONFeed version '%s'" % v)
for src, dst in self.FEED_FIELDS:
if src in data:
self.feeddata[dst] = data[src]
if 'author' in data:
self.parse_author(data['author'], self.feeddata)
# TODO: hubs; expired has no RSS equivalent
self.entries = [self.parse_entry(e) for e in data['items']]
def parse_entry(self, e):
entry = FeedParserDict()
for src, dst in self.ITEM_FIELDS:
if src in e:
entry[dst] = e[src]
if 'content_text' in e:
entry['content'] = c = FeedParserDict()
c['value'] = e['content_text']
c['type'] = 'text'
elif 'content_html' in e:
entry['content'] = c = FeedParserDict()
c['value'] = sanitize_html(e['content_html'], self.encoding, 'application/json')
c['type'] = 'html'
if 'date_published' in e:
entry['published'] = e['date_published']
entry['published_parsed'] = _parse_date(e['date_published'])
if 'date_updated' in e:
entry['updated'] = e['date_modified']
entry['updated_parsed'] = _parse_date(e['date_modified'])
if 'tags' in e:
entry['category'] = e['tags']
if 'author' in e:
self.parse_author(e['author'], entry)
if 'attachments' in e:
entry['enclosures'] = [self.parse_attachment(a) for a in e['attachments']]
return entry
@staticmethod
def parse_author(parent, dest):
dest['author_detail'] = detail = FeedParserDict()
if 'name' in parent:
dest['author'] = detail['name'] = parent['name']
if 'url' in parent:
if parent['url'].startswith('mailto:'):
detail['email'] = parent['url'][7:]
else:
detail['href'] = parent['url']
@staticmethod
def parse_attachment(attachment):
enc = FeedParserDict()
enc['href'] = attachment['url']
enc['type'] = attachment['mime_type']
if 'size_in_bytes' in attachment:
enc['length'] = attachment['size_in_bytes']
return enc

View file

@ -1,5 +1,5 @@
# The loose feed parser that interfaces with an SGML parsing library
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -26,7 +26,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
class _LooseFeedParser(object):
class LooseXMLParser:
contentparams = None
def __init__(self, baseuri=None, baselang=None, encoding=None, entities=None):
@ -34,7 +34,7 @@ class _LooseFeedParser(object):
self.lang = baselang or None
self.encoding = encoding or 'utf-8' # character encoding
self.entities = entities or {}
super(_LooseFeedParser, self).__init__()
super().__init__()
@staticmethod
def _normalize_attributes(kv):

View file

@ -1,5 +1,5 @@
# The strict feed parser that interfaces with an XML parsing library
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -29,7 +29,7 @@
from ..exceptions import UndeclaredNamespace
class _StrictFeedParser(object):
class StrictXMLParser:
def __init__(self, baseuri, baselang, encoding):
self.bozo = 0
self.exc = None
@ -37,7 +37,7 @@ class _StrictFeedParser(object):
self.baseuri = baseuri or ''
self.lang = baselang
self.encoding = encoding
super(_StrictFeedParser, self).__init__()
super(StrictXMLParser, self).__init__()
@staticmethod
def _normalize_attributes(kv):

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -27,11 +27,11 @@
import re
from .html import _BaseHTMLProcessor
from .html import BaseHTMLProcessor
from .urls import make_safe_absolute_uri
class _HTMLSanitizer(_BaseHTMLProcessor):
class HTMLSanitizer(BaseHTMLProcessor):
acceptable_elements = {
'a',
'abbr',
@ -732,14 +732,14 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
}
def __init__(self, encoding=None, _type='application/xhtml+xml'):
super(_HTMLSanitizer, self).__init__(encoding, _type)
super().__init__(encoding, _type)
self.unacceptablestack = 0
self.mathmlOK = 0
self.svgOK = 0
def reset(self):
super(_HTMLSanitizer, self).reset()
super().reset()
self.unacceptablestack = 0
self.mathmlOK = 0
self.svgOK = 0
@ -805,7 +805,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
if key == 'href':
value = make_safe_absolute_uri(value)
clean_attrs.append((key, value))
super(_HTMLSanitizer, self).unknown_starttag(tag, clean_attrs)
super().unknown_starttag(tag, clean_attrs)
def unknown_endtag(self, tag):
if tag not in self.acceptable_elements:
@ -820,7 +820,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
self.svgOK -= 1
else:
return
super(_HTMLSanitizer, self).unknown_endtag(tag)
super().unknown_endtag(tag)
def handle_pi(self, text):
pass
@ -830,7 +830,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
def handle_data(self, text):
if not self.unacceptablestack:
super(_HTMLSanitizer, self).handle_data(text)
super().handle_data(text)
def sanitize_style(self, style):
# disallow urls
@ -865,7 +865,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
return ' '.join(clean)
def parse_comment(self, i, report=1):
ret = super(_HTMLSanitizer, self).parse_comment(i, report)
ret = super().parse_comment(i, report)
if ret >= 0:
return ret
# if ret == -1, this may be a malicious attempt to circumvent
@ -877,8 +877,8 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
return len(self.rawdata)
def _sanitize_html(html_source, encoding, _type):
p = _HTMLSanitizer(encoding, _type)
def sanitize_html(html_source, encoding, _type):
p = HTMLSanitizer(encoding, _type)
html_source = html_source.replace('<![CDATA[', '&lt;![CDATA[')
p.feed(html_source)
data = p.output()

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -27,7 +27,7 @@
import re
import sgmllib
import sgmllib # type: ignore[import]
__all__ = [
'sgmllib',
@ -82,7 +82,7 @@ class _EndBracketRegEx:
match = self.endbracket.match(target, index)
if match is not None:
# Returning a new object in the calling thread's context
# resolves a thread-safety.
# resolves a thread-safety issue.
return EndBracketMatch(match)
return None

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -28,7 +28,7 @@
import re
import urllib.parse
from .html import _BaseHTMLProcessor
from .html import BaseHTMLProcessor
# If you want feedparser to allow all URL schemes, set this to ()
# List culled from Python's urlparse documentation at:
@ -103,7 +103,7 @@ def make_safe_absolute_uri(base, rel=None):
return uri
class RelativeURIResolver(_BaseHTMLProcessor):
class RelativeURIResolver(BaseHTMLProcessor):
relative_uris = {
('a', 'href'),
('applet', 'codebase'),
@ -137,7 +137,7 @@ class RelativeURIResolver(_BaseHTMLProcessor):
}
def __init__(self, baseuri, encoding, _type):
_BaseHTMLProcessor.__init__(self, encoding, _type)
BaseHTMLProcessor.__init__(self, encoding, _type)
self.baseuri = baseuri
def resolve_uri(self, uri):

View file

@ -1,4 +1,4 @@
# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2010-2022 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
@ -48,7 +48,7 @@ class FeedParserDict(dict):
'tagline_detail': 'subtitle_detail',
}
def __getitem__(self, key):
def __getitem__(self, key, _stacklevel=2):
"""
:return: A :class:`FeedParserDict`.
"""
@ -59,9 +59,8 @@ class FeedParserDict(dict):
except IndexError:
raise KeyError("object doesn't have key 'category'")
elif key == 'enclosures':
norel = lambda link: FeedParserDict([(name, value) for (name, value) in link.items() if name != 'rel'])
return [
norel(link)
FeedParserDict([(name, value) for (name, value) in link.items() if name != 'rel'])
for link in dict.__getitem__(self, 'links')
if link['rel'] == 'enclosure'
]
@ -84,6 +83,7 @@ class FeedParserDict(dict):
"exist. This fallback will be removed in a future version "
"of feedparser.",
DeprecationWarning,
stacklevel=_stacklevel,
)
return dict.__getitem__(self, 'published')
return dict.__getitem__(self, 'updated')
@ -99,6 +99,7 @@ class FeedParserDict(dict):
"`updated_parsed` doesn't exist. This fallback will be "
"removed in a future version of feedparser.",
DeprecationWarning,
stacklevel=_stacklevel,
)
return dict.__getitem__(self, 'published_parsed')
return dict.__getitem__(self, 'updated_parsed')
@ -119,7 +120,7 @@ class FeedParserDict(dict):
# This fix was proposed in issue 328.
return dict.__contains__(self, key)
try:
self.__getitem__(key)
self.__getitem__(key, _stacklevel=3)
except KeyError:
return False
else:
@ -133,7 +134,7 @@ class FeedParserDict(dict):
"""
try:
return self.__getitem__(key)
return self.__getitem__(key, _stacklevel=3)
except KeyError:
return default
@ -143,17 +144,11 @@ class FeedParserDict(dict):
key = key[0]
return dict.__setitem__(self, key, value)
def setdefault(self, k, default):
if k not in self:
self[k] = default
return default
return self[k]
def __getattr__(self, key):
# __getattribute__() is called first; this will be called
# only if an attribute was not already found
try:
return self.__getitem__(key)
return self.__getitem__(key, _stacklevel=3)
except KeyError:
raise AttributeError("object has no attribute '%s'" % key)