mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-23 19:23:37 +00:00
547 lines
17 KiB
Python
547 lines
17 KiB
Python
# Support for the Atom, RSS, RDF, and CDF feed formats
|
|
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
|
# Copyright 2002-2008 Mark Pilgrim
|
|
# All rights reserved.
|
|
#
|
|
# This file is a part of feedparser.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without modification,
|
|
# are permitted provided that the following conditions are met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
import copy
|
|
|
|
from ..datetimes import _parse_date
|
|
from ..urls import make_safe_absolute_uri
|
|
from ..util import FeedParserDict
|
|
|
|
|
|
class Namespace:
|
|
"""Support for the Atom, RSS, RDF, and CDF feed formats.
|
|
|
|
The feed formats all share common elements, some of which have conflicting
|
|
interpretations. For simplicity, all of the base feed format support is
|
|
collected here.
|
|
"""
|
|
|
|
supported_namespaces = {
|
|
"": "",
|
|
"http://backend.userland.com/rss": "",
|
|
"http://blogs.law.harvard.edu/tech/rss": "",
|
|
"http://purl.org/rss/1.0/": "",
|
|
"http://my.netscape.com/rdf/simple/0.9/": "",
|
|
"http://example.com/newformat#": "",
|
|
"http://example.com/necho": "",
|
|
"http://purl.org/echo/": "",
|
|
"uri/of/echo/namespace#": "",
|
|
"http://purl.org/pie/": "",
|
|
"http://purl.org/atom/ns#": "",
|
|
"http://www.w3.org/2005/Atom": "",
|
|
"http://purl.org/rss/1.0/modules/rss091#": "",
|
|
}
|
|
|
|
def _start_rss(self, attrs_d):
|
|
versionmap = {
|
|
"0.91": "rss091u",
|
|
"0.92": "rss092",
|
|
"0.93": "rss093",
|
|
"0.94": "rss094",
|
|
}
|
|
|
|
# If we're here then this is an RSS feed.
|
|
# If we don't have a version or have a version that starts with something
|
|
# other than RSS then there's been a mistake. Correct it.
|
|
if not self.version or not self.version.startswith("rss"):
|
|
attr_version = attrs_d.get("version", "")
|
|
version = versionmap.get(attr_version)
|
|
if version:
|
|
self.version = version
|
|
elif attr_version.startswith("2."):
|
|
self.version = "rss20"
|
|
else:
|
|
self.version = "rss"
|
|
|
|
def _start_channel(self, attrs_d):
|
|
self.infeed = 1
|
|
self._cdf_common(attrs_d)
|
|
|
|
def _cdf_common(self, attrs_d):
|
|
if "lastmod" in attrs_d:
|
|
self._start_modified({})
|
|
self.elementstack[-1][-1] = attrs_d["lastmod"]
|
|
self._end_modified()
|
|
if "href" in attrs_d:
|
|
self._start_link({})
|
|
self.elementstack[-1][-1] = attrs_d["href"]
|
|
self._end_link()
|
|
|
|
def _start_feed(self, attrs_d):
|
|
self.infeed = 1
|
|
versionmap = {"0.1": "atom01", "0.2": "atom02", "0.3": "atom03"}
|
|
if not self.version:
|
|
attr_version = attrs_d.get("version")
|
|
version = versionmap.get(attr_version)
|
|
if version:
|
|
self.version = version
|
|
else:
|
|
self.version = "atom"
|
|
|
|
def _end_channel(self):
|
|
self.infeed = 0
|
|
|
|
_end_feed = _end_channel
|
|
|
|
def _start_image(self, attrs_d):
|
|
context = self._get_context()
|
|
if not self.inentry:
|
|
context.setdefault("image", FeedParserDict())
|
|
self.inimage = 1
|
|
self.title_depth = -1
|
|
self.push("image", 0)
|
|
|
|
def _end_image(self):
|
|
self.pop("image")
|
|
self.inimage = 0
|
|
|
|
def _start_textinput(self, attrs_d):
|
|
context = self._get_context()
|
|
context.setdefault("textinput", FeedParserDict())
|
|
self.intextinput = 1
|
|
self.title_depth = -1
|
|
self.push("textinput", 0)
|
|
|
|
_start_textInput = _start_textinput
|
|
|
|
def _end_textinput(self):
|
|
self.pop("textinput")
|
|
self.intextinput = 0
|
|
|
|
_end_textInput = _end_textinput
|
|
|
|
def _start_author(self, attrs_d):
|
|
self.inauthor = 1
|
|
self.push("author", 1)
|
|
# Append a new FeedParserDict when expecting an author
|
|
context = self._get_context()
|
|
context.setdefault("authors", [])
|
|
context["authors"].append(FeedParserDict())
|
|
|
|
_start_managingeditor = _start_author
|
|
|
|
def _end_author(self):
|
|
self.pop("author")
|
|
self.inauthor = 0
|
|
self._sync_author_detail()
|
|
|
|
_end_managingeditor = _end_author
|
|
|
|
def _start_contributor(self, attrs_d):
|
|
self.incontributor = 1
|
|
context = self._get_context()
|
|
context.setdefault("contributors", [])
|
|
context["contributors"].append(FeedParserDict())
|
|
self.push("contributor", 0)
|
|
|
|
def _end_contributor(self):
|
|
self.pop("contributor")
|
|
self.incontributor = 0
|
|
|
|
def _start_name(self, attrs_d):
|
|
self.push("name", 0)
|
|
|
|
def _end_name(self):
|
|
value = self.pop("name")
|
|
if self.inpublisher:
|
|
self._save_author("name", value, "publisher")
|
|
elif self.inauthor:
|
|
self._save_author("name", value)
|
|
elif self.incontributor:
|
|
self._save_contributor("name", value)
|
|
elif self.intextinput:
|
|
context = self._get_context()
|
|
context["name"] = value
|
|
|
|
def _start_width(self, attrs_d):
|
|
self.push("width", 0)
|
|
|
|
def _end_width(self):
|
|
value = self.pop("width")
|
|
try:
|
|
value = int(value)
|
|
except ValueError:
|
|
value = 0
|
|
if self.inimage:
|
|
context = self._get_context()
|
|
context["width"] = value
|
|
|
|
def _start_height(self, attrs_d):
|
|
self.push("height", 0)
|
|
|
|
def _end_height(self):
|
|
value = self.pop("height")
|
|
try:
|
|
value = int(value)
|
|
except ValueError:
|
|
value = 0
|
|
if self.inimage:
|
|
context = self._get_context()
|
|
context["height"] = value
|
|
|
|
def _start_url(self, attrs_d):
|
|
self.push("href", 1)
|
|
|
|
_start_homepage = _start_url
|
|
_start_uri = _start_url
|
|
|
|
def _end_url(self):
|
|
value = self.pop("href")
|
|
if self.inauthor:
|
|
self._save_author("href", value)
|
|
elif self.incontributor:
|
|
self._save_contributor("href", value)
|
|
|
|
_end_homepage = _end_url
|
|
_end_uri = _end_url
|
|
|
|
def _start_email(self, attrs_d):
|
|
self.push("email", 0)
|
|
|
|
def _end_email(self):
|
|
value = self.pop("email")
|
|
if self.inpublisher:
|
|
self._save_author("email", value, "publisher")
|
|
elif self.inauthor:
|
|
self._save_author("email", value)
|
|
elif self.incontributor:
|
|
self._save_contributor("email", value)
|
|
|
|
def _start_subtitle(self, attrs_d):
|
|
self.push_content("subtitle", attrs_d, "text/plain", 1)
|
|
|
|
_start_tagline = _start_subtitle
|
|
|
|
def _end_subtitle(self):
|
|
self.pop_content("subtitle")
|
|
|
|
_end_tagline = _end_subtitle
|
|
|
|
def _start_rights(self, attrs_d):
|
|
self.push_content("rights", attrs_d, "text/plain", 1)
|
|
|
|
_start_copyright = _start_rights
|
|
|
|
def _end_rights(self):
|
|
self.pop_content("rights")
|
|
|
|
_end_copyright = _end_rights
|
|
|
|
def _start_item(self, attrs_d):
|
|
self.entries.append(FeedParserDict())
|
|
self.push("item", 0)
|
|
self.inentry = 1
|
|
self.guidislink = 0
|
|
self.title_depth = -1
|
|
id = self._get_attribute(attrs_d, "rdf:about")
|
|
if id:
|
|
context = self._get_context()
|
|
context["id"] = id
|
|
self._cdf_common(attrs_d)
|
|
|
|
_start_entry = _start_item
|
|
|
|
def _end_item(self):
|
|
self.pop("item")
|
|
self.inentry = 0
|
|
self.hasContent = 0
|
|
|
|
_end_entry = _end_item
|
|
|
|
def _start_language(self, attrs_d):
|
|
self.push("language", 1)
|
|
|
|
def _end_language(self):
|
|
self.lang = self.pop("language")
|
|
|
|
def _start_webmaster(self, attrs_d):
|
|
self.push("publisher", 1)
|
|
|
|
def _end_webmaster(self):
|
|
self.pop("publisher")
|
|
self._sync_author_detail("publisher")
|
|
|
|
def _start_published(self, attrs_d):
|
|
self.push("published", 1)
|
|
|
|
_start_issued = _start_published
|
|
_start_pubdate = _start_published
|
|
|
|
def _end_published(self):
|
|
value = self.pop("published")
|
|
self._save("published_parsed", _parse_date(value), overwrite=True)
|
|
|
|
_end_issued = _end_published
|
|
_end_pubdate = _end_published
|
|
|
|
def _start_updated(self, attrs_d):
|
|
self.push("updated", 1)
|
|
|
|
_start_modified = _start_updated
|
|
_start_lastbuilddate = _start_updated
|
|
|
|
def _end_updated(self):
|
|
value = self.pop("updated")
|
|
parsed_value = _parse_date(value)
|
|
self._save("updated_parsed", parsed_value, overwrite=True)
|
|
|
|
_end_modified = _end_updated
|
|
_end_lastbuilddate = _end_updated
|
|
|
|
def _start_created(self, attrs_d):
|
|
self.push("created", 1)
|
|
|
|
def _end_created(self):
|
|
value = self.pop("created")
|
|
self._save("created_parsed", _parse_date(value), overwrite=True)
|
|
|
|
def _start_expirationdate(self, attrs_d):
|
|
self.push("expired", 1)
|
|
|
|
def _end_expirationdate(self):
|
|
self._save("expired_parsed", _parse_date(self.pop("expired")), overwrite=True)
|
|
|
|
def _start_category(self, attrs_d):
|
|
term = attrs_d.get("term")
|
|
scheme = attrs_d.get("scheme", attrs_d.get("domain"))
|
|
label = attrs_d.get("label")
|
|
self._add_tag(term, scheme, label)
|
|
self.push("category", 1)
|
|
|
|
_start_keywords = _start_category
|
|
|
|
def _end_category(self):
|
|
value = self.pop("category")
|
|
if not value:
|
|
return
|
|
context = self._get_context()
|
|
tags = context["tags"]
|
|
if value and len(tags) and not tags[-1]["term"]:
|
|
tags[-1]["term"] = value
|
|
else:
|
|
self._add_tag(value, None, None)
|
|
|
|
_end_keywords = _end_category
|
|
|
|
def _start_cloud(self, attrs_d):
|
|
self._get_context()["cloud"] = FeedParserDict(attrs_d)
|
|
|
|
def _start_link(self, attrs_d):
|
|
attrs_d.setdefault("rel", "alternate")
|
|
if attrs_d["rel"] == "self":
|
|
attrs_d.setdefault("type", "application/atom+xml")
|
|
else:
|
|
attrs_d.setdefault("type", "text/html")
|
|
context = self._get_context()
|
|
attrs_d = self._enforce_href(attrs_d)
|
|
if "href" in attrs_d:
|
|
attrs_d["href"] = self.resolve_uri(attrs_d["href"])
|
|
if (
|
|
attrs_d.get("rel") == "alternate"
|
|
and self.map_content_type(attrs_d.get("type")) in self.html_types
|
|
):
|
|
self.isentrylink = 1
|
|
expecting_text = self.infeed or self.inentry or self.insource
|
|
context.setdefault("links", [])
|
|
if not (self.inentry and self.inimage):
|
|
context["links"].append(FeedParserDict(attrs_d))
|
|
if "href" in attrs_d:
|
|
if self.isentrylink:
|
|
context["link"] = attrs_d["href"]
|
|
else:
|
|
self.push("link", expecting_text)
|
|
|
|
def _end_link(self):
|
|
self.pop("link")
|
|
self.isentrylink = 0
|
|
|
|
def _start_guid(self, attrs_d):
|
|
self.guidislink = attrs_d.get("ispermalink", "true") == "true"
|
|
self.push("id", 1)
|
|
|
|
_start_id = _start_guid
|
|
|
|
def _end_guid(self):
|
|
value = self.pop("id")
|
|
self._save("guidislink", self.guidislink and "link" not in self._get_context())
|
|
if self.guidislink:
|
|
# guid acts as link, but only if 'ispermalink' is not present or is 'true',
|
|
# and only if the item doesn't already have a link element
|
|
self._save("link", value)
|
|
|
|
_end_id = _end_guid
|
|
|
|
def _start_title(self, attrs_d):
|
|
if self.svgOK:
|
|
return self.unknown_starttag("title", list(attrs_d.items()))
|
|
self.push_content(
|
|
"title", attrs_d, "text/plain", self.infeed or self.inentry or self.insource
|
|
)
|
|
|
|
def _end_title(self):
|
|
if self.svgOK:
|
|
return
|
|
value = self.pop_content("title")
|
|
if not value:
|
|
return
|
|
self.title_depth = self.depth
|
|
|
|
def _start_description(self, attrs_d):
|
|
context = self._get_context()
|
|
if "summary" in context and not self.hasContent:
|
|
self._summaryKey = "content"
|
|
self._start_content(attrs_d)
|
|
else:
|
|
self.push_content(
|
|
"description",
|
|
attrs_d,
|
|
"text/html",
|
|
self.infeed or self.inentry or self.insource,
|
|
)
|
|
|
|
def _start_abstract(self, attrs_d):
|
|
self.push_content(
|
|
"description",
|
|
attrs_d,
|
|
"text/plain",
|
|
self.infeed or self.inentry or self.insource,
|
|
)
|
|
|
|
def _end_description(self):
|
|
if self._summaryKey == "content":
|
|
self._end_content()
|
|
else:
|
|
self.pop_content("description")
|
|
self._summaryKey = None
|
|
|
|
_end_abstract = _end_description
|
|
|
|
def _start_info(self, attrs_d):
|
|
self.push_content("info", attrs_d, "text/plain", 1)
|
|
|
|
_start_feedburner_browserfriendly = _start_info
|
|
|
|
def _end_info(self):
|
|
self.pop_content("info")
|
|
|
|
_end_feedburner_browserfriendly = _end_info
|
|
|
|
def _start_generator(self, attrs_d):
|
|
if attrs_d:
|
|
attrs_d = self._enforce_href(attrs_d)
|
|
if "href" in attrs_d:
|
|
attrs_d["href"] = self.resolve_uri(attrs_d["href"])
|
|
self._get_context()["generator_detail"] = FeedParserDict(attrs_d)
|
|
self.push("generator", 1)
|
|
|
|
def _end_generator(self):
|
|
value = self.pop("generator")
|
|
context = self._get_context()
|
|
if "generator_detail" in context:
|
|
context["generator_detail"]["name"] = value
|
|
|
|
def _start_summary(self, attrs_d):
|
|
context = self._get_context()
|
|
if "summary" in context and not self.hasContent:
|
|
self._summaryKey = "content"
|
|
self._start_content(attrs_d)
|
|
else:
|
|
self._summaryKey = "summary"
|
|
self.push_content(self._summaryKey, attrs_d, "text/plain", 1)
|
|
|
|
def _end_summary(self):
|
|
if self._summaryKey == "content":
|
|
self._end_content()
|
|
else:
|
|
self.pop_content(self._summaryKey or "summary")
|
|
self._summaryKey = None
|
|
|
|
def _start_enclosure(self, attrs_d):
|
|
attrs_d = self._enforce_href(attrs_d)
|
|
context = self._get_context()
|
|
attrs_d["rel"] = "enclosure"
|
|
context.setdefault("links", []).append(FeedParserDict(attrs_d))
|
|
|
|
def _start_source(self, attrs_d):
|
|
if "url" in attrs_d:
|
|
# This means that we're processing a source element from an RSS 2.0 feed
|
|
self.sourcedata["href"] = attrs_d["url"]
|
|
self.push("source", 1)
|
|
self.insource = 1
|
|
self.title_depth = -1
|
|
|
|
def _end_source(self):
|
|
self.insource = 0
|
|
value = self.pop("source")
|
|
if value:
|
|
self.sourcedata["title"] = value
|
|
self._get_context()["source"] = copy.deepcopy(self.sourcedata)
|
|
self.sourcedata.clear()
|
|
|
|
def _start_content(self, attrs_d):
|
|
self.hasContent = 1
|
|
self.push_content("content", attrs_d, "text/plain", 1)
|
|
src = attrs_d.get("src")
|
|
if src:
|
|
self.contentparams["src"] = src
|
|
self.push("content", 1)
|
|
|
|
def _start_body(self, attrs_d):
|
|
self.push_content("content", attrs_d, "application/xhtml+xml", 1)
|
|
|
|
_start_xhtml_body = _start_body
|
|
|
|
def _start_content_encoded(self, attrs_d):
|
|
self.hasContent = 1
|
|
self.push_content("content", attrs_d, "text/html", 1)
|
|
|
|
_start_fullitem = _start_content_encoded
|
|
|
|
def _end_content(self):
|
|
copyToSummary = self.map_content_type(self.contentparams.get("type")) in (
|
|
{"text/plain"} | self.html_types
|
|
)
|
|
value = self.pop_content("content")
|
|
if copyToSummary:
|
|
self._save("summary", value)
|
|
|
|
_end_body = _end_content
|
|
_end_xhtml_body = _end_content
|
|
_end_content_encoded = _end_content
|
|
_end_fullitem = _end_content
|
|
|
|
def _start_newlocation(self, attrs_d):
|
|
self.push("newlocation", 1)
|
|
|
|
def _end_newlocation(self):
|
|
url = self.pop("newlocation")
|
|
context = self._get_context()
|
|
# don't set newlocation if the context isn't right
|
|
if context is not self.feeddata:
|
|
return
|
|
context["newlocation"] = make_safe_absolute_uri(self.baseuri, url.strip())
|