mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-22 02:33:37 +00:00
Merge branch 'feature/UpdateFeedparser' into dev
This commit is contained in:
commit
918340e474
5 changed files with 14 additions and 8 deletions
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
* Update Beautiful Soup 4.11.1 (r642) to 4.12.2
|
* Update Beautiful Soup 4.11.1 (r642) to 4.12.2
|
||||||
* Update certifi 2023.05.07 to 2023.07.22
|
* Update certifi 2023.05.07 to 2023.07.22
|
||||||
|
* Update feedparser 6.0.10 (859ac57) to 6.0.10 (9865dec)
|
||||||
* Update soupsieve 2.3.2.post1 (792d566) to 2.4.1 (2e66beb)
|
* Update soupsieve 2.3.2.post1 (792d566) to 2.4.1 (2e66beb)
|
||||||
* Update Tornado Web Server 6.3.2 (e3aa6c5) to 6.3.3 (e4d6984)
|
* Update Tornado Web Server 6.3.2 (e3aa6c5) to 6.3.3 (e4d6984)
|
||||||
* Fix regex that was not using py312 notation
|
* Fix regex that was not using py312 notation
|
||||||
|
|
|
@ -335,7 +335,7 @@ def convert_to_utf8(
|
||||||
|
|
||||||
|
|
||||||
# How much to read from a binary file in order to detect encoding.
|
# How much to read from a binary file in order to detect encoding.
|
||||||
# In inital tests, 4k was enough for ~160 mostly-English feeds;
|
# In initial tests, 4k was enough for ~160 mostly-English feeds;
|
||||||
# 64k seems like a safe margin.
|
# 64k seems like a safe margin.
|
||||||
CONVERT_FILE_PREFIX_LEN = 2**16
|
CONVERT_FILE_PREFIX_LEN = 2**16
|
||||||
|
|
||||||
|
|
|
@ -152,7 +152,7 @@ class BaseHTMLProcessor(sgmllib.SGMLParser):
|
||||||
:rtype: None
|
:rtype: None
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data = re.sub(r"<!((?!DOCTYPE|--|\[))", r"<!\1", data, re.IGNORECASE)
|
data = re.sub(r"<!((?!DOCTYPE|--|\[))", r"<!\1", data, flags=re.IGNORECASE)
|
||||||
data = re.sub(r"<([^<>\s]+?)\s*/>", self._shorttag_replace, data)
|
data = re.sub(r"<([^<>\s]+?)\s*/>", self._shorttag_replace, data)
|
||||||
data = data.replace("'", "'")
|
data = data.replace("'", "'")
|
||||||
data = data.replace(""", '"')
|
data = data.replace(""", '"')
|
||||||
|
|
|
@ -192,6 +192,7 @@ class XMLParserMixin(
|
||||||
self.incontributor = 0
|
self.incontributor = 0
|
||||||
self.inpublisher = 0
|
self.inpublisher = 0
|
||||||
self.insource = 0
|
self.insource = 0
|
||||||
|
self.isentrylink = 0
|
||||||
|
|
||||||
self.sourcedata = FeedParserDict()
|
self.sourcedata = FeedParserDict()
|
||||||
self.contentparams = FeedParserDict()
|
self.contentparams = FeedParserDict()
|
||||||
|
@ -233,7 +234,7 @@ class XMLParserMixin(
|
||||||
if isinstance(baseuri, bytes):
|
if isinstance(baseuri, bytes):
|
||||||
baseuri = baseuri.decode(self.encoding, "ignore")
|
baseuri = baseuri.decode(self.encoding, "ignore")
|
||||||
# ensure that self.baseuri is always an absolute URI that
|
# ensure that self.baseuri is always an absolute URI that
|
||||||
# uses a whitelisted URI scheme (e.g. not `javscript:`)
|
# uses a whitelisted URI scheme (e.g. not `javascript:`)
|
||||||
if self.baseuri:
|
if self.baseuri:
|
||||||
self.baseuri = make_safe_absolute_uri(self.baseuri, baseuri) or self.baseuri
|
self.baseuri = make_safe_absolute_uri(self.baseuri, baseuri) or self.baseuri
|
||||||
else:
|
else:
|
||||||
|
@ -624,6 +625,7 @@ class XMLParserMixin(
|
||||||
# unhandled character references. fix this special case.
|
# unhandled character references. fix this special case.
|
||||||
output = output.replace("&", "&")
|
output = output.replace("&", "&")
|
||||||
output = re.sub("&([A-Za-z0-9_]+);", r"&\g<1>", output)
|
output = re.sub("&([A-Za-z0-9_]+);", r"&\g<1>", output)
|
||||||
|
if self.isentrylink or not self.entries[-1].get(element):
|
||||||
self.entries[-1][element] = output
|
self.entries[-1][element] = output
|
||||||
if output:
|
if output:
|
||||||
self.entries[-1]["links"][-1]["href"] = output
|
self.entries[-1]["links"][-1]["href"] = output
|
||||||
|
|
|
@ -361,21 +361,24 @@ class Namespace:
|
||||||
attrs_d = self._enforce_href(attrs_d)
|
attrs_d = self._enforce_href(attrs_d)
|
||||||
if "href" in attrs_d:
|
if "href" in attrs_d:
|
||||||
attrs_d["href"] = self.resolve_uri(attrs_d["href"])
|
attrs_d["href"] = self.resolve_uri(attrs_d["href"])
|
||||||
|
if (
|
||||||
|
attrs_d.get("rel") == "alternate"
|
||||||
|
and self.map_content_type(attrs_d.get("type")) in self.html_types
|
||||||
|
):
|
||||||
|
self.isentrylink = 1
|
||||||
expecting_text = self.infeed or self.inentry or self.insource
|
expecting_text = self.infeed or self.inentry or self.insource
|
||||||
context.setdefault("links", [])
|
context.setdefault("links", [])
|
||||||
if not (self.inentry and self.inimage):
|
if not (self.inentry and self.inimage):
|
||||||
context["links"].append(FeedParserDict(attrs_d))
|
context["links"].append(FeedParserDict(attrs_d))
|
||||||
if "href" in attrs_d:
|
if "href" in attrs_d:
|
||||||
if (
|
if self.isentrylink:
|
||||||
attrs_d.get("rel") == "alternate"
|
|
||||||
and self.map_content_type(attrs_d.get("type")) in self.html_types
|
|
||||||
):
|
|
||||||
context["link"] = attrs_d["href"]
|
context["link"] = attrs_d["href"]
|
||||||
else:
|
else:
|
||||||
self.push("link", expecting_text)
|
self.push("link", expecting_text)
|
||||||
|
|
||||||
def _end_link(self):
|
def _end_link(self):
|
||||||
self.pop("link")
|
self.pop("link")
|
||||||
|
self.isentrylink = 0
|
||||||
|
|
||||||
def _start_guid(self, attrs_d):
|
def _start_guid(self, attrs_d):
|
||||||
self.guidislink = attrs_d.get("ispermalink", "true") == "true"
|
self.guidislink = attrs_d.get("ispermalink", "true") == "true"
|
||||||
|
|
Loading…
Reference in a new issue