SickGear/lib/feedparser/parsers/json.py

# The JSON feed parser
# Copyright 2017 Beat Bolli
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import json

from ..datetimes import _parse_date
from ..sanitizer import sanitize_html
from ..util import FeedParserDict


class JSONParser:
    VERSIONS = {
        "https://jsonfeed.org/version/1": "json1",
        "https://jsonfeed.org/version/1.1": "json11",
    }
    FEED_FIELDS = (
        ("title", "title"),
        ("icon", "image"),
        ("home_page_url", "link"),
        ("description", "description"),
    )
    ITEM_FIELDS = (
        ("title", "title"),
        ("id", "guid"),
        ("url", "link"),
        ("summary", "summary"),
        ("external_url", "source"),
    )

    def __init__(self, baseuri=None, baselang=None, encoding=None):
        self.baseuri = baseuri or ""
        self.lang = baselang or None
        self.encoding = encoding or "utf-8"  # character encoding

        self.version = None
        self.feeddata = FeedParserDict()
        self.namespacesInUse = []
        self.entries = []

    def feed(self, file):
        data = json.load(file)

        v = data.get("version", "")
        try:
            self.version = self.VERSIONS[v]
        except KeyError:
            raise ValueError("Unrecognized JSONFeed version '%s'" % v)

        for src, dst in self.FEED_FIELDS:
            if src in data:
                self.feeddata[dst] = data[src]
        if "author" in data:
            self.parse_author(data["author"], self.feeddata)
        # TODO: hubs; expired has no RSS equivalent

        self.entries = [self.parse_entry(e) for e in data["items"]]

    def parse_entry(self, e):
        entry = FeedParserDict()
        for src, dst in self.ITEM_FIELDS:
            if src in e:
                entry[dst] = e[src]

        if "content_text" in e:
            entry["content"] = c = FeedParserDict()
            c["value"] = e["content_text"]
            c["type"] = "text"
        elif "content_html" in e:
            entry["content"] = c = FeedParserDict()
            c["value"] = sanitize_html(
                e["content_html"], self.encoding, "application/json"
            )
            c["type"] = "html"

        if "date_published" in e:
            entry["published"] = e["date_published"]
            entry["published_parsed"] = _parse_date(e["date_published"])
        if "date_updated" in e:
            entry["updated"] = e["date_modified"]
            entry["updated_parsed"] = _parse_date(e["date_modified"])

        if "tags" in e:
            entry["category"] = e["tags"]

        if "author" in e:
            self.parse_author(e["author"], entry)

        if "attachments" in e:
            entry["enclosures"] = [self.parse_attachment(a) for a in e["attachments"]]

        return entry

    @staticmethod
    def parse_author(parent, dest):
        dest["author_detail"] = detail = FeedParserDict()
        if "name" in parent:
            dest["author"] = detail["name"] = parent["name"]
        if "url" in parent:
            if parent["url"].startswith("mailto:"):
                detail["email"] = parent["url"][7:]
            else:
                detail["href"] = parent["url"]

    @staticmethod
    def parse_attachment(attachment):
        enc = FeedParserDict()
        enc["href"] = attachment["url"]
        enc["type"] = attachment["mime_type"]
        if "size_in_bytes" in attachment:
            enc["length"] = attachment["size_in_bytes"]
        return enc
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00			`# The JSON feed parser`
			`# Copyright 2017 Beat Bolli`
			`# All rights reserved.`
			`#`
			`# This file is a part of feedparser.`
			`#`
			`# Redistribution and use in source and binary forms, with or without modification,`
			`# are permitted provided that the following conditions are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions and the following disclaimer in the documentation`
			`# and/or other materials provided with the distribution.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`

			`import json`

			`from ..datetimes import _parse_date`
			`from ..sanitizer import sanitize_html`
			`from ..util import FeedParserDict`


			`class JSONParser:`
			`VERSIONS = {`
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`"https://jsonfeed.org/version/1": "json1",`
			`"https://jsonfeed.org/version/1.1": "json11",`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00			`}`
			`FEED_FIELDS = (`
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`("title", "title"),`
			`("icon", "image"),`
			`("home_page_url", "link"),`
			`("description", "description"),`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00			`)`
			`ITEM_FIELDS = (`
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`("title", "title"),`
			`("id", "guid"),`
			`("url", "link"),`
			`("summary", "summary"),`
			`("external_url", "source"),`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00			`)`

			`def __init__(self, baseuri=None, baselang=None, encoding=None):`
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`self.baseuri = baseuri or ""`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00			`self.lang = baselang or None`
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`self.encoding = encoding or "utf-8" # character encoding`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00
			`self.version = None`
			`self.feeddata = FeedParserDict()`
			`self.namespacesInUse = []`
			`self.entries = []`

Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`def feed(self, file):`
			`data = json.load(file)`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`v = data.get("version", "")`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00			`try:`
			`self.version = self.VERSIONS[v]`
			`except KeyError:`
			`raise ValueError("Unrecognized JSONFeed version '%s'" % v)`

			`for src, dst in self.FEED_FIELDS:`
			`if src in data:`
			`self.feeddata[dst] = data[src]`
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`if "author" in data:`
			`self.parse_author(data["author"], self.feeddata)`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00			`# TODO: hubs; expired has no RSS equivalent`

Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`self.entries = [self.parse_entry(e) for e in data["items"]]`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00
			`def parse_entry(self, e):`
			`entry = FeedParserDict()`
			`for src, dst in self.ITEM_FIELDS:`
			`if src in e:`
			`entry[dst] = e[src]`

Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`if "content_text" in e:`
			`entry["content"] = c = FeedParserDict()`
			`c["value"] = e["content_text"]`
			`c["type"] = "text"`
			`elif "content_html" in e:`
			`entry["content"] = c = FeedParserDict()`
			`c["value"] = sanitize_html(`
			`e["content_html"], self.encoding, "application/json"`
			`)`
			`c["type"] = "html"`

			`if "date_published" in e:`
			`entry["published"] = e["date_published"]`
			`entry["published_parsed"] = _parse_date(e["date_published"])`
			`if "date_updated" in e:`
			`entry["updated"] = e["date_modified"]`
			`entry["updated_parsed"] = _parse_date(e["date_modified"])`

			`if "tags" in e:`
			`entry["category"] = e["tags"]`

			`if "author" in e:`
			`self.parse_author(e["author"], entry)`

			`if "attachments" in e:`
			`entry["enclosures"] = [self.parse_attachment(a) for a in e["attachments"]]`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00
			`return entry`

			`@staticmethod`
			`def parse_author(parent, dest):`
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`dest["author_detail"] = detail = FeedParserDict()`
			`if "name" in parent:`
			`dest["author"] = detail["name"] = parent["name"]`
			`if "url" in parent:`
			`if parent["url"].startswith("mailto:"):`
			`detail["email"] = parent["url"][7:]`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00			`else:`
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`detail["href"] = parent["url"]`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00
			`@staticmethod`
			`def parse_attachment(attachment):`
			`enc = FeedParserDict()`
Update feedparser 6.0.10 (6d032b8) → 6.0.10 (859ac57). 2023-04-13 07:04:58 +00:00			`enc["href"] = attachment["url"]`
			`enc["type"] = attachment["mime_type"]`
			`if "size_in_bytes" in attachment:`
			`enc["length"] = attachment["size_in_bytes"]`
Update feedparser 6.0.1 (98d189fa) → 6.0.10 (5fcb3ae). 2023-01-13 20:16:45 +00:00			`return enc`