mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-01 00:43:37 +00:00
499 lines
16 KiB
Python
499 lines
16 KiB
Python
# Support for the Atom, RSS, RDF, and CDF feed formats
|
|
# Copyright 2010-2015 Kurt McKee <contactme@kurtmckee.org>
|
|
# Copyright 2002-2008 Mark Pilgrim
|
|
# All rights reserved.
|
|
#
|
|
# This file is a part of feedparser.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without modification,
|
|
# are permitted provided that the following conditions are met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
from __future__ import absolute_import, unicode_literals
|
|
|
|
import copy
|
|
|
|
from ..datetimes import registerDateHandler, _parse_date
|
|
from ..urls import _makeSafeAbsoluteURI
|
|
from ..util import FeedParserDict
|
|
|
|
class Namespace(object):
|
|
"""Support for the Atom, RSS, RDF, and CDF feed formats.
|
|
|
|
The feed formats all share common elements, some of which have conflicting
|
|
interpretations. For simplicity, all of the base feed format support is
|
|
collected here.
|
|
"""
|
|
|
|
supported_namespaces = {
|
|
'': '',
|
|
'http://backend.userland.com/rss': '',
|
|
'http://blogs.law.harvard.edu/tech/rss': '',
|
|
'http://purl.org/rss/1.0/': '',
|
|
'http://my.netscape.com/rdf/simple/0.9/': '',
|
|
'http://example.com/newformat#': '',
|
|
'http://example.com/necho': '',
|
|
'http://purl.org/echo/': '',
|
|
'uri/of/echo/namespace#': '',
|
|
'http://purl.org/pie/': '',
|
|
'http://purl.org/atom/ns#': '',
|
|
'http://www.w3.org/2005/Atom': '',
|
|
'http://purl.org/rss/1.0/modules/rss091#': '',
|
|
}
|
|
|
|
def _start_rss(self, attrsD):
|
|
versionmap = {'0.91': 'rss091u',
|
|
'0.92': 'rss092',
|
|
'0.93': 'rss093',
|
|
'0.94': 'rss094'}
|
|
#If we're here then this is an RSS feed.
|
|
#If we don't have a version or have a version that starts with something
|
|
#other than RSS then there's been a mistake. Correct it.
|
|
if not self.version or not self.version.startswith('rss'):
|
|
attr_version = attrsD.get('version', '')
|
|
version = versionmap.get(attr_version)
|
|
if version:
|
|
self.version = version
|
|
elif attr_version.startswith('2.'):
|
|
self.version = 'rss20'
|
|
else:
|
|
self.version = 'rss'
|
|
|
|
def _start_channel(self, attrsD):
|
|
self.infeed = 1
|
|
self._cdf_common(attrsD)
|
|
|
|
def _cdf_common(self, attrsD):
|
|
if 'lastmod' in attrsD:
|
|
self._start_modified({})
|
|
self.elementstack[-1][-1] = attrsD['lastmod']
|
|
self._end_modified()
|
|
if 'href' in attrsD:
|
|
self._start_link({})
|
|
self.elementstack[-1][-1] = attrsD['href']
|
|
self._end_link()
|
|
|
|
def _start_feed(self, attrsD):
|
|
self.infeed = 1
|
|
versionmap = {'0.1': 'atom01',
|
|
'0.2': 'atom02',
|
|
'0.3': 'atom03'}
|
|
if not self.version:
|
|
attr_version = attrsD.get('version')
|
|
version = versionmap.get(attr_version)
|
|
if version:
|
|
self.version = version
|
|
else:
|
|
self.version = 'atom'
|
|
|
|
def _end_channel(self):
|
|
self.infeed = 0
|
|
_end_feed = _end_channel
|
|
|
|
def _start_image(self, attrsD):
|
|
context = self._getContext()
|
|
if not self.inentry:
|
|
context.setdefault('image', FeedParserDict())
|
|
self.inimage = 1
|
|
self.title_depth = -1
|
|
self.push('image', 0)
|
|
|
|
def _end_image(self):
|
|
self.pop('image')
|
|
self.inimage = 0
|
|
|
|
def _start_textinput(self, attrsD):
|
|
context = self._getContext()
|
|
context.setdefault('textinput', FeedParserDict())
|
|
self.intextinput = 1
|
|
self.title_depth = -1
|
|
self.push('textinput', 0)
|
|
_start_textInput = _start_textinput
|
|
|
|
def _end_textinput(self):
|
|
self.pop('textinput')
|
|
self.intextinput = 0
|
|
_end_textInput = _end_textinput
|
|
|
|
def _start_author(self, attrsD):
|
|
self.inauthor = 1
|
|
self.push('author', 1)
|
|
# Append a new FeedParserDict when expecting an author
|
|
context = self._getContext()
|
|
context.setdefault('authors', [])
|
|
context['authors'].append(FeedParserDict())
|
|
_start_managingeditor = _start_author
|
|
|
|
def _end_author(self):
|
|
self.pop('author')
|
|
self.inauthor = 0
|
|
self._sync_author_detail()
|
|
_end_managingeditor = _end_author
|
|
|
|
def _start_contributor(self, attrsD):
|
|
self.incontributor = 1
|
|
context = self._getContext()
|
|
context.setdefault('contributors', [])
|
|
context['contributors'].append(FeedParserDict())
|
|
self.push('contributor', 0)
|
|
|
|
def _end_contributor(self):
|
|
self.pop('contributor')
|
|
self.incontributor = 0
|
|
|
|
def _start_name(self, attrsD):
|
|
self.push('name', 0)
|
|
|
|
def _end_name(self):
|
|
value = self.pop('name')
|
|
if self.inpublisher:
|
|
self._save_author('name', value, 'publisher')
|
|
elif self.inauthor:
|
|
self._save_author('name', value)
|
|
elif self.incontributor:
|
|
self._save_contributor('name', value)
|
|
elif self.intextinput:
|
|
context = self._getContext()
|
|
context['name'] = value
|
|
|
|
def _start_width(self, attrsD):
|
|
self.push('width', 0)
|
|
|
|
def _end_width(self):
|
|
value = self.pop('width')
|
|
try:
|
|
value = int(value)
|
|
except ValueError:
|
|
value = 0
|
|
if self.inimage:
|
|
context = self._getContext()
|
|
context['width'] = value
|
|
|
|
def _start_height(self, attrsD):
|
|
self.push('height', 0)
|
|
|
|
def _end_height(self):
|
|
value = self.pop('height')
|
|
try:
|
|
value = int(value)
|
|
except ValueError:
|
|
value = 0
|
|
if self.inimage:
|
|
context = self._getContext()
|
|
context['height'] = value
|
|
|
|
def _start_url(self, attrsD):
|
|
self.push('href', 1)
|
|
_start_homepage = _start_url
|
|
_start_uri = _start_url
|
|
|
|
def _end_url(self):
|
|
value = self.pop('href')
|
|
if self.inauthor:
|
|
self._save_author('href', value)
|
|
elif self.incontributor:
|
|
self._save_contributor('href', value)
|
|
_end_homepage = _end_url
|
|
_end_uri = _end_url
|
|
|
|
def _start_email(self, attrsD):
|
|
self.push('email', 0)
|
|
|
|
def _end_email(self):
|
|
value = self.pop('email')
|
|
if self.inpublisher:
|
|
self._save_author('email', value, 'publisher')
|
|
elif self.inauthor:
|
|
self._save_author('email', value)
|
|
elif self.incontributor:
|
|
self._save_contributor('email', value)
|
|
|
|
def _start_subtitle(self, attrsD):
|
|
self.pushContent('subtitle', attrsD, 'text/plain', 1)
|
|
_start_tagline = _start_subtitle
|
|
|
|
def _end_subtitle(self):
|
|
self.popContent('subtitle')
|
|
_end_tagline = _end_subtitle
|
|
|
|
def _start_rights(self, attrsD):
|
|
self.pushContent('rights', attrsD, 'text/plain', 1)
|
|
_start_copyright = _start_rights
|
|
|
|
def _end_rights(self):
|
|
self.popContent('rights')
|
|
_end_copyright = _end_rights
|
|
|
|
def _start_item(self, attrsD):
|
|
self.entries.append(FeedParserDict())
|
|
self.push('item', 0)
|
|
self.inentry = 1
|
|
self.guidislink = 0
|
|
self.title_depth = -1
|
|
id = self._getAttribute(attrsD, 'rdf:about')
|
|
if id:
|
|
context = self._getContext()
|
|
context['id'] = id
|
|
self._cdf_common(attrsD)
|
|
_start_entry = _start_item
|
|
|
|
def _end_item(self):
|
|
self.pop('item')
|
|
self.inentry = 0
|
|
_end_entry = _end_item
|
|
|
|
def _start_language(self, attrsD):
|
|
self.push('language', 1)
|
|
|
|
def _end_language(self):
|
|
self.lang = self.pop('language')
|
|
|
|
def _start_webmaster(self, attrsD):
|
|
self.push('publisher', 1)
|
|
|
|
def _end_webmaster(self):
|
|
self.pop('publisher')
|
|
self._sync_author_detail('publisher')
|
|
|
|
def _start_published(self, attrsD):
|
|
self.push('published', 1)
|
|
_start_issued = _start_published
|
|
_start_pubdate = _start_published
|
|
|
|
def _end_published(self):
|
|
value = self.pop('published')
|
|
self._save('published_parsed', _parse_date(value), overwrite=True)
|
|
_end_issued = _end_published
|
|
_end_pubdate = _end_published
|
|
|
|
def _start_updated(self, attrsD):
|
|
self.push('updated', 1)
|
|
_start_modified = _start_updated
|
|
_start_lastbuilddate = _start_updated
|
|
|
|
def _end_updated(self):
|
|
value = self.pop('updated')
|
|
parsed_value = _parse_date(value)
|
|
self._save('updated_parsed', parsed_value, overwrite=True)
|
|
_end_modified = _end_updated
|
|
_end_lastbuilddate = _end_updated
|
|
|
|
def _start_created(self, attrsD):
|
|
self.push('created', 1)
|
|
|
|
def _end_created(self):
|
|
value = self.pop('created')
|
|
self._save('created_parsed', _parse_date(value), overwrite=True)
|
|
|
|
def _start_expirationdate(self, attrsD):
|
|
self.push('expired', 1)
|
|
|
|
def _end_expirationdate(self):
|
|
self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True)
|
|
|
|
def _start_category(self, attrsD):
|
|
term = attrsD.get('term')
|
|
scheme = attrsD.get('scheme', attrsD.get('domain'))
|
|
label = attrsD.get('label')
|
|
self._addTag(term, scheme, label)
|
|
self.push('category', 1)
|
|
_start_keywords = _start_category
|
|
|
|
def _end_category(self):
|
|
value = self.pop('category')
|
|
if not value:
|
|
return
|
|
context = self._getContext()
|
|
tags = context['tags']
|
|
if value and len(tags) and not tags[-1]['term']:
|
|
tags[-1]['term'] = value
|
|
else:
|
|
self._addTag(value, None, None)
|
|
_end_keywords = _end_category
|
|
|
|
def _start_cloud(self, attrsD):
|
|
self._getContext()['cloud'] = FeedParserDict(attrsD)
|
|
|
|
def _start_link(self, attrsD):
|
|
attrsD.setdefault('rel', 'alternate')
|
|
if attrsD['rel'] == 'self':
|
|
attrsD.setdefault('type', 'application/atom+xml')
|
|
else:
|
|
attrsD.setdefault('type', 'text/html')
|
|
context = self._getContext()
|
|
attrsD = self._itsAnHrefDamnIt(attrsD)
|
|
if 'href' in attrsD:
|
|
attrsD['href'] = self.resolveURI(attrsD['href'])
|
|
expectingText = self.infeed or self.inentry or self.insource
|
|
context.setdefault('links', [])
|
|
if not (self.inentry and self.inimage):
|
|
context['links'].append(FeedParserDict(attrsD))
|
|
if 'href' in attrsD:
|
|
expectingText = 0
|
|
if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
|
|
context['link'] = attrsD['href']
|
|
else:
|
|
self.push('link', expectingText)
|
|
|
|
def _end_link(self):
|
|
value = self.pop('link')
|
|
|
|
def _start_guid(self, attrsD):
|
|
self.guidislink = (attrsD.get('ispermalink', 'true') == 'true')
|
|
self.push('id', 1)
|
|
_start_id = _start_guid
|
|
|
|
def _end_guid(self):
|
|
value = self.pop('id')
|
|
self._save('guidislink', self.guidislink and 'link' not in self._getContext())
|
|
if self.guidislink:
|
|
# guid acts as link, but only if 'ispermalink' is not present or is 'true',
|
|
# and only if the item doesn't already have a link element
|
|
self._save('link', value)
|
|
_end_id = _end_guid
|
|
|
|
def _start_title(self, attrsD):
|
|
if self.svgOK:
|
|
return self.unknown_starttag('title', list(attrsD.items()))
|
|
self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
|
|
|
|
def _end_title(self):
|
|
if self.svgOK:
|
|
return
|
|
value = self.popContent('title')
|
|
if not value:
|
|
return
|
|
self.title_depth = self.depth
|
|
|
|
def _start_description(self, attrsD):
|
|
context = self._getContext()
|
|
if 'summary' in context:
|
|
self._summaryKey = 'content'
|
|
self._start_content(attrsD)
|
|
else:
|
|
self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource)
|
|
|
|
def _start_abstract(self, attrsD):
|
|
self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
|
|
|
|
def _end_description(self):
|
|
if self._summaryKey == 'content':
|
|
self._end_content()
|
|
else:
|
|
value = self.popContent('description')
|
|
self._summaryKey = None
|
|
_end_abstract = _end_description
|
|
|
|
def _start_info(self, attrsD):
|
|
self.pushContent('info', attrsD, 'text/plain', 1)
|
|
_start_feedburner_browserfriendly = _start_info
|
|
|
|
def _end_info(self):
|
|
self.popContent('info')
|
|
_end_feedburner_browserfriendly = _end_info
|
|
|
|
def _start_generator(self, attrsD):
|
|
if attrsD:
|
|
attrsD = self._itsAnHrefDamnIt(attrsD)
|
|
if 'href' in attrsD:
|
|
attrsD['href'] = self.resolveURI(attrsD['href'])
|
|
self._getContext()['generator_detail'] = FeedParserDict(attrsD)
|
|
self.push('generator', 1)
|
|
|
|
def _end_generator(self):
|
|
value = self.pop('generator')
|
|
context = self._getContext()
|
|
if 'generator_detail' in context:
|
|
context['generator_detail']['name'] = value
|
|
|
|
def _start_summary(self, attrsD):
|
|
context = self._getContext()
|
|
if 'summary' in context:
|
|
self._summaryKey = 'content'
|
|
self._start_content(attrsD)
|
|
else:
|
|
self._summaryKey = 'summary'
|
|
self.pushContent(self._summaryKey, attrsD, 'text/plain', 1)
|
|
|
|
def _end_summary(self):
|
|
if self._summaryKey == 'content':
|
|
self._end_content()
|
|
else:
|
|
self.popContent(self._summaryKey or 'summary')
|
|
self._summaryKey = None
|
|
|
|
def _start_enclosure(self, attrsD):
|
|
attrsD = self._itsAnHrefDamnIt(attrsD)
|
|
context = self._getContext()
|
|
attrsD['rel'] = 'enclosure'
|
|
context.setdefault('links', []).append(FeedParserDict(attrsD))
|
|
|
|
def _start_source(self, attrsD):
|
|
if 'url' in attrsD:
|
|
# This means that we're processing a source element from an RSS 2.0 feed
|
|
self.sourcedata['href'] = attrsD['url']
|
|
self.push('source', 1)
|
|
self.insource = 1
|
|
self.title_depth = -1
|
|
|
|
def _end_source(self):
|
|
self.insource = 0
|
|
value = self.pop('source')
|
|
if value:
|
|
self.sourcedata['title'] = value
|
|
self._getContext()['source'] = copy.deepcopy(self.sourcedata)
|
|
self.sourcedata.clear()
|
|
|
|
def _start_content(self, attrsD):
|
|
self.pushContent('content', attrsD, 'text/plain', 1)
|
|
src = attrsD.get('src')
|
|
if src:
|
|
self.contentparams['src'] = src
|
|
self.push('content', 1)
|
|
|
|
def _start_body(self, attrsD):
|
|
self.pushContent('content', attrsD, 'application/xhtml+xml', 1)
|
|
_start_xhtml_body = _start_body
|
|
|
|
def _start_content_encoded(self, attrsD):
|
|
self.pushContent('content', attrsD, 'text/html', 1)
|
|
_start_fullitem = _start_content_encoded
|
|
|
|
def _end_content(self):
|
|
copyToSummary = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types)
|
|
value = self.popContent('content')
|
|
if copyToSummary:
|
|
self._save('summary', value)
|
|
|
|
_end_body = _end_content
|
|
_end_xhtml_body = _end_content
|
|
_end_content_encoded = _end_content
|
|
_end_fullitem = _end_content
|
|
|
|
def _start_newlocation(self, attrsD):
|
|
self.push('newlocation', 1)
|
|
|
|
def _end_newlocation(self):
|
|
url = self.pop('newlocation')
|
|
context = self._getContext()
|
|
# don't set newlocation if the context isn't right
|
|
if context is not self.feeddata:
|
|
return
|
|
context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip())
|