Update IMDb 5.1dev20160106 to 5.1 (r907).

This commit is contained in:
JackDandy 2017-01-27 14:25:20 +00:00
parent 25566998ca
commit 33ea1af76c
14 changed files with 1378 additions and 53 deletions

View file

@ -14,6 +14,7 @@
* Update Certifi 2015.11.20.1 (385476b) to 2017.01.23 (9f9dc30)
* Update feedparser library 5.2.0 (8c62940) to 5.2.1 (f1dd1bb)
* Update html5lib 0.99999999/1.0b9 (46dae3d) to (1a28d72)
* Update IMDb 5.1dev20160106 to 5.1 (r907)
[develop changelog]

View file

@ -6,7 +6,7 @@ a person from the IMDb database.
It can fetch data through different media (e.g.: the IMDb web pages,
a SQL database, etc.)
Copyright 2004-2015 Davide Alberani <da@erlug.linux.it>
Copyright 2004-2016 Davide Alberani <da@erlug.linux.it>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
'available_access_systems']
__version__ = VERSION = '5.1dev20160106'
__version__ = VERSION = '5.1'
# Import compatibility module (importing it is enough).
import _compat

View file

@ -1,13 +1,13 @@
# Gettext message file for imdbpy
# Translators:
# RainDropR <rajaa@hilltx.com>, 2013
# Rajaa Jalil <rajaa@hilltx.com>, 2013
msgid ""
msgstr ""
"Project-Id-Version: IMDbPY\n"
"POT-Creation-Date: 2010-03-18 14:35+0000\n"
"PO-Revision-Date: 2013-11-20 11:07+0000\n"
"Last-Translator: RainDropR <rajaa@hilltx.com>\n"
"Language-Team: Arabic (http://www.transifex.com/projects/p/imdbpy/language/ar/)\n"
"PO-Revision-Date: 2016-03-28 20:40+0000\n"
"Last-Translator: Rajaa Jalil <rajaa@hilltx.com>\n"
"Language-Team: Arabic (http://www.transifex.com/davide_alberani/imdbpy/language/ar/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"

View file

@ -1,13 +1,13 @@
# Gettext message file for imdbpy
# Translators:
# Niko Kovach <crashdeburn@gmail.com>, 2014
# Atanas Kovachki <crashdeburn@gmail.com>, 2014
msgid ""
msgstr ""
"Project-Id-Version: IMDbPY\n"
"POT-Creation-Date: 2010-03-18 14:35+0000\n"
"PO-Revision-Date: 2014-03-16 10:46+0000\n"
"Last-Translator: Niko Kovach <crashdeburn@gmail.com>\n"
"Language-Team: Bulgarian (http://www.transifex.com/projects/p/imdbpy/language/bg/)\n"
"PO-Revision-Date: 2016-03-28 20:40+0000\n"
"Last-Translator: Atanas Kovachki <crashdeburn@gmail.com>\n"
"Language-Team: Bulgarian (http://www.transifex.com/davide_alberani/imdbpy/language/bg/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"

View file

@ -6,9 +6,9 @@ msgid ""
msgstr ""
"Project-Id-Version: IMDbPY\n"
"POT-Creation-Date: 2010-03-18 14:35+0000\n"
"PO-Revision-Date: 2014-10-21 15:24+0000\n"
"PO-Revision-Date: 2016-03-28 20:40+0000\n"
"Last-Translator: Raphael\n"
"Language-Team: German (http://www.transifex.com/projects/p/imdbpy/language/de/)\n"
"Language-Team: German (http://www.transifex.com/davide_alberani/imdbpy/language/de/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"

View file

@ -1,14 +1,13 @@
# Gettext message file for imdbpy
# Translators:
# strel <strelnic@gmail.com>, 2013
# strel, 2013
msgid ""
msgstr ""
"Project-Id-Version: IMDbPY\n"
"Report-Msgid-Bugs-To: http://sourceforge.net/tracker/?group_id=105998&atid=642794\n"
"POT-Creation-Date: 2010-03-18 14:35+0000\n"
"PO-Revision-Date: 2013-03-11 17:18+0000\n"
"Last-Translator: strel <strelnic@gmail.com>\n"
"Language-Team: Spanish (http://www.transifex.com/projects/p/imdbpy/language/es/)\n"
"PO-Revision-Date: 2016-03-28 20:40+0000\n"
"Last-Translator: strel\n"
"Language-Team: Spanish (http://www.transifex.com/davide_alberani/imdbpy/language/es/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"

View file

@ -1,15 +1,15 @@
# Gettext message file for imdbpy
# Translators:
# lukophron, 2014
# Rajaa Gutknecht <rajaa@hilltx.com>, 2013
# lukophron, 2014-2016
# Rajaa Jalil <rajaa@hilltx.com>, 2013
# lkppo, 2012
msgid ""
msgstr ""
"Project-Id-Version: IMDbPY\n"
"POT-Creation-Date: 2010-03-18 14:35+0000\n"
"PO-Revision-Date: 2014-10-08 02:52+0000\n"
"PO-Revision-Date: 2016-03-20 05:27+0000\n"
"Last-Translator: lukophron\n"
"Language-Team: French (http://www.transifex.com/projects/p/imdbpy/language/fr/)\n"
"Language-Team: French (http://www.transifex.com/davide_alberani/imdbpy/language/fr/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
@ -38,7 +38,7 @@ msgstr "information-additionnelle"
# Default: Admissions
msgid "admissions"
msgstr "admissions"
msgstr "entrées"
# Default: Agent address
msgid "agent-address"
@ -46,15 +46,15 @@ msgstr ""
# Default: Airing
msgid "airing"
msgstr ""
msgstr "en-diffusion"
# Default: Akas
msgid "akas"
msgstr ""
msgstr "alias"
# Default: Akas from release info
msgid "akas-from-release-info"
msgstr ""
msgstr "alias-depuis-info-sortie"
# Default: All products
msgid "all-products"
@ -70,7 +70,7 @@ msgstr ""
# Default: Amazon reviews
msgid "amazon-reviews"
msgstr ""
msgstr "critiques-amazon"
# Default: Analog left
msgid "analog-left"
@ -82,7 +82,7 @@ msgstr ""
# Default: Animation department
msgid "animation-department"
msgstr ""
msgstr "département-animation"
# Default: Archive footage
msgid "archive-footage"
@ -178,7 +178,7 @@ msgstr "livre"
# Default: Books
msgid "books"
msgstr "vres"
msgstr "livres"
# Default: Bottom 100 rank
msgid "bottom-100-rank"

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,7 @@ pages would be:
plot summary: http://akas.imdb.com/title/tt0094226/plotsummary
...and so on...
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
Copyright 2004-2016 Davide Alberani <da@erlug.linux.it>
2008 H. Turgut Uyar <uyar@tekir.org>
This program is free software; you can redistribute it and/or modify
@ -207,6 +207,11 @@ class DOMHTMLMovieParser(DOMParserBase):
multi=True,
path="./text()")),
Extractor(label='myrating',
path="//span[@id='voteuser']",
attrs=Attribute(key='myrating',
path=".//text()")),
Extractor(label='h5sections',
path="//div[@class='info']/h5/..",
attrs=[
@ -554,10 +559,10 @@ class DOMHTMLPlotParser(DOMParserBase):
# Notice that recently IMDb started to put the email of the
# author only in the link, that we're not collecting, here.
extractors = [Extractor(label='plot',
path="//ul[@class='zebraList']//p",
path="//p[@class='plotSummary']",
attrs=Attribute(key='plot',
multi=True,
path={'plot': './text()[1]',
path={'plot': './/text()',
'author': './span/em/a/text()'},
postprocess=_process_plotsummary))]
@ -785,17 +790,20 @@ class DOMHTMLTriviaParser(DOMParserBase):
class DOMHTMLSoundtrackParser(DOMHTMLAlternateVersionsParser):
kind = 'soundtrack'
preprocessors = [
('<br>', '\n')
]
class DOMHTMLSoundtrackParser(DOMParserBase):
_defGetRefs = True
preprocessors = [('<br />', '\n'), ('<br>', '\n')]
extractors = [Extractor(label='soundtrack',
path="//div[@class='list']//div",
attrs=Attribute(key='soundtrack',
multi=True,
path=".//text()",
postprocess=lambda x: x.strip()))]
def postprocess_data(self, data):
if 'alternate versions' in data:
if 'soundtrack' in data:
nd = []
for x in data['alternate versions']:
for x in data['soundtrack']:
ds = x.split('\n')
title = ds[0]
if title[0] == '"' and title[-1] == '"':
@ -1291,16 +1299,17 @@ class DOMHTMLTechParser(DOMParserBase):
result = tparser.parse(technical_html_string)
"""
kind = 'tech'
re_space = re.compile(r'\s+')
extractors = [Extractor(label='tech',
group="//h5",
group="//table//tr/td[@class='label']",
group_key="./text()",
group_key_normalize=lambda x: x.lower(),
path="./following-sibling::div[1]",
group_key_normalize=lambda x: x.lower().strip(),
path=".",
attrs=Attribute(key=None,
path=".//text()",
path="..//td[2]//text()",
postprocess=lambda x: [t.strip()
for t in x.split('\n') if t.strip()]))]
for t in x.split(':::') if t.strip()]))]
preprocessors = [
(re.compile('(<h5>.*?</h5>)', re.I), r'</div>\1<div class="_imdbpy">'),
@ -1310,12 +1319,15 @@ class DOMHTMLTechParser(DOMParserBase):
(re.compile('<p>(.*?)</p>', re.I), r'\1<br/>'),
(re.compile('(</td><td valign="top">)', re.I), r'\1::'),
(re.compile('(</tr><tr>)', re.I), r'\n\1'),
(re.compile('<span class="ghost">\|</span>', re.I), r':::'),
(re.compile('<br/?>', re.I), r':::'),
# this is for splitting individual entries
(re.compile('<br/>', re.I), r'\n'),
]
def postprocess_data(self, data):
for key in data:
data[key] = filter(lambda x: x != '|', data[key])
data[key] = [self.re_space.sub(' ', x).strip() for x in data[key]]
data[key] = filter(None, data[key])
if self.kind in ('literature', 'business', 'contacts') and data:
if 'screenplay/teleplay' in data:
@ -1907,7 +1919,7 @@ _OBJECTS = {
'goofs_parser': ((DOMHTMLGoofsParser,), None),
'alternateversions_parser': ((DOMHTMLAlternateVersionsParser,), None),
'trivia_parser': ((DOMHTMLTriviaParser,), None),
'soundtrack_parser': ((DOMHTMLSoundtrackParser,), {'kind': 'soundtrack'}),
'soundtrack_parser': ((DOMHTMLSoundtrackParser,), None),
'quotes_parser': ((DOMHTMLQuotesParser,), None),
'releasedates_parser': ((DOMHTMLReleaseinfoParser,), None),
'ratings_parser': ((DOMHTMLRatingsParser,), None),

View file

@ -396,7 +396,7 @@ class DOMHTMLResumeParser(DOMParserBase):
]
def postprocess_data(self, data):
for key in data.keys():
if data[key] == '':
del data[key]

View file

@ -47,7 +47,7 @@ class DOMHTMLSearchKeywordParser(DOMHTMLSearchMovieParser):
the one given."""
_BaseParser = DOMBasicKeywordParser
_notDirectHitTitle = '<title>imdb keyword'
_notDirectHitTitle = '<title>find - imdb'
_titleBuilder = lambda self, x: x
_linkPrefix = '/keyword/'
@ -56,7 +56,7 @@ class DOMHTMLSearchKeywordParser(DOMHTMLSearchMovieParser):
path="./a[1]/text()"
)]
extractors = [Extractor(label='search',
path="//td[3]/a[starts-with(@href, " \
path="//a[starts-with(@href, " \
"'/keyword/')]/..",
attrs=_attrs)]
@ -80,7 +80,7 @@ class DOMHTMLSearchMovieKeywordParser(DOMHTMLSearchMovieParser):
"new search system" is used, searching for movies with the given
keyword."""
_notDirectHitTitle = '<title>best'
_notDirectHitTitle = '<title>most'
_attrs = [Attribute(key='data',
multi=True,
@ -98,7 +98,7 @@ class DOMHTMLSearchMovieKeywordParser(DOMHTMLSearchMovieParser):
))]
extractors = [Extractor(label='search',
path="//td[3]/a[starts-with(@href, " \
path="//div[@class='lister-list']//h3//a[starts-with(@href, " \
"'/title/tt')]/..",
attrs=_attrs)]

View file

@ -118,6 +118,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
self.url = u''
def preprocess_string(self, html_string):
if self._notDirectHitTitle in html_string[:10240].lower():
if self._linkPrefix == '/title/tt':
# Only for movies.

View file

@ -404,6 +404,15 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
m = Movie(title=title, movieID=movieID, notes=notes, currentRole=role,
roleID=roleID, roleIsPerson=_parsingCharacter,
modFunct=modFunct, accessSystem=accessSystem)
if additionalNotes:
if '(TV Series)' in additionalNotes:
m['kind'] = u'tv series'
elif '(Video Game)' in additionalNotes:
m['kind'] = u'video game'
elif '(TV Movie)' in additionalNotes:
m['kind'] = u'tv movie'
elif '(TV Short)' in additionalNotes:
m['kind'] = u'tv short'
if roleNotes and len(roleNotes) == len(roleID):
for idx, role in enumerate(m.currentRole):
try:

View file

@ -431,13 +431,13 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
yi = [(yiy, yii)]
if yk == 'TV episode':
kind = u'episode'
elif yk == 'TV':
elif yk in ('TV', 'TV Movie'):
kind = u'tv movie'
elif yk == 'TV Series':
kind = u'tv series'
elif yk == 'Video':
kind = u'video movie'
elif yk == 'TV mini-series':
elif yk in ('TV mini-series', 'TV Mini-Series'):
kind = u'tv mini series'
elif yk == 'Video Game':
kind = u'video game'