Update IMDb 5.0 to 5.1dev20160106

This commit is contained in:
JackDandy 2016-01-14 08:54:24 +00:00
parent 77a631f54f
commit 5519fd7e13
11 changed files with 228 additions and 115 deletions

View file

@ -14,6 +14,7 @@
* Update dateutil library 2.4.2 (083f666) to 2.4.2 (d4baf97) * Update dateutil library 2.4.2 (083f666) to 2.4.2 (d4baf97)
* Update Hachoir library 1.3.4 (r1383) to 1.3.4 (r1435) * Update Hachoir library 1.3.4 (r1383) to 1.3.4 (r1435)
* Update html5lib 0.999 to 0.99999999/1.0b9 (46dae3d) * Update html5lib 0.999 to 0.99999999/1.0b9 (46dae3d)
* Update IMDb 5.0 to 5.1dev20160106
* Update PNotify library 2.0.1 to 2.1.0 * Update PNotify library 2.0.1 to 2.1.0
* Update profilehooks 1.4 to 1.8.2.dev0 (ee3f1a8) * Update profilehooks 1.4 to 1.8.2.dev0 (ee3f1a8)
* Update Requests library 2.7.0 (5d6d1bc) to 2.9.1 (a1c9b84) * Update Requests library 2.7.0 (5d6d1bc) to 2.9.1 (a1c9b84)

View file

@ -6,7 +6,7 @@ a person from the IMDb database.
It can fetch data through different media (e.g.: the IMDb web pages, It can fetch data through different media (e.g.: the IMDb web pages,
a SQL database, etc.) a SQL database, etc.)
Copyright 2004-2014 Davide Alberani <da@erlug.linux.it> Copyright 2004-2015 Davide Alberani <da@erlug.linux.it>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company', __all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
'available_access_systems'] 'available_access_systems']
__version__ = VERSION = '5.0' __version__ = VERSION = '5.1dev20160106'
# Import compatibility module (importing it is enough). # Import compatibility module (importing it is enough).
import _compat import _compat

View file

@ -29,7 +29,7 @@
[imdbpy] [imdbpy]
## Default. ## Default.
accessSystem = httpThin #accessSystem = http
## Optional (options common to every data access system): ## Optional (options common to every data access system):
# Activate adult searches (on, by default). # Activate adult searches (on, by default).
@ -69,7 +69,7 @@ accessSystem = httpThin
## Set the threshold for logging messages. ## Set the threshold for logging messages.
# Can be one of "debug", "info", "warning", "error", "critical" (default: # Can be one of "debug", "info", "warning", "error", "critical" (default:
# "warning"). # "warning").
loggingLevel = debug #loggingLevel = debug
## Path to a configuration file for the logging facility; ## Path to a configuration file for the logging facility;
# see: http://docs.python.org/library/logging.html#configuring-logging # see: http://docs.python.org/library/logging.html#configuring-logging

View file

@ -1,12 +1,13 @@
# Gettext message file for imdbpy # Gettext message file for imdbpy
# Translators: # Translators:
# Ioan, 2013 # Nils Welzk, 2013
# Raphael, 2014
msgid "" msgid ""
msgstr "" msgstr ""
"Project-Id-Version: IMDbPY\n" "Project-Id-Version: IMDbPY\n"
"POT-Creation-Date: 2010-03-18 14:35+0000\n" "POT-Creation-Date: 2010-03-18 14:35+0000\n"
"PO-Revision-Date: 2013-11-20 11:07+0000\n" "PO-Revision-Date: 2014-10-21 15:24+0000\n"
"Last-Translator: Ioan\n" "Last-Translator: Raphael\n"
"Language-Team: German (http://www.transifex.com/projects/p/imdbpy/language/de/)\n" "Language-Team: German (http://www.transifex.com/projects/p/imdbpy/language/de/)\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
@ -20,11 +21,11 @@ msgstr ""
# Default: Actor # Default: Actor
msgid "actor" msgid "actor"
msgstr "" msgstr "Schauspieler"
# Default: Actress # Default: Actress
msgid "actress" msgid "actress"
msgstr "" msgstr "Schauspielerin"
# Default: Adaption # Default: Adaption
msgid "adaption" msgid "adaption"
@ -32,7 +33,7 @@ msgstr ""
# Default: Additional information # Default: Additional information
msgid "additional-information" msgid "additional-information"
msgstr "" msgstr "zusätzliche Information"
# Default: Admissions # Default: Admissions
msgid "admissions" msgid "admissions"
@ -48,7 +49,7 @@ msgstr ""
# Default: Akas # Default: Akas
msgid "akas" msgid "akas"
msgstr "" msgstr "Pseudonüme"
# Default: Akas from release info # Default: Akas from release info
msgid "akas-from-release-info" msgid "akas-from-release-info"
@ -56,7 +57,7 @@ msgstr ""
# Default: All products # Default: All products
msgid "all-products" msgid "all-products"
msgstr "" msgstr "Alle Produkte"
# Default: Alternate language version of # Default: Alternate language version of
msgid "alternate-language-version-of" msgid "alternate-language-version-of"
@ -68,7 +69,7 @@ msgstr ""
# Default: Amazon reviews # Default: Amazon reviews
msgid "amazon-reviews" msgid "amazon-reviews"
msgstr "" msgstr "Amazon Rezensionen"
# Default: Analog left # Default: Analog left
msgid "analog-left" msgid "analog-left"
@ -100,7 +101,7 @@ msgstr ""
# Default: Art director # Default: Art director
msgid "art-director" msgid "art-director"
msgstr "" msgstr "Art Director"
# Default: Article # Default: Article
msgid "article" msgid "article"
@ -112,7 +113,7 @@ msgstr ""
# Default: Aspect ratio # Default: Aspect ratio
msgid "aspect-ratio" msgid "aspect-ratio"
msgstr "" msgstr "Seitenverhältnis"
# Default: Assigner # Default: Assigner
msgid "assigner" msgid "assigner"
@ -132,7 +133,7 @@ msgstr ""
# Default: Audio quality # Default: Audio quality
msgid "audio-quality" msgid "audio-quality"
msgstr "" msgstr "Audio Qualität"
# Default: Award # Default: Award
msgid "award" msgid "award"
@ -188,7 +189,7 @@ msgstr "Kosten"
# Default: Business # Default: Business
msgid "business" msgid "business"
msgstr "" msgstr "Geschäft"
# Default: By arrangement with # Default: By arrangement with
msgid "by-arrangement-with" msgid "by-arrangement-with"
@ -220,7 +221,7 @@ msgstr ""
# Default: Cast # Default: Cast
msgid "cast" msgid "cast"
msgstr "" msgstr "Besetzung"
# Default: Casting department # Default: Casting department
msgid "casting-department" msgid "casting-department"
@ -236,23 +237,23 @@ msgstr ""
# Default: Category # Default: Category
msgid "category" msgid "category"
msgstr "" msgstr "Kategorie"
# Default: Certificate # Default: Certificate
msgid "certificate" msgid "certificate"
msgstr "" msgstr "Zertifikat"
# Default: Certificates # Default: Certificates
msgid "certificates" msgid "certificates"
msgstr "" msgstr "Zertifikate"
# Default: Certification # Default: Certification
msgid "certification" msgid "certification"
msgstr "" msgstr "Bescheinigung"
# Default: Channel # Default: Channel
msgid "channel" msgid "channel"
msgstr "" msgstr "Kanal"
# Default: Character # Default: Character
msgid "character" msgid "character"
@ -372,7 +373,7 @@ msgstr ""
# Default: Description # Default: Description
msgid "description" msgid "description"
msgstr "" msgstr "Beschreibung"
# Default: Dialogue intellegibility # Default: Dialogue intellegibility
msgid "dialogue-intellegibility" msgid "dialogue-intellegibility"
@ -396,7 +397,7 @@ msgstr ""
# Default: Distributors # Default: Distributors
msgid "distributors" msgid "distributors"
msgstr "" msgstr "Händler"
# Default: Dvd # Default: Dvd
msgid "dvd" msgid "dvd"
@ -452,7 +453,7 @@ msgstr "Episoden"
# Default: Episodes rating # Default: Episodes rating
msgid "episodes-rating" msgid "episodes-rating"
msgstr "" msgstr "Episoden Bewertung"
# Default: Essays # Default: Essays
msgid "essays" msgid "essays"
@ -464,7 +465,7 @@ msgstr ""
# Default: Faqs # Default: Faqs
msgid "faqs" msgid "faqs"
msgstr "" msgstr "FAQs"
# Default: Feature # Default: Feature
msgid "feature" msgid "feature"
@ -488,19 +489,19 @@ msgstr ""
# Default: Filmography # Default: Filmography
msgid "filmography" msgid "filmography"
msgstr "" msgstr "Filmografie"
# Default: Followed by # Default: Followed by
msgid "followed-by" msgid "followed-by"
msgstr "" msgstr "gefolgt von"
# Default: Follows # Default: Follows
msgid "follows" msgid "follows"
msgstr "" msgstr "folgt"
# Default: For # Default: For
msgid "for" msgid "for"
msgstr "" msgstr "für"
# Default: Frequency response # Default: Frequency response
msgid "frequency-response" msgid "frequency-response"
@ -508,7 +509,7 @@ msgstr ""
# Default: From # Default: From
msgid "from" msgid "from"
msgstr "" msgstr "von"
# Default: Full article link # Default: Full article link
msgid "full-article-link" msgid "full-article-link"
@ -524,7 +525,7 @@ msgstr ""
# Default: Genres # Default: Genres
msgid "genres" msgid "genres"
msgstr "" msgstr "Genres"
# Default: Goofs # Default: Goofs
msgid "goofs" msgid "goofs"
@ -540,7 +541,7 @@ msgstr ""
# Default: Headshot # Default: Headshot
msgid "headshot" msgid "headshot"
msgstr "" msgstr "Portrait"
# Default: Height # Default: Height
msgid "height" msgid "height"
@ -556,15 +557,15 @@ msgstr ""
# Default: Interview # Default: Interview
msgid "interview" msgid "interview"
msgstr "" msgstr "Interview"
# Default: Interviews # Default: Interviews
msgid "interviews" msgid "interviews"
msgstr "" msgstr "Interviews"
# Default: Introduction # Default: Introduction
msgid "introduction" msgid "introduction"
msgstr "" msgstr "Vorstellung"
# Default: Item # Default: Item
msgid "item" msgid "item"
@ -596,7 +597,7 @@ msgstr "Sprachen"
# Default: Laserdisc # Default: Laserdisc
msgid "laserdisc" msgid "laserdisc"
msgstr "" msgstr "Laserdisc"
# Default: Laserdisc title # Default: Laserdisc title
msgid "laserdisc-title" msgid "laserdisc-title"
@ -624,7 +625,7 @@ msgstr "Literatur"
# Default: Locations # Default: Locations
msgid "locations" msgid "locations"
msgstr "" msgstr "Standorte"
# Default: Long imdb canonical name # Default: Long imdb canonical name
msgid "long-imdb-canonical-name" msgid "long-imdb-canonical-name"
@ -708,11 +709,11 @@ msgstr ""
# Default: Nick names # Default: Nick names
msgid "nick-names" msgid "nick-names"
msgstr "" msgstr "Spitznamen"
# Default: Notes # Default: Notes
msgid "notes" msgid "notes"
msgstr "" msgstr "Anmerkungen"
# Default: Novel # Default: Novel
msgid "novel" msgid "novel"
@ -720,7 +721,7 @@ msgstr ""
# Default: Number # Default: Number
msgid "number" msgid "number"
msgstr "" msgstr "Zahl"
# Default: Number of chapter stops # Default: Number of chapter stops
msgid "number-of-chapter-stops" msgid "number-of-chapter-stops"
@ -800,7 +801,7 @@ msgstr ""
# Default: Plot # Default: Plot
msgid "plot" msgid "plot"
msgstr "Inhalt" msgstr "Handlung"
# Default: Plot outline # Default: Plot outline
msgid "plot-outline" msgid "plot-outline"
@ -824,7 +825,7 @@ msgstr ""
# Default: Producer # Default: Producer
msgid "producer" msgid "producer"
msgstr "" msgstr "Produzent"
# Default: Production companies # Default: Production companies
msgid "production-companies" msgid "production-companies"
@ -864,15 +865,15 @@ msgstr ""
# Default: Quote # Default: Quote
msgid "quote" msgid "quote"
msgstr "" msgstr "Zitat"
# Default: Quotes # Default: Quotes
msgid "quotes" msgid "quotes"
msgstr "" msgstr "Zitate"
# Default: Rating # Default: Rating
msgid "rating" msgid "rating"
msgstr "" msgstr "Bewertung"
# Default: Recommendations # Default: Recommendations
msgid "recommendations" msgid "recommendations"
@ -896,11 +897,11 @@ msgstr ""
# Default: Release date # Default: Release date
msgid "release-date" msgid "release-date"
msgstr "" msgstr "Veröffentlichungsdatum"
# Default: Release dates # Default: Release dates
msgid "release-dates" msgid "release-dates"
msgstr "" msgstr "Veröffentlichungstermine"
# Default: Remade as # Default: Remade as
msgid "remade-as" msgid "remade-as"
@ -908,27 +909,27 @@ msgstr ""
# Default: Remake of # Default: Remake of
msgid "remake-of" msgid "remake-of"
msgstr "" msgstr "Remake von"
# Default: Rentals # Default: Rentals
msgid "rentals" msgid "rentals"
msgstr "" msgstr "Leigebühr"
# Default: Result # Default: Result
msgid "result" msgid "result"
msgstr "" msgstr "Ergebnis"
# Default: Review # Default: Review
msgid "review" msgid "review"
msgstr "" msgstr "Kritik"
# Default: Review author # Default: Review author
msgid "review-author" msgid "review-author"
msgstr "" msgstr "Kritik Autor"
# Default: Review kind # Default: Review kind
msgid "review-kind" msgid "review-kind"
msgstr "" msgstr "Kritik Art"
# Default: Runtime # Default: Runtime
msgid "runtime" msgid "runtime"
@ -1096,7 +1097,7 @@ msgstr ""
# Default: Soundtrack # Default: Soundtrack
msgid "soundtrack" msgid "soundtrack"
msgstr "" msgstr "Soundtrack"
# Default: Spaciality # Default: Spaciality
msgid "spaciality" msgid "spaciality"
@ -1116,43 +1117,43 @@ msgstr ""
# Default: Spin off # Default: Spin off
msgid "spin-off" msgid "spin-off"
msgstr "" msgstr "Nebenprodukt"
# Default: Spin off from # Default: Spin off from
msgid "spin-off-from" msgid "spin-off-from"
msgstr "" msgstr "Nebenprodukt von"
# Default: Spoofed in # Default: Spoofed in
msgid "spoofed-in" msgid "spoofed-in"
msgstr "" msgstr "Parodiert in"
# Default: Spoofs # Default: Spoofs
msgid "spoofs" msgid "spoofs"
msgstr "" msgstr "Parodie"
# Default: Spouse # Default: Spouse
msgid "spouse" msgid "spouse"
msgstr "" msgstr "Gattin"
# Default: Status of availablility # Default: Status of availablility
msgid "status-of-availablility" msgid "status-of-availablility"
msgstr "" msgstr "Verfügbarkeitsstatus"
# Default: Studio # Default: Studio
msgid "studio" msgid "studio"
msgstr "" msgstr "Studio"
# Default: Studios # Default: Studios
msgid "studios" msgid "studios"
msgstr "" msgstr "Studios"
# Default: Stunt performer # Default: Stunt performer
msgid "stunt-performer" msgid "stunt-performer"
msgstr "" msgstr "Stunt-Darsteller"
# Default: Stunts # Default: Stunts
msgid "stunts" msgid "stunts"
msgstr "" msgstr "Stunts"
# Default: Subtitles # Default: Subtitles
msgid "subtitles" msgid "subtitles"
@ -1160,19 +1161,19 @@ msgstr "Untertitel"
# Default: Supplement # Default: Supplement
msgid "supplement" msgid "supplement"
msgstr "" msgstr "Ergänzung"
# Default: Supplements # Default: Supplements
msgid "supplements" msgid "supplements"
msgstr "" msgstr "Ergänzungen"
# Default: Synopsis # Default: Synopsis
msgid "synopsis" msgid "synopsis"
msgstr "" msgstr "Zusammenfassung"
# Default: Taglines # Default: Taglines
msgid "taglines" msgid "taglines"
msgstr "" msgstr "Slogan"
# Default: Tech info # Default: Tech info
msgid "tech-info" msgid "tech-info"
@ -1188,7 +1189,7 @@ msgstr "Zeit"
# Default: Title # Default: Title
msgid "title" msgid "title"
msgstr "" msgstr "Titel"
# Default: Titles in this product # Default: Titles in this product
msgid "titles-in-this-product" msgid "titles-in-this-product"
@ -1200,11 +1201,11 @@ msgstr ""
# Default: Top 250 rank # Default: Top 250 rank
msgid "top-250-rank" msgid "top-250-rank"
msgstr "" msgstr "Top 250 platzierung"
# Default: Trade mark # Default: Trade mark
msgid "trade-mark" msgid "trade-mark"
msgstr "" msgstr "Warenzeichen"
# Default: Transportation department # Default: Transportation department
msgid "transportation-department" msgid "transportation-department"
@ -1212,7 +1213,7 @@ msgstr ""
# Default: Trivia # Default: Trivia
msgid "trivia" msgid "trivia"
msgstr "" msgstr "Nichtigkeiten"
# Default: Tv # Default: Tv
msgid "tv" msgid "tv"
@ -1220,7 +1221,7 @@ msgstr "TV"
# Default: Under license from # Default: Under license from
msgid "under-license-from" msgid "under-license-from"
msgstr "" msgstr "lizensiert von"
# Default: Unknown link # Default: Unknown link
msgid "unknown-link" msgid "unknown-link"
@ -1256,19 +1257,19 @@ msgstr ""
# Default: Video quality # Default: Video quality
msgid "video-quality" msgid "video-quality"
msgstr "" msgstr "Video Qualität"
# Default: Video standard # Default: Video standard
msgid "video-standard" msgid "video-standard"
msgstr "" msgstr "Video Standart"
# Default: Visual effects # Default: Visual effects
msgid "visual-effects" msgid "visual-effects"
msgstr "" msgstr "Visuelle Effekte"
# Default: Votes # Default: Votes
msgid "votes" msgid "votes"
msgstr "" msgstr "Stimmen"
# Default: Votes distribution # Default: Votes distribution
msgid "votes-distribution" msgid "votes-distribution"
@ -1284,11 +1285,11 @@ msgstr ""
# Default: With # Default: With
msgid "with" msgid "with"
msgstr "" msgstr "mit"
# Default: Writer # Default: Writer
msgid "writer" msgid "writer"
msgstr "Schreiber" msgstr "Autor"
# Default: Written by # Default: Written by
msgid "written-by" msgid "written-by"

View file

@ -1,13 +1,14 @@
# Gettext message file for imdbpy # Gettext message file for imdbpy
# Translators: # Translators:
# RainDropR <rajaa@hilltx.com>, 2013 # lukophron, 2014
# Stéphane Aulery, 2012 # Rajaa Gutknecht <rajaa@hilltx.com>, 2013
# lkppo, 2012
msgid "" msgid ""
msgstr "" msgstr ""
"Project-Id-Version: IMDbPY\n" "Project-Id-Version: IMDbPY\n"
"POT-Creation-Date: 2010-03-18 14:35+0000\n" "POT-Creation-Date: 2010-03-18 14:35+0000\n"
"PO-Revision-Date: 2013-11-20 11:07+0000\n" "PO-Revision-Date: 2014-10-08 02:52+0000\n"
"Last-Translator: RainDropR <rajaa@hilltx.com>\n" "Last-Translator: lukophron\n"
"Language-Team: French (http://www.transifex.com/projects/p/imdbpy/language/fr/)\n" "Language-Team: French (http://www.transifex.com/projects/p/imdbpy/language/fr/)\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
@ -33,11 +34,11 @@ msgstr "adaptation"
# Default: Additional information # Default: Additional information
msgid "additional-information" msgid "additional-information"
msgstr "" msgstr "information-additionnelle"
# Default: Admissions # Default: Admissions
msgid "admissions" msgid "admissions"
msgstr "" msgstr "admissions"
# Default: Agent address # Default: Agent address
msgid "agent-address" msgid "agent-address"

View file

@ -726,6 +726,10 @@ class IMDbHTTPAccessSystem(IMDbBase):
cont = self._retrieve(self.urls['person_main'] % personID + 'bio') cont = self._retrieve(self.urls['person_main'] % personID + 'bio')
return self.pProxy.bio_parser.parse(cont, getRefs=self._getRefs) return self.pProxy.bio_parser.parse(cont, getRefs=self._getRefs)
def get_person_resume(self, personID):
cont = self._retrieve(self.urls['person_main'] % personID + 'resume')
return self.pProxy.resume_parser.parse(cont, getRefs=self._getRefs)
def get_person_awards(self, personID): def get_person_awards(self, personID):
cont = self._retrieve(self.urls['person_main'] % personID + 'awards') cont = self._retrieve(self.urls['person_main'] % personID + 'awards')
return self.pProxy.person_awards_parser.parse(cont) return self.pProxy.person_awards_parser.parse(cont)

View file

@ -226,7 +226,7 @@ class DOMHTMLMovieParser(DOMParserBase):
Attribute(key="countries", Attribute(key="countries",
path="./h5[starts-with(text(), " \ path="./h5[starts-with(text(), " \
"'Countr')]/../div[@class='info-content']//text()", "'Countr')]/../div[@class='info-content']//text()",
postprocess=makeSplitter('|')), postprocess=makeSplitter('|')),
Attribute(key="language", Attribute(key="language",
path="./h5[starts-with(text(), " \ path="./h5[starts-with(text(), " \
"'Language')]/..//text()", "'Language')]/..//text()",
@ -234,7 +234,7 @@ class DOMHTMLMovieParser(DOMParserBase):
Attribute(key='color info', Attribute(key='color info',
path="./h5[starts-with(text(), " \ path="./h5[starts-with(text(), " \
"'Color')]/..//text()", "'Color')]/..//text()",
postprocess=makeSplitter('Color:')), postprocess=makeSplitter('|')),
Attribute(key='sound mix', Attribute(key='sound mix',
path="./h5[starts-with(text(), " \ path="./h5[starts-with(text(), " \
"'Sound Mix')]/..//text()", "'Sound Mix')]/..//text()",
@ -462,6 +462,8 @@ class DOMHTMLMovieParser(DOMParserBase):
del data['other akas'] del data['other akas']
if nakas: if nakas:
data['akas'] = nakas data['akas'] = nakas
if 'color info' in data:
data['color info'] = [x.replace('Color:', '', 1) for x in data['color info']]
if 'runtimes' in data: if 'runtimes' in data:
data['runtimes'] = [x.replace(' min', u'') data['runtimes'] = [x.replace(' min', u'')
for x in data['runtimes']] for x in data['runtimes']]
@ -1177,7 +1179,7 @@ class DOMHTMLCriticReviewsParser(DOMParserBase):
path="//div[@class='article']/div[@class='see-more']/a", path="//div[@class='article']/div[@class='see-more']/a",
attrs=Attribute(key='metacritic url', attrs=Attribute(key='metacritic url',
path="./@href")) ] path="./@href")) ]
class DOMHTMLOfficialsitesParser(DOMParserBase): class DOMHTMLOfficialsitesParser(DOMParserBase):
"""Parser for the "official sites", "external reviews", "newsgroup """Parser for the "official sites", "external reviews", "newsgroup
reviews", "miscellaneous links", "sound clips", "video clips" and reviews", "miscellaneous links", "sound clips", "video clips" and
@ -1534,7 +1536,7 @@ class DOMHTMLSeasonEpisodesParser(DOMParserBase):
'').strip() '').strip()
episode_title = episode.get('title', '').strip() episode_title = episode.get('title', '').strip()
episode_plot = episode.get('plot', '') episode_plot = episode.get('plot', '')
if not (episode_nr and episode_id and episode_title): if not (episode_nr is not None and episode_id and episode_title):
continue continue
ep_obj = Movie(movieID=episode_id, title=episode_title, ep_obj = Movie(movieID=episode_id, title=episode_title,
accessSystem=self._as, modFunct=self._modFunct) accessSystem=self._as, modFunct=self._modFunct)

View file

@ -204,7 +204,7 @@ class DOMHTMLBioParser(DOMParserBase):
_birth_attrs = [Attribute(key='birth date', _birth_attrs = [Attribute(key='birth date',
path={ path={
'day': "./a[starts-with(@href, " \ 'day': "./a[starts-with(@href, " \
"'/date/')]/text()", "'/search/name?birth_monthday=')]/text()",
'year': "./a[starts-with(@href, " \ 'year': "./a[starts-with(@href, " \
"'/search/name?birth_year=')]/text()" "'/search/name?birth_year=')]/text()"
}, },
@ -215,7 +215,7 @@ class DOMHTMLBioParser(DOMParserBase):
_death_attrs = [Attribute(key='death date', _death_attrs = [Attribute(key='death date',
path={ path={
'day': "./a[starts-with(@href, " \ 'day': "./a[starts-with(@href, " \
"'/date/')]/text()", "'/search/name?death_monthday=')]/text()",
'year': "./a[starts-with(@href, " \ 'year': "./a[starts-with(@href, " \
"'/search/name?death_date=')]/text()" "'/search/name?death_date=')]/text()"
}, },
@ -326,6 +326,107 @@ class DOMHTMLBioParser(DOMParserBase):
return data return data
class DOMHTMLResumeParser(DOMParserBase):
"""Parser for the "resume" page of a given person.
The page should be provided as a string, as taken from
the akas.imdb.com server. The final result will be a
dictionary, with a key for every relevant section.
Example:
resumeparser = DOMHTMLResumeParser()
result = resumeparser.parse(resume_html_string)
"""
_defGetRefs = True
extractors = [
Extractor(label='info',
group="//div[@class='section_box']",
group_key="./h3/text()",
group_key_normalize=lambda x: x.lower().replace(' ', '_'),
path="./ul[@class='resume_section_multi_list']//li",
attrs=Attribute(key=None,
multi=True,
path={
'title': ".//b//text()",
'desc': ".//text()",
},
postprocess=lambda x: (x.get('title'), x.get('desc').strip().replace('\n', ' ')))),
Extractor(label='other_info',
group="//div[@class='section_box']",
group_key="./h3/text()",
group_key_normalize=lambda x: x.lower().replace(' ', '_'),
path="./ul[@class='_imdbpy']//li",
attrs=Attribute(key=None,
multi=True,
path=".//text()",
postprocess=lambda x: x.strip().replace('\n', ' '))),
Extractor(label='credits',
group="//div[@class='section_box']",
group_key="./h3/text()",
group_key_normalize=lambda x: x.lower().replace(' ', '_'),
path="./table[@class='credits']//tr",
attrs=Attribute(key=None,
multi=True,
path={
'0':".//td[1]//text()",
'1':".//td[2]//text()",
'2':".//td[3]//text()",
},
postprocess=lambda x: [x.get('0'),x.get('1'),x.get('2')])),
Extractor(label='mini_info',
path="//div[@class='center']",
attrs=Attribute(key='mini_info',
path=".//text()",
postprocess=lambda x: x.strip().replace('\n', ' '))),
Extractor(label='name',
path="//div[@class='center']/h1[@id='preview_user_name']",
attrs=Attribute(key='name',
path=".//text()",
postprocess=lambda x: x.strip().replace('\n', ' '))),
Extractor(label='resume_bio',
path="//div[@id='resume_rendered_html']//p",
attrs=Attribute(key='resume_bio',
multi=True,
path=".//text()")),
]
preprocessors = [
(re.compile('(<ul>)', re.I), r'<ul class="_imdbpy">\1'),
]
def postprocess_data(self, data):
for key in data.keys():
if data[key] == '':
del data[key]
if key in ('mini_info', 'name', 'resume_bio'):
if key == 'resume_bio':
data[key] = "".join(data[key]).strip()
continue
if len(data[key][0]) == 3:
for item in data[key]:
item[:] = [x for x in item if not x == None]
continue
if len(data[key][0]) == 2:
new_key = {}
for item in data[key]:
if item[0] == None:
continue
if ':' in item[0]:
if item[1].replace(item[0], '')[1:].strip() == '':
continue
new_key[item[0].strip().replace(':', '')] = item[1].replace(item[0], '')[1:].strip()
else:
new_key[item[0]] = item[1]
data[key] = new_key
new_data = {}
new_data['resume'] = data
return new_data
class DOMHTMLOtherWorksParser(DOMParserBase): class DOMHTMLOtherWorksParser(DOMParserBase):
"""Parser for the "other works" and "agent" pages of a given person. """Parser for the "other works" and "agent" pages of a given person.
The page should be provided as a string, as taken from The page should be provided as a string, as taken from
@ -502,6 +603,7 @@ from movieParser import DOMHTMLNewsParser
_OBJECTS = { _OBJECTS = {
'maindetails_parser': ((DOMHTMLMaindetailsParser,), None), 'maindetails_parser': ((DOMHTMLMaindetailsParser,), None),
'bio_parser': ((DOMHTMLBioParser,), None), 'bio_parser': ((DOMHTMLBioParser,), None),
'resume_parser': ((DOMHTMLResumeParser,), None),
'otherworks_parser': ((DOMHTMLOtherWorksParser,), None), 'otherworks_parser': ((DOMHTMLOtherWorksParser,), None),
#'agent_parser': ((DOMHTMLOtherWorksParser,), {'kind': 'agent'}), #'agent_parser': ((DOMHTMLOtherWorksParser,), {'kind': 'agent'}),
'person_officialsites_parser': ((DOMHTMLOfficialsitesParser,), None), 'person_officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),

View file

@ -7,7 +7,7 @@ E.g.:
http://akas.imdb.com/chart/top http://akas.imdb.com/chart/top
http://akas.imdb.com/chart/bottom http://akas.imdb.com/chart/bottom
Copyright 2009 Davide Alberani <da@erlug.linux.it> Copyright 2009-2015 Davide Alberani <da@erlug.linux.it>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -43,14 +43,15 @@ class DOMHTMLTop250Parser(DOMParserBase):
def _init(self): def _init(self):
self.extractors = [Extractor(label=self.label, self.extractors = [Extractor(label=self.label,
path="//div[@id='main']//table//tr", path="//div[@id='main']//div[1]//div//table//tbody//tr",
attrs=Attribute(key=None, attrs=Attribute(key=None,
multi=True, multi=True,
path={self.ranktext: "./td[1]//text()", path={self.ranktext: "./td[2]//text()",
'rating': "./td[2]//text()", 'rating': "./td[3]//strong//text()",
'title': "./td[3]//text()", 'title': "./td[2]//a//text()",
'movieID': "./td[3]//a/@href", 'year': "./td[2]//span//text()",
'votes': "./td[4]//text()" 'movieID': "./td[2]//a/@href",
'votes': "./td[3]//strong/@title"
}))] }))]
def postprocess_data(self, data): def postprocess_data(self, data):
@ -72,12 +73,16 @@ class DOMHTMLTop250Parser(DOMParserBase):
if theID in seenIDs: if theID in seenIDs:
continue continue
seenIDs.append(theID) seenIDs.append(theID)
minfo = analyze_title(d['title']) minfo = analyze_title(d['title']+" "+d['year'])
try: minfo[self.ranktext] = int(d[self.ranktext].replace('.', '')) try: minfo[self.ranktext] = int(d[self.ranktext].replace('.', ''))
except: pass except: pass
if 'votes' in d: if 'votes' in d:
try: minfo['votes'] = int(d['votes'].replace(',', '')) try:
except: pass votes = d['votes'].replace(' votes','')
votes = votes.split(' based on ')[1]
minfo['votes'] = int(votes.replace(',', ''))
except:
pass
if 'rating' in d: if 'rating' in d:
try: minfo['rating'] = float(d['rating']) try: minfo['rating'] = float(d['rating'])
except: pass except: pass

View file

@ -441,12 +441,6 @@ class DOMParserBase(object):
self._useModule = useModule self._useModule = useModule
nrMods = len(useModule) nrMods = len(useModule)
_gotError = False _gotError = False
# Force warnings.warn() to omit the source code line in the message
formatwarning_orig = warnings.formatwarning
warnings.formatwarning = lambda message, category, filename, lineno, line=None: \
formatwarning_orig(message, category, filename, lineno, line='')
for idx, mod in enumerate(useModule): for idx, mod in enumerate(useModule):
mod = mod.strip().lower() mod = mod.strip().lower()
try: try:

View file

@ -639,11 +639,14 @@ def analyze_company_name(name, stripNotes=False):
o_name = name o_name = name
name = name.strip() name = name.strip()
country = None country = None
if name.endswith(']'): if name.startswith('['):
idx = name.rfind('[') name = re.sub('[!@#$\(\)\[\]]', '', name)
if idx != -1: else:
country = name[idx:] if name.endswith(']'):
name = name[:idx].rstrip() idx = name.rfind('[')
if idx != -1:
country = name[idx:]
name = name[:idx].rstrip()
if not name: if not name:
raise IMDbParserError('invalid name: "%s"' % o_name) raise IMDbParserError('invalid name: "%s"' % o_name)
result = {'name': name} result = {'name': name}
@ -957,7 +960,7 @@ def _tag4TON(ton, addAccessSystem=False, _containerOnly=False):
crl = [crl] crl = [crl]
for cr in crl: for cr in crl:
crTag = cr.__class__.__name__.lower() crTag = cr.__class__.__name__.lower()
crValue = cr['long imdb name'] crValue = cr.get('long imdb name') or u''
crValue = _normalizeValue(crValue) crValue = _normalizeValue(crValue)
crID = cr.getID() crID = cr.getID()
if crID is not None: if crID is not None: