Update IMDb 5.0 to 5.1dev20160106

2025-01-22 01:23:43 +00:00 · 2016-01-14 08:54:24 +00:00 · 2016-01-14 08:54:24 +00:00 · 5519fd7e13
commit 5519fd7e13
parent 77a631f54f
11 changed files with 228 additions and 115 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -14,6 +14,7 @@
 * Update dateutil library 2.4.2 (083f666) to 2.4.2 (d4baf97)
 * Update Hachoir library 1.3.4 (r1383) to 1.3.4 (r1435)
 * Update html5lib 0.999 to 0.99999999/1.0b9 (46dae3d)
 * Update IMDb 5.0 to 5.1dev20160106
 * Update PNotify library 2.0.1 to 2.1.0
 * Update profilehooks 1.4 to 1.8.2.dev0 (ee3f1a8)
 * Update Requests library 2.7.0 (5d6d1bc) to 2.9.1 (a1c9b84)
--- a/lib/imdb/init.py
+++ b/lib/imdb/init.py
@ -6,7 +6,7 @@ a person from the IMDb database.
 It can fetch data through different media (e.g.: the IMDb web pages,
 a SQL database, etc.)
-Copyright 2004-2014 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2015 Davide Alberani <da@erlug.linux.it>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 __all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
            'available_access_systems']
-__version__ = VERSION = '5.0'
+__version__ = VERSION = '5.1dev20160106'
 # Import compatibility module (importing it is enough).
 import _compat
--- a/lib/imdb/imdbpy.cfg
+++ b/lib/imdb/imdbpy.cfg
@ -29,7 +29,7 @@
 [imdbpy]
 ## Default.
-accessSystem = httpThin
+#accessSystem = http
 ## Optional (options common to every data access system):
 # Activate adult searches (on, by default).
@ -69,7 +69,7 @@ accessSystem = httpThin
 ## Set the threshold for logging messages.
 # Can be one of "debug", "info", "warning", "error", "critical" (default:
 # "warning").
-loggingLevel = debug
+#loggingLevel = debug
 ## Path to a configuration file for the logging facility;
 # see: http://docs.python.org/library/logging.html#configuring-logging
--- a/lib/imdb/locale/imdbpy-de.po
+++ b/lib/imdb/locale/imdbpy-de.po
@ -1,12 +1,13 @@
 # Gettext message file for imdbpy
 # Translators:
-# Ioan, 2013
+# Nils Welzk, 2013
 # Raphael, 2014
 msgid ""
 msgstr ""
 "Project-Id-Version: IMDbPY\n"
 "POT-Creation-Date: 2010-03-18 14:35+0000\n"
-"PO-Revision-Date: 2013-11-20 11:07+0000\n"
+"PO-Revision-Date: 2014-10-21 15:24+0000\n"
-"Last-Translator: Ioan\n"
+"Last-Translator: Raphael\n"
 "Language-Team: German (http://www.transifex.com/projects/p/imdbpy/language/de/)\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
@ -20,11 +21,11 @@ msgstr ""
 # Default: Actor
 msgid "actor"
-msgstr ""
+msgstr "Schauspieler"
 # Default: Actress
 msgid "actress"
-msgstr ""
+msgstr "Schauspielerin"
 # Default: Adaption
 msgid "adaption"
@ -32,7 +33,7 @@ msgstr ""
 # Default: Additional information
 msgid "additional-information"
-msgstr ""
+msgstr "zusätzliche Information"
 # Default: Admissions
 msgid "admissions"
@ -48,7 +49,7 @@ msgstr ""
 # Default: Akas
 msgid "akas"
-msgstr ""
+msgstr "Pseudonüme"
 # Default: Akas from release info
 msgid "akas-from-release-info"
@ -56,7 +57,7 @@ msgstr ""
 # Default: All products
 msgid "all-products"
-msgstr ""
+msgstr "Alle Produkte"
 # Default: Alternate language version of
 msgid "alternate-language-version-of"
@ -68,7 +69,7 @@ msgstr ""
 # Default: Amazon reviews
 msgid "amazon-reviews"
-msgstr ""
+msgstr "Amazon Rezensionen"
 # Default: Analog left
 msgid "analog-left"
@ -100,7 +101,7 @@ msgstr ""
 # Default: Art director
 msgid "art-director"
-msgstr ""
+msgstr "Art Director"
 # Default: Article
 msgid "article"
@ -112,7 +113,7 @@ msgstr ""
 # Default: Aspect ratio
 msgid "aspect-ratio"
-msgstr ""
+msgstr "Seitenverhältnis"
 # Default: Assigner
 msgid "assigner"
@ -132,7 +133,7 @@ msgstr ""
 # Default: Audio quality
 msgid "audio-quality"
-msgstr ""
+msgstr "Audio Qualität"
 # Default: Award
 msgid "award"
@ -188,7 +189,7 @@ msgstr "Kosten"
 # Default: Business
 msgid "business"
-msgstr ""
+msgstr "Geschäft"
 # Default: By arrangement with
 msgid "by-arrangement-with"
@ -220,7 +221,7 @@ msgstr ""
 # Default: Cast
 msgid "cast"
-msgstr ""
+msgstr "Besetzung"
 # Default: Casting department
 msgid "casting-department"
@ -236,23 +237,23 @@ msgstr ""
 # Default: Category
 msgid "category"
-msgstr ""
+msgstr "Kategorie"
 # Default: Certificate
 msgid "certificate"
-msgstr ""
+msgstr "Zertifikat"
 # Default: Certificates
 msgid "certificates"
-msgstr ""
+msgstr "Zertifikate"
 # Default: Certification
 msgid "certification"
-msgstr ""
+msgstr "Bescheinigung"
 # Default: Channel
 msgid "channel"
-msgstr ""
+msgstr "Kanal"
 # Default: Character
 msgid "character"
@ -372,7 +373,7 @@ msgstr ""
 # Default: Description
 msgid "description"
-msgstr ""
+msgstr "Beschreibung"
 # Default: Dialogue intellegibility
 msgid "dialogue-intellegibility"
@ -396,7 +397,7 @@ msgstr ""
 # Default: Distributors
 msgid "distributors"
-msgstr ""
+msgstr "Händler"
 # Default: Dvd
 msgid "dvd"
@ -452,7 +453,7 @@ msgstr "Episoden"
 # Default: Episodes rating
 msgid "episodes-rating"
-msgstr ""
+msgstr "Episoden Bewertung"
 # Default: Essays
 msgid "essays"
@ -464,7 +465,7 @@ msgstr ""
 # Default: Faqs
 msgid "faqs"
-msgstr ""
+msgstr "FAQs"
 # Default: Feature
 msgid "feature"
@ -488,19 +489,19 @@ msgstr ""
 # Default: Filmography
 msgid "filmography"
-msgstr ""
+msgstr "Filmografie"
 # Default: Followed by
 msgid "followed-by"
-msgstr ""
+msgstr "gefolgt von"
 # Default: Follows
 msgid "follows"
-msgstr ""
+msgstr "folgt"
 # Default: For
 msgid "for"
-msgstr ""
+msgstr "für"
 # Default: Frequency response
 msgid "frequency-response"
@ -508,7 +509,7 @@ msgstr ""
 # Default: From
 msgid "from"
-msgstr ""
+msgstr "von"
 # Default: Full article link
 msgid "full-article-link"
@ -524,7 +525,7 @@ msgstr ""
 # Default: Genres
 msgid "genres"
-msgstr ""
+msgstr "Genres"
 # Default: Goofs
 msgid "goofs"
@ -540,7 +541,7 @@ msgstr ""
 # Default: Headshot
 msgid "headshot"
-msgstr ""
+msgstr "Portrait"
 # Default: Height
 msgid "height"
@ -556,15 +557,15 @@ msgstr ""
 # Default: Interview
 msgid "interview"
-msgstr ""
+msgstr "Interview"
 # Default: Interviews
 msgid "interviews"
-msgstr ""
+msgstr "Interviews"
 # Default: Introduction
 msgid "introduction"
-msgstr ""
+msgstr "Vorstellung"
 # Default: Item
 msgid "item"
@ -596,7 +597,7 @@ msgstr "Sprachen"
 # Default: Laserdisc
 msgid "laserdisc"
-msgstr ""
+msgstr "Laserdisc"
 # Default: Laserdisc title
 msgid "laserdisc-title"
@ -624,7 +625,7 @@ msgstr "Literatur"
 # Default: Locations
 msgid "locations"
-msgstr ""
+msgstr "Standorte"
 # Default: Long imdb canonical name
 msgid "long-imdb-canonical-name"
@ -708,11 +709,11 @@ msgstr ""
 # Default: Nick names
 msgid "nick-names"
-msgstr ""
+msgstr "Spitznamen"
 # Default: Notes
 msgid "notes"
-msgstr ""
+msgstr "Anmerkungen"
 # Default: Novel
 msgid "novel"
@ -720,7 +721,7 @@ msgstr ""
 # Default: Number
 msgid "number"
-msgstr ""
+msgstr "Zahl"
 # Default: Number of chapter stops
 msgid "number-of-chapter-stops"
@ -800,7 +801,7 @@ msgstr ""
 # Default: Plot
 msgid "plot"
-msgstr "Inhalt"
+msgstr "Handlung"
 # Default: Plot outline
 msgid "plot-outline"
@ -824,7 +825,7 @@ msgstr ""
 # Default: Producer
 msgid "producer"
-msgstr ""
+msgstr "Produzent"
 # Default: Production companies
 msgid "production-companies"
@ -864,15 +865,15 @@ msgstr ""
 # Default: Quote
 msgid "quote"
-msgstr ""
+msgstr "Zitat"
 # Default: Quotes
 msgid "quotes"
-msgstr ""
+msgstr "Zitate"
 # Default: Rating
 msgid "rating"
-msgstr ""
+msgstr "Bewertung"
 # Default: Recommendations
 msgid "recommendations"
@ -896,11 +897,11 @@ msgstr ""
 # Default: Release date
 msgid "release-date"
-msgstr ""
+msgstr "Veröffentlichungsdatum"
 # Default: Release dates
 msgid "release-dates"
-msgstr ""
+msgstr "Veröffentlichungstermine"
 # Default: Remade as
 msgid "remade-as"
@ -908,27 +909,27 @@ msgstr ""
 # Default: Remake of
 msgid "remake-of"
-msgstr ""
+msgstr "Remake von"
 # Default: Rentals
 msgid "rentals"
-msgstr ""
+msgstr "Leigebühr"
 # Default: Result
 msgid "result"
-msgstr ""
+msgstr "Ergebnis"
 # Default: Review
 msgid "review"
-msgstr ""
+msgstr "Kritik"
 # Default: Review author
 msgid "review-author"
-msgstr ""
+msgstr "Kritik Autor"
 # Default: Review kind
 msgid "review-kind"
-msgstr ""
+msgstr "Kritik Art"
 # Default: Runtime
 msgid "runtime"
@ -1096,7 +1097,7 @@ msgstr ""
 # Default: Soundtrack
 msgid "soundtrack"
-msgstr ""
+msgstr "Soundtrack"
 # Default: Spaciality
 msgid "spaciality"
@ -1116,43 +1117,43 @@ msgstr ""
 # Default: Spin off
 msgid "spin-off"
-msgstr ""
+msgstr "Nebenprodukt"
 # Default: Spin off from
 msgid "spin-off-from"
-msgstr ""
+msgstr "Nebenprodukt von"
 # Default: Spoofed in
 msgid "spoofed-in"
-msgstr ""
+msgstr "Parodiert in"
 # Default: Spoofs
 msgid "spoofs"
-msgstr ""
+msgstr "Parodie"
 # Default: Spouse
 msgid "spouse"
-msgstr ""
+msgstr "Gattin"
 # Default: Status of availablility
 msgid "status-of-availablility"
-msgstr ""
+msgstr "Verfügbarkeitsstatus"
 # Default: Studio
 msgid "studio"
-msgstr ""
+msgstr "Studio"
 # Default: Studios
 msgid "studios"
-msgstr ""
+msgstr "Studios"
 # Default: Stunt performer
 msgid "stunt-performer"
-msgstr ""
+msgstr "Stunt-Darsteller"
 # Default: Stunts
 msgid "stunts"
-msgstr ""
+msgstr "Stunts"
 # Default: Subtitles
 msgid "subtitles"
@ -1160,19 +1161,19 @@ msgstr "Untertitel"
 # Default: Supplement
 msgid "supplement"
-msgstr ""
+msgstr "Ergänzung"
 # Default: Supplements
 msgid "supplements"
-msgstr ""
+msgstr "Ergänzungen"
 # Default: Synopsis
 msgid "synopsis"
-msgstr ""
+msgstr "Zusammenfassung"
 # Default: Taglines
 msgid "taglines"
-msgstr ""
+msgstr "Slogan"
 # Default: Tech info
 msgid "tech-info"
@ -1188,7 +1189,7 @@ msgstr "Zeit"
 # Default: Title
 msgid "title"
-msgstr ""
+msgstr "Titel"
 # Default: Titles in this product
 msgid "titles-in-this-product"
@ -1200,11 +1201,11 @@ msgstr ""
 # Default: Top 250 rank
 msgid "top-250-rank"
-msgstr ""
+msgstr "Top 250 platzierung"
 # Default: Trade mark
 msgid "trade-mark"
-msgstr ""
+msgstr "Warenzeichen"
 # Default: Transportation department
 msgid "transportation-department"
@ -1212,7 +1213,7 @@ msgstr ""
 # Default: Trivia
 msgid "trivia"
-msgstr ""
+msgstr "Nichtigkeiten"
 # Default: Tv
 msgid "tv"
@ -1220,7 +1221,7 @@ msgstr "TV"
 # Default: Under license from
 msgid "under-license-from"
-msgstr ""
+msgstr "lizensiert von"
 # Default: Unknown link
 msgid "unknown-link"
@ -1256,19 +1257,19 @@ msgstr ""
 # Default: Video quality
 msgid "video-quality"
-msgstr ""
+msgstr "Video Qualität"
 # Default: Video standard
 msgid "video-standard"
-msgstr ""
+msgstr "Video Standart"
 # Default: Visual effects
 msgid "visual-effects"
-msgstr ""
+msgstr "Visuelle Effekte"
 # Default: Votes
 msgid "votes"
-msgstr ""
+msgstr "Stimmen"
 # Default: Votes distribution
 msgid "votes-distribution"
@ -1284,11 +1285,11 @@ msgstr ""
 # Default: With
 msgid "with"
-msgstr ""
+msgstr "mit"
 # Default: Writer
 msgid "writer"
-msgstr "Schreiber"
+msgstr "Autor"
 # Default: Written by
 msgid "written-by"
--- a/lib/imdb/locale/imdbpy-fr.po
+++ b/lib/imdb/locale/imdbpy-fr.po
@ -1,13 +1,14 @@
 # Gettext message file for imdbpy
 # Translators:
-# RainDropR <rajaa@hilltx.com>, 2013
+# lukophron, 2014
-# Stéphane Aulery, 2012
+# Rajaa Gutknecht <rajaa@hilltx.com>, 2013
 # lkppo, 2012
 msgid ""
 msgstr ""
 "Project-Id-Version: IMDbPY\n"
 "POT-Creation-Date: 2010-03-18 14:35+0000\n"
-"PO-Revision-Date: 2013-11-20 11:07+0000\n"
+"PO-Revision-Date: 2014-10-08 02:52+0000\n"
-"Last-Translator: RainDropR <rajaa@hilltx.com>\n"
+"Last-Translator: lukophron\n"
 "Language-Team: French (http://www.transifex.com/projects/p/imdbpy/language/fr/)\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
@ -33,11 +34,11 @@ msgstr "adaptation"
 # Default: Additional information
 msgid "additional-information"
-msgstr ""
+msgstr "information-additionnelle"
 # Default: Admissions
 msgid "admissions"
-msgstr ""
+msgstr "admissions"
 # Default: Agent address
 msgid "agent-address"
--- a/lib/imdb/parser/http/init.py
+++ b/lib/imdb/parser/http/init.py
@ -726,6 +726,10 @@ class IMDbHTTPAccessSystem(IMDbBase):
        cont = self._retrieve(self.urls['person_main'] % personID + 'bio')
        return self.pProxy.bio_parser.parse(cont, getRefs=self._getRefs)
    def get_person_resume(self, personID):
        cont = self._retrieve(self.urls['person_main'] % personID + 'resume')
        return self.pProxy.resume_parser.parse(cont, getRefs=self._getRefs)
    def get_person_awards(self, personID):
        cont = self._retrieve(self.urls['person_main'] % personID + 'awards')
        return self.pProxy.person_awards_parser.parse(cont)
--- a/lib/imdb/parser/http/movieParser.py
+++ b/lib/imdb/parser/http/movieParser.py
@ -226,7 +226,7 @@ class DOMHTMLMovieParser(DOMParserBase):
                            Attribute(key="countries",
                                path="./h5[starts-with(text(), " \
                            "'Countr')]/../div[@class='info-content']//text()",
-                            postprocess=makeSplitter('|')),
+                                postprocess=makeSplitter('|')),
                            Attribute(key="language",
                                path="./h5[starts-with(text(), " \
                                        "'Language')]/..//text()",
@ -234,7 +234,7 @@ class DOMHTMLMovieParser(DOMParserBase):
                            Attribute(key='color info',
                                path="./h5[starts-with(text(), " \
                                        "'Color')]/..//text()",
-                                postprocess=makeSplitter('Color:')),
+                                postprocess=makeSplitter('|')),
                            Attribute(key='sound mix',
                                path="./h5[starts-with(text(), " \
                                        "'Sound Mix')]/..//text()",
@ -462,6 +462,8 @@ class DOMHTMLMovieParser(DOMParserBase):
                del data['other akas']
            if nakas:
                data['akas'] = nakas
        if 'color info' in data:
            data['color info'] = [x.replace('Color:', '', 1) for x in data['color info']]
        if 'runtimes' in data:
            data['runtimes'] = [x.replace(' min', u'')
                                for x in data['runtimes']]
@ -1177,7 +1179,7 @@ class DOMHTMLCriticReviewsParser(DOMParserBase):
                path="//div[@class='article']/div[@class='see-more']/a",
                attrs=Attribute(key='metacritic url',
                                path="./@href")) ]
-    
+
 class DOMHTMLOfficialsitesParser(DOMParserBase):
    """Parser for the "official sites", "external reviews", "newsgroup
    reviews", "miscellaneous links", "sound clips", "video clips" and
@ -1534,7 +1536,7 @@ class DOMHTMLSeasonEpisodesParser(DOMParserBase):
                                            '').strip()
            episode_title = episode.get('title', '').strip()
            episode_plot = episode.get('plot', '')
-            if not (episode_nr and episode_id and episode_title):
+            if not (episode_nr is not None and episode_id and episode_title):
                continue
            ep_obj = Movie(movieID=episode_id, title=episode_title,
                        accessSystem=self._as, modFunct=self._modFunct)
--- a/lib/imdb/parser/http/personParser.py
+++ b/lib/imdb/parser/http/personParser.py
@ -204,7 +204,7 @@ class DOMHTMLBioParser(DOMParserBase):
    _birth_attrs = [Attribute(key='birth date',
                        path={
                            'day': "./a[starts-with(@href, " \
-                                    "'/date/')]/text()",
+                                    "'/search/name?birth_monthday=')]/text()",
                            'year': "./a[starts-with(@href, " \
                                    "'/search/name?birth_year=')]/text()"
                            },
@ -215,7 +215,7 @@ class DOMHTMLBioParser(DOMParserBase):
    _death_attrs = [Attribute(key='death date',
                        path={
                            'day': "./a[starts-with(@href, " \
-                                    "'/date/')]/text()",
+                                    "'/search/name?death_monthday=')]/text()",
                            'year': "./a[starts-with(@href, " \
                                    "'/search/name?death_date=')]/text()"
                            },
@ -326,6 +326,107 @@ class DOMHTMLBioParser(DOMParserBase):
        return data
 class DOMHTMLResumeParser(DOMParserBase):
    """Parser for the "resume" page of a given person.
    The page should be provided as a string, as taken from
    the akas.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
        resumeparser = DOMHTMLResumeParser()
        result = resumeparser.parse(resume_html_string)
    """
    _defGetRefs = True
    extractors = [
            Extractor(label='info',
                        group="//div[@class='section_box']",
                        group_key="./h3/text()",
                        group_key_normalize=lambda x: x.lower().replace(' ', '_'),
                        path="./ul[@class='resume_section_multi_list']//li",
                        attrs=Attribute(key=None,
                            multi=True,
                            path={
                                'title': ".//b//text()",
                                'desc': ".//text()",
                                },
                            postprocess=lambda x: (x.get('title'), x.get('desc').strip().replace('\n', ' ')))),
            Extractor(label='other_info',
                        group="//div[@class='section_box']",
                        group_key="./h3/text()",
                        group_key_normalize=lambda x: x.lower().replace(' ', '_'),
                        path="./ul[@class='_imdbpy']//li",
                        attrs=Attribute(key=None,
                            multi=True,
                            path=".//text()",
                            postprocess=lambda x: x.strip().replace('\n', ' '))),
            Extractor(label='credits',
                        group="//div[@class='section_box']",
                        group_key="./h3/text()",
                        group_key_normalize=lambda x: x.lower().replace(' ', '_'),
                        path="./table[@class='credits']//tr",
                        attrs=Attribute(key=None,
                            multi=True,
                            path={
                                '0':".//td[1]//text()",
                                '1':".//td[2]//text()",
                                '2':".//td[3]//text()",
                            },
                            postprocess=lambda x: [x.get('0'),x.get('1'),x.get('2')])),
            Extractor(label='mini_info',
                        path="//div[@class='center']",
                        attrs=Attribute(key='mini_info',
                            path=".//text()",
                            postprocess=lambda x: x.strip().replace('\n', ' '))),
            Extractor(label='name',
                        path="//div[@class='center']/h1[@id='preview_user_name']",
                        attrs=Attribute(key='name',
                            path=".//text()",
                            postprocess=lambda x: x.strip().replace('\n', ' '))),
            Extractor(label='resume_bio',
                        path="//div[@id='resume_rendered_html']//p",
                        attrs=Attribute(key='resume_bio',
                            multi=True,
                            path=".//text()")),
            ]
    preprocessors = [
        (re.compile('(<ul>)', re.I), r'<ul class="_imdbpy">\1'),
        ]
    def postprocess_data(self, data):
        for key in data.keys():
            if data[key] == '':
                del data[key]
            if key in ('mini_info', 'name', 'resume_bio'):
                if key == 'resume_bio':
                    data[key] = "".join(data[key]).strip()
                continue
            if len(data[key][0]) == 3:
                for item in data[key]:
                    item[:] = [x for x in item if not x == None]
                continue
            if len(data[key][0]) == 2:
                new_key = {}
                for item in data[key]:
                    if item[0] == None:
                        continue
                    if ':' in item[0]:
                        if item[1].replace(item[0], '')[1:].strip() == '':
                            continue
                        new_key[item[0].strip().replace(':', '')] = item[1].replace(item[0], '')[1:].strip()
                    else:
                        new_key[item[0]] = item[1]
                data[key] = new_key
        new_data = {}
        new_data['resume'] = data
        return new_data
 class DOMHTMLOtherWorksParser(DOMParserBase):
    """Parser for the "other works" and "agent" pages of a given person.
    The page should be provided as a string, as taken from
@ -502,6 +603,7 @@ from movieParser import DOMHTMLNewsParser
 _OBJECTS = {
    'maindetails_parser': ((DOMHTMLMaindetailsParser,), None),
    'bio_parser': ((DOMHTMLBioParser,), None),
    'resume_parser': ((DOMHTMLResumeParser,), None),
    'otherworks_parser': ((DOMHTMLOtherWorksParser,), None),
    #'agent_parser': ((DOMHTMLOtherWorksParser,), {'kind': 'agent'}),
    'person_officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),
--- a/lib/imdb/parser/http/topBottomParser.py
+++ b/lib/imdb/parser/http/topBottomParser.py
@ -7,7 +7,7 @@ E.g.:
    http://akas.imdb.com/chart/top
    http://akas.imdb.com/chart/bottom
-Copyright 2009 Davide Alberani <da@erlug.linux.it>
+Copyright 2009-2015 Davide Alberani <da@erlug.linux.it>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -43,14 +43,15 @@ class DOMHTMLTop250Parser(DOMParserBase):
    def _init(self):
        self.extractors = [Extractor(label=self.label,
-                        path="//div[@id='main']//table//tr",
+                        path="//div[@id='main']//div[1]//div//table//tbody//tr",
                        attrs=Attribute(key=None,
                                multi=True,
-                                path={self.ranktext: "./td[1]//text()",
+                                path={self.ranktext: "./td[2]//text()",
-                                        'rating': "./td[2]//text()",
+                                        'rating': "./td[3]//strong//text()",
-                                        'title': "./td[3]//text()",
+                                        'title': "./td[2]//a//text()",
-                                        'movieID': "./td[3]//a/@href",
+                                        'year': "./td[2]//span//text()",
-                                        'votes': "./td[4]//text()"
+                                        'movieID': "./td[2]//a/@href",
                                        'votes': "./td[3]//strong/@title"
                                        }))]
    def postprocess_data(self, data):
@ -72,12 +73,16 @@ class DOMHTMLTop250Parser(DOMParserBase):
            if theID in seenIDs:
                continue
            seenIDs.append(theID)
-            minfo = analyze_title(d['title'])
+            minfo = analyze_title(d['title']+" "+d['year'])
            try: minfo[self.ranktext] = int(d[self.ranktext].replace('.', ''))
            except: pass
            if 'votes' in d:
-                try: minfo['votes'] = int(d['votes'].replace(',', ''))
+                try:
-                except: pass
+                    votes = d['votes'].replace(' votes','')
                    votes = votes.split(' based on ')[1]
                    minfo['votes'] = int(votes.replace(',', ''))
                except:
                    pass
            if 'rating' in d:
                try: minfo['rating'] = float(d['rating'])
                except: pass
--- a/lib/imdb/parser/http/utils.py
+++ b/lib/imdb/parser/http/utils.py
@ -441,12 +441,6 @@ class DOMParserBase(object):
        self._useModule = useModule
        nrMods = len(useModule)
        _gotError = False
        # Force warnings.warn() to omit the source code line in the message
        formatwarning_orig = warnings.formatwarning
        warnings.formatwarning = lambda message, category, filename, lineno, line=None: \
            formatwarning_orig(message, category, filename, lineno, line='')
        for idx, mod in enumerate(useModule):
            mod = mod.strip().lower()
            try:
--- a/lib/imdb/utils.py
+++ b/lib/imdb/utils.py
@ -639,11 +639,14 @@ def analyze_company_name(name, stripNotes=False):
    o_name = name
    name = name.strip()
    country = None
-    if name.endswith(']'):
+    if name.startswith('['):
-        idx = name.rfind('[')
+        name = re.sub('[!@#$\(\)\[\]]', '', name)
-        if idx != -1:
+    else:
-            country = name[idx:]
+        if name.endswith(']'):
-            name = name[:idx].rstrip()
+            idx = name.rfind('[')
            if idx != -1:
                country = name[idx:]
                name = name[:idx].rstrip()
    if not name:
        raise IMDbParserError('invalid name: "%s"' % o_name)
    result = {'name': name}
@ -957,7 +960,7 @@ def _tag4TON(ton, addAccessSystem=False, _containerOnly=False):
            crl = [crl]
        for cr in crl:
            crTag = cr.__class__.__name__.lower()
-            crValue = cr['long imdb name']
+            crValue = cr.get('long imdb name') or u''
            crValue = _normalizeValue(crValue)
            crID = cr.getID()
            if crID is not None: