SickGear/lib/api_imdb/imdb_api.py

# encoding:utf-8
# author:Prinz23
# project:imdb_api

__author__ = 'Prinz23'
__version__ = '1.0'
__api_version__ = '1.0.0'

import logging
import re

# from .imdb_exceptions import *
from bs4_parser import BS4Parser
from exceptions_helper import ex
from lib import imdbpie
from lib.dateutil.parser import parser
# from lib.tvinfo_base.exceptions import BaseTVinfoShownotfound
from lib.tvinfo_base import (
    TVInfoCharacter, TVInfoPerson, PersonGenders, TVINFO_IMDB,
    # TVINFO_FACEBOOK, TVINFO_INSTAGRAM, TVINFO_TMDB, TVINFO_TRAKT,
    # TVINFO_TVDB, TVINFO_TVRAGE, TVINFO_TWITTER, TVINFO_WIKIPEDIA,
    TVInfoBase, TVInfoIDs, TVInfoShow)
from sg_helpers import clean_data, enforce_type, get_url, try_int
from json_helper import json_loads

from six import iteritems
from six.moves import http_client as httplib
from six.moves.urllib.parse import urlencode, urljoin, quote, unquote


# noinspection PyUnreachableCode
if False:
    from typing import Any, AnyStr, Dict, List, Optional
    from six import integer_types

tz_p = parser()
log = logging.getLogger('imdb.api')
log.addHandler(logging.NullHandler())


def _get_imdb(self, url, query=None, params=None):
    headers = {'Accept-Language': self.locale}
    if params:
        full_url = '{0}?{1}'.format(url, urlencode(params))
    else:
        full_url = url
    headers.update(self.get_auth_headers(full_url))
    resp = get_url(url, headers=headers, params=params, return_response=True)

    if not resp.ok:
        if resp.status_code == httplib.NOT_FOUND:
            raise LookupError('Resource {0} not found'.format(url))
        else:
            msg = '{0} {1}'.format(resp.status_code, resp.text)
            raise imdbpie.ImdbAPIError(msg)
    resp_data = resp.content.decode('utf-8')
    try:
        resp_dict = json_loads(resp_data)
    except ValueError:
        resp_dict = self._parse_dirty_json(
            data=resp_data, query=query
        )

    if resp_dict.get('error'):
        return None
    return resp_dict


imdbpie.Imdb._get = _get_imdb


class IMDbIndexer(TVInfoBase):
    # supported_id_searches = [TVINFO_IMDB]
    supported_person_id_searches = [TVINFO_IMDB]
    supported_id_searches = [TVINFO_IMDB]

    # noinspection PyUnusedLocal
    # noinspection PyDefaultArgument
    def __init__(self, *args, **kwargs):
        super(IMDbIndexer, self).__init__(*args, **kwargs)

    def search(self, series):
        # type: (AnyStr) -> List
        """This searches for the series name
        and returns the result list
        """
        result = []
        cache_name_key = 's-title-%s' % series
        is_none, shows = self._get_cache_entry(cache_name_key)
        if not self.config.get('cache_search') or (None is shows and not is_none):
            try:
                result = imdbpie.Imdb().search_for_title(series)
            except (BaseException, Exception):
                pass
            self._set_cache_entry(cache_name_key, result, expire=self.search_cache_expire)
        else:
            result = shows
        return result

    def _search_show(self, name=None, ids=None, **kwargs):
        # type: (AnyStr, Dict[integer_types, integer_types], Optional[Any]) -> List[TVInfoShow]
        """This searches IMDB for the series name,
        """
        def _make_result_dict(s):
            imdb_id = try_int(re.search(r'tt(\d+)', s.get('id') or s.get('imdb_id')).group(1), None)
            ti_show = TVInfoShow()
            ti_show.seriesname, ti_show.id, ti_show.firstaired, ti_show.genre_list, ti_show.overview, \
                ti_show.poster, ti_show.ids = \
                clean_data(s['title']), imdb_id, s.get('releaseDetails', {}).get('date') or s.get('year'), \
                s.get('genres'), enforce_type(clean_data(s.get('plot', {}).get('outline', {}).get('text')), str, ''), \
                s.get('image') and s['image'].get('url'), TVInfoIDs(imdb=imdb_id)
            return ti_show

        results = []
        if ids:
            for t, p in iteritems(ids):
                if t in self.supported_id_searches:
                    if t == TVINFO_IMDB:
                        cache_id_key = 's-id-%s-%s' % (TVINFO_IMDB, p)
                        is_none, shows = self._get_cache_entry(cache_id_key)
                        if not self.config.get('cache_search') or (None is shows and not is_none):
                            try:
                                show = imdbpie.Imdb().get_title_auxiliary('tt%07d' % p)
                            except (BaseException, Exception):
                                continue
                            self._set_cache_entry(cache_id_key, show, expire=self.search_cache_expire)
                        else:
                            show = shows
                        if show:
                            results.extend([_make_result_dict(show)])
        if name:
            for n in ([name], name)[isinstance(name, list)]:
                try:
                    shows = self.search(n)
                    results.extend([_make_result_dict(s) for s in shows])
                except (BaseException, Exception) as e:
                    log.debug('Error searching for show: %s' % ex(e))
        seen = set()
        results = [seen.add(r.id) or r for r in results if r.id not in seen]
        return results

    @staticmethod
    def _convert_person(person_obj, filmography=None, bio=None):
        if isinstance(person_obj, dict) and 'imdb_id' in person_obj:
            imdb_id = try_int(re.search(r'(\d+)', person_obj['imdb_id']).group(1))
            return TVInfoPerson(p_id=imdb_id, name=person_obj['name'], ids=TVInfoIDs(ids={TVINFO_IMDB: imdb_id}))
        characters = []
        for known_for in (filmography and filmography['filmography']) or []:
            if known_for['titleType'] not in ('tvSeries', 'tvMiniSeries'):
                continue
            for character in known_for.get('characters') or ['unknown name']:
                ti_show = TVInfoShow()
                ti_show.id = try_int(re.search(r'(\d+)', known_for.get('id')).group(1))
                ti_show.ids.imdb = ti_show.id
                ti_show.seriesname = known_for.get('title')
                ti_show.firstaired = known_for.get('year')
                characters.append(
                    TVInfoCharacter(name=character, ti_show=ti_show, start_year=known_for.get('startYear'),
                                    end_year=known_for.get('endYear'))
                )
        try:
            birthdate = person_obj['base']['birthDate'] and tz_p.parse(person_obj['base']['birthDate']).date()
        except (BaseException, Exception):
            birthdate = None
        try:
            deathdate = person_obj['base']['deathDate'] and tz_p.parse(person_obj['base']['deathDate']).date()
        except (BaseException, Exception):
            deathdate = None
        imdb_id = try_int(re.search(r'(\d+)', person_obj['id']).group(1))
        return TVInfoPerson(
            p_id=imdb_id, ids=TVInfoIDs(ids={TVINFO_IMDB: imdb_id}), characters=characters,
            name=person_obj['base'].get('name'), real_name=person_obj['base'].get('realName'),
            nicknames=set((person_obj['base'].get('nicknames') and person_obj['base'].get('nicknames')) or []),
            akas=set((person_obj['base'].get('akas') and person_obj['base'].get('akas')) or []),
            bio=bio, gender=PersonGenders.imdb_map.get(person_obj['base'].get('gender'), PersonGenders.unknown),
            image=person_obj['base'].get('image', {}).get('url'),
            birthdate=birthdate, birthplace=person_obj['base'].get('birthPlace'),
            deathdate=deathdate, deathplace=person_obj['base'].get('deathPlace'),
            height=person_obj['base'].get('heightCentimeters')
        )

    def _search_person(self, name=None, ids=None):
        # type: (AnyStr, Dict[integer_types, integer_types]) -> List[TVInfoPerson]
        """
        search for person by name
        :param name: name to search for
        :param ids: dict of ids to search
        :return: list of found person's
        """
        results, ids = [], ids or {}
        for tv_src in self.supported_person_id_searches:
            if tv_src in ids:
                if TVINFO_IMDB == tv_src:
                    try:
                        p = self.get_person(ids[tv_src])
                    except (BaseException, Exception):
                        p = None
                    if p:
                        results.append(p)
        if name:
            cache_name_key = 'p-name-%s' % name
            is_none, ps = self._get_cache_entry(cache_name_key)
            if None is ps and not is_none:
                try:
                    ps = imdbpie.Imdb().search_for_name(name)
                except (BaseException, Exception):
                    ps = None
                self._set_cache_entry(cache_name_key, ps)
            if ps:
                for cp in ps:
                    if not any(1 for c in results if cp['imdb_id'] == 'nm%07d' % c.id):
                        results.append(self._convert_person(cp))
        return results

    @staticmethod
    def _get_bio(p_id):
        try:
            bio = get_url('https://www.imdb.com/name/nm%07d/bio' % p_id, headers={'Accept-Language': 'en'})
            if not bio:
                return
            with BS4Parser(bio) as bio_item:
                bv = bio_item.find('div', attrs={'data-testid': re.compile('mini_bio$')}, recursive=True)
                for a in bv.findAll('a'):
                    a.replaceWithChildren()
                for b in bv.findAll('br'):
                    b.replaceWith('\n')
                return bv.get_text().strip()
        except (BaseException, Exception):
            return

    def get_person(self, p_id, get_show_credits=False, get_images=False, **kwargs):
        # type: (integer_types, bool, bool, Any) -> Optional[TVInfoPerson]
        if not p_id:
            return
        cache_main_key, cache_bio_key, cache_credits_key = 'p-main-%s' % p_id, 'p-bio-%s' % p_id, 'p-credits-%s' % p_id
        is_none, p = self._get_cache_entry(cache_main_key)
        if None is p and not is_none:
            try:
                p = imdbpie.Imdb().get_name(imdb_id='nm%07d' % p_id)
            except (BaseException, Exception):
                p = None
            self._set_cache_entry(cache_main_key, p)
        is_none, bio = self._get_cache_entry(cache_bio_key)
        if None is bio and not is_none:
            bio = self._get_bio(p_id)
            self._set_cache_entry(cache_bio_key, bio)
        fg = None
        if get_show_credits:
            is_none, fg = self._get_cache_entry(cache_credits_key)
            if None is fg and not is_none:
                try:
                    fg = imdbpie.Imdb().get_name_filmography(imdb_id='nm%07d' % p_id)
                except (BaseException, Exception):
                    fg = None
                self._set_cache_entry(cache_credits_key, fg)
        if p:
            return self._convert_person(p, filmography=fg, bio=bio)