SickGear/lib/imdbpie/imdbpie.py

358 lines
13 KiB
Python

# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import re
import json
from datetime import date
import tempfile
import logging
from trans import trans
import requests
from six import text_type
from six.moves import http_client as httplib
from six.moves.urllib.parse import urlencode, urljoin, quote
from .constants import BASE_URI, SEARCH_BASE_URI
from .auth import Auth
from .exceptions import ImdbAPIError
logger = logging.getLogger(__name__)
# client method name -> api path
_SIMPLE_GET_ENDPOINTS = {
'get_name_images': '/name/{imdb_id}/images',
'get_name_videos': '/name/{imdb_id}/videos',
'get_title_metacritic_reviews': '/title/{imdb_id}/metacritic',
'get_title_user_reviews': '/title/{imdb_id}/userreviews',
'get_title_videos': '/title/{imdb_id}/videos',
'get_title_images': '/title/{imdb_id}/images',
'get_title_companies': '/title/{imdb_id}/companies',
'get_title_technical': '/title/{imdb_id}/technical',
'get_title_trivia': '/title/{imdb_id}/trivia',
'get_title_goofs': '/title/{imdb_id}/goofs',
'get_title_soundtracks': '/title/{imdb_id}/soundtracks',
'get_title_news': '/title/{imdb_id}/news',
'get_title_plot': '/title/{imdb_id}/plot',
'get_title_plot_synopsis': '/title/{imdb_id}/plotsynopsis',
'get_title_plot_taglines': '/title/{imdb_id}/taglines',
'get_title_versions': '/title/{imdb_id}/versions',
'get_title_releases': '/title/{imdb_id}/releases',
'get_title_quotes': '/title/{imdb_id}/quotes',
'get_title_connections': '/title/{imdb_id}/connections',
'get_title_genres': '/title/{imdb_id}/genres',
'get_title_similarities': '/title/{imdb_id}/similarities',
'get_title_awards': '/title/{imdb_id}/awards',
'get_title_ratings': '/title/{imdb_id}/ratings',
'get_title_credits': '/title/{imdb_id}/fullcredits',
'get_name': '/name/{imdb_id}/fulldetails',
'get_name_filmography': '/name/{imdb_id}/filmography',
}
class Imdb(Auth):
def __init__(self, locale=None, exclude_episodes=False, session=None, cachedir=None):
self.locale = locale or 'en_US'
self.region = self.locale.split('_')[-1].upper()
self.exclude_episodes = exclude_episodes
self.session = session or requests.Session()
if not cachedir:
self._cachedir = tempfile.gettempdir()
else:
self._cachedir = cachedir
def __getattr__(self, name):
if name in _SIMPLE_GET_ENDPOINTS:
return self._simple_get_method(
method=name, path=_SIMPLE_GET_ENDPOINTS[name]
)
else:
msg = '{obj!r} object has no attribute {name!r}'.format(
obj=self.__class__.__name__,
name=name,
)
raise AttributeError(msg)
def get_title(self, imdb_id):
logger.info('called get_title %s', imdb_id)
self.validate_imdb_id(imdb_id)
self._redirection_title_check(imdb_id)
try:
resource = self._get_resource(
'/title/{0}/auxiliary'.format(imdb_id)
)
except LookupError:
self._title_not_found()
if (
self.exclude_episodes is True
and resource['base']['titleType'] == 'tvEpisode'
):
raise LookupError(
'Title not found. Title was an episode and '
'"exclude_episodes" is set to true'
)
return resource
def get_title_auxiliary(self, imdb_id):
logger.info('called get_title_auxiliary %s', imdb_id)
self.validate_imdb_id(imdb_id)
self._redirection_title_check(imdb_id)
path = '/template/imdb-ios-writable/title-auxiliary-v31.jstl/render'
try:
resource = self._get(
url=urljoin(BASE_URI, path),
params={
'inlineBannerAdWeblabOn': 'false',
'minwidth': '320',
'osVersion': '11.3.0',
'region': self.region,
'tconst': imdb_id,
'today': date.today().strftime('%Y-%m-%d'),
},
)
except LookupError:
self._title_not_found()
if (
self.exclude_episodes is True
and resource['titleType'].lower() == 'tvepisode'
):
raise LookupError(
'Title not found. Title was an episode and '
'"exclude_episodes" is set to true'
)
return resource
def _simple_get_method(self, method, path):
"""Return client method generated from ``_SIMPLE_GET_ENDPOINTS``."""
def get(imdb_id):
logger.info('called %s %s', method, imdb_id)
self.validate_imdb_id(imdb_id)
self._redirection_title_check(imdb_id)
return self._get_resource(path.format(imdb_id=imdb_id))
return get
def title_exists(self, imdb_id):
self.validate_imdb_id(imdb_id)
page_url = 'https://www.imdb.com/title/{0}/'.format(imdb_id)
response = self.session.get(
page_url,
allow_redirects=False,
headers={'User-Agent': 'Mozilla/5.0'},
)
if response.status_code == httplib.OK:
return True
elif response.status_code == httplib.NOT_FOUND:
return False
elif response.status_code == httplib.MOVED_PERMANENTLY:
# redirection result
return False
else:
response.raise_for_status()
def _suggest_search(self, query):
# translates national characters into similar sounding latin characters
cleaned_query = trans(query)
query_encoded = quote(cleaned_query)
first_alphanum_char = self._query_first_alpha_num(cleaned_query)
path = '/suggests/{0}/{1}.json'.format(
first_alphanum_char, query_encoded
)
url = urljoin(SEARCH_BASE_URI, path)
search_results = self._get(url=url, query=query_encoded)
return search_results
def search_for_name(self, name):
logger.info('called search_for_name %s', name)
name = re.sub(r'\W+', '+', name).strip('+')
search_results = self._suggest_search(name)
results = []
for result in search_results.get('d', ()):
if not result['id'].startswith('nm'):
# ignore non-person results
continue
result_item = {
'name': result['l'],
'imdb_id': result['id'],
}
results.append(result_item)
return results
def search_for_title(self, title):
logger.info('called search_for_title %s', title)
title = re.sub(r'\W+', '+', title).strip('+')
search_results = self._suggest_search(title)
results = []
for result in search_results.get('d', ()):
if not result['id'].startswith('tt'):
# ignore non-title results
continue
result_item = {
'title': result['l'],
'year': text_type(result['y']) if result.get('y') else None,
'imdb_id': result['id'],
'type': result.get('q'),
}
results.append(result_item)
return results
def get_popular_titles(self):
return self._get_resource('/chart/titlemeter')
def get_popular_shows(self):
return self._get_resource('/chart/tvmeter')
def get_popular_movies(self):
return self._get_resource('/chart/moviemeter')
def get_title_episodes(self, imdb_id):
logger.info('called get_title_episodes %s', imdb_id)
self.validate_imdb_id(imdb_id)
if self.exclude_episodes:
raise ValueError('exclude_episodes is current set to true')
return self._get_resource('/title/{0}/episodes'.format(imdb_id))
def get_title_episodes_detailed(
self, imdb_id, season, limit=500, region=None, offset=0
):
"""
Request detailed information for a tv series, for a specific season.
:param imdb_id: The imdb id including the TT prefix.
:param limit: Limit the amound of episodes returned for a season.
:param region: Two capital letter region code in ISO 3166-1 alpha-2.
:param season: The season you want the detailed information for.
:param offset: Offset episode results by this value.
"""
logger.info('called get_title_episodes_detailed %s', imdb_id)
self.validate_imdb_id(imdb_id)
if season < 1:
raise ValueError('season must be greater than zero')
params = {
'end': limit,
'start': offset,
'season': season - 1, # api seasons are zero indexed
'tconst': imdb_id,
}
if region:
params.update({'region': region})
return self._get(
urljoin(
BASE_URI,
'/template/imdb-ios-writable/tv-episodes-v2.jstl/render',
),
params=params,
)
def get_title_top_crew(self, imdb_id):
"""
Request detailed information about title's top crew
(ie: directors, writters, etc.).
:param imdb_id: The imdb id including the TT prefix.
"""
logger.info('called get_title_top_crew %s', imdb_id)
self.validate_imdb_id(imdb_id)
params = {'tconst': imdb_id}
return self._get(
urljoin(
BASE_URI,
'/template/imdb-android-writable/7.3.top-crew.jstl/render',
),
params=params,
)
@staticmethod
def _parse_dirty_json(data, query=None):
if query is None:
match_json_within_dirty_json = r'imdb\$.+\({1}(.+)\){1}'
else:
# No need to unquote as the json is containing quoted query
match_json_within_dirty_json = r'imdb\${}\((.+)\)'.format(query)
data_clean = re.match(
match_json_within_dirty_json, data, re.IGNORECASE
).groups()[0]
return json.loads(data_clean)
@staticmethod
def validate_imdb_id(imdb_id):
match_id = r'[a-zA-Z]{2}[0-9]{7}'
try:
re.match(match_id, imdb_id, re.IGNORECASE).group()
except (AttributeError, TypeError):
raise ValueError('invalid imdb id')
@staticmethod
def _is_redirection_result(response):
"""
Return True if response is that of a redirection else False
Redirection results have no information of use.
"""
imdb_id = response['data'].get('tconst')
if imdb_id and imdb_id != response['data'].get('news', {}).get(
'channel'
):
return True
return False
def _get_resource(self, path):
url = urljoin(BASE_URI, path)
return self._get(url=url)['resource']
def _get(self, url, query=None, params=None):
headers = {'Accept-Language': self.locale}
if params:
full_url = '{0}?{1}'.format(url, urlencode(params))
else:
full_url = url
headers.update(self.get_auth_headers(full_url))
resp = self.session.get(url, headers=headers, params=params)
if not resp.ok:
if resp.status_code == httplib.NOT_FOUND:
raise LookupError('Resource {0} not found'.format(url))
else:
msg = '{0} {1}'.format(resp.status_code, resp.text)
raise ImdbAPIError(msg)
resp_data = resp.content.decode('utf-8')
try:
resp_dict = json.loads(resp_data)
except ValueError:
resp_dict = self._parse_dirty_json(data=resp_data, query=query)
if resp_dict.get('error'):
return None
return resp_dict
def _redirection_title_check(self, imdb_id):
if self.is_redirection_title(imdb_id):
self._title_not_found(
msg='{0} is a redirection imdb id'.format(imdb_id)
)
def is_redirection_title(self, imdb_id):
self.validate_imdb_id(imdb_id)
page_url = 'https://www.imdb.com/title/{0}/'.format(imdb_id)
response = self.session.get(page_url, allow_redirects=False)
if response.status_code == httplib.MOVED_PERMANENTLY:
return True
else:
return False
def _query_first_alpha_num(self, query):
for char in query.lower():
if char.isalnum():
return char
raise ValueError(
'invalid query, does not contain any alphanumeric characters'
)
def _title_not_found(self, msg=''):
if msg:
msg = ' {0}'.format(msg)
raise LookupError('Title not found.{0}'.format(msg))