Update imdbpie 5.6.4 (f695e87) → 5.6.5 (f8ed7a0).

This commit is contained in:
JackDandy 2024-06-07 13:27:06 +01:00
parent 13a7cd8169
commit 909fd3d24e
5 changed files with 124 additions and 92 deletions

View file

@ -7,6 +7,7 @@
* Update feedparser 6.0.10 (9865dec) to 6.0.11 (efcb89b) * Update feedparser 6.0.10 (9865dec) to 6.0.11 (efcb89b)
* Update filelock 3.12.4 (c1163ae) to 3.14.0 (8556141) * Update filelock 3.12.4 (c1163ae) to 3.14.0 (8556141)
* Update idna library 3.4 (cab054c) to 3.7 (1d365e1) * Update idna library 3.4 (cab054c) to 3.7 (1d365e1)
* Update imdbpie 5.6.4 (f695e87) to 5.6.5 (f8ed7a0)
* Update Requests library 2.31.0 (8812812) to 2.32.3 (0e322af) * Update Requests library 2.31.0 (8812812) to 2.32.3 (0e322af)
* Update Tornado Web Server 6.4 (b3f2a4b) to 6.4.1 (2a0e1d1) * Update Tornado Web Server 6.4 (b3f2a4b) to 6.4.1 (2a0e1d1)
* Update urllib3 2.0.7 (56f01e0) to 2.2.1 (54d6edf) * Update urllib3 2.0.7 (56f01e0) to 2.2.1 (54d6edf)

View file

@ -50,7 +50,7 @@ class ZuluHmacAuthV3HTTPHandler(object):
them into a string, separated by newlines. them into a string, separated by newlines.
""" """
vals = sorted(['%s:%s' % (n.lower().strip(), vals = sorted(['%s:%s' % (n.lower().strip(),
headers_to_sign[n].strip()) for n in headers_to_sign]) headers_to_sign[n].strip()) for n in headers_to_sign])
return '\n'.join(vals) return '\n'.join(vals)
def headers_to_sign(self, http_request): def headers_to_sign(self, http_request):
@ -90,14 +90,16 @@ class ZuluHmacAuthV3HTTPHandler(object):
headers_to_sign = self.headers_to_sign(http_request) headers_to_sign = self.headers_to_sign(http_request)
canonical_qs = self.canonical_query_string(http_request) canonical_qs = self.canonical_query_string(http_request)
canonical_headers = self.canonical_headers(headers_to_sign) canonical_headers = self.canonical_headers(headers_to_sign)
string_to_sign = '\n'.join(( string_to_sign = '\n'.join(
http_request.method, (
http_request.path, http_request.method,
canonical_qs, http_request.path,
canonical_headers, canonical_qs,
'', canonical_headers,
http_request.body '',
)) http_request.body,
)
)
return string_to_sign, headers_to_sign return string_to_sign, headers_to_sign
def add_auth(self, req): def add_auth(self, req):
@ -319,9 +321,15 @@ class Auth(object):
key: val[0] for key, val in parse_qs(parsed_url.query).items() key: val[0] for key, val in parse_qs(parsed_url.query).items()
} }
request = HTTPRequest( request = HTTPRequest(
method='GET', protocol='https', host=HOST, method='GET',
port=443, path=parsed_url.path, auth_path=None, params=params, protocol='https',
headers={'User-Agent': USER_AGENT}, body='' host=HOST,
port=443,
path=parsed_url.path,
auth_path=None,
params=params,
headers={'User-Agent': USER_AGENT},
body='',
) )
handler.add_auth(req=request) handler.add_auth(req=request)
headers = request.headers headers = request.headers

View file

@ -7,15 +7,20 @@ from dateutil.parser import parse
from .imdbpie import Imdb from .imdbpie import Imdb
from .objects import ( from .objects import (
Title, TitleEpisodes, Name, TitleName, Image, TitleRelease, Title,
TitleSearchResult, NameSearchResult, TitleEpisodes,
Name,
TitleName,
Image,
TitleRelease,
TitleSearchResult,
NameSearchResult,
) )
REGEX_IMDB_ID = re.compile(r'([a-zA-Z]{2}[0-9]{7})') REGEX_IMDB_ID = re.compile(r'([a-zA-Z]{2}[0-9]{7})')
class ImdbFacade(object): class ImdbFacade(object):
def __init__(self, client=None): def __init__(self, client=None):
self._client = client or Imdb() self._client = client or Imdb()
@ -36,8 +41,11 @@ class ImdbFacade(object):
season = None season = None
episode = None episode = None
return Title( return Title(
season=season, episode=episode, episodes=episodes, season=season,
runtime=runtime, **title_data episode=episode,
episodes=episodes,
runtime=runtime,
**title_data
) )
def get_name(self, imdb_id): def get_name(self, imdb_id):
@ -68,8 +76,13 @@ class ImdbFacade(object):
self._parse_id(f['id']) for f in filmography_data['filmography'] self._parse_id(f['id']) for f in filmography_data['filmography']
) )
return Name( return Name(
name=name, imdb_id=imdb_id, date_of_birth=date_of_birth, name=name,
gender=gender, birth_place=birth_place, bios=bios, image=image, imdb_id=imdb_id,
date_of_birth=date_of_birth,
gender=gender,
birth_place=birth_place,
bios=bios,
image=image,
filmography=filmography, filmography=filmography,
) )
@ -77,7 +90,8 @@ class ImdbFacade(object):
results = [] results = []
for result in self._client.search_for_name(query): for result in self._client.search_for_name(query):
result = NameSearchResult( result = NameSearchResult(
imdb_id=result['imdb_id'], name=result['name'], imdb_id=result['imdb_id'],
name=result['name'],
) )
results.append(result) results.append(result)
return tuple(results) return tuple(results)
@ -90,8 +104,10 @@ class ImdbFacade(object):
else: else:
year = None year = None
result = TitleSearchResult( result = TitleSearchResult(
imdb_id=result['imdb_id'], title=result['title'], imdb_id=result['imdb_id'],
type=result['type'], year=year, title=result['title'],
type=result['type'],
year=year,
) )
results.append(result) results.append(result)
return tuple(results) return tuple(results)
@ -102,8 +118,9 @@ class ImdbFacade(object):
name=i['name'], name=i['name'],
job=i.get('job'), job=i.get('job'),
category=i.get('category'), category=i.get('category'),
imdb_id=self._parse_id(i['id']) imdb_id=self._parse_id(i['id']),
) for i in top_crew_data['writers'] )
for i in top_crew_data['writers']
) )
def _get_stars(self, principals_data): def _get_stars(self, principals_data):
@ -113,8 +130,9 @@ class ImdbFacade(object):
job=i.get('job'), job=i.get('job'),
characters=tuple(i.get('characters', ())), characters=tuple(i.get('characters', ())),
category=i.get('category'), category=i.get('category'),
imdb_id=self._parse_id(i['id']) imdb_id=self._parse_id(i['id']),
) for i in principals_data )
for i in principals_data
) )
def _get_creators(self, top_crew_data): def _get_creators(self, top_crew_data):
@ -123,8 +141,9 @@ class ImdbFacade(object):
name=i['name'], name=i['name'],
job=i.get('job'), job=i.get('job'),
category=i.get('category'), category=i.get('category'),
imdb_id=self._parse_id(i['id']) imdb_id=self._parse_id(i['id']),
) for i in top_crew_data['writers'] )
for i in top_crew_data['writers']
if i.get('job') == 'creator' if i.get('job') == 'creator'
) )
@ -134,20 +153,23 @@ class ImdbFacade(object):
name=i['name'], name=i['name'],
job=i.get('job'), job=i.get('job'),
category=i.get('category'), category=i.get('category'),
imdb_id=self._parse_id(i['id']) imdb_id=self._parse_id(i['id']),
) for i in top_crew_data['directors'] )
for i in top_crew_data['directors']
) )
def _get_credits(self, credits_data): def _get_credits(self, credits_data):
credits = [] credits = []
for category in credits_data.get('credits', ()): for category in credits_data.get('credits', ()):
for item in credits_data['credits'][category]: for item in credits_data['credits'][category]:
credits.append(TitleName( credits.append(
name=item['name'], TitleName(
category=item.get('category'), name=item['name'],
job=item.get('job'), category=item.get('category'),
imdb_id=self._parse_id(item['id']) job=item.get('job'),
)) imdb_id=self._parse_id(item['id']),
)
)
return tuple(credits) return tuple(credits)
def _parse_id(self, string): def _parse_id(self, string):
@ -212,22 +234,25 @@ class ImdbFacade(object):
) )
except KeyError: except KeyError:
image = None image = None
return dict( return (
imdb_id=imdb_id, dict(
title=title, imdb_id=imdb_id,
year=year, title=title,
rating=rating, year=year,
type=type_, rating=rating,
release_date=release_date, type=type_,
releases=releases, release_date=release_date,
plot_outline=plot_outline, releases=releases,
rating_count=rating_count, plot_outline=plot_outline,
writers=writers, rating_count=rating_count,
directors=directors, writers=writers,
creators=creators, directors=directors,
genres=genres, creators=creators,
credits=credits, genres=genres,
certification=certification, credits=credits,
image=image, certification=certification,
stars=stars, image=image,
), title_aux_data stars=stars,
),
title_aux_data,
)

View file

@ -11,7 +11,7 @@ from trans import trans
import requests import requests
from six import text_type from six import text_type
from six.moves import http_client as httplib from six.moves import http_client as httplib
from six.moves.urllib.parse import urlencode, urljoin, quote, unquote from six.moves.urllib.parse import urlencode, urljoin, quote
from .constants import BASE_URI, SEARCH_BASE_URI from .constants import BASE_URI, SEARCH_BASE_URI
from .auth import Auth from .auth import Auth
@ -52,7 +52,6 @@ _SIMPLE_GET_ENDPOINTS = {
class Imdb(Auth): class Imdb(Auth):
def __init__(self, locale=None, exclude_episodes=False, session=None, cachedir=None): def __init__(self, locale=None, exclude_episodes=False, session=None, cachedir=None):
self.locale = locale or 'en_US' self.locale = locale or 'en_US'
self.region = self.locale.split('_')[-1].upper() self.region = self.locale.split('_')[-1].upper()
@ -87,8 +86,8 @@ class Imdb(Auth):
self._title_not_found() self._title_not_found()
if ( if (
self.exclude_episodes is True and self.exclude_episodes is True
resource['base']['titleType'] == 'tvEpisode' and resource['base']['titleType'] == 'tvEpisode'
): ):
raise LookupError( raise LookupError(
'Title not found. Title was an episode and ' 'Title not found. Title was an episode and '
@ -111,14 +110,14 @@ class Imdb(Auth):
'region': self.region, 'region': self.region,
'tconst': imdb_id, 'tconst': imdb_id,
'today': date.today().strftime('%Y-%m-%d'), 'today': date.today().strftime('%Y-%m-%d'),
} },
) )
except LookupError: except LookupError:
self._title_not_found() self._title_not_found()
if ( if (
self.exclude_episodes is True and self.exclude_episodes is True
resource['titleType'].lower() == 'tvepisode' and resource['titleType'].lower() == 'tvepisode'
): ):
raise LookupError( raise LookupError(
'Title not found. Title was an episode and ' 'Title not found. Title was an episode and '
@ -128,19 +127,24 @@ class Imdb(Auth):
def _simple_get_method(self, method, path): def _simple_get_method(self, method, path):
"""Return client method generated from ``_SIMPLE_GET_ENDPOINTS``.""" """Return client method generated from ``_SIMPLE_GET_ENDPOINTS``."""
def get(imdb_id): def get(imdb_id):
logger.info('called %s %s', method, imdb_id) logger.info('called %s %s', method, imdb_id)
self.validate_imdb_id(imdb_id) self.validate_imdb_id(imdb_id)
self._redirection_title_check(imdb_id) self._redirection_title_check(imdb_id)
return self._get_resource(path.format(imdb_id=imdb_id)) return self._get_resource(path.format(imdb_id=imdb_id))
return get return get
def title_exists(self, imdb_id): def title_exists(self, imdb_id):
self.validate_imdb_id(imdb_id) self.validate_imdb_id(imdb_id)
page_url = 'https://www.imdb.com/title/{0}/'.format(imdb_id) page_url = 'https://www.imdb.com/title/{0}/'.format(imdb_id)
response = self.session.get(page_url, allow_redirects=False) response = self.session.get(
page_url,
allow_redirects=False,
headers={'User-Agent': 'Mozilla/5.0'},
)
if response.status_code == httplib.OK: if response.status_code == httplib.OK:
return True return True
elif response.status_code == httplib.NOT_FOUND: elif response.status_code == httplib.NOT_FOUND:
@ -165,7 +169,7 @@ class Imdb(Auth):
def search_for_name(self, name): def search_for_name(self, name):
logger.info('called search_for_name %s', name) logger.info('called search_for_name %s', name)
name = re.sub(r'\W+', '_', name, flags=re.UNICODE).strip('_') name = re.sub(r'\W+', '+', name).strip('+')
search_results = self._suggest_search(name) search_results = self._suggest_search(name)
results = [] results = []
for result in search_results.get('d', ()): for result in search_results.get('d', ()):
@ -181,7 +185,7 @@ class Imdb(Auth):
def search_for_title(self, title): def search_for_title(self, title):
logger.info('called search_for_title %s', title) logger.info('called search_for_title %s', title)
title = re.sub(r'\W+', '_', title, flags=re.UNICODE).strip('_') title = re.sub(r'\W+', '+', title).strip('+')
search_results = self._suggest_search(title) search_results = self._suggest_search(title)
results = [] results = []
for result in search_results.get('d', ()): for result in search_results.get('d', ()):
@ -238,9 +242,13 @@ class Imdb(Auth):
if region: if region:
params.update({'region': region}) params.update({'region': region})
return self._get(urljoin( return self._get(
BASE_URI, '/template/imdb-ios-writable/tv-episodes-v2.jstl/render' urljoin(
), params=params) BASE_URI,
'/template/imdb-ios-writable/tv-episodes-v2.jstl/render',
),
params=params,
)
def get_title_top_crew(self, imdb_id): def get_title_top_crew(self, imdb_id):
""" """
@ -252,24 +260,21 @@ class Imdb(Auth):
logger.info('called get_title_top_crew %s', imdb_id) logger.info('called get_title_top_crew %s', imdb_id)
self.validate_imdb_id(imdb_id) self.validate_imdb_id(imdb_id)
params = {'tconst': imdb_id} params = {'tconst': imdb_id}
return self._get(urljoin( return self._get(
BASE_URI, urljoin(
'/template/imdb-android-writable/7.3.top-crew.jstl/render' BASE_URI,
), params=params) '/template/imdb-android-writable/7.3.top-crew.jstl/render',
),
params=params,
)
@staticmethod @staticmethod
def _parse_dirty_json(data, query=None): def _parse_dirty_json(data, query=None):
if query is None: if query is None:
match_json_within_dirty_json = r'imdb\$.+\({1}(.+)\){1}' match_json_within_dirty_json = r'imdb\$.+\({1}(.+)\){1}'
else: else:
query_match = ''.join( # No need to unquote as the json is containing quoted query
char if char.isalnum() else '[{0}]'.format(char) match_json_within_dirty_json = r'imdb\${}\((.+)\)'.format(query)
for char in unquote(query)
)
query_match = query_match.replace('[ ]', '.+')
match_json_within_dirty_json = (
r'imdb\${}\((.+)\)'.format(query_match)
)
data_clean = re.match( data_clean = re.match(
match_json_within_dirty_json, data, re.IGNORECASE match_json_within_dirty_json, data, re.IGNORECASE
).groups()[0] ).groups()[0]
@ -290,9 +295,8 @@ class Imdb(Auth):
Redirection results have no information of use. Redirection results have no information of use.
""" """
imdb_id = response['data'].get('tconst') imdb_id = response['data'].get('tconst')
if ( if imdb_id and imdb_id != response['data'].get('news', {}).get(
imdb_id and 'channel'
imdb_id != response['data'].get('news', {}).get('channel')
): ):
return True return True
return False return False
@ -309,7 +313,6 @@ class Imdb(Auth):
full_url = url full_url = url
headers.update(self.get_auth_headers(full_url)) headers.update(self.get_auth_headers(full_url))
resp = self.session.get(url, headers=headers, params=params) resp = self.session.get(url, headers=headers, params=params)
if not resp.ok: if not resp.ok:
if resp.status_code == httplib.NOT_FOUND: if resp.status_code == httplib.NOT_FOUND:
raise LookupError('Resource {0} not found'.format(url)) raise LookupError('Resource {0} not found'.format(url))
@ -320,9 +323,7 @@ class Imdb(Auth):
try: try:
resp_dict = json.loads(resp_data) resp_dict = json.loads(resp_data)
except ValueError: except ValueError:
resp_dict = self._parse_dirty_json( resp_dict = self._parse_dirty_json(data=resp_data, query=query)
data=resp_data, query=query
)
if resp_dict.get('error'): if resp_dict.get('error'):
return None return None

View file

@ -9,12 +9,9 @@ class Image(object):
class TitleEpisodes(object): class TitleEpisodes(object):
def __init__(self, facade, imdb_id): def __init__(self, facade, imdb_id):
self._facade = facade self._facade = facade
episodes = self._facade._client.get_title_episodes( episodes = self._facade._client.get_title_episodes(imdb_id=imdb_id)
imdb_id=imdb_id
)
self._episode_imdb_ids = [] self._episode_imdb_ids = []
for season in episodes['seasons']: for season in episodes['seasons']:
for episode in season['episodes']: for episode in season['episodes']: