mirror of
https://github.com/SickGear/SickGear.git
synced 2024-11-24 13:55:16 +00:00
458 lines
15 KiB
Python
458 lines
15 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
# enzyme - Video metadata parser
|
||
|
# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com>
|
||
|
# Copyright 2003-2006 Thomas Schueppel <stain@acm.org>
|
||
|
# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org>
|
||
|
#
|
||
|
# This file is part of enzyme.
|
||
|
#
|
||
|
# enzyme is free software; you can redistribute it and/or modify it under
|
||
|
# the terms of the GNU General Public License as published by
|
||
|
# the Free Software Foundation; either version 3 of the License, or
|
||
|
# (at your option) any later version.
|
||
|
#
|
||
|
# enzyme is distributed in the hope that it will be useful,
|
||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
# GNU General Public License for more details.
|
||
|
#
|
||
|
# You should have received a copy of the GNU General Public License
|
||
|
# along with enzyme. If not, see <http://www.gnu.org/licenses/>.
|
||
|
import re
|
||
|
import logging
|
||
|
|
||
|
from . import fourcc
|
||
|
from . import language
|
||
|
from .strutils import str_to_unicode, unicode_to_str
|
||
|
|
||
|
from _23 import decode_str
|
||
|
from six import PY2, string_types, text_type
|
||
|
|
||
|
UNPRINTABLE_KEYS = ['thumbnail', 'url', 'codec_private']
|
||
|
MEDIACORE = ['title', 'caption', 'comment', 'size', 'type', 'subtype', 'timestamp',
|
||
|
'keywords', 'country', 'language', 'langcode', 'url', 'artist',
|
||
|
'mime', 'datetime', 'tags', 'hash']
|
||
|
AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'format',
|
||
|
'samplebits', 'bitrate', 'fourcc', 'trackno', 'id', 'userdate',
|
||
|
'enabled', 'default', 'codec_private']
|
||
|
MUSICCORE = ['trackof', 'album', 'genre', 'discs', 'thumbnail']
|
||
|
VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'format',
|
||
|
'samplebits', 'width', 'height', 'fps', 'aspect', 'trackno',
|
||
|
'fourcc', 'id', 'enabled', 'default', 'codec_private']
|
||
|
AVCORE = ['length', 'encoder', 'trackno', 'trackof', 'copyright', 'product',
|
||
|
'genre', 'writer', 'producer', 'studio', 'rating', 'actors', 'thumbnail',
|
||
|
'delay', 'image', 'video', 'audio', 'subtitles', 'chapters', 'software',
|
||
|
'summary', 'synopsis', 'season', 'episode', 'series']
|
||
|
|
||
|
# get logging object
|
||
|
log = logging.getLogger(__name__)
|
||
|
|
||
|
|
||
|
class Media(object):
|
||
|
"""
|
||
|
Media is the base class to all Media Metadata Containers. It defines
|
||
|
the basic structures that handle metadata. Media and its derivates
|
||
|
contain a common set of metadata attributes that is listed in keys.
|
||
|
Specific derivates contain additional keys to the dublin core set that is
|
||
|
defined in Media.
|
||
|
"""
|
||
|
media = None
|
||
|
_keys = MEDIACORE
|
||
|
table_mapping = {}
|
||
|
|
||
|
def __init__(self, _hash=None):
|
||
|
if _hash is not None:
|
||
|
# create Media based on dict
|
||
|
for key, value in _hash.items():
|
||
|
if isinstance(value, list) and value and isinstance(value[0], dict):
|
||
|
value = [Media(x) for x in value]
|
||
|
self._set(key, value)
|
||
|
return
|
||
|
|
||
|
self._keys = self._keys[:]
|
||
|
self.tables = {}
|
||
|
# Tags, unlike tables, are more well-defined dicts whose values are
|
||
|
# either Tag objects, other dicts (for nested tags), or lists of either
|
||
|
# (for multiple instances of the tag, e.g. actor). Where possible,
|
||
|
# parsers should transform tag names to conform to the Official
|
||
|
# Matroska tags defined at http://www.matroska.org/technical/specs/tagging/index.html
|
||
|
# All tag names will be lower-cased.
|
||
|
self.tags = Tags()
|
||
|
for key in set(self._keys) - set(['media', 'tags']):
|
||
|
setattr(self, key, None)
|
||
|
|
||
|
#
|
||
|
# unicode and string convertion for debugging
|
||
|
#
|
||
|
# TODO: Fix that mess
|
||
|
def __unicode__(self):
|
||
|
result = u''
|
||
|
|
||
|
# print normal attributes
|
||
|
lists = []
|
||
|
for key in self._keys:
|
||
|
value = getattr(self, key, None)
|
||
|
if value is None or key == 'url':
|
||
|
continue
|
||
|
if isinstance(value, list):
|
||
|
if not value:
|
||
|
continue
|
||
|
elif isinstance(value[0], string_types):
|
||
|
# Just a list of strings (keywords?), so don't treat it specially.
|
||
|
value = u', '.join(value)
|
||
|
else:
|
||
|
lists.append((key, value))
|
||
|
continue
|
||
|
elif isinstance(value, dict):
|
||
|
# Tables or tags treated separately.
|
||
|
continue
|
||
|
if key in UNPRINTABLE_KEYS:
|
||
|
value = '<unprintable data, size=%d>' % len(value)
|
||
|
result += u'| %10s: %s\n' % (decode_str(key), decode_str(value))
|
||
|
|
||
|
# print tags (recursively, to support nested tags).
|
||
|
def print_tags(tags, suffix, show_label):
|
||
|
result = ''
|
||
|
for n, (name, tag) in enumerate(tags.items()):
|
||
|
result += u'| %12s%s%s = ' % (u'tags: ' if n == 0 and show_label else '', suffix, name)
|
||
|
if isinstance(tag, list):
|
||
|
# TODO: doesn't support lists/dicts within lists.
|
||
|
result += u'%s\n' % ', '.join(subtag.value for subtag in tag)
|
||
|
else:
|
||
|
result += u'%s\n' % (tag.value or '')
|
||
|
if isinstance(tag, dict):
|
||
|
result += print_tags(tag, ' ', False)
|
||
|
return result
|
||
|
|
||
|
result += print_tags(self.tags, '', True)
|
||
|
|
||
|
# print lists
|
||
|
for key, l in lists:
|
||
|
for n, item in enumerate(l):
|
||
|
label = '+-- ' + key.rstrip('s').capitalize()
|
||
|
if key not in ['tracks', 'subtitles', 'chapters']:
|
||
|
label += ' Track'
|
||
|
result += u'%s #%d\n' % (label, n + 1)
|
||
|
result += '| ' + re.sub(r'\n(.)', r'\n| \1', decode_str(item))
|
||
|
|
||
|
# print tables
|
||
|
# FIXME: WTH?
|
||
|
# if log.level >= 10:
|
||
|
# for name, table in self.tables.items():
|
||
|
# result += '+-- Table %s\n' % str(name)
|
||
|
# for key, value in table.items():
|
||
|
# try:
|
||
|
# value = unicode(value)
|
||
|
# if len(value) > 50:
|
||
|
# value = u'<unprintable data, size=%d>' % len(value)
|
||
|
# except (UnicodeDecodeError, TypeError):
|
||
|
# try:
|
||
|
# value = u'<unprintable data, size=%d>' % len(value)
|
||
|
# except AttributeError:
|
||
|
# value = u'<unprintable data>'
|
||
|
# result += u'| | %s: %s\n' % (unicode(key), value)
|
||
|
return result
|
||
|
|
||
|
def __str__(self):
|
||
|
return decode_str(self)
|
||
|
|
||
|
def __repr__(self):
|
||
|
if hasattr(self, 'url'):
|
||
|
return '<%s %s>' % (str(self.__class__)[8:-2], self.url)
|
||
|
else:
|
||
|
return '<%s>' % (str(self.__class__)[8:-2])
|
||
|
|
||
|
#
|
||
|
# internal functions
|
||
|
#
|
||
|
def _appendtable(self, name, hashmap):
|
||
|
"""
|
||
|
Appends a tables of additional metadata to the Object.
|
||
|
If such a table already exists, the given tables items are
|
||
|
added to the existing one.
|
||
|
"""
|
||
|
if name not in self.tables:
|
||
|
self.tables[name] = hashmap
|
||
|
else:
|
||
|
# Append to the already existing table
|
||
|
for k in hashmap.keys():
|
||
|
self.tables[name][k] = hashmap[k]
|
||
|
|
||
|
def _set(self, key, value):
|
||
|
"""
|
||
|
Set key to value and add the key to the internal keys list if
|
||
|
missing.
|
||
|
"""
|
||
|
if value is None and getattr(self, key, None) is None:
|
||
|
return
|
||
|
if isinstance(value, str):
|
||
|
value = str_to_unicode(value)
|
||
|
setattr(self, key, value)
|
||
|
if key not in self._keys:
|
||
|
self._keys.append(key)
|
||
|
|
||
|
def _set_url(self, url):
|
||
|
"""
|
||
|
Set the URL of the source
|
||
|
"""
|
||
|
self.url = url
|
||
|
|
||
|
def _finalize(self):
|
||
|
"""
|
||
|
Correct same data based on specific rules
|
||
|
"""
|
||
|
# make sure all strings are unicode
|
||
|
for key in self._keys:
|
||
|
if key in UNPRINTABLE_KEYS:
|
||
|
continue
|
||
|
value = getattr(self, key)
|
||
|
if value is None:
|
||
|
continue
|
||
|
if key == 'image':
|
||
|
if PY2 and isinstance(value, text_type):
|
||
|
setattr(self, key, unicode_to_str(value))
|
||
|
continue
|
||
|
if isinstance(value, str):
|
||
|
setattr(self, key, str_to_unicode(value))
|
||
|
if isinstance(value, text_type):
|
||
|
setattr(self, key, value.strip().rstrip().replace(u'\0', u''))
|
||
|
if isinstance(value, list) and value and isinstance(value[0], Media):
|
||
|
for submenu in value:
|
||
|
submenu._finalize()
|
||
|
|
||
|
# copy needed tags from tables
|
||
|
for name, table in self.tables.items():
|
||
|
mapping = self.table_mapping.get(name, {})
|
||
|
for tag, attr in mapping.items():
|
||
|
if self.get(attr):
|
||
|
continue
|
||
|
value = table.get(tag, None)
|
||
|
if value is not None:
|
||
|
if not isinstance(value, string_types):
|
||
|
value = str_to_unicode(str(value))
|
||
|
elif isinstance(value, str):
|
||
|
value = str_to_unicode(value)
|
||
|
value = value.strip().rstrip().replace(u'\0', u'')
|
||
|
setattr(self, attr, value)
|
||
|
|
||
|
if 'fourcc' in self._keys and 'codec' in self._keys and self.codec is not None:
|
||
|
# Codec may be a fourcc, in which case we resolve it to its actual
|
||
|
# name and set the fourcc attribute.
|
||
|
self.fourcc, self.codec = fourcc.resolve(self.codec)
|
||
|
if 'language' in self._keys:
|
||
|
self.langcode, self.language = language.resolve(self.language)
|
||
|
|
||
|
#
|
||
|
# data access
|
||
|
#
|
||
|
def __contains__(self, key):
|
||
|
"""
|
||
|
Test if key exists in the dict
|
||
|
"""
|
||
|
return hasattr(self, key)
|
||
|
|
||
|
def get(self, attr, default=None):
|
||
|
"""
|
||
|
Returns the given attribute. If the attribute is not set by
|
||
|
the parser return 'default'.
|
||
|
"""
|
||
|
return getattr(self, attr, default)
|
||
|
|
||
|
def __getitem__(self, attr):
|
||
|
"""
|
||
|
Get the value of the given attribute
|
||
|
"""
|
||
|
return getattr(self, attr, None)
|
||
|
|
||
|
def __setitem__(self, key, value):
|
||
|
"""
|
||
|
Set the value of 'key' to 'value'
|
||
|
"""
|
||
|
setattr(self, key, value)
|
||
|
|
||
|
def has_key(self, key):
|
||
|
"""
|
||
|
Check if the object has an attribute 'key'
|
||
|
"""
|
||
|
return hasattr(self, key)
|
||
|
|
||
|
def convert(self):
|
||
|
"""
|
||
|
Convert Media to dict.
|
||
|
"""
|
||
|
result = {}
|
||
|
for k in self._keys:
|
||
|
value = getattr(self, k, None)
|
||
|
if isinstance(value, list) and value and isinstance(value[0], Media):
|
||
|
value = [x.convert() for x in value]
|
||
|
result[k] = value
|
||
|
return result
|
||
|
|
||
|
def keys(self):
|
||
|
"""
|
||
|
Return all keys for the attributes set by the parser.
|
||
|
"""
|
||
|
return self._keys
|
||
|
|
||
|
|
||
|
class Collection(Media):
|
||
|
"""
|
||
|
Collection of Digial Media like CD, DVD, Directory, Playlist
|
||
|
"""
|
||
|
_keys = Media._keys + ['id', 'tracks']
|
||
|
|
||
|
def __init__(self):
|
||
|
Media.__init__(self)
|
||
|
self.tracks = []
|
||
|
|
||
|
|
||
|
class Tag(object):
|
||
|
"""
|
||
|
An individual tag, which will be a value stored in a Tags object.
|
||
|
|
||
|
Tag values are strings (for binary data), unicode objects, or datetime
|
||
|
objects for tags that represent dates or times.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, value=None, langcode='und', binary=False):
|
||
|
super(Tag, self).__init__()
|
||
|
self.value = value
|
||
|
self.langcode = langcode
|
||
|
self.binary = binary
|
||
|
|
||
|
def __unicode__(self):
|
||
|
return decode_str(self.value)
|
||
|
|
||
|
def __str__(self):
|
||
|
return str(self.value)
|
||
|
|
||
|
def __repr__(self):
|
||
|
if not self.binary:
|
||
|
return '<Tag object: %s>' % repr(self.value)
|
||
|
else:
|
||
|
return '<Binary Tag object: size=%d>' % len(self.value)
|
||
|
|
||
|
@property
|
||
|
def langcode(self):
|
||
|
return self._langcode
|
||
|
|
||
|
@langcode.setter
|
||
|
def langcode(self, code):
|
||
|
self._langcode, self.language = language.resolve(code)
|
||
|
|
||
|
|
||
|
class Tags(dict, Tag):
|
||
|
"""
|
||
|
A dictionary containing Tag objects. Values can be other Tags objects
|
||
|
(for nested tags), lists, or Tag objects.
|
||
|
|
||
|
A Tags object is more or less a dictionary but it also contains a value.
|
||
|
This is necessary in order to represent this kind of tag specification
|
||
|
(e.g. for Matroska)::
|
||
|
|
||
|
<Simple>
|
||
|
<Name>LAW_RATING</Name>
|
||
|
<String>PG</String>
|
||
|
<Simple>
|
||
|
<Name>COUNTRY</Name>
|
||
|
<String>US</String>
|
||
|
</Simple>
|
||
|
</Simple>
|
||
|
|
||
|
The attribute RATING has a value (PG), but it also has a child tag
|
||
|
COUNTRY that specifies the country code the rating belongs to.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, value=None, langcode='und', binary=False):
|
||
|
super(Tags, self).__init__()
|
||
|
self.value = value
|
||
|
self.langcode = langcode
|
||
|
self.binary = False
|
||
|
|
||
|
|
||
|
class AudioStream(Media):
|
||
|
"""
|
||
|
Audio Tracks in a Multiplexed Container.
|
||
|
"""
|
||
|
_keys = Media._keys + AUDIOCORE
|
||
|
|
||
|
|
||
|
class Music(AudioStream):
|
||
|
"""
|
||
|
Digital Music.
|
||
|
"""
|
||
|
_keys = AudioStream._keys + MUSICCORE
|
||
|
|
||
|
def _finalize(self):
|
||
|
"""
|
||
|
Correct same data based on specific rules
|
||
|
"""
|
||
|
AudioStream._finalize(self)
|
||
|
if self.trackof:
|
||
|
try:
|
||
|
# XXX Why is this needed anyway?
|
||
|
if int(self.trackno) < 10:
|
||
|
self.trackno = u'0%s' % int(self.trackno)
|
||
|
except (AttributeError, ValueError):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class VideoStream(Media):
|
||
|
"""
|
||
|
Video Tracks in a Multiplexed Container.
|
||
|
"""
|
||
|
_keys = Media._keys + VIDEOCORE
|
||
|
|
||
|
|
||
|
class Chapter(Media):
|
||
|
"""
|
||
|
Chapter in a Multiplexed Container.
|
||
|
"""
|
||
|
_keys = ['enabled', 'name', 'pos', 'id']
|
||
|
|
||
|
def __init__(self, name=None, pos=0):
|
||
|
Media.__init__(self)
|
||
|
self.name = name
|
||
|
self.pos = pos
|
||
|
self.enabled = True
|
||
|
|
||
|
|
||
|
class Subtitle(Media):
|
||
|
"""
|
||
|
Subtitle Tracks in a Multiplexed Container.
|
||
|
"""
|
||
|
_keys = ['enabled', 'default', 'langcode', 'language', 'trackno', 'title',
|
||
|
'id', 'codec']
|
||
|
|
||
|
def __init__(self, language=None):
|
||
|
Media.__init__(self)
|
||
|
self.language = language
|
||
|
|
||
|
|
||
|
class AVContainer(Media):
|
||
|
"""
|
||
|
Container for Audio and Video streams. This is the Container Type for
|
||
|
all media, that contain more than one stream.
|
||
|
"""
|
||
|
_keys = Media._keys + AVCORE
|
||
|
|
||
|
def __init__(self):
|
||
|
Media.__init__(self)
|
||
|
self.audio = []
|
||
|
self.video = []
|
||
|
self.subtitles = []
|
||
|
self.chapters = []
|
||
|
|
||
|
def _finalize(self):
|
||
|
"""
|
||
|
Correct same data based on specific rules
|
||
|
"""
|
||
|
Media._finalize(self)
|
||
|
if not self.length and len(self.video) and self.video[0].length:
|
||
|
self.length = 0
|
||
|
# Length not specified for container, so use the largest length
|
||
|
# of its tracks as container length.
|
||
|
for track in self.video + self.audio:
|
||
|
if track.length:
|
||
|
self.length = max(self.length, track.length)
|