SickGear/lib/enzyme/core.py

451 lines
15 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# enzyme - Video metadata parser
# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com>
# Copyright 2003-2006 Thomas Schueppel <stain@acm.org>
# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org>
#
# This file is part of enzyme.
#
# enzyme is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# enzyme is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with enzyme. If not, see <http://www.gnu.org/licenses/>.
import re
import logging
import fourcc
import language
from strutils import str_to_unicode, unicode_to_str
UNPRINTABLE_KEYS = ['thumbnail', 'url', 'codec_private']
MEDIACORE = ['title', 'caption', 'comment', 'size', 'type', 'subtype', 'timestamp',
'keywords', 'country', 'language', 'langcode', 'url', 'artist',
'mime', 'datetime', 'tags', 'hash']
AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'format',
'samplebits', 'bitrate', 'fourcc', 'trackno', 'id', 'userdate',
'enabled', 'default', 'codec_private']
MUSICCORE = ['trackof', 'album', 'genre', 'discs', 'thumbnail']
VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'format',
'samplebits', 'width', 'height', 'fps', 'aspect', 'trackno',
'fourcc', 'id', 'enabled', 'default', 'codec_private']
AVCORE = ['length', 'encoder', 'trackno', 'trackof', 'copyright', 'product',
'genre', 'writer', 'producer', 'studio', 'rating', 'actors', 'thumbnail',
'delay', 'image', 'video', 'audio', 'subtitles', 'chapters', 'software',
'summary', 'synopsis', 'season', 'episode', 'series']
# get logging object
log = logging.getLogger(__name__)
class Media(object):
"""
Media is the base class to all Media Metadata Containers. It defines
the basic structures that handle metadata. Media and its derivates
contain a common set of metadata attributes that is listed in keys.
Specific derivates contain additional keys to the dublin core set that is
defined in Media.
"""
media = None
_keys = MEDIACORE
table_mapping = {}
def __init__(self, hash=None):
if hash is not None:
# create Media based on dict
for key, value in hash.items():
if isinstance(value, list) and value and isinstance(value[0], dict):
value = [Media(x) for x in value]
self._set(key, value)
return
self._keys = self._keys[:]
self.tables = {}
# Tags, unlike tables, are more well-defined dicts whose values are
# either Tag objects, other dicts (for nested tags), or lists of either
# (for multiple instances of the tag, e.g. actor). Where possible,
# parsers should transform tag names to conform to the Official
# Matroska tags defined at http://www.matroska.org/technical/specs/tagging/index.html
# All tag names will be lower-cased.
self.tags = Tags()
for key in set(self._keys) - set(['media', 'tags']):
setattr(self, key, None)
#
# unicode and string convertion for debugging
#
#TODO: Fix that mess
def __unicode__(self):
result = u''
# print normal attributes
lists = []
for key in self._keys:
value = getattr(self, key, None)
if value == None or key == 'url':
continue
if isinstance(value, list):
if not value:
continue
elif isinstance(value[0], basestring):
# Just a list of strings (keywords?), so don't treat it specially.
value = u', '.join(value)
else:
lists.append((key, value))
continue
elif isinstance(value, dict):
# Tables or tags treated separately.
continue
if key in UNPRINTABLE_KEYS:
value = '<unprintable data, size=%d>' % len(value)
result += u'| %10s: %s\n' % (unicode(key), unicode(value))
# print tags (recursively, to support nested tags).
def print_tags(tags, suffix, show_label):
result = ''
for n, (name, tag) in enumerate(tags.items()):
result += u'| %12s%s%s = ' % (u'tags: ' if n == 0 and show_label else '', suffix, name)
if isinstance(tag, list):
# TODO: doesn't support lists/dicts within lists.
result += u'%s\n' % ', '.join(subtag.value for subtag in tag)
else:
result += u'%s\n' % (tag.value or '')
if isinstance(tag, dict):
result += print_tags(tag, ' ', False)
return result
result += print_tags(self.tags, '', True)
# print lists
for key, l in lists:
for n, item in enumerate(l):
label = '+-- ' + key.rstrip('s').capitalize()
if key not in ['tracks', 'subtitles', 'chapters']:
label += ' Track'
result += u'%s #%d\n' % (label, n + 1)
result += '| ' + re.sub(r'\n(.)', r'\n| \1', unicode(item))
# print tables
#FIXME: WTH?
# if log.level >= 10:
# for name, table in self.tables.items():
# result += '+-- Table %s\n' % str(name)
# for key, value in table.items():
# try:
# value = unicode(value)
# if len(value) > 50:
# value = u'<unprintable data, size=%d>' % len(value)
# except (UnicodeDecodeError, TypeError):
# try:
# value = u'<unprintable data, size=%d>' % len(value)
# except AttributeError:
# value = u'<unprintable data>'
# result += u'| | %s: %s\n' % (unicode(key), value)
return result
def __str__(self):
return unicode(self).encode()
def __repr__(self):
if hasattr(self, 'url'):
return '<%s %s>' % (str(self.__class__)[8:-2], self.url)
else:
return '<%s>' % (str(self.__class__)[8:-2])
#
# internal functions
#
def _appendtable(self, name, hashmap):
"""
Appends a tables of additional metadata to the Object.
If such a table already exists, the given tables items are
added to the existing one.
"""
if name not in self.tables:
self.tables[name] = hashmap
else:
# Append to the already existing table
for k in hashmap.keys():
self.tables[name][k] = hashmap[k]
def _set(self, key, value):
"""
Set key to value and add the key to the internal keys list if
missing.
"""
if value is None and getattr(self, key, None) is None:
return
if isinstance(value, str):
value = str_to_unicode(value)
setattr(self, key, value)
if not key in self._keys:
self._keys.append(key)
def _set_url(self, url):
"""
Set the URL of the source
"""
self.url = url
def _finalize(self):
"""
Correct same data based on specific rules
"""
# make sure all strings are unicode
for key in self._keys:
if key in UNPRINTABLE_KEYS:
continue
value = getattr(self, key)
if value is None:
continue
if key == 'image':
if isinstance(value, unicode):
setattr(self, key, unicode_to_str(value))
continue
if isinstance(value, str):
setattr(self, key, str_to_unicode(value))
if isinstance(value, unicode):
setattr(self, key, value.strip().rstrip().replace(u'\0', u''))
if isinstance(value, list) and value and isinstance(value[0], Media):
for submenu in value:
submenu._finalize()
# copy needed tags from tables
for name, table in self.tables.items():
mapping = self.table_mapping.get(name, {})
for tag, attr in mapping.items():
if self.get(attr):
continue
value = table.get(tag, None)
if value is not None:
if not isinstance(value, (str, unicode)):
value = str_to_unicode(str(value))
elif isinstance(value, str):
value = str_to_unicode(value)
value = value.strip().rstrip().replace(u'\0', u'')
setattr(self, attr, value)
if 'fourcc' in self._keys and 'codec' in self._keys and self.codec is not None:
# Codec may be a fourcc, in which case we resolve it to its actual
# name and set the fourcc attribute.
self.fourcc, self.codec = fourcc.resolve(self.codec)
if 'language' in self._keys:
self.langcode, self.language = language.resolve(self.language)
#
# data access
#
def __contains__(self, key):
"""
Test if key exists in the dict
"""
return hasattr(self, key)
def get(self, attr, default=None):
"""
Returns the given attribute. If the attribute is not set by
the parser return 'default'.
"""
return getattr(self, attr, default)
def __getitem__(self, attr):
"""
Get the value of the given attribute
"""
return getattr(self, attr, None)
def __setitem__(self, key, value):
"""
Set the value of 'key' to 'value'
"""
setattr(self, key, value)
def has_key(self, key):
"""
Check if the object has an attribute 'key'
"""
return hasattr(self, key)
def convert(self):
"""
Convert Media to dict.
"""
result = {}
for k in self._keys:
value = getattr(self, k, None)
if isinstance(value, list) and value and isinstance(value[0], Media):
value = [x.convert() for x in value]
result[k] = value
return result
def keys(self):
"""
Return all keys for the attributes set by the parser.
"""
return self._keys
class Collection(Media):
"""
Collection of Digial Media like CD, DVD, Directory, Playlist
"""
_keys = Media._keys + ['id', 'tracks']
def __init__(self):
Media.__init__(self)
self.tracks = []
class Tag(object):
"""
An individual tag, which will be a value stored in a Tags object.
Tag values are strings (for binary data), unicode objects, or datetime
objects for tags that represent dates or times.
"""
def __init__(self, value=None, langcode='und', binary=False):
super(Tag, self).__init__()
self.value = value
self.langcode = langcode
self.binary = binary
def __unicode__(self):
return unicode(self.value)
def __str__(self):
return str(self.value)
def __repr__(self):
if not self.binary:
return '<Tag object: %s>' % repr(self.value)
else:
return '<Binary Tag object: size=%d>' % len(self.value)
@property
def langcode(self):
return self._langcode
@langcode.setter
def langcode(self, code):
self._langcode, self.language = language.resolve(code)
class Tags(dict, Tag):
"""
A dictionary containing Tag objects. Values can be other Tags objects
(for nested tags), lists, or Tag objects.
A Tags object is more or less a dictionary but it also contains a value.
This is necessary in order to represent this kind of tag specification
(e.g. for Matroska)::
<Simple>
<Name>LAW_RATING</Name>
<String>PG</String>
<Simple>
<Name>COUNTRY</Name>
<String>US</String>
</Simple>
</Simple>
The attribute RATING has a value (PG), but it also has a child tag
COUNTRY that specifies the country code the rating belongs to.
"""
def __init__(self, value=None, langcode='und', binary=False):
super(Tags, self).__init__()
self.value = value
self.langcode = langcode
self.binary = False
class AudioStream(Media):
"""
Audio Tracks in a Multiplexed Container.
"""
_keys = Media._keys + AUDIOCORE
class Music(AudioStream):
"""
Digital Music.
"""
_keys = AudioStream._keys + MUSICCORE
def _finalize(self):
"""
Correct same data based on specific rules
"""
AudioStream._finalize(self)
if self.trackof:
try:
# XXX Why is this needed anyway?
if int(self.trackno) < 10:
self.trackno = u'0%s' % int(self.trackno)
except (AttributeError, ValueError):
pass
class VideoStream(Media):
"""
Video Tracks in a Multiplexed Container.
"""
_keys = Media._keys + VIDEOCORE
class Chapter(Media):
"""
Chapter in a Multiplexed Container.
"""
_keys = ['enabled', 'name', 'pos', 'id']
def __init__(self, name=None, pos=0):
Media.__init__(self)
self.name = name
self.pos = pos
self.enabled = True
class Subtitle(Media):
"""
Subtitle Tracks in a Multiplexed Container.
"""
_keys = ['enabled', 'default', 'langcode', 'language', 'trackno', 'title',
'id', 'codec']
def __init__(self, language=None):
Media.__init__(self)
self.language = language
class AVContainer(Media):
"""
Container for Audio and Video streams. This is the Container Type for
all media, that contain more than one stream.
"""
_keys = Media._keys + AVCORE
def __init__(self):
Media.__init__(self)
self.audio = []
self.video = []
self.subtitles = []
self.chapters = []
def _finalize(self):
"""
Correct same data based on specific rules
"""
Media._finalize(self)
if not self.length and len(self.video) and self.video[0].length:
self.length = 0
# Length not specified for container, so use the largest length
# of its tracks as container length.
for track in self.video + self.audio:
if track.length:
self.length = max(self.length, track.length)