SickGear/lib/hachoir/metadata/metadata.py
JackDandy 980e05cc99 Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo.
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014).
Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/*
Remove metadata/qt*

PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5).
Basic Support for XMP Packets.
tga: improvements to adhere more closely to the spec.
pdf: slightly improved parsing.
rar: fix TypeError on unknown block types.
Add MacRoman win32 codepage.
tiff/exif: support SubIFDs and tiled images.
Add method to export metadata in dictionary.
mpeg_video: don't attempt to parse Stream past length.
mpeg_video: parse ESCR correctly, add SCR value.
Change centralise CustomFragments.
field: don't set parser class if class is None, to enable autodetect.
field: add value/display for CustomFragment.
parser: inline warning to enable tracebacks in debug mode.
Fix empty bytestrings in makePrintable.
Fix contentSize in jpeg.py to account for image_data blocks.
Fix the ELF parser.
Enhance the AR archive parser.
elf parser: fix wrong wrong fields order in parsing little endian section flags.
elf parser: add s390 as a machine type.
Flesh out mp4 parser.

PORT: Version 2.0a1 (inline with 3.0a1).
Major refactoring and PEP8.
Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams.
metadata: get comment from ZIP.
Support for InputIOStream.read(0).
Fix sizeGe when size is None.
Remove unused new_seekable_field_set file.
Remove parser Mapsforge .map.
Remove parser Parallel Realities Starfighter .pak files.
sevenzip: fix for newer archives.
java: update access flags and modifiers for Java 1.7 and update description text for most recent Java.
Support ustar prefix field in tar archives.
Remove file_system* parsers.
Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*.
Remove program parsers macho, nds, prc.
Support non-8bit Character subclasses.
Python parser supports Python 3.7.
Enhance mpeg_ts parser to support MTS/M2TS.
Support for creation date in tiff.
Change don't hardcode errno constant.

PORT: 1.9.1
Internal Only: The following are legacy reference to upstream commit messages.
Relevant changes up to b0a115f8.
Use integer division.
Replace HACHOIR_ERRORS with Exception.
Fix metadata.Data: make it sortable.
Import fixes from e7de492.
PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad).
Replace hachoir.core.field with hachoir.field
Replace hachoir.core.stream with hachoir.stream
Remove the compatibility module for PY1.5 to PY2.5.
metadata: support TIFF picture.
metadata: fix string normalization.
metadata: fix datetime regex Fix hachoir bug #57.
FileFromInputStream: fix comparison between None and an int.
InputIOStream: open the file in binary mode.
2018-03-28 00:43:11 +01:00

367 lines
11 KiB
Python

# -*- coding: utf-8 -*-
from hachoir.core.endian import endian_name
from hachoir.core.tools import makePrintable, makeUnicode
from hachoir.core.dict import Dict
from hachoir.core.i18n import _
from hachoir.core.log import Logger
from hachoir.metadata.metadata_item import (
MIN_PRIORITY, MAX_PRIORITY, QUALITY_NORMAL)
from hachoir.metadata.register import registerAllItems
extractors = {}
class Metadata(Logger):
header = u"Metadata"
def __init__(self, parent, quality=QUALITY_NORMAL):
assert isinstance(self.header, unicode)
# Limit to 0.0 .. 1.0
if parent:
quality = parent.quality
else:
quality = min(max(0.0, quality), 1.0)
object.__init__(self)
object.__setattr__(self, "_Metadata__data", {})
object.__setattr__(self, "quality", quality)
header = self.__class__.header
object.__setattr__(self, "_Metadata__header", header)
registerAllItems(self)
def _logger(self):
pass
def __setattr__(self, key, value):
"""
Add a new value to data with name 'key'. Skip duplicates.
"""
# Invalid key?
if key not in self.__data:
raise KeyError(_("%s has no metadata '%s'") %
(self.__class__.__name__, key))
# Skip duplicates
self.__data[key].add(value)
def setHeader(self, text):
object.__setattr__(self, "header", text)
def getItems(self, key):
try:
return self.__data[key]
except LookupError:
raise ValueError("Metadata has no value '%s'" % key)
def getItem(self, key, index):
try:
return self.getItems(key)[index]
except (LookupError, ValueError):
return None
def has(self, key):
return 1 <= len(self.getItems(key))
def get(self, key, default=None, index=0):
"""
Read first value of tag with name 'key'.
>>> from datetime import timedelta
>>> a = RootMetadata()
>>> a.duration = timedelta(seconds=2300)
>>> a.get('duration')
datetime.timedelta(0, 2300)
>>> a.get('author', u'Anonymous')
u'Anonymous'
"""
item = self.getItem(key, index)
if item is None:
if default is None:
raise ValueError(
"Metadata has no value '%s' (index %s)" % (key, index))
else:
return default
return item.value
def getValues(self, key):
try:
data = self.__data[key]
except LookupError:
raise ValueError("Metadata has no value '%s'" % key)
return [item.value for item in data]
def getText(self, key, default=None, index=0):
"""
Read first value, as unicode string, of tag with name 'key'.
>>> from datetime import timedelta
>>> a = RootMetadata()
>>> a.duration = timedelta(seconds=2300)
>>> a.getText('duration')
u'38 min 20 sec'
>>> a.getText('titre', u'Unknown')
u'Unknown'
"""
item = self.getItem(key, index)
if item is not None:
return item.text
else:
return default
def register(self, data):
assert data.key not in self.__data
data.metadata = self
self.__data[data.key] = data
def __iter__(self):
return self.__data.itervalues()
def __str__(self):
r"""
Create a multi-line ASCII string (end of line is "\n") which
represents all datas.
>>> a = RootMetadata()
>>> a.author = "haypo"
>>> a.copyright = unicode("© Hachoir", "UTF-8")
>>> print a
Metadata:
- Author: haypo
- Copyright: \xa9 Hachoir
@see __unicode__() and exportPlaintext()
"""
text = self.exportPlaintext()
return "\n".join(makePrintable(line, "ASCII") for line in text)
def __unicode__(self):
r"""
Create a multi-line Unicode string (end of line is "\n") which
represents all datas.
>>> a = RootMetadata()
>>> a.copyright = unicode("© Hachoir", "UTF-8")
>>> print repr(unicode(a))
u'Metadata:\n- Copyright: \xa9 Hachoir'
@see __str__() and exportPlaintext()
"""
return "\n".join(self.exportPlaintext())
def exportPlaintext(self, priority=None, human=True, line_prefix=u"- ", title=None):
r"""
Convert metadata to multi-line Unicode string and skip datas
with priority lower than specified priority.
Default priority is Metadata.MAX_PRIORITY. If human flag is True, data
key are translated to better human name (eg. "bit_rate" becomes
"Bit rate") which may be translated using gettext.
If priority is too small, metadata are empty and so None is returned.
>>> print RootMetadata().exportPlaintext()
None
>>> meta = RootMetadata()
>>> meta.copyright = unicode("© Hachoir", "UTF-8")
>>> print repr(meta.exportPlaintext())
[u'Metadata:', u'- Copyright: \xa9 Hachoir']
@see __str__() and __unicode__()
"""
if priority is not None:
priority = max(priority, MIN_PRIORITY)
priority = min(priority, MAX_PRIORITY)
else:
priority = MAX_PRIORITY
if not title:
title = self.header
text = ["%s:" % title]
for data in sorted(self):
if priority < data.priority:
break
if not data.values:
continue
if human:
title = data.description
else:
title = data.key
for item in data.values:
if human:
value = item.text
else:
value = makeUnicode(item.value)
text.append("%s%s: %s" % (line_prefix, title, value))
if 1 < len(text):
return text
else:
return None
def exportDictionary(self, priority=None, human=True, title=None):
r"""
Convert metadata to python Dictionary and skip datas
with priority lower than specified priority.
Default priority is Metadata.MAX_PRIORITY. If human flag is True, data
key are translated to better human name (eg. "bit_rate" becomes
"Bit rate") which may be translated using gettext.
If priority is too small, metadata are empty and so None is returned.
"""
if priority is not None:
priority = max(priority, MIN_PRIORITY)
priority = min(priority, MAX_PRIORITY)
else:
priority = MAX_PRIORITY
if not title:
title = self.header
text = {}
text[title] = {}
for data in sorted(self):
if priority < data.priority:
break
if not data.values:
continue
if human:
field = data.description
else:
field = data.key
text[title][field] = {}
for item in data.values:
if human:
value = item.text
else:
value = makeUnicode(item.value)
text[title][field] = value
return text
def __nonzero__(self):
return any(item for item in self.__data.itervalues())
class RootMetadata(Metadata):
def __init__(self, quality=QUALITY_NORMAL):
Metadata.__init__(self, None, quality)
class MultipleMetadata(RootMetadata):
header = _("Common")
def __init__(self, quality=QUALITY_NORMAL):
RootMetadata.__init__(self, quality)
object.__setattr__(self, "_MultipleMetadata__groups", Dict())
object.__setattr__(self, "_MultipleMetadata__key_counter", {})
def __contains__(self, key):
return key in self.__groups
def __getitem__(self, key):
return self.__groups[key]
def iterGroups(self):
return self.__groups.itervalues()
def __nonzero__(self):
if RootMetadata.__nonzero__(self):
return True
return any(bool(group) for group in self.__groups)
def addGroup(self, key, metadata, header=None):
"""
Add a new group (metadata of a sub-document).
Returns False if the group is skipped, True if it has been added.
"""
if not metadata:
self.warning("Skip empty group %s" % key)
return False
if key.endswith("[]"):
key = key[:-2]
if key in self.__key_counter:
self.__key_counter[key] += 1
else:
self.__key_counter[key] = 1
key += "[%u]" % self.__key_counter[key]
if header:
metadata.setHeader(header)
self.__groups.append(key, metadata)
return True
def exportPlaintext(self, priority=None, human=True, line_prefix=u"- "):
common = Metadata.exportPlaintext(self, priority, human, line_prefix)
if common:
text = common
else:
text = []
for key, metadata in self.__groups.iteritems():
if not human:
title = key
else:
title = None
value = metadata.exportPlaintext(
priority, human, line_prefix, title=title)
if value:
text.extend(value)
if len(text):
return text
else:
return None
def exportDictionary(self, priority=None, human=True):
common = Metadata.exportDictionary(self, priority, human)
if common:
text = common
else:
text = {}
for key, metadata in self.__groups.items():
if not human:
title = key
else:
title = None
value = metadata.exportDictionary(priority, human, title=title)
if value:
text.update(value)
return text
def registerExtractor(parser, extractor):
assert parser not in extractors
assert issubclass(extractor, RootMetadata)
extractors[parser] = extractor
def extractMetadata(parser, quality=QUALITY_NORMAL, **kwargs):
"""
Create a Metadata class from a parser. Returns None if no metadata
extractor does exist for the parser class.
"""
try:
extractor = extractors[parser.__class__]
except KeyError:
return None
metadata = extractor(quality)
meta_extract_error = True
try:
if 'scan_index' in kwargs:
metadata.extract(parser, scan_index=kwargs['scan_index'])
else:
metadata.extract(parser)
meta_extract_error = False
except Exception as err:
error("Error during metadata extraction: %s" % unicode(err))
except Exception as err:
error("Error during metadata extraction: %s" % unicode(err))
if meta_extract_error:
try:
parser.stream._input.close()
except:
pass
return None
if metadata:
metadata.mime_type = parser.mime_type
metadata.endian = endian_name[parser.endian]
return metadata