SickGear/lib/hachoir/metadata/misc.py
JackDandy 980e05cc99 Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo.
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014).
Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/*
Remove metadata/qt*

PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5).
Basic Support for XMP Packets.
tga: improvements to adhere more closely to the spec.
pdf: slightly improved parsing.
rar: fix TypeError on unknown block types.
Add MacRoman win32 codepage.
tiff/exif: support SubIFDs and tiled images.
Add method to export metadata in dictionary.
mpeg_video: don't attempt to parse Stream past length.
mpeg_video: parse ESCR correctly, add SCR value.
Change centralise CustomFragments.
field: don't set parser class if class is None, to enable autodetect.
field: add value/display for CustomFragment.
parser: inline warning to enable tracebacks in debug mode.
Fix empty bytestrings in makePrintable.
Fix contentSize in jpeg.py to account for image_data blocks.
Fix the ELF parser.
Enhance the AR archive parser.
elf parser: fix wrong wrong fields order in parsing little endian section flags.
elf parser: add s390 as a machine type.
Flesh out mp4 parser.

PORT: Version 2.0a1 (inline with 3.0a1).
Major refactoring and PEP8.
Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams.
metadata: get comment from ZIP.
Support for InputIOStream.read(0).
Fix sizeGe when size is None.
Remove unused new_seekable_field_set file.
Remove parser Mapsforge .map.
Remove parser Parallel Realities Starfighter .pak files.
sevenzip: fix for newer archives.
java: update access flags and modifiers for Java 1.7 and update description text for most recent Java.
Support ustar prefix field in tar archives.
Remove file_system* parsers.
Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*.
Remove program parsers macho, nds, prc.
Support non-8bit Character subclasses.
Python parser supports Python 3.7.
Enhance mpeg_ts parser to support MTS/M2TS.
Support for creation date in tiff.
Change don't hardcode errno constant.

PORT: 1.9.1
Internal Only: The following are legacy reference to upstream commit messages.
Relevant changes up to b0a115f8.
Use integer division.
Replace HACHOIR_ERRORS with Exception.
Fix metadata.Data: make it sortable.
Import fixes from e7de492.
PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad).
Replace hachoir.core.field with hachoir.field
Replace hachoir.core.stream with hachoir.stream
Remove the compatibility module for PY1.5 to PY2.5.
metadata: support TIFF picture.
metadata: fix string normalization.
metadata: fix datetime regex Fix hachoir bug #57.
FileFromInputStream: fix comparison between None and an int.
InputIOStream: open the file in binary mode.
2018-03-28 00:43:11 +01:00

265 lines
8.6 KiB
Python

from hachoir.metadata.metadata import RootMetadata, registerExtractor
from hachoir.metadata.safe import fault_tolerant
from hachoir.parser.container import SwfFile
from hachoir.parser.misc import TorrentFile, TrueTypeFontFile, PcfFile
from hachoir.field import isString
from hachoir.core.error import warning
from hachoir.parser import guessParser
from hachoir.metadata.setter import normalizeString
class TorrentMetadata(RootMetadata):
KEY_TO_ATTR = {
u"announce": "url",
u"comment": "comment",
u"creation_date": "creation_date",
}
INFO_TO_ATTR = {
u"length": "file_size",
u"name": "filename",
}
def extract(self, torrent):
for field in torrent[0]:
self.processRoot(field)
@fault_tolerant
def processRoot(self, field):
if field.name in self.KEY_TO_ATTR:
key = self.KEY_TO_ATTR[field.name]
value = field.value
setattr(self, key, value)
elif field.name == "info" and "value" in field:
for field in field["value"]:
self.processInfo(field)
@fault_tolerant
def processInfo(self, field):
if field.name in self.INFO_TO_ATTR:
key = self.INFO_TO_ATTR[field.name]
value = field.value
setattr(self, key, value)
elif field.name == "piece_length":
self.comment = "Piece length: %s" % field.display
class TTF_Metadata(RootMetadata):
NAMEID_TO_ATTR = {
0: "copyright", # Copyright notice
3: "title", # Unique font identifier
5: "version", # Version string
8: "author", # Manufacturer name
11: "url", # URL Vendor
14: "copyright", # License info URL
}
def extract(self, ttf):
if "header" in ttf:
self.extractHeader(ttf["header"])
if "names" in ttf:
self.extractNames(ttf["names"])
@fault_tolerant
def extractHeader(self, header):
self.creation_date = header["created"].value
self.last_modification = header["modified"].value
self.comment = u"Smallest readable size in pixels: %s pixels" % header["lowest"].value
self.comment = u"Font direction: %s" % header["font_dir"].display
@fault_tolerant
def extractNames(self, names):
offset = names["offset"].value
for header in names.array("header"):
key = header["nameID"].value
foffset = offset + header["offset"].value
field = names.getFieldByAddress(foffset * 8)
if not field or not isString(field):
continue
value = field.value
if key not in self.NAMEID_TO_ATTR:
continue
key = self.NAMEID_TO_ATTR[key]
if key == "version" and value.startswith(u"Version "):
# "Version 1.2" => "1.2"
value = value[8:]
setattr(self, key, value)
# deprecated
# class OLE2_Metadata(RootMetadata):
# SUMMARY_ID_TO_ATTR = {
# 2: "title", # Title
# 3: "title", # Subject
# 4: "author",
# 6: "comment",
# 8: "author", # Last saved by
# 12: "creation_date",
# 13: "last_modification",
# 14: "nb_page",
# 18: "producer",
# }
# IGNORE_SUMMARY = set((
# 1, # Code page
# ))
#
# DOC_SUMMARY_ID_TO_ATTR = {
# 3: "title", # Subject
# 14: "author", # Manager
# }
# IGNORE_DOC_SUMMARY = set((
# 1, # Code page
# ))
#
# def extract(self, ole2):
# self._extract(ole2)
#
# def _extract(self, fieldset):
# try:
# fieldset._feedAll()
# except StopIteration:
# pass
# if "root[0]" in fieldset:
# self._extract(self.getFragment(fieldset["root[0]"]))
# doc_summary = self.getField(fieldset, "doc_summary[0]")
# if doc_summary:
# self.useSummary(doc_summary, True)
# word_doc = self.getField(fieldset, "word_doc[0]")
# if word_doc:
# self.useWordDocument(word_doc)
# summary = self.getField(fieldset, "summary[0]")
# if summary:
# self.useSummary(summary, False)
#
# def getFragment(self, frag):
# stream = frag.getSubIStream()
# ministream = guessParser(stream)
# if not ministream:
# warning("Unable to create the OLE2 mini stream parser!")
# return frag
# return ministream
#
# def getField(self, fieldset, name):
# # _feedAll() is needed to make sure that we get all fragments
# # eg. summary[0], summary[1], ..., summary[n]
# try:
# fieldset._feedAll()
# except StopIteration:
# pass
# if name not in fieldset:
# return None
# field = fieldset[name]
# return self.getFragment(field)
#
# @fault_tolerant
# def useSummary(self, summary, is_doc_summary):
# if "os" in summary:
# self.os = summary["os"].display
# if "section[0]" not in summary:
# return
# summary = summary["section[0]"]
# for property in summary.array("property_index"):
# self.useProperty(summary, property, is_doc_summary)
#
# @fault_tolerant
# def useWordDocument(self, doc):
# self.comment = "Encrypted: %s" % doc["FIB/fEncrypted"].value
#
# @fault_tolerant
# def useProperty(self, summary, property, is_doc_summary):
# field = summary.getFieldByAddress(property["offset"].value * 8)
# if not field \
# or "value" not in field:
# return
# field = field["value"]
# if not field.hasValue():
# return
#
# # Get value
# value = field.value
# if isinstance(value, (str, unicode)):
# value = normalizeString(value)
# if not value:
# return
#
# # Get property identifier
# prop_id = property["id"].value
# if is_doc_summary:
# id_to_attr = self.DOC_SUMMARY_ID_TO_ATTR
# ignore = self.IGNORE_DOC_SUMMARY
# else:
# id_to_attr = self.SUMMARY_ID_TO_ATTR
# ignore = self.IGNORE_SUMMARY
# if prop_id in ignore:
# return
#
# # Get Hachoir metadata key
# try:
# key = id_to_attr[prop_id]
# use_prefix = False
# except LookupError:
# key = "comment"
# use_prefix = True
# if use_prefix:
# prefix = property["id"].display
# if (prefix in ("TotalEditingTime", "LastPrinted")) \
# and (not field):
# # Ignore null time delta
# return
# value = "%s: %s" % (prefix, value)
# else:
# if (key == "last_modification") and (not field):
# # Ignore null timestamp
# return
# setattr(self, key, value)
#
class PcfMetadata(RootMetadata):
PROP_TO_KEY = {
'CHARSET_REGISTRY': 'charset',
'COPYRIGHT': 'copyright',
'WEIGHT_NAME': 'font_weight',
'FOUNDRY': 'author',
'FONT': 'title',
'_XMBDFED_INFO': 'producer',
}
def extract(self, pcf):
if "properties" in pcf:
self.useProperties(pcf["properties"])
def useProperties(self, properties):
last = properties["total_str_length"]
offset0 = last.address + last.size
for index in properties.array("property"):
# Search name and value
value = properties.getFieldByAddress(offset0 + index["value_offset"].value * 8)
if not value:
continue
value = value.value
if not value:
continue
name = properties.getFieldByAddress(offset0 + index["name_offset"].value * 8)
if not name:
continue
name = name.value
if name not in self.PROP_TO_KEY:
warning("Skip %s=%r" % (name, value))
continue
key = self.PROP_TO_KEY[name]
setattr(self, key, value)
class SwfMetadata(RootMetadata):
def extract(self, swf):
self.height = swf["rect/ymax"].value # twips
self.width = swf["rect/xmax"].value # twips
self.format_version = "flash version %s" % swf["version"].value
self.frame_rate = swf["frame_rate"].value
self.comment = "Frame count: %s" % swf["frame_count"].value
registerExtractor(TorrentFile, TorrentMetadata)
registerExtractor(TrueTypeFontFile, TTF_Metadata)
# registerExtractor(OLE2_File, OLE2_Metadata)
registerExtractor(PcfFile, PcfMetadata)
registerExtractor(SwfFile, SwfMetadata)