SickGear/lib/hachoir/metadata/video.py
JackDandy 980e05cc99 Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo.
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014).
Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/*
Remove metadata/qt*

PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5).
Basic Support for XMP Packets.
tga: improvements to adhere more closely to the spec.
pdf: slightly improved parsing.
rar: fix TypeError on unknown block types.
Add MacRoman win32 codepage.
tiff/exif: support SubIFDs and tiled images.
Add method to export metadata in dictionary.
mpeg_video: don't attempt to parse Stream past length.
mpeg_video: parse ESCR correctly, add SCR value.
Change centralise CustomFragments.
field: don't set parser class if class is None, to enable autodetect.
field: add value/display for CustomFragment.
parser: inline warning to enable tracebacks in debug mode.
Fix empty bytestrings in makePrintable.
Fix contentSize in jpeg.py to account for image_data blocks.
Fix the ELF parser.
Enhance the AR archive parser.
elf parser: fix wrong wrong fields order in parsing little endian section flags.
elf parser: add s390 as a machine type.
Flesh out mp4 parser.

PORT: Version 2.0a1 (inline with 3.0a1).
Major refactoring and PEP8.
Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams.
metadata: get comment from ZIP.
Support for InputIOStream.read(0).
Fix sizeGe when size is None.
Remove unused new_seekable_field_set file.
Remove parser Mapsforge .map.
Remove parser Parallel Realities Starfighter .pak files.
sevenzip: fix for newer archives.
java: update access flags and modifiers for Java 1.7 and update description text for most recent Java.
Support ustar prefix field in tar archives.
Remove file_system* parsers.
Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*.
Remove program parsers macho, nds, prc.
Support non-8bit Character subclasses.
Python parser supports Python 3.7.
Enhance mpeg_ts parser to support MTS/M2TS.
Support for creation date in tiff.
Change don't hardcode errno constant.

PORT: 1.9.1
Internal Only: The following are legacy reference to upstream commit messages.
Relevant changes up to b0a115f8.
Use integer division.
Replace HACHOIR_ERRORS with Exception.
Fix metadata.Data: make it sortable.
Import fixes from e7de492.
PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad).
Replace hachoir.core.field with hachoir.field
Replace hachoir.core.stream with hachoir.stream
Remove the compatibility module for PY1.5 to PY2.5.
metadata: support TIFF picture.
metadata: fix string normalization.
metadata: fix datetime regex Fix hachoir bug #57.
FileFromInputStream: fix comparison between None and an int.
InputIOStream: open the file in binary mode.
2018-03-28 00:43:11 +01:00

417 lines
15 KiB
Python

from hachoir.field import MissingField
from hachoir.metadata.metadata import (registerExtractor,
Metadata, RootMetadata, MultipleMetadata)
from hachoir.metadata.metadata_item import QUALITY_GOOD
from hachoir.metadata.safe import fault_tolerant
from hachoir.parser.video import AsfFile, FlvFile
from hachoir.parser.video.asf import Descriptor as ASF_Descriptor
from hachoir.parser.container import MkvFile, MP4File
from hachoir.parser.container.mkv import dateToDatetime
from hachoir.core.i18n import _
from hachoir.core.tools import makeUnicode, makePrintable, timedelta2seconds
from datetime import timedelta
class MkvMetadata(MultipleMetadata):
tag_key = {
"TITLE": "title",
"URL": "url",
"COPYRIGHT": "copyright",
# TODO: use maybe another name?
# Its value may be different than (...)/Info/DateUTC/date
"DATE_RECORDED": "creation_date",
# TODO: Extract subtitle metadata
"SUBTITLE": "subtitle_author",
}
def extract(self, mkv):
for segment in mkv.array("Segment"):
self.processSegment(segment)
def processSegment(self, segment):
for field in segment:
if field.name.startswith("Info["):
self.processInfo(field)
elif field.name.startswith("Tags["):
for tag in field.array("Tag"):
self.processTag(tag)
elif field.name.startswith("Tracks["):
self.processTracks(field)
elif field.name.startswith("Cluster["):
if self.quality < QUALITY_GOOD:
return
def processTracks(self, tracks):
for entry in tracks.array("TrackEntry"):
self.processTrack(entry)
def processTrack(self, track):
if "TrackType/enum" not in track:
return
if track["TrackType/enum"].display == "video":
self.processVideo(track)
elif track["TrackType/enum"].display == "audio":
self.processAudio(track)
elif track["TrackType/enum"].display == "subtitle":
self.processSubtitle(track)
def trackCommon(self, track, meta):
if "Name/unicode" in track:
meta.title = track["Name/unicode"].value
if "Language/string" in track:
meta.language = track["Language/string"].value
else:
meta.language = "eng"
def processVideo(self, track):
video = Metadata(self)
self.trackCommon(track, video)
try:
video.compression = track["CodecID/string"].value
if "Video" in track:
video.width = track["Video/PixelWidth/unsigned"].value
video.height = track["Video/PixelHeight/unsigned"].value
except MissingField:
pass
self.addGroup("video[]", video, "Video stream")
def getDouble(self, field, parent):
float_key = '%s/float' % parent
if float_key in field:
return field[float_key].value
double_key = '%s/double' % parent
if double_key in field:
return field[double_key].value
return None
def processAudio(self, track):
audio = Metadata(self)
self.trackCommon(track, audio)
if "Audio" in track:
frequency = self.getDouble(track, "Audio/SamplingFrequency")
if frequency is not None:
audio.sample_rate = frequency
if "Audio/Channels/unsigned" in track:
audio.nb_channel = track["Audio/Channels/unsigned"].value
if "Audio/BitDepth/unsigned" in track:
audio.bits_per_sample = track["Audio/BitDepth/unsigned"].value
if "CodecID/string" in track:
audio.compression = track["CodecID/string"].value
self.addGroup("audio[]", audio, "Audio stream")
def processSubtitle(self, track):
sub = Metadata(self)
self.trackCommon(track, sub)
try:
sub.compression = track["CodecID/string"].value
except MissingField:
pass
self.addGroup("subtitle[]", sub, "Subtitle")
def processTag(self, tag):
for field in tag.array("SimpleTag"):
self.processSimpleTag(field)
def processSimpleTag(self, tag):
if "TagName/unicode" not in tag \
or "TagString/unicode" not in tag:
return
name = tag["TagName/unicode"].value
if name not in self.tag_key:
return
key = self.tag_key[name]
value = tag["TagString/unicode"].value
setattr(self, key, value)
def processInfo(self, info):
if "TimecodeScale/unsigned" in info:
duration = self.getDouble(info, "Duration")
if duration is not None:
try:
seconds = duration * info["TimecodeScale/unsigned"].value * 1e-9
self.duration = timedelta(seconds=seconds)
except OverflowError:
# Catch OverflowError for timedelta (long int too large
# to be converted to an int)
pass
if "DateUTC/date" in info:
try:
self.creation_date = dateToDatetime(info["DateUTC/date"].value)
except OverflowError:
pass
if "WritingApp/unicode" in info:
self.producer = info["WritingApp/unicode"].value
if "MuxingApp/unicode" in info:
self.producer = info["MuxingApp/unicode"].value
if "Title/unicode" in info:
self.title = info["Title/unicode"].value
class FlvMetadata(MultipleMetadata):
def extract(self, flv):
if "video[0]" in flv:
meta = Metadata(self)
self.extractVideo(flv["video[0]"], meta)
self.addGroup("video", meta, "Video stream")
if "audio[0]" in flv:
meta = Metadata(self)
self.extractAudio(flv["audio[0]"], meta)
self.addGroup("audio", meta, "Audio stream")
# TODO: Computer duration
# One technic: use last video/audio chunk and use timestamp
# But this is very slow
self.format_version = flv.description
if "metadata/entry[1]" in flv:
self.extractAMF(flv["metadata/entry[1]"])
if self.has('duration'):
self.bit_rate = flv.size / timedelta2seconds(self.get('duration'))
@fault_tolerant
def extractAudio(self, audio, meta):
if audio["codec"].display == "MP3" and "music_data" in audio:
meta.compression = audio["music_data"].description
else:
meta.compression = audio["codec"].display
meta.sample_rate = audio.getSampleRate()
if audio["is_16bit"].value:
meta.bits_per_sample = 16
else:
meta.bits_per_sample = 8
if audio["is_stereo"].value:
meta.nb_channel = 2
else:
meta.nb_channel = 1
@fault_tolerant
def extractVideo(self, video, meta):
meta.compression = video["codec"].display
def extractAMF(self, amf):
for entry in amf.array("item"):
self.useAmfEntry(entry)
@fault_tolerant
def useAmfEntry(self, entry):
key = entry["key"].value
if key == "duration":
self.duration = timedelta(seconds=entry["value"].value)
elif key == "creator":
self.producer = entry["value"].value
elif key == "audiosamplerate":
self.sample_rate = entry["value"].value
elif key == "framerate":
self.frame_rate = entry["value"].value
elif key == "metadatacreator":
self.producer = entry["value"].value
elif key == "metadatadate":
self.creation_date = entry.value
elif key == "width":
self.width = int(entry["value"].value)
elif key == "height":
self.height = int(entry["value"].value)
class MP4Metadata(RootMetadata):
def extract(self, mov):
for atom in mov:
if "movie" in atom:
self.processMovie(atom["movie"])
@fault_tolerant
def processMovieHeader(self, hdr):
self.creation_date = hdr["creation_date"].value
self.last_modification = hdr["lastmod_date"].value
self.duration = timedelta(seconds=float(hdr["duration"].value) / hdr["time_scale"].value)
self.comment = _("Play speed: %.1f%%") % (hdr["play_speed"].value * 100)
self.comment = _("User volume: %.1f%%") % (float(hdr["volume"].value) * 100)
@fault_tolerant
def processTrackHeader(self, hdr):
width = int(hdr["frame_size_width"].value)
height = int(hdr["frame_size_height"].value)
if width and height:
self.width = width
self.height = height
def processTrack(self, atom):
for field in atom:
if "track_hdr" in field:
self.processTrackHeader(field["track_hdr"])
def processMovie(self, atom):
for field in atom:
if "track" in field:
self.processTrack(field["track"])
if "movie_hdr" in field:
self.processMovieHeader(field["movie_hdr"])
class AsfMetadata(MultipleMetadata):
EXT_DESC_TO_ATTR = {
"Encoder": "producer",
"ToolName": "producer",
"AlbumTitle": "album",
"Track": "track_number",
"TrackNumber": "track_total",
"Year": "creation_date",
"AlbumArtist": "author",
}
SKIP_EXT_DESC = set((
# Useless informations
"WMFSDKNeeded", "WMFSDKVersion",
"Buffer Average", "VBR Peak", "EncodingTime",
"MediaPrimaryClassID", "UniqueFileIdentifier",
))
def extract(self, asf):
if "header/content" in asf:
self.processHeader(asf["header/content"])
def processHeader(self, header):
compression = []
is_vbr = None
if "ext_desc/content" in header:
# Extract all data from ext_desc
data = {}
for desc in header.array("ext_desc/content/descriptor"):
self.useExtDescItem(desc, data)
# Have ToolName and ToolVersion? If yes, group them to producer key
if "ToolName" in data and "ToolVersion" in data:
self.producer = "%s (version %s)" % (data["ToolName"], data["ToolVersion"])
del data["ToolName"]
del data["ToolVersion"]
# "IsVBR" key
if "IsVBR" in data:
is_vbr = (data["IsVBR"] == 1)
del data["IsVBR"]
# Store data
for key, value in data.iteritems():
if key in self.EXT_DESC_TO_ATTR:
key = self.EXT_DESC_TO_ATTR[key]
else:
if isinstance(key, str):
key = makePrintable(key, "ISO-8859-1", to_unicode=True)
value = "%s=%s" % (key, value)
key = "comment"
setattr(self, key, value)
if "file_prop/content" in header:
self.useFileProp(header["file_prop/content"], is_vbr)
if "codec_list/content" in header:
for codec in header.array("codec_list/content/codec"):
if "name" in codec:
text = codec["name"].value
if "desc" in codec and codec["desc"].value:
text = "%s (%s)" % (text, codec["desc"].value)
compression.append(text)
audio_index = 1
video_index = 1
for index, stream_prop in enumerate(header.array("stream_prop")):
if "content/audio_header" in stream_prop:
meta = Metadata(self)
self.streamProperty(header, index, meta)
self.streamAudioHeader(stream_prop["content/audio_header"], meta)
if self.addGroup("audio[%u]" % audio_index, meta, "Audio stream #%u" % audio_index):
audio_index += 1
elif "content/video_header" in stream_prop:
meta = Metadata(self)
self.streamProperty(header, index, meta)
self.streamVideoHeader(stream_prop["content/video_header"], meta)
if self.addGroup("video[%u]" % video_index, meta, "Video stream #%u" % video_index):
video_index += 1
if "metadata/content" in header:
info = header["metadata/content"]
try:
self.title = info["title"].value
self.author = info["author"].value
self.copyright = info["copyright"].value
except MissingField:
pass
@fault_tolerant
def streamAudioHeader(self, audio, meta):
if not meta.has("compression"):
meta.compression = audio["twocc"].display
meta.nb_channel = audio["channels"].value
meta.sample_rate = audio["sample_rate"].value
meta.bits_per_sample = audio["bits_per_sample"].value
@fault_tolerant
def streamVideoHeader(self, video, meta):
meta.width = video["width"].value
meta.height = video["height"].value
if "bmp_info" in video:
bmp_info = video["bmp_info"]
if not meta.has("compression"):
meta.compression = bmp_info["codec"].display
meta.bits_per_pixel = bmp_info["bpp"].value
@fault_tolerant
def useExtDescItem(self, desc, data):
if desc["type"].value == ASF_Descriptor.TYPE_BYTE_ARRAY:
# Skip binary data
return
key = desc["name"].value
if "/" in key:
# Replace "WM/ToolName" with "ToolName"
key = key.split("/", 1)[1]
if key in self.SKIP_EXT_DESC:
# Skip some keys
return
value = desc["value"].value
if not value:
return
value = makeUnicode(value)
data[key] = value
@fault_tolerant
def useFileProp(self, prop, is_vbr):
self.creation_date = prop["creation_date"].value
self.duration = prop["play_duration"].value
if prop["seekable"].value:
self.comment = u"Is seekable"
value = prop["max_bitrate"].value
text = prop["max_bitrate"].display
if is_vbr is True:
text = "VBR (%s max)" % text
elif is_vbr is False:
text = "%s (CBR)" % text
else:
text = "%s (max)" % text
self.bit_rate = (value, text)
def streamProperty(self, header, index, meta):
key = "bit_rates/content/bit_rate[%u]/avg_bitrate" % index
if key in header:
meta.bit_rate = header[key].value
# TODO: Use codec list
# It doesn't work when the video uses /header/content/bitrate_mutex
# since the codec list are shared between streams but... how is it
# shared?
# key = "codec_list/content/codec[%u]" % index
# if key in header:
# codec = header[key]
# if "name" in codec:
# text = codec["name"].value
# if "desc" in codec and codec["desc"].value:
# meta.compression = "%s (%s)" % (text, codec["desc"].value)
# else:
# meta.compression = text
registerExtractor(AsfFile, AsfMetadata)
registerExtractor(FlvFile, FlvMetadata)
registerExtractor(MkvFile, MkvMetadata)
registerExtractor(MP4File, MP4Metadata)