mirror of
synced 2025-03-03 11:25:03 +00:00
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014). Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/* Remove metadata/qt* PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5). Basic Support for XMP Packets. tga: improvements to adhere more closely to the spec. pdf: slightly improved parsing. rar: fix TypeError on unknown block types. Add MacRoman win32 codepage. tiff/exif: support SubIFDs and tiled images. Add method to export metadata in dictionary. mpeg_video: don't attempt to parse Stream past length. mpeg_video: parse ESCR correctly, add SCR value. Change centralise CustomFragments. field: don't set parser class if class is None, to enable autodetect. field: add value/display for CustomFragment. parser: inline warning to enable tracebacks in debug mode. Fix empty bytestrings in makePrintable. Fix contentSize in jpeg.py to account for image_data blocks. Fix the ELF parser. Enhance the AR archive parser. elf parser: fix wrong wrong fields order in parsing little endian section flags. elf parser: add s390 as a machine type. Flesh out mp4 parser. PORT: Version 2.0a1 (inline with 3.0a1). Major refactoring and PEP8. Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams. metadata: get comment from ZIP. Support for InputIOStream.read(0). Fix sizeGe when size is None. Remove unused new_seekable_field_set file. Remove parser Mapsforge .map. Remove parser Parallel Realities Starfighter .pak files. sevenzip: fix for newer archives. java: update access flags and modifiers for Java 1.7 and update description text for most recent Java. Support ustar prefix field in tar archives. Remove file_system* parsers. Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*. Remove program parsers macho, nds, prc. Support non-8bit Character subclasses. Python parser supports Python 3.7. Enhance mpeg_ts parser to support MTS/M2TS. Support for creation date in tiff. Change don't hardcode errno constant. PORT: 1.9.1 Internal Only: The following are legacy reference to upstream commit messages. Relevant changes up to b0a115f8. Use integer division. Replace HACHOIR_ERRORS with Exception. Fix metadata.Data: make it sortable. Import fixes from e7de492. PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad). Replace hachoir.core.field with hachoir.field Replace hachoir.core.stream with hachoir.stream Remove the compatibility module for PY1.5 to PY2.5. metadata: support TIFF picture. metadata: fix string normalization. metadata: fix datetime regex Fix hachoir bug #57. FileFromInputStream: fix comparison between None and an int. InputIOStream: open the file in binary mode.
417 lines
15 KiB
417 lines
15 KiB
from hachoir.field import MissingField
from hachoir.metadata.metadata import (registerExtractor,
Metadata, RootMetadata, MultipleMetadata)
from hachoir.metadata.metadata_item import QUALITY_GOOD
from hachoir.metadata.safe import fault_tolerant
from hachoir.parser.video import AsfFile, FlvFile
from hachoir.parser.video.asf import Descriptor as ASF_Descriptor
from hachoir.parser.container import MkvFile, MP4File
from hachoir.parser.container.mkv import dateToDatetime
from hachoir.core.i18n import _
from hachoir.core.tools import makeUnicode, makePrintable, timedelta2seconds
from datetime import timedelta
class MkvMetadata(MultipleMetadata):
tag_key = {
"TITLE": "title",
"URL": "url",
"COPYRIGHT": "copyright",
# TODO: use maybe another name?
# Its value may be different than (...)/Info/DateUTC/date
"DATE_RECORDED": "creation_date",
# TODO: Extract subtitle metadata
"SUBTITLE": "subtitle_author",
def extract(self, mkv):
for segment in mkv.array("Segment"):
def processSegment(self, segment):
for field in segment:
if field.name.startswith("Info["):
elif field.name.startswith("Tags["):
for tag in field.array("Tag"):
elif field.name.startswith("Tracks["):
elif field.name.startswith("Cluster["):
if self.quality < QUALITY_GOOD:
def processTracks(self, tracks):
for entry in tracks.array("TrackEntry"):
def processTrack(self, track):
if "TrackType/enum" not in track:
if track["TrackType/enum"].display == "video":
elif track["TrackType/enum"].display == "audio":
elif track["TrackType/enum"].display == "subtitle":
def trackCommon(self, track, meta):
if "Name/unicode" in track:
meta.title = track["Name/unicode"].value
if "Language/string" in track:
meta.language = track["Language/string"].value
meta.language = "eng"
def processVideo(self, track):
video = Metadata(self)
self.trackCommon(track, video)
video.compression = track["CodecID/string"].value
if "Video" in track:
video.width = track["Video/PixelWidth/unsigned"].value
video.height = track["Video/PixelHeight/unsigned"].value
except MissingField:
self.addGroup("video[]", video, "Video stream")
def getDouble(self, field, parent):
float_key = '%s/float' % parent
if float_key in field:
return field[float_key].value
double_key = '%s/double' % parent
if double_key in field:
return field[double_key].value
return None
def processAudio(self, track):
audio = Metadata(self)
self.trackCommon(track, audio)
if "Audio" in track:
frequency = self.getDouble(track, "Audio/SamplingFrequency")
if frequency is not None:
audio.sample_rate = frequency
if "Audio/Channels/unsigned" in track:
audio.nb_channel = track["Audio/Channels/unsigned"].value
if "Audio/BitDepth/unsigned" in track:
audio.bits_per_sample = track["Audio/BitDepth/unsigned"].value
if "CodecID/string" in track:
audio.compression = track["CodecID/string"].value
self.addGroup("audio[]", audio, "Audio stream")
def processSubtitle(self, track):
sub = Metadata(self)
self.trackCommon(track, sub)
sub.compression = track["CodecID/string"].value
except MissingField:
self.addGroup("subtitle[]", sub, "Subtitle")
def processTag(self, tag):
for field in tag.array("SimpleTag"):
def processSimpleTag(self, tag):
if "TagName/unicode" not in tag \
or "TagString/unicode" not in tag:
name = tag["TagName/unicode"].value
if name not in self.tag_key:
key = self.tag_key[name]
value = tag["TagString/unicode"].value
setattr(self, key, value)
def processInfo(self, info):
if "TimecodeScale/unsigned" in info:
duration = self.getDouble(info, "Duration")
if duration is not None:
seconds = duration * info["TimecodeScale/unsigned"].value * 1e-9
self.duration = timedelta(seconds=seconds)
except OverflowError:
# Catch OverflowError for timedelta (long int too large
# to be converted to an int)
if "DateUTC/date" in info:
self.creation_date = dateToDatetime(info["DateUTC/date"].value)
except OverflowError:
if "WritingApp/unicode" in info:
self.producer = info["WritingApp/unicode"].value
if "MuxingApp/unicode" in info:
self.producer = info["MuxingApp/unicode"].value
if "Title/unicode" in info:
self.title = info["Title/unicode"].value
class FlvMetadata(MultipleMetadata):
def extract(self, flv):
if "video[0]" in flv:
meta = Metadata(self)
self.extractVideo(flv["video[0]"], meta)
self.addGroup("video", meta, "Video stream")
if "audio[0]" in flv:
meta = Metadata(self)
self.extractAudio(flv["audio[0]"], meta)
self.addGroup("audio", meta, "Audio stream")
# TODO: Computer duration
# One technic: use last video/audio chunk and use timestamp
# But this is very slow
self.format_version = flv.description
if "metadata/entry[1]" in flv:
if self.has('duration'):
self.bit_rate = flv.size / timedelta2seconds(self.get('duration'))
def extractAudio(self, audio, meta):
if audio["codec"].display == "MP3" and "music_data" in audio:
meta.compression = audio["music_data"].description
meta.compression = audio["codec"].display
meta.sample_rate = audio.getSampleRate()
if audio["is_16bit"].value:
meta.bits_per_sample = 16
meta.bits_per_sample = 8
if audio["is_stereo"].value:
meta.nb_channel = 2
meta.nb_channel = 1
def extractVideo(self, video, meta):
meta.compression = video["codec"].display
def extractAMF(self, amf):
for entry in amf.array("item"):
def useAmfEntry(self, entry):
key = entry["key"].value
if key == "duration":
self.duration = timedelta(seconds=entry["value"].value)
elif key == "creator":
self.producer = entry["value"].value
elif key == "audiosamplerate":
self.sample_rate = entry["value"].value
elif key == "framerate":
self.frame_rate = entry["value"].value
elif key == "metadatacreator":
self.producer = entry["value"].value
elif key == "metadatadate":
self.creation_date = entry.value
elif key == "width":
self.width = int(entry["value"].value)
elif key == "height":
self.height = int(entry["value"].value)
class MP4Metadata(RootMetadata):
def extract(self, mov):
for atom in mov:
if "movie" in atom:
def processMovieHeader(self, hdr):
self.creation_date = hdr["creation_date"].value
self.last_modification = hdr["lastmod_date"].value
self.duration = timedelta(seconds=float(hdr["duration"].value) / hdr["time_scale"].value)
self.comment = _("Play speed: %.1f%%") % (hdr["play_speed"].value * 100)
self.comment = _("User volume: %.1f%%") % (float(hdr["volume"].value) * 100)
def processTrackHeader(self, hdr):
width = int(hdr["frame_size_width"].value)
height = int(hdr["frame_size_height"].value)
if width and height:
self.width = width
self.height = height
def processTrack(self, atom):
for field in atom:
if "track_hdr" in field:
def processMovie(self, atom):
for field in atom:
if "track" in field:
if "movie_hdr" in field:
class AsfMetadata(MultipleMetadata):
"Encoder": "producer",
"ToolName": "producer",
"AlbumTitle": "album",
"Track": "track_number",
"TrackNumber": "track_total",
"Year": "creation_date",
"AlbumArtist": "author",
# Useless informations
"WMFSDKNeeded", "WMFSDKVersion",
"Buffer Average", "VBR Peak", "EncodingTime",
"MediaPrimaryClassID", "UniqueFileIdentifier",
def extract(self, asf):
if "header/content" in asf:
def processHeader(self, header):
compression = []
is_vbr = None
if "ext_desc/content" in header:
# Extract all data from ext_desc
data = {}
for desc in header.array("ext_desc/content/descriptor"):
self.useExtDescItem(desc, data)
# Have ToolName and ToolVersion? If yes, group them to producer key
if "ToolName" in data and "ToolVersion" in data:
self.producer = "%s (version %s)" % (data["ToolName"], data["ToolVersion"])
del data["ToolName"]
del data["ToolVersion"]
# "IsVBR" key
if "IsVBR" in data:
is_vbr = (data["IsVBR"] == 1)
del data["IsVBR"]
# Store data
for key, value in data.iteritems():
if key in self.EXT_DESC_TO_ATTR:
key = self.EXT_DESC_TO_ATTR[key]
if isinstance(key, str):
key = makePrintable(key, "ISO-8859-1", to_unicode=True)
value = "%s=%s" % (key, value)
key = "comment"
setattr(self, key, value)
if "file_prop/content" in header:
self.useFileProp(header["file_prop/content"], is_vbr)
if "codec_list/content" in header:
for codec in header.array("codec_list/content/codec"):
if "name" in codec:
text = codec["name"].value
if "desc" in codec and codec["desc"].value:
text = "%s (%s)" % (text, codec["desc"].value)
audio_index = 1
video_index = 1
for index, stream_prop in enumerate(header.array("stream_prop")):
if "content/audio_header" in stream_prop:
meta = Metadata(self)
self.streamProperty(header, index, meta)
self.streamAudioHeader(stream_prop["content/audio_header"], meta)
if self.addGroup("audio[%u]" % audio_index, meta, "Audio stream #%u" % audio_index):
audio_index += 1
elif "content/video_header" in stream_prop:
meta = Metadata(self)
self.streamProperty(header, index, meta)
self.streamVideoHeader(stream_prop["content/video_header"], meta)
if self.addGroup("video[%u]" % video_index, meta, "Video stream #%u" % video_index):
video_index += 1
if "metadata/content" in header:
info = header["metadata/content"]
self.title = info["title"].value
self.author = info["author"].value
self.copyright = info["copyright"].value
except MissingField:
def streamAudioHeader(self, audio, meta):
if not meta.has("compression"):
meta.compression = audio["twocc"].display
meta.nb_channel = audio["channels"].value
meta.sample_rate = audio["sample_rate"].value
meta.bits_per_sample = audio["bits_per_sample"].value
def streamVideoHeader(self, video, meta):
meta.width = video["width"].value
meta.height = video["height"].value
if "bmp_info" in video:
bmp_info = video["bmp_info"]
if not meta.has("compression"):
meta.compression = bmp_info["codec"].display
meta.bits_per_pixel = bmp_info["bpp"].value
def useExtDescItem(self, desc, data):
if desc["type"].value == ASF_Descriptor.TYPE_BYTE_ARRAY:
# Skip binary data
key = desc["name"].value
if "/" in key:
# Replace "WM/ToolName" with "ToolName"
key = key.split("/", 1)[1]
if key in self.SKIP_EXT_DESC:
# Skip some keys
value = desc["value"].value
if not value:
value = makeUnicode(value)
data[key] = value
def useFileProp(self, prop, is_vbr):
self.creation_date = prop["creation_date"].value
self.duration = prop["play_duration"].value
if prop["seekable"].value:
self.comment = u"Is seekable"
value = prop["max_bitrate"].value
text = prop["max_bitrate"].display
if is_vbr is True:
text = "VBR (%s max)" % text
elif is_vbr is False:
text = "%s (CBR)" % text
text = "%s (max)" % text
self.bit_rate = (value, text)
def streamProperty(self, header, index, meta):
key = "bit_rates/content/bit_rate[%u]/avg_bitrate" % index
if key in header:
meta.bit_rate = header[key].value
# TODO: Use codec list
# It doesn't work when the video uses /header/content/bitrate_mutex
# since the codec list are shared between streams but... how is it
# shared?
# key = "codec_list/content/codec[%u]" % index
# if key in header:
# codec = header[key]
# if "name" in codec:
# text = codec["name"].value
# if "desc" in codec and codec["desc"].value:
# meta.compression = "%s (%s)" % (text, codec["desc"].value)
# else:
# meta.compression = text
registerExtractor(AsfFile, AsfMetadata)
registerExtractor(FlvFile, FlvMetadata)
registerExtractor(MkvFile, MkvMetadata)
registerExtractor(MP4File, MP4Metadata)