mirror of
synced 2025-03-03 03:15:02 +00:00
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014). Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/* Remove metadata/qt* PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5). Basic Support for XMP Packets. tga: improvements to adhere more closely to the spec. pdf: slightly improved parsing. rar: fix TypeError on unknown block types. Add MacRoman win32 codepage. tiff/exif: support SubIFDs and tiled images. Add method to export metadata in dictionary. mpeg_video: don't attempt to parse Stream past length. mpeg_video: parse ESCR correctly, add SCR value. Change centralise CustomFragments. field: don't set parser class if class is None, to enable autodetect. field: add value/display for CustomFragment. parser: inline warning to enable tracebacks in debug mode. Fix empty bytestrings in makePrintable. Fix contentSize in jpeg.py to account for image_data blocks. Fix the ELF parser. Enhance the AR archive parser. elf parser: fix wrong wrong fields order in parsing little endian section flags. elf parser: add s390 as a machine type. Flesh out mp4 parser. PORT: Version 2.0a1 (inline with 3.0a1). Major refactoring and PEP8. Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams. metadata: get comment from ZIP. Support for InputIOStream.read(0). Fix sizeGe when size is None. Remove unused new_seekable_field_set file. Remove parser Mapsforge .map. Remove parser Parallel Realities Starfighter .pak files. sevenzip: fix for newer archives. java: update access flags and modifiers for Java 1.7 and update description text for most recent Java. Support ustar prefix field in tar archives. Remove file_system* parsers. Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*. Remove program parsers macho, nds, prc. Support non-8bit Character subclasses. Python parser supports Python 3.7. Enhance mpeg_ts parser to support MTS/M2TS. Support for creation date in tiff. Change don't hardcode errno constant. PORT: 1.9.1 Internal Only: The following are legacy reference to upstream commit messages. Relevant changes up to b0a115f8. Use integer division. Replace HACHOIR_ERRORS with Exception. Fix metadata.Data: make it sortable. Import fixes from e7de492. PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad). Replace hachoir.core.field with hachoir.field Replace hachoir.core.stream with hachoir.stream Remove the compatibility module for PY1.5 to PY2.5. metadata: support TIFF picture. metadata: fix string normalization. metadata: fix datetime regex Fix hachoir bug #57. FileFromInputStream: fix comparison between None and an int. InputIOStream: open the file in binary mode.
203 lines
8 KiB
203 lines
8 KiB
Extract metadata from RIFF file format: AVI video and WAV sound.
from hachoir.metadata.metadata import Metadata, MultipleMetadata, registerExtractor
from hachoir.metadata.safe import fault_tolerant, getValue
from hachoir.parser.container.riff import RiffFile
from hachoir.parser.video.fourcc import UNCOMPRESSED_AUDIO
from hachoir.core.tools import humanFilesize, makeUnicode, timedelta2seconds
from hachoir.core.i18n import _
from hachoir.metadata.audio import computeComprRate as computeAudioComprRate
from datetime import timedelta
class RiffMetadata(MultipleMetadata):
"INAM": "title",
"IART": "artist",
"ICMT": "comment",
"ICOP": "copyright",
"IENG": "author", # (engineer)
"ISFT": "producer",
"ICRD": "creation_date",
"IDIT": "creation_date",
def extract(self, riff, **kwargs):
type = riff["type"].value
if type == "WAVE":
size = getValue(riff, "audio_data/size")
if size:
computeAudioComprRate(self, size * 8)
elif type == "AVI ":
if "headers" in riff:
if 'scan_index' in kwargs:
self.extractAVI(riff["headers"], scan_index=kwargs['scan_index'])
elif type == "ACON":
if "info" in riff:
def processChunk(self, chunk):
if "text" not in chunk:
value = chunk["text"].value
tag = chunk["tag"].value
if tag not in self.TAG_TO_KEY:
self.warning("Skip RIFF metadata %s: %s" % (tag, value))
key = self.TAG_TO_KEY[tag]
setattr(self, key, value)
def extractWAVE(self, wav):
format = wav["format"]
# Number of channel, bits/sample, sample rate
self.nb_channel = format["nb_channel"].value
self.bits_per_sample = format["bit_per_sample"].value
self.sample_rate = format["sample_per_sec"].value
self.compression = format["codec"].display
if "nb_sample/nb_sample" in wav \
and 0 < format["sample_per_sec"].value:
self.duration = timedelta(seconds=float(
wav["nb_sample/nb_sample"].value) / format["sample_per_sec"].value)
if format["codec"].value in UNCOMPRESSED_AUDIO:
# Codec with fixed bit rate
self.bit_rate = format[
"nb_channel"].value * format["bit_per_sample"].value * format["sample_per_sec"].value
if not self.has("duration") \
and "audio_data/size" in wav \
and self.has("bit_rate"):
duration = float(wav["audio_data/size"].value) * \
8 / self.get('bit_rate')
self.duration = timedelta(seconds=duration)
def extractInfo(self, fieldset):
for field in fieldset:
if not field.is_field_set:
if "tag" in field:
if field["tag"].value == "LIST":
def extractAVIVideo(self, header, meta):
meta.compression = "%s (fourcc:\"%s\")" \
% (header["fourcc"].display, makeUnicode(header["fourcc"].value))
if header["rate"].value and header["scale"].value:
fps = float(header["rate"].value) / header["scale"].value
meta.frame_rate = fps
if 0 < fps:
self.duration = meta.duration = timedelta(
seconds=float(header["length"].value) / fps)
if "../stream_fmt/width" in header:
format = header["../stream_fmt"]
meta.width = format["width"].value
meta.height = format["height"].value
meta.bits_per_pixel = format["depth"].value
meta.width = header["right"].value - header["left"].value
meta.height = header["bottom"].value - header["top"].value
def extractAVIAudio(self, format, meta):
meta.nb_channel = format["channel"].value
meta.sample_rate = format["sample_rate"].value
meta.bit_rate = format["bit_rate"].value * 8
if format["bits_per_sample"].value:
meta.bits_per_sample = format["bits_per_sample"].value
if "../stream_hdr" in format:
header = format["../stream_hdr"]
if header["rate"].value and header["scale"].value:
frame_rate = float(
header["rate"].value) / header["scale"].value
meta.duration = timedelta(seconds=float(
header["length"].value) / frame_rate)
if header["fourcc"].value != "":
meta.compression = "%s (fourcc:\"%s\")" \
% (format["codec"].display, header["fourcc"].value)
if not meta.has("compression"):
meta.compression = format["codec"].display
def computeAudioComprRate(self, meta):
uncompr = meta.get('bit_rate', 0)
if not uncompr:
compr = meta.get('nb_channel') * meta.get('sample_rate') * \
meta.get('bits_per_sample', default=16)
if not compr:
meta.compr_rate = float(compr) / uncompr
def useAviHeader(self, header):
microsec = header["microsec_per_frame"].value
if microsec:
self.frame_rate = 1000000.0 / microsec
total_frame = getValue(header, "total_frame")
if total_frame and not self.has("duration"):
self.duration = timedelta(microseconds=total_frame * microsec)
self.width = header["width"].value
self.height = header["height"].value
def extractAVI(self, headers, **kwargs):
audio_index = 1
for stream in headers.array("stream"):
if "stream_hdr/stream_type" not in stream:
stream_type = stream["stream_hdr/stream_type"].value
if stream_type == "vids":
if "stream_hdr" in stream:
meta = Metadata(self)
self.extractAVIVideo(stream["stream_hdr"], meta)
self.addGroup("video", meta, "Video stream")
elif stream_type == "auds":
if "stream_fmt" in stream:
meta = Metadata(self)
self.extractAVIAudio(stream["stream_fmt"], meta)
self.addGroup("audio[%u]" % audio_index, meta, "Audio stream")
audio_index += 1
if "avi_hdr" in headers:
# Compute global bit rate
if self.has("duration") and "/movie/size" in headers:
self.bit_rate = float(
headers["/movie/size"].value) * 8 / timedelta2seconds(self.get('duration'))
# Video has index?
scan_index = (True, kwargs['scan_index'])['scan_index' in kwargs]
if scan_index and "/index" in headers:
self.comment = _("Has audio/video index (%s)") \
% humanFilesize(headers["/index"].size // 8)
def extractAnim(self, riff):
if "anim_rate/rate[0]" in riff:
count = 0
total = 0
for rate in riff.array("anim_rate/rate"):
count += 1
if 100 < count:
total += rate.value / 60.0
if count and total:
self.frame_rate = count / total
if not self.has("frame_rate") and "anim_hdr/jiffie_rate" in riff:
self.frame_rate = 60.0 / riff["anim_hdr/jiffie_rate"].value
registerExtractor(RiffFile, RiffMetadata)