Merge branch 'feature/UpdateHachoir' into develop

This commit is contained in:
JackDandy 2018-03-28 00:43:29 +01:00
commit 7c3c9c6095
228 changed files with 8933 additions and 13786 deletions

View file

@ -5,6 +5,7 @@
* Update chardet packages 3.0.4 (9b8c5c2) to 4.0.0 (b3d867a)
* Update dateutil library 2.6.1 (2f3a160) to 2.7.2 (ff03c0f)
* Update feedparser library 5.2.1 (f1dd1bb) to 5.2.1 (5646f4c) - Uses the faster cchardet if installed
* Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo
[develop changelog]

2
lib/hachoir/__init__.py Normal file
View file

@ -0,0 +1,2 @@
# noinspection PyPep8Naming
from hachoir.version import VERSION as __version__

View file

@ -3,11 +3,11 @@ Utilities to convert integers and binary strings to binary (number), binary
string, number, hexadecimal, etc.
"""
from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from hachoir_core.compatibility import reversed
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from itertools import chain, repeat
from struct import calcsize, unpack, error as struct_error
def swap16(value):
"""
Swap byte between big and little endian of a 16 bits integer.
@ -17,6 +17,7 @@ def swap16(value):
"""
return (value & 0xFF) << 8 | (value >> 8)
def swap32(value):
"""
Swap byte between big and little endian of a 32 bits integer.
@ -24,25 +25,28 @@ def swap32(value):
>>> "%x" % swap32(0x12345678)
'78563412'
"""
value = long(value)
return ((value & 0x000000FFL) << 24) \
| ((value & 0x0000FF00L) << 8) \
| ((value & 0x00FF0000L) >> 8) \
| ((value & 0xFF000000L) >> 24)
value = int(value)
return (((value & 0x000000FF) << 24)
| ((value & 0x0000FF00) << 8)
| ((value & 0x00FF0000) >> 8)
| ((value & 0xFF000000) >> 24))
def arrswapmid(data):
r"""
Convert an array of characters from middle-endian to big-endian and vice-versa.
Convert an array of characters from middle-endian to big-endian and
vice-versa.
>>> arrswapmid("badcfehg")
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
"""
assert len(data)%2 == 0
ret = ['']*len(data)
assert len(data) % 2 == 0
ret = [''] * len(data)
ret[1::2] = data[0::2]
ret[0::2] = data[1::2]
return ret
def strswapmid(data):
r"""
Convert raw data from middle-endian to big-endian and vice-versa.
@ -52,6 +56,7 @@ def strswapmid(data):
"""
return ''.join(arrswapmid(data))
def bin2long(text, endian):
"""
Convert binary number written in a string into an integer.
@ -65,8 +70,8 @@ def bin2long(text, endian):
3
"""
assert endian in (LITTLE_ENDIAN, BIG_ENDIAN)
bits = [ (ord(character)-ord("0")) \
for character in text if character in "01" ]
bits = [(ord(character) - ord("0"))
for character in text if character in "01"]
if endian is not BIG_ENDIAN:
bits = bits[::-1]
size = len(bits)
@ -77,6 +82,7 @@ def bin2long(text, endian):
value += bit
return value
def str2hex(value, prefix="", glue=u"", format="%02X"):
r"""
Convert binary string in hexadecimal (base 16).
@ -100,6 +106,7 @@ def str2hex(value, prefix="", glue=u"", format="%02X"):
text.append(format % ord(character))
return glue.join(text)
def countBits(value):
"""
Count number of bits needed to store a (positive) integer number.
@ -117,7 +124,7 @@ def countBits(value):
count = 1
bits = 1
while (1 << bits) <= value:
count += bits
count += bits
value >>= bits
bits <<= 1
while 2 <= value:
@ -126,10 +133,11 @@ def countBits(value):
else:
bits -= 1
while (1 << bits) <= value:
count += bits
count += bits
value >>= bits
return count
def byte2bin(number, classic_mode=True):
"""
Convert a byte (integer in 0..255 range) to a binary string.
@ -143,7 +151,7 @@ def byte2bin(number, classic_mode=True):
text = ""
for i in range(0, 8):
if classic_mode:
mask = 1 << (7-i)
mask = 1 << (7 - i)
else:
mask = 1 << i
if (number & mask) == mask:
@ -152,6 +160,7 @@ def byte2bin(number, classic_mode=True):
text += "0"
return text
def long2raw(value, endian, size=None):
r"""
Convert a number (positive and not nul) to a raw string.
@ -169,7 +178,7 @@ def long2raw(value, endian, size=None):
text = []
while (value != 0 or text == ""):
byte = value % 256
text.append( chr(byte) )
text.append(chr(byte))
value >>= 8
if size:
need = max(size - len(text), 0)
@ -187,6 +196,7 @@ def long2raw(value, endian, size=None):
text = arrswapmid(text)
return "".join(text)
def long2bin(size, value, endian, classic_mode=False):
"""
Convert a number into bits (in a string):
@ -227,6 +237,7 @@ def long2bin(size, value, endian, classic_mode=False):
text = text[8:]
return result
def str2bin(value, classic_mode=True):
r"""
Convert binary string to binary numbers.
@ -245,13 +256,14 @@ def str2bin(value, classic_mode=True):
text += byte2bin(byte, classic_mode)
return text
def _createStructFormat():
"""
Create a dictionnary (endian, size_byte) => struct format used
by str2long() to convert raw data to positive integer.
"""
format = {
BIG_ENDIAN: {},
BIG_ENDIAN: {},
LITTLE_ENDIAN: {},
}
for struct_format in "BHILQ":
@ -262,8 +274,11 @@ def _createStructFormat():
except struct_error:
pass
return format
_struct_format = _createStructFormat()
def str2long(data, endian):
r"""
Convert a raw data (type 'str') into a long integer.
@ -285,7 +300,7 @@ def str2long(data, endian):
>>> str2long("\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d
True
"""
assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits
assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits
try:
return unpack(_struct_format[endian][len(data)], data)[0]
except KeyError:

View file

@ -8,8 +8,7 @@ import os
max_string_length = 40 # Max. length in characters of GenericString.display
max_byte_length = 14 # Max. length in bytes of RawBytes.display
max_bit_length = 256 # Max. length in bits of RawBits.display
unicode_stdout = True # Replace stdout and stderr with Unicode compatible objects
# Disable it for readline or ipython
unicode_stdout = True # Replace stdout and stderr with Unicode compatible objects, disable for readline or ipython
# Global options
debug = False # Display many informations usefull to debug
@ -24,6 +23,5 @@ else:
use_i18n = True
# Parser global options
autofix = True # Enable Autofix? see hachoir_core.field.GenericFieldSet
autofix = True # Enable Autofix? see hachoir.field.GenericFieldSet
check_padding_pattern = True # Check padding fields pattern?

View file

@ -2,32 +2,35 @@
Dictionnary classes which store values order.
"""
from hachoir_core.error import HachoirError
from hachoir_core.i18n import _
from hachoir.core.i18n import _
class UniqKeyError(HachoirError):
class UniqKeyError(Exception):
"""
Error raised when a value is set whereas the key already exist in a
dictionnary.
"""
pass
class Dict(object):
"""
This class works like classic Python dict() but has an important method:
__iter__() which allow to iterate into the dictionnary _values_ (and not
keys like Python's dict does).
"""
def __init__(self, values=None):
self._index = {} # key => index
self._key_list = [] # index => key
self._value_list = [] # index => value
self._index = {} # key => index
self._key_list = [] # index => key
self._value_list = [] # index => value
if values:
for key, value in values:
self.append(key,value)
self.append(key, value)
def _getValues(self):
return self._value_list
values = property(_getValues)
def index(self, key):
@ -139,7 +142,7 @@ class Dict(object):
index += len(self._value_list)
if not (0 <= index < len(self._value_list)):
raise IndexError(_("list assignment index out of range (%s/%s)")
% (index, len(self._value_list)))
% (index, len(self._value_list)))
del self._value_list[index]
del self._key_list[index]
@ -168,7 +171,7 @@ class Dict(object):
_index = index
if index < 0:
index += len(self._value_list)
if not(0 <= index <= len(self._value_list)):
if not (0 <= index <= len(self._value_list)):
raise IndexError(_("Insert error: index '%s' is invalid") % _index)
for item_key, item_index in self._index.iteritems():
if item_index >= index:
@ -178,6 +181,5 @@ class Dict(object):
self._value_list.insert(index, value)
def __repr__(self):
items = ( "%r: %r" % (key, value) for key, value in self.iteritems() )
items = ("%r: %r" % (key, value) for key, value in self.iteritems())
return "{%s}" % ", ".join(items)

View file

@ -2,7 +2,7 @@
Constant values about endian.
"""
from hachoir_core.i18n import _
from hachoir.core.i18n import _
BIG_ENDIAN = "ABCD"
LITTLE_ENDIAN = "DCBA"

30
lib/hachoir/core/error.py Normal file
View file

@ -0,0 +1,30 @@
"""
Functions to display an error (error, warning or information) message.
"""
from hachoir.core.log import log
import sys
import traceback
def getBacktrace(empty="Empty backtrace."):
"""
Try to get backtrace as string.
Returns "Error while trying to get backtrace" on failure.
"""
try:
info = sys.exc_info()
trace = traceback.format_exception(*info)
sys.exc_clear()
if trace[0] != "None\n":
return "".join(trace)
except Exception:
# No i18n here (imagine if i18n function calls error...)
return "Error while trying to get backtrace"
return empty
info = log.info
warning = log.warning
error = log.error

View file

@ -14,13 +14,14 @@ WARNING: Loading this module indirectly calls initLocale() which sets
settings.
"""
import hachoir_core.config as config
import hachoir_core
import hachoir.core.config as config
import hachoir.core
import locale
from os import path
import sys
from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE
def _getTerminalCharset():
"""
Function used by getTerminalCharset() to get terminal charset.
@ -50,6 +51,7 @@ def _getTerminalCharset():
# (4) Otherwise, returns "ASCII"
return "ASCII"
def getTerminalCharset():
"""
Guess terminal charset using differents tests:
@ -66,6 +68,7 @@ def getTerminalCharset():
getTerminalCharset.value = _getTerminalCharset()
return getTerminalCharset.value
class UnicodeStdout(object):
def __init__(self, old_device, charset):
self.device = old_device
@ -83,6 +86,7 @@ class UnicodeStdout(object):
for text in lines:
self.write(text)
def initLocale():
# Only initialize locale once
if initLocale.is_done:
@ -104,17 +108,22 @@ def initLocale():
sys.stdout = UnicodeStdout(sys.stdout, charset)
sys.stderr = UnicodeStdout(sys.stderr, charset)
return charset
initLocale.is_done = False
def _dummy_gettext(text):
return unicode(text)
def _dummy_ngettext(singular, plural, count):
if 1 < abs(count) or not count:
return unicode(plural)
else:
return unicode(singular)
def _initGettext():
charset = initLocale()
@ -133,7 +142,7 @@ def _initGettext():
return (_dummy_gettext, _dummy_ngettext)
# Gettext variables
package = hachoir_core.PACKAGE
package = 'hachoir'
locale_dir = path.join(path.dirname(__file__), "..", "locale")
# Initialize gettext module
@ -150,6 +159,7 @@ def _initGettext():
unicode(ngettext(singular, plural, count), charset)
return (unicode_gettext, unicode_ngettext)
UTF_BOMS = (
(BOM_UTF8, "UTF-8"),
(BOM_UTF16_LE, "UTF-16-LE"),
@ -165,6 +175,7 @@ CHARSET_CHARACTERS = (
(set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),
)
def guessBytesCharset(bytes, default=None):
r"""
>>> guessBytesCharset("abc")
@ -202,13 +213,13 @@ def guessBytesCharset(bytes, default=None):
pass
# Create a set of non-ASCII characters
non_ascii_set = set( byte for byte in bytes if ord(byte) >= 128 )
non_ascii_set = set(byte for byte in bytes if ord(byte) >= 128)
for characters, charset in CHARSET_CHARACTERS:
if characters.issuperset(non_ascii_set):
return charset
return default
# Initialize _(), gettext() and ngettext() functions
gettext, ngettext = _initGettext()
_ = gettext

View file

@ -555,4 +555,3 @@ for line in _ISO639:
for key in line[1].split("/"):
ISO639_2[key] = line[0]
del _ISO639

View file

@ -1,4 +1,5 @@
from hachoir_core.iso639 import ISO639_2
from hachoir.core.iso639 import ISO639_2
class Language:
def __init__(self, code):
@ -13,11 +14,10 @@ class Language:
return cmp(self.code, other.code)
def __unicode__(self):
return ISO639_2[self.code]
return ISO639_2[self.code]
def __str__(self):
return self.__unicode__()
return self.__unicode__()
def __repr__(self):
return "<Language '%s', code=%r>" % (unicode(self), self.code)

View file

@ -1,11 +1,15 @@
import os, sys, time
import hachoir_core.config as config
from hachoir_core.i18n import _
import os
import sys
import time
import hachoir.core.config as config
from hachoir.core.i18n import _
class Log:
LOG_INFO = 0
LOG_WARN = 1
LOG_ERROR = 2
LOG_INFO = 0
LOG_WARN = 1
LOG_ERROR = 2
level_name = {
LOG_WARN: "[warn]",
@ -18,7 +22,8 @@ class Log:
self.__file = None
self.use_print = True
self.use_buffer = False
self.on_new_message = None # Prototype: def func(level, prefix, text, context)
# Prototype: def func(level, prefix, text, context)
self.on_new_message = None
def shutdown(self):
if self.__file:
@ -46,12 +51,10 @@ class Log:
else:
self.__file = codecs.open(filename, "w", "utf-8")
self._writeIntoFile(_("Starting Hachoir"))
except IOError, err:
if err.errno == 2:
self.__file = None
self.info(_("[Log] setFilename(%s) fails: no such file") % filename)
else:
raise
except FileNotFoundError:
self.__file = None
self.info("[Log] setFilename(%s) fails: no such file"
% filename)
def _writeIntoFile(self, message):
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
@ -72,10 +75,10 @@ class Log:
"""
if level < self.LOG_ERROR and config.quiet or \
level <= self.LOG_INFO and not config.verbose:
level <= self.LOG_INFO and not config.verbose:
return
if config.debug:
from hachoir_core.error import getBacktrace
from hachoir.core.error import getBacktrace
backtrace = getBacktrace(None)
if backtrace:
text += "\n\n" + backtrace
@ -108,7 +111,7 @@ class Log:
# Use callback (if used)
if self.on_new_message:
self.on_new_message (level, prefix, _text, ctxt)
self.on_new_message(level, prefix, _text, ctxt)
def info(self, text):
"""
@ -131,14 +134,19 @@ class Log:
"""
self.newMessage(Log.LOG_ERROR, text)
log = Log()
class Logger(object):
def _logger(self):
return "<%s>" % self.__class__.__name__
def info(self, text):
log.newMessage(Log.LOG_INFO, text, self)
def warning(self, text):
log.newMessage(Log.LOG_WARN, text, self)
def error(self, text):
log.newMessage(Log.LOG_ERROR, text, self)

View file

@ -1,15 +1,17 @@
from hachoir_core.tools import humanDurationNanosec
from hachoir_core.i18n import _
from hachoir.core.tools import humanDurationNanosec
from hachoir.core.i18n import _
from math import floor
from time import time
class BenchmarkError(Exception):
"""
Error during benchmark, use str(err) to format it as string.
"""
def __init__(self, message):
Exception.__init__(self,
"Benchmark internal error: %s" % message)
Exception.__init__(self, "Benchmark internal error: %s" % message)
class BenchmarkStat:
"""
@ -24,6 +26,7 @@ class BenchmarkStat:
- __len__(): get number of elements
- __nonzero__(): isn't empty?
"""
def __init__(self):
self._values = []
@ -53,9 +56,10 @@ class BenchmarkStat:
def getSum(self):
return self._sum
class Benchmark:
def __init__(self, max_time=5.0,
min_count=5, max_count=None, progress_time=1.0):
min_count=5, max_count=None, progress_time=1.0):
"""
Constructor:
- max_time: Maximum wanted duration of the whole benchmark
@ -97,8 +101,7 @@ class Benchmark:
average = stat.getSum() / len(stat)
values = (stat.getMin(), average, stat.getMax(), stat.getSum())
values = tuple(self.formatTime(value) for value in values)
print _("Benchmark: best=%s average=%s worst=%s total=%s") \
% values
print _("Benchmark: best=%s average=%s worst=%s total=%s") % values
def _runOnce(self, func, args, kw):
before = time()
@ -140,7 +143,7 @@ class Benchmark:
estimate = diff * count
if self.verbose:
print _("Run benchmark: %s calls (estimate: %s)") \
% (count, self.formatTime(estimate))
% (count, self.formatTime(estimate))
display_progress = self.verbose and (1.0 <= estimate)
total_count = 1
@ -148,14 +151,14 @@ class Benchmark:
# Run benchmark and display each result
if display_progress:
print _("Result %s/%s: %s (best: %s)") % \
(total_count, count,
self.formatTime(diff), self.formatTime(best))
(total_count, count,
self.formatTime(diff), self.formatTime(best))
part = count - total_count
# Will takes more than one second?
average = total_time / total_count
if self.progress_time < part * average:
part = max( int(self.progress_time / average), 1)
part = max(int(self.progress_time / average), 1)
for index in xrange(part):
diff = self._runOnce(func, args, kw)
stat.append(diff)
@ -164,8 +167,8 @@ class Benchmark:
total_count += part
if display_progress:
print _("Result %s/%s: %s (best: %s)") % \
(count, count,
self.formatTime(diff), self.formatTime(best))
(count, count,
self.formatTime(diff), self.formatTime(best))
return stat
def validateStat(self, stat):
@ -207,4 +210,3 @@ class Benchmark:
# Validate and display stats
self.validateStat(stat)
self.displayStat(stat)

View file

@ -1,37 +1,42 @@
from optparse import OptionGroup
from hachoir_core.log import log
from hachoir_core.i18n import _, getTerminalCharset
from hachoir_core.tools import makePrintable
import hachoir_core.config as config
from hachoir.core.log import log
from hachoir.core.i18n import _, getTerminalCharset
from hachoir.core.tools import makePrintable
import hachoir.core.config as config
def getHachoirOptions(parser):
"""
Create an option group (type optparse.OptionGroup) of Hachoir
library options.
"""
def setLogFilename(*args):
log.setFilename(args[2])
common = OptionGroup(parser, _("Hachoir library"), \
"Configure Hachoir library")
common = OptionGroup(parser, _("Hachoir library"),
"Configure Hachoir library")
common.add_option("--verbose", help=_("Verbose mode"),
default=False, action="store_true")
default=False, action="store_true")
common.add_option("--log", help=_("Write log in a file"),
type="string", action="callback", callback=setLogFilename)
type="string", action="callback",
callback=setLogFilename)
common.add_option("--quiet", help=_("Quiet mode (don't display warning)"),
default=False, action="store_true")
default=False, action="store_true")
common.add_option("--debug", help=_("Debug mode"),
default=False, action="store_true")
default=False, action="store_true")
return common
def configureHachoir(option):
# Configure Hachoir using "option" (value from optparse)
if option.quiet:
config.quiet = True
config.quiet = True
if option.verbose:
config.verbose = True
config.verbose = True
if option.debug:
config.debug = True
config.debug = True
def unicodeFilename(filename, charset=None):
if not charset:
@ -40,4 +45,3 @@ def unicodeFilename(filename, charset=None):
return unicode(filename, charset)
except UnicodeDecodeError:
return makePrintable(filename, charset, to_unicode=True)

View file

@ -1,8 +1,9 @@
import gc
#---- Default implementation when resource is missing ----------------------
# ---- Default implementation when resource is missing ----------------------
PAGE_SIZE = 4096
def getMemoryLimit():
"""
Get current memory limit in bytes.
@ -11,6 +12,7 @@ def getMemoryLimit():
"""
return None
def setMemoryLimit(max_mem):
"""
Set memory limit in bytes.
@ -20,6 +22,7 @@ def setMemoryLimit(max_mem):
"""
return False
def getMemorySize():
"""
Read currenet process memory size: size of available virtual memory.
@ -33,19 +36,22 @@ def getMemorySize():
return None
return int(statm[0]) * PAGE_SIZE
def clearCaches():
"""
Try to clear all caches: call gc.collect() (Python garbage collector).
"""
gc.collect()
#import re; re.purge()
# import re; re.purge()
try:
#---- 'resource' implementation ---------------------------------------------
# ---- 'resource' implementation ---------------------------------------------
from resource import getpagesize, getrlimit, setrlimit, RLIMIT_AS
PAGE_SIZE = getpagesize()
def getMemoryLimit():
try:
limit = getrlimit(RLIMIT_AS)[0]
@ -55,6 +61,7 @@ try:
except ValueError:
return None
def setMemoryLimit(max_mem):
if max_mem is None:
max_mem = -1
@ -66,6 +73,7 @@ try:
except ImportError:
pass
def limitedMemory(limit, func, *args, **kw):
"""
Limit memory grow when calling func(*args, **kw):
@ -96,4 +104,3 @@ def limitedMemory(limit, func, *args, **kw):
# After calling the function: clear all caches
clearCaches()

View file

@ -2,7 +2,9 @@ from hotshot import Profile
from hotshot.stats import load as loadStats
from os import unlink
def runProfiler(func, args=tuple(), kw={}, verbose=True, nb_func=25, sort_by=('cumulative', 'calls')):
def runProfiler(func, args=tuple(), kw={}, verbose=True, nb_func=25,
sort_by=('cumulative', 'calls')):
profile_filename = "/tmp/profiler"
prof = Profile(profile_filename)
try:
@ -28,4 +30,3 @@ def runProfiler(func, args=tuple(), kw={}, verbose=True, nb_func=25, sort_by=('c
return result
finally:
unlink(profile_filename)

View file

@ -6,24 +6,28 @@ from math import ceil
IMPLEMENTATION = None
class Timeout(RuntimeError):
"""
Timeout error, inherits from RuntimeError
"""
pass
def signalHandler(signum, frame):
"""
Signal handler to catch timeout signal: raise Timeout exception.
"""
raise Timeout("Timeout exceed!")
def limitedTime(second, func, *args, **kw):
"""
Call func(*args, **kw) with a timeout of second seconds.
"""
return func(*args, **kw)
def fixTimeout(second):
"""
Fix timeout value: convert to integer with a minimum of 1 second
@ -33,6 +37,7 @@ def fixTimeout(second):
assert isinstance(second, (int, long))
return max(second, 1)
if not IMPLEMENTATION:
try:
from signal import signal, alarm, SIGALRM
@ -48,6 +53,7 @@ if not IMPLEMENTATION:
alarm(0)
signal(SIGALRM, old_alarm)
IMPLEMENTATION = "signal.alarm()"
except ImportError:
pass
@ -57,6 +63,7 @@ if not IMPLEMENTATION:
from signal import signal, SIGXCPU
from resource import getrlimit, setrlimit, RLIMIT_CPU
# resource.setrlimit(RLIMIT_CPU) implementation
# "Bug": timeout is 'CPU' time so sleep() are not part of the timeout
def limitedTime(second, func, *args, **kw):
@ -70,7 +77,7 @@ if not IMPLEMENTATION:
setrlimit(RLIMIT_CPU, current)
signal(SIGXCPU, old_alarm)
IMPLEMENTATION = "resource.setrlimit(RLIMIT_CPU)"
except ImportError:
pass

View file

@ -2,12 +2,13 @@
Utilities used to convert a field to human classic reprentation of data.
"""
from hachoir_core.tools import (
from hachoir.core.tools import (
humanDuration, humanFilesize, alignValue,
durationWin64 as doDurationWin64,
deprecated)
from types import FunctionType, MethodType
from hachoir_core.field import Field
from hachoir.field import Field
def textHandler(field, handler):
assert isinstance(handler, (FunctionType, MethodType))
@ -15,12 +16,14 @@ def textHandler(field, handler):
field.createDisplay = lambda: handler(field)
return field
def displayHandler(field, handler):
assert isinstance(handler, (FunctionType, MethodType))
assert issubclass(field.__class__, Field)
field.createDisplay = lambda: handler(field.value)
return field
@deprecated("Use TimedeltaWin64 field type")
def durationWin64(field):
"""
@ -37,12 +40,14 @@ def durationWin64(field):
delta = doDurationWin64(field.value)
return humanDuration(delta)
def filesizeHandler(field):
"""
Format field value using humanFilesize()
"""
return displayHandler(field, humanFilesize)
def hexadecimal(field):
"""
Convert an integer to hexadecimal in lower case. Returns unicode string.
@ -57,4 +62,3 @@ def hexadecimal(field):
padding = alignValue(size, 4) // 4
pattern = u"0x%%0%ux" % padding
return pattern % field.value

View file

@ -4,12 +4,13 @@
Various utilities.
"""
from hachoir_core.i18n import _, ngettext
from hachoir.core.i18n import _, ngettext
import re
import stat
from datetime import datetime, timedelta, MAXYEAR
from warnings import warn
def deprecated(comment=None):
"""
This is a decorator which can be used to mark functions
@ -26,6 +27,7 @@ def deprecated(comment=None):
Code from: http://code.activestate.com/recipes/391367/
"""
def _deprecated(func):
def newFunc(*args, **kwargs):
message = "Call to deprecated function %s" % func.__name__
@ -33,12 +35,15 @@ def deprecated(comment=None):
message += ": " + comment
warn(message, category=DeprecationWarning, stacklevel=2)
return func(*args, **kwargs)
newFunc.__name__ = func.__name__
newFunc.__doc__ = func.__doc__
newFunc.__dict__.update(func.__dict__)
return newFunc
return _deprecated
def paddingSize(value, align):
"""
Compute size of a padding field.
@ -57,6 +62,7 @@ def paddingSize(value, align):
else:
return 0
def alignValue(value, align):
"""
Align a value to next 'align' multiple.
@ -76,6 +82,7 @@ def alignValue(value, align):
else:
return value
def timedelta2seconds(delta):
"""
Convert a datetime.timedelta() objet to a number of second
@ -87,7 +94,8 @@ def timedelta2seconds(delta):
60.25
"""
return delta.microseconds / 1000000.0 \
+ delta.seconds + delta.days * 60*60*24
+ delta.seconds + delta.days * 60 * 60 * 24
def humanDurationNanosec(nsec):
"""
@ -105,14 +113,15 @@ def humanDurationNanosec(nsec):
# Micro seconds
usec, nsec = divmod(nsec, 1000)
if usec < 1000:
return u"%.2f usec" % (usec+float(nsec)/1000)
return u"%.2f usec" % (usec + float(nsec) / 1000)
# Milli seconds
msec, usec = divmod(usec, 1000)
if msec < 1000:
return u"%.2f ms" % (msec + float(usec)/1000)
return u"%.2f ms" % (msec + float(usec) / 1000)
return humanDuration(msec)
def humanDuration(delta):
"""
Convert a duration in millisecond to human natural representation.
@ -128,12 +137,12 @@ def humanDuration(delta):
u'1 hour 46 min 42 sec'
"""
if not isinstance(delta, timedelta):
delta = timedelta(microseconds=delta*1000)
delta = timedelta(microseconds=delta * 1000)
# Milliseconds
text = []
if 1000 <= delta.microseconds:
text.append(u"%u ms" % (delta.microseconds//1000))
text.append(u"%u ms" % (delta.microseconds // 1000))
# Seconds
minutes, seconds = divmod(delta.seconds, 60)
@ -157,6 +166,7 @@ def humanDuration(delta):
return u"0 ms"
return u" ".join(reversed(text))
def humanFilesize(size):
"""
Convert a file size in byte to human natural representation.
@ -181,6 +191,7 @@ def humanFilesize(size):
return "%.1f %s" % (size, unit)
return "%u %s" % (size, unit)
def humanBitSize(size):
"""
Convert a size in bit to human classic representation.
@ -205,6 +216,7 @@ def humanBitSize(size):
return "%.1f %s" % (size, unit)
return u"%u %s" % (size, unit)
def humanBitRate(size):
"""
Convert a bit rate to human classic representation. It uses humanBitSize()
@ -217,6 +229,7 @@ def humanBitRate(size):
"""
return "".join((humanBitSize(size), "/sec"))
def humanFrequency(hertz):
"""
Convert a frequency in hertz to human classic representation.
@ -239,18 +252,20 @@ def humanFrequency(hertz):
return u"%.1f %s" % (hertz, unit)
return u"%s %s" % (hertz, unit)
regex_control_code = re.compile(r"([\x00-\x1f\x7f])")
controlchars = tuple({
# Don't use "\0", because "\0"+"0"+"1" = "\001" = "\1" (1 character)
# Same rease to not use octal syntax ("\1")
ord("\n"): r"\n",
ord("\r"): r"\r",
ord("\t"): r"\t",
ord("\a"): r"\a",
ord("\b"): r"\b",
}.get(code, '\\x%02x' % code)
for code in xrange(128)
)
# Don't use "\0", because "\0"+"0"+"1" = "\001" = "\1" (1 character)
# Same rease to not use octal syntax ("\1")
ord("\n"): r"\n",
ord("\r"): r"\r",
ord("\t"): r"\t",
ord("\a"): r"\a",
ord("\b"): r"\b",
}.get(code, '\\x%02x' % code)
for code in xrange(128)
)
def makePrintable(data, charset, quote=None, to_unicode=False, smart=True):
r"""
@ -309,6 +324,8 @@ def makePrintable(data, charset, quote=None, to_unicode=False, smart=True):
data = ''.join((quote, data, quote))
elif quote:
data = "(empty)"
else:
data = ""
data = data.encode(charset, "backslashreplace")
if smart:
# Replace \x00\x01 by \0\1
@ -317,6 +334,7 @@ def makePrintable(data, charset, quote=None, to_unicode=False, smart=True):
data = unicode(data, charset)
return data
def makeUnicode(text):
r"""
Convert text to printable Unicode string. For byte string (type 'str'),
@ -343,6 +361,7 @@ def makeUnicode(text):
text = re.sub(r"\\x0([0-7])(?=[^0-7]|$)", r"\\\1", text)
return text
def binarySearch(seq, cmp_func):
"""
Search a value in a sequence using binary search. Returns index of the
@ -376,20 +395,40 @@ def binarySearch(seq, cmp_func):
return index
return None
def lowerBound(seq, cmp_func):
f = 0
l = len(seq)
while l > 0:
h = l >> 1
seqlen = len(seq)
while seqlen > 0:
h = seqlen >> 1
m = f + h
if cmp_func(seq[m]):
f = m
f += 1
l -= h + 1
seqlen -= h + 1
else:
l = h
seqlen = h
return f
def _ftypelet(mode):
if stat.S_ISREG(mode) or not stat.S_IFMT(mode):
return '-'
if stat.S_ISBLK(mode):
return 'b'
if stat.S_ISCHR(mode):
return 'c'
if stat.S_ISDIR(mode):
return 'd'
if stat.S_ISFIFO(mode):
return 'p'
if stat.S_ISLNK(mode):
return 'l'
if stat.S_ISSOCK(mode):
return 's'
return '?'
def humanUnixAttributes(mode):
"""
Convert a Unix file attributes (or "file mode") to an unicode string.
@ -403,18 +442,7 @@ def humanUnixAttributes(mode):
u'-rwxr-sr-x (2755)'
"""
def ftypelet(mode):
if stat.S_ISREG (mode) or not stat.S_IFMT(mode):
return '-'
if stat.S_ISBLK (mode): return 'b'
if stat.S_ISCHR (mode): return 'c'
if stat.S_ISDIR (mode): return 'd'
if stat.S_ISFIFO(mode): return 'p'
if stat.S_ISLNK (mode): return 'l'
if stat.S_ISSOCK(mode): return 's'
return '?'
chars = [ ftypelet(mode), 'r', 'w', 'x', 'r', 'w', 'x', 'r', 'w', 'x' ]
chars = [_ftypelet(mode), 'r', 'w', 'x', 'r', 'w', 'x', 'r', 'w', 'x']
for i in xrange(1, 10):
if not mode & 1 << 9 - i:
chars[i] = '-'
@ -435,6 +463,7 @@ def humanUnixAttributes(mode):
chars[9] = 't'
return u"%s (%o)" % (''.join(chars), mode)
def createDict(data, index):
"""
Create a new dictionnay from dictionnary key=>values:
@ -446,11 +475,13 @@ def createDict(data, index):
>>> createDict(data, 2)
{10: 'a', 20: 'b'}
"""
return dict( (key,values[index]) for key, values in data.iteritems() )
return dict((key, values[index]) for key, values in data.iteritems())
# Start of UNIX timestamp (Epoch): 1st January 1970 at 00:00
UNIX_TIMESTAMP_T0 = datetime(1970, 1, 1)
def timestampUNIX(value):
"""
Convert an UNIX (32-bit) timestamp to datetime object. Timestamp value
@ -470,13 +501,15 @@ def timestampUNIX(value):
"""
if not isinstance(value, (float, int, long)):
raise TypeError("timestampUNIX(): an integer or float is required")
if not(0 <= value <= 2147483647):
if not (0 <= value <= 2147483647):
raise ValueError("timestampUNIX(): value have to be in 0..2147483647")
return UNIX_TIMESTAMP_T0 + timedelta(seconds=value)
# Start of Macintosh timestamp: 1st January 1904 at 00:00
MAC_TIMESTAMP_T0 = datetime(1904, 1, 1)
def timestampMac32(value):
"""
Convert an Mac (32-bit) timestamp to string. The format is the number
@ -489,10 +522,11 @@ def timestampMac32(value):
"""
if not isinstance(value, (float, int, long)):
raise TypeError("an integer or float is required")
if not(0 <= value <= 4294967295):
if not (0 <= value <= 4294967295):
return _("invalid Mac timestamp (%s)") % value
return MAC_TIMESTAMP_T0 + timedelta(seconds=value)
def durationWin64(value):
"""
Convert Windows 64-bit duration to string. The timestamp format is
@ -507,11 +541,13 @@ def durationWin64(value):
raise TypeError("an integer or float is required")
if value < 0:
raise ValueError("value have to be a positive or nul integer")
return timedelta(microseconds=value/10)
return timedelta(microseconds=value / 10)
# Start of 64-bit Windows timestamp: 1st January 1600 at 00:00
WIN64_TIMESTAMP_T0 = datetime(1601, 1, 1, 0, 0, 0)
def timestampWin64(value):
"""
Convert Windows 64-bit timestamp to string. The timestamp format is
@ -527,11 +563,14 @@ def timestampWin64(value):
try:
return WIN64_TIMESTAMP_T0 + durationWin64(value)
except OverflowError:
raise ValueError(_("date newer than year %s (value=%s)") % (MAXYEAR, value))
raise ValueError(_("date newer than year %s (value=%s)")
% (MAXYEAR, value))
# Start of 60-bit UUID timestamp: 15 October 1582 at 00:00
UUID60_TIMESTAMP_T0 = datetime(1582, 10, 15, 0, 0, 0)
def timestampUUID60(value):
"""
Convert UUID 60-bit timestamp to string. The timestamp format is
@ -548,10 +587,11 @@ def timestampUUID60(value):
if value < 0:
raise ValueError("value have to be a positive or nul integer")
try:
return UUID60_TIMESTAMP_T0 + timedelta(microseconds=value/10)
return UUID60_TIMESTAMP_T0 + timedelta(microseconds=value / 10)
except OverflowError:
raise ValueError(_("timestampUUID60() overflow (value=%s)") % value)
def humanDatetime(value, strip_microsecond=True):
"""
Convert a timestamp to Unicode string: use ISO format with space separator.
@ -569,8 +609,10 @@ def humanDatetime(value, strip_microsecond=True):
text = text.split(".")[0]
return text
NEWLINES_REGEX = re.compile("\n+")
def normalizeNewline(text):
r"""
Replace Windows and Mac newlines with Unix newlines.
@ -586,4 +628,3 @@ def normalizeNewline(text):
text = text.replace("\r\n", "\n")
text = text.replace("\r", "\n")
return NEWLINES_REGEX.sub("\n", text)

View file

@ -0,0 +1,59 @@
# Field classes
from hachoir.field.field import Field, FieldError, MissingField, joinPath
from hachoir.field.bit_field import Bit, Bits, RawBits
from hachoir.field.byte_field import Bytes, RawBytes
from hachoir.field.sub_file import SubFile, CompressedField
from hachoir.field.character import Character
from hachoir.field.integer import (
Int8, Int16, Int24, Int32, Int64,
UInt8, UInt16, UInt24, UInt32, UInt64,
GenericInteger)
from hachoir.field.enum import Enum
from hachoir.field.string_field import (GenericString,
String, CString, UnixLine,
PascalString8, PascalString16, PascalString32)
from hachoir.field.padding import (PaddingBits, PaddingBytes,
NullBits, NullBytes)
# Functions
from hachoir.field.helper import (isString, isInteger,
createPaddingField, createNullField, createRawField,
writeIntoFile, createOrphanField)
# FieldSet classes
from hachoir.field.fake_array import FakeArray
from hachoir.field.basic_field_set import (BasicFieldSet,
ParserError, MatchError)
from hachoir.field.generic_field_set import GenericFieldSet
from hachoir.field.seekable_field_set import SeekableFieldSet, RootSeekableFieldSet
from hachoir.field.field_set import FieldSet
from hachoir.field.static_field_set import StaticFieldSet
from hachoir.field.parser import Parser
from hachoir.field.vector import GenericVector, UserVector
# Complex types
from hachoir.field.float import Float32, Float64, Float80
from hachoir.field.timestamp import (GenericTimestamp,
TimestampUnix32, TimestampUnix64, TimestampMac32, TimestampUUID60, TimestampWin64,
DateTimeMSDOS32, TimeDateMSDOS32, TimedeltaWin64)
# Special Field classes
from hachoir.field.link import Link, Fragment
from hachoir.field.fragment import FragmentGroup, CustomFragment
available_types = (
Bit, Bits, RawBits,
Bytes, RawBytes,
SubFile,
Character,
Int8, Int16, Int24, Int32, Int64,
UInt8, UInt16, UInt24, UInt32, UInt64,
String, CString, UnixLine,
PascalString8, PascalString16, PascalString32,
Float32, Float64,
PaddingBits, PaddingBytes,
NullBits, NullBytes,
TimestampUnix32, TimestampMac32, TimestampWin64,
DateTimeMSDOS32, TimeDateMSDOS32,
# GenericInteger, GenericString,
)

View file

@ -1,7 +1,8 @@
from hachoir_core.field import Field, FieldError
from hachoir_core.stream import InputStream
from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from hachoir_core.event_handler import EventHandler
from hachoir.field import Field, FieldError
from hachoir.stream import InputStream
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from hachoir.core.event_handler import EventHandler
class ParserError(FieldError):
"""
@ -11,6 +12,7 @@ class ParserError(FieldError):
"""
pass
class MatchError(FieldError):
"""
Error raised by a field set when the stream content doesn't
@ -20,6 +22,7 @@ class MatchError(FieldError):
"""
pass
class BasicFieldSet(Field):
_event_handler = None
is_field_set = True
@ -132,16 +135,21 @@ class BasicFieldSet(Field):
def createFields(self):
raise NotImplementedError()
def __iter__(self):
raise NotImplementedError()
def __len__(self):
raise NotImplementedError()
def getField(self, key, const=True):
raise NotImplementedError()
def nextFieldAddress(self):
raise NotImplementedError()
def getFieldIndex(self, field):
raise NotImplementedError()
def readMoreFields(self, number):
raise NotImplementedError()

View file

@ -5,9 +5,10 @@ Bit sized classes:
- RawBits: unknown content with a size in bits.
"""
from hachoir_core.field import Field
from hachoir_core.i18n import _
from hachoir_core import config
from hachoir.field import Field
from hachoir.core.i18n import _
from hachoir.core import config
class RawBits(Field):
"""
@ -33,9 +34,11 @@ class RawBits(Field):
return unicode(self.value)
else:
return _("<%s size=%u>" %
(self.__class__.__name__, self._size))
(self.__class__.__name__, self._size))
createRawDisplay = createDisplay
class Bits(RawBits):
"""
Positive integer with a size in bits
@ -45,6 +48,7 @@ class Bits(RawBits):
"""
pass
class Bit(RawBits):
"""
Single bit: value can be False or True, and size is exactly one bit.
@ -61,8 +65,7 @@ class Bit(RawBits):
def createValue(self):
return 1 == self._parent.stream.readBits(
self.absolute_address, 1, self._parent.endian)
self.absolute_address, 1, self._parent.endian)
def createRawDisplay(self):
return unicode(int(self.value))

View file

@ -3,12 +3,15 @@ Very basic field: raw content with a size in byte. Use this class for
unknown content.
"""
from hachoir_core.field import Field, FieldError
from hachoir_core.tools import makePrintable
from hachoir_core.bits import str2hex
from hachoir_core import config
import types
from hachoir.field import Field, FieldError
from hachoir.core.tools import makePrintable
from hachoir.core.bits import str2hex
from hachoir.core import config
MAX_LENGTH = (2 ** 64)
MAX_LENGTH = (2**64)
class RawBytes(Field):
"""
@ -16,25 +19,25 @@ class RawBytes(Field):
@see: L{Bytes}
"""
static_size = staticmethod(lambda *args, **kw: args[1]*8)
static_size = staticmethod(lambda *args, **kw: args[1] * 8)
def __init__(self, parent, name, length, description="Raw data"):
assert issubclass(parent.__class__, Field)
if not(0 < length <= MAX_LENGTH):
if not (0 < length <= MAX_LENGTH):
raise FieldError("Invalid RawBytes length (%s)!" % length)
Field.__init__(self, parent, name, length*8, description)
Field.__init__(self, parent, name, length * 8, description)
self._display = None
def _createDisplay(self, human):
max_bytes = config.max_byte_length
if type(self._getValue) is type(lambda: None):
display = self.value[:max_bytes]
if isinstance(self._getValue, types.FunctionType):
display = makePrintable(self.value[:max_bytes], "ASCII")
else:
if self._display is None:
address = self.absolute_address
length = min(self._size / 8, max_bytes)
length = min(self._size // 8, max_bytes)
self._display = self._parent.stream.readBytes(address, length)
display = self._display
display = makePrintable(self._display, "ASCII")
truncated = (8 * len(display) < self._size)
if human:
if truncated:
@ -61,7 +64,8 @@ class RawBytes(Field):
if self._display:
self._display = None
return self._parent.stream.readBytes(
self.absolute_address, self._size / 8)
self.absolute_address, self._size // 8)
class Bytes(RawBytes):
"""
@ -70,4 +74,3 @@ class Bytes(RawBytes):
@see: L{RawBytes}
"""
pass

View file

@ -2,9 +2,9 @@
Character field class: a 8-bit character
"""
from hachoir_core.field import Bits
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.tools import makePrintable
from hachoir.field import Bits
from hachoir.core.tools import makePrintable
class Character(Bits):
"""
@ -13,11 +13,11 @@ class Character(Bits):
static_size = 8
def __init__(self, parent, name, description=None):
Bits.__init__(self, parent, name, 8, description=description)
Bits.__init__(self, parent, name, self.static_size, description=description)
def createValue(self):
return chr(self._parent.stream.readBits(
self.absolute_address, 8, BIG_ENDIAN))
self.absolute_address, self.static_size, self.parent.endian))
def createRawDisplay(self):
return unicode(Bits.createValue(self))

View file

@ -1,5 +1,6 @@
import itertools
from hachoir_core.field import MissingField
from hachoir.field import MissingField
class FakeArray:
"""
@ -13,11 +14,12 @@ class FakeArray:
And to get array size using len(fieldset.array("item")).
"""
def __init__(self, fieldset, name):
pos = name.rfind("/")
if pos != -1:
self.fieldset = fieldset[name[:pos]]
self.name = name[pos+1:]
self.name = name[pos + 1:]
else:
self.fieldset = fieldset
self.name = name
@ -27,19 +29,19 @@ class FakeArray:
self._max_index = -1
def __nonzero__(self):
"Is the array empty or not?"
"""Is the array empty or not?"""
if self._cache:
return True
else:
return (0 in self)
return 0 in self
def __len__(self):
"Number of fields in the array"
total = self._max_index+1
"""Number of fields in the array"""
total = self._max_index + 1
if not self._known_size:
for index in itertools.count(total):
try:
field = self[index]
_ = self[index]
total += 1
except MissingField:
break
@ -47,7 +49,7 @@ class FakeArray:
def __contains__(self, index):
try:
field = self[index]
_ = self[index]
return True
except MissingField:
return False
@ -77,5 +79,4 @@ class FakeArray:
try:
yield self[index]
except MissingField:
raise StopIteration()
break

View file

@ -2,28 +2,27 @@
Parent of all (field) classes in Hachoir: Field.
"""
from hachoir_core.compatibility import reversed
from hachoir_core.stream import InputFieldStream
from hachoir_core.error import HachoirError, HACHOIR_ERRORS
from hachoir_core.log import Logger
from hachoir_core.i18n import _
from hachoir_core.tools import makePrintable
from hachoir.stream import InputFieldStream
from hachoir.core.log import Logger
from hachoir.core.i18n import _
from hachoir.core.tools import makePrintable
from weakref import ref as weakref_ref
class FieldError(HachoirError):
class FieldError(Exception):
"""
Error raised by a L{Field}.
@see: L{HachoirError}
"""
pass
def joinPath(path, name):
if path != "/":
return "/".join((path, name))
else:
return "/%s" % name
class MissingField(KeyError, FieldError):
def __init__(self, field, key):
KeyError.__init__(self)
@ -36,6 +35,7 @@ class MissingField(KeyError, FieldError):
def __unicode__(self):
return u'Can\'t get field "%s" from %s' % (self.key, self.field.path)
class Field(Logger):
# static size can have two differents value: None (no static size), an
# integer (number of bits), or a function which returns an integer.
@ -82,6 +82,7 @@ class Field(Logger):
def createDescription(self):
return ""
def _getDescription(self):
if self._description is None:
try:
@ -89,51 +90,60 @@ class Field(Logger):
if isinstance(self._description, str):
self._description = makePrintable(
self._description, "ISO-8859-1", to_unicode=True)
except HACHOIR_ERRORS, err:
except Exception as err:
self.error("Error getting description: " + unicode(err))
self._description = ""
return self._description
description = property(_getDescription,
doc="Description of the field (string)")
doc="Description of the field (string)")
def __str__(self):
return self.display
def __unicode__(self):
return self.display
def __repr__(self):
return "<%s path=%r, address=%s, size=%s>" % (
self.__class__.__name__, self.path, self._address, self._size)
def hasValue(self):
return self._getValue() is not None
def createValue(self):
raise NotImplementedError()
def _getValue(self):
try:
value = self.createValue()
except HACHOIR_ERRORS, err:
except Exception as err:
self.error(_("Unable to create value: %s") % unicode(err))
value = None
self._getValue = lambda: value
return value
value = property(lambda self: self._getValue(), doc="Value of field")
def _getParent(self):
return self._parent
parent = property(_getParent, doc="Parent of this field")
def createDisplay(self):
return unicode(self.value)
def _getDisplay(self):
if not hasattr(self, "_Field__display"):
try:
self.__display = self.createDisplay()
except HACHOIR_ERRORS, err:
except Exception as err:
self.error("Unable to create display: %s" % err)
self.__display = u""
return self.__display
display = property(lambda self: self._getDisplay(),
doc="Short (unicode) string which represents field content")
doc="Short (unicode) string which represents field content")
def createRawDisplay(self):
value = self.value
@ -141,26 +151,30 @@ class Field(Logger):
return makePrintable(value, "ASCII", to_unicode=True)
else:
return unicode(value)
def _getRawDisplay(self):
if not hasattr(self, "_Field__raw_display"):
try:
self.__raw_display = self.createRawDisplay()
except HACHOIR_ERRORS, err:
except Exception as err:
self.error("Unable to create raw display: %s" % err)
self.__raw_display = u""
return self.__raw_display
raw_display = property(lambda self: self._getRawDisplay(),
doc="(Unicode) string which represents raw field content")
doc="(Unicode) string which represents raw field content")
def _getName(self):
return self._name
name = property(_getName,
doc="Field name (unique in its parent field set list)")
doc="Field name (unique in its parent field set list)")
def _getIndex(self):
if not self._parent:
return None
return self._parent.getFieldIndex(self)
index = property(_getIndex)
def _getPath(self):
@ -173,13 +187,15 @@ class Field(Logger):
field = field._parent
names[-1] = ''
return '/'.join(reversed(names))
path = property(_getPath,
doc="Full path of the field starting at root field")
doc="Full path of the field starting at root field")
def _getAddress(self):
return self._address
address = property(_getAddress,
doc="Relative address in bit to parent address")
doc="Relative address in bit to parent address")
def _getAbsoluteAddress(self):
address = self._address
@ -188,11 +204,13 @@ class Field(Logger):
address += current._address
current = current._parent
return address
absolute_address = property(_getAbsoluteAddress,
doc="Absolute address (from stream beginning) in bit")
doc="Absolute address (from stream beginning) in bit")
def _getSize(self):
return self._size
size = property(_getSize, doc="Content size in bit")
def _getField(self, name, const):
@ -237,6 +255,7 @@ class Field(Logger):
def _createInputStream(self, **args):
assert self._parent
return InputFieldStream(self, **args)
def getSubIStream(self):
if hasattr(self, "_sub_istream"):
stream = self._sub_istream()
@ -246,6 +265,7 @@ class Field(Logger):
stream = self._createInputStream()
self._sub_istream = weakref_ref(stream)
return stream
def setSubIStream(self, createInputStream):
cis = self._createInputStream
self._createInputStream = lambda **args: createInputStream(cis, **args)
@ -259,4 +279,3 @@ class Field(Logger):
def getFieldType(self):
return self.__class__.__name__

View file

@ -1,7 +1,7 @@
from hachoir_core.field import BasicFieldSet, GenericFieldSet
from hachoir.field import BasicFieldSet, GenericFieldSet
class FieldSet(GenericFieldSet):
def __init__(self, parent, name, *args, **kw):
assert issubclass(parent.__class__, BasicFieldSet)
GenericFieldSet.__init__(self, parent, name, parent.stream, *args, **kw)

View file

@ -1,5 +1,5 @@
from hachoir_core.field import Bit, Bits, FieldSet
from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN
from hachoir.field import Bit, Bits, FieldSet
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN
import struct
# Make sure that we use right struct types
@ -8,6 +8,7 @@ assert struct.calcsize("d") == 8
assert struct.unpack("<d", "\x1f\x85\xebQ\xb8\x1e\t@")[0] == 3.14
assert struct.unpack(">d", "\xc0\0\0\0\0\0\0\0")[0] == -2.0
class FloatMantissa(Bits):
def createValue(self):
value = Bits.createValue(self)
@ -16,10 +17,11 @@ class FloatMantissa(Bits):
def createRawDisplay(self):
return unicode(Bits.createValue(self))
class FloatExponent(Bits):
def __init__(self, parent, name, size):
Bits.__init__(self, parent, name, size)
self.bias = 2 ** (size-1) - 1
self.bias = 2 ** (size - 1) - 1
def createValue(self):
return Bits.createValue(self) - self.bias
@ -27,6 +29,7 @@ class FloatExponent(Bits):
def createRawDisplay(self):
return unicode(self.value + self.bias)
def floatFactory(name, format, mantissa_bits, exponent_bits, doc):
size = 1 + mantissa_bits + exponent_bits
@ -39,9 +42,9 @@ def floatFactory(name, format, mantissa_bits, exponent_bits, doc):
FieldSet.__init__(self, parent, name, description, size)
if format:
if self._parent.endian == BIG_ENDIAN:
self.struct_format = ">"+format
self.struct_format = ">" + format
else:
self.struct_format = "<"+format
self.struct_format = "<" + format
else:
self.struct_format = None
@ -55,12 +58,12 @@ def floatFactory(name, format, mantissa_bits, exponent_bits, doc):
"""
if self.struct_format:
raw = self._parent.stream.readBytes(
self.absolute_address, self._size//8)
self.absolute_address, self._size // 8)
try:
return struct.unpack(self.struct_format, raw)[0]
except struct.error, err:
raise ValueError("[%s] conversion error: %s" %
(self.__class__.__name__, err))
(self.__class__.__name__, err))
else:
try:
value = self["mantissa"].value * (2.0 ** float(self["exponent"].value))
@ -70,14 +73,14 @@ def floatFactory(name, format, mantissa_bits, exponent_bits, doc):
return value
except OverflowError:
raise ValueError("[%s] floating point overflow" %
self.__class__.__name__)
self.__class__.__name__)
def createFields(self):
yield Bit(self, "negative")
yield FloatExponent(self, "exponent", exponent_bits)
if 64 <= mantissa_bits:
yield Bit(self, "one")
yield FloatMantissa(self, "mantissa", mantissa_bits-1)
yield FloatMantissa(self, "mantissa", mantissa_bits - 1)
else:
yield FloatMantissa(self, "mantissa", mantissa_bits)
@ -85,15 +88,15 @@ def floatFactory(name, format, mantissa_bits, exponent_bits, doc):
cls.__name__ = name
return cls
# 32-bit float (standard: IEEE 754/854)
Float32 = floatFactory("Float32", "f", 23, 8,
"Floating point number: format IEEE 754 int 32 bit")
"Floating point number: format IEEE 754 int 32 bit")
# 64-bit float (standard: IEEE 754/854)
Float64 = floatFactory("Float64", "d", 52, 11,
"Floating point number: format IEEE 754 in 64 bit")
"Floating point number: format IEEE 754 in 64 bit")
# 80-bit float (standard: IEEE 754/854)
Float80 = floatFactory("Float80", None, 64, 15,
"Floating point number: format IEEE 754 in 80 bit")
"Floating point number: format IEEE 754 in 80 bit")

View file

@ -0,0 +1,49 @@
from hachoir.field import FieldSet, RawBytes
from hachoir.stream import StringInputStream
class FragmentGroup:
def __init__(self, parser):
self.items = []
self.parser = parser
self.args = {}
def add(self, item):
self.items.append(item)
def createInputStream(self):
# FIXME: Use lazy stream creation
data = []
for item in self.items:
data.append(item["rawdata"].value)
data = "".join(data)
tags = {"args": self.args}
if self.parser is not None:
tags["class"] = self.parser
tags = iter(tags.items())
return StringInputStream(data, "<fragment group>", tags=tags)
class CustomFragment(FieldSet):
def __init__(self, parent, name, size, parser, description=None, group=None):
FieldSet.__init__(self, parent, name, description, size=size)
if not group:
group = FragmentGroup(parser)
self.field_size = size
self.group = group
self.group.add(self)
def createFields(self):
yield RawBytes(self, "rawdata", self.field_size // 8)
def _createInputStream(self, **args):
return self.group.createInputStream()
def createValue(self):
return self["rawdata"].value
def createDisplay(self):
return self["rawdata"].display

View file

@ -1,9 +1,9 @@
from hachoir_core.field import (MissingField, BasicFieldSet, Field, ParserError,
createRawField, createNullField, createPaddingField, FakeArray)
from hachoir_core.dict import Dict, UniqKeyError
from hachoir_core.error import HACHOIR_ERRORS
from hachoir_core.tools import lowerBound, makeUnicode
import hachoir_core.config as config
from hachoir.field import (MissingField, BasicFieldSet, Field, ParserError,
createRawField, createNullField, createPaddingField, FakeArray)
from hachoir.core.dict import Dict, UniqKeyError
from hachoir.core.tools import lowerBound, makeUnicode
import hachoir.core.config as config
class GenericFieldSet(BasicFieldSet):
"""
@ -93,7 +93,7 @@ class GenericFieldSet(BasicFieldSet):
def __str__(self):
return '<%s path=%s, current_size=%s, current length=%s>' % \
(self.__class__.__name__, self.path, self._current_size, len(self._fields))
(self.__class__.__name__, self.path, self._current_size, len(self._fields))
def __len__(self):
"""
@ -106,17 +106,20 @@ class GenericFieldSet(BasicFieldSet):
def _getCurrentLength(self):
return len(self._fields)
current_length = property(_getCurrentLength)
def _getSize(self):
if self._size is None:
self._feedAll()
return self._size
size = property(_getSize, doc="Size in bits, may create all fields to get size")
def _getCurrentSize(self):
assert not(self.done)
assert not (self.done)
return self._current_size
current_size = property(_getCurrentSize)
eof = property(lambda self: self._checkSize(self._current_size + 1, True) < 0)
@ -147,7 +150,7 @@ class GenericFieldSet(BasicFieldSet):
"""
if not issubclass(field.__class__, Field):
raise ParserError("Field type (%s) is not a subclass of 'Field'!"
% field.__class__.__name__)
% field.__class__.__name__)
assert isinstance(field._name, str)
if field._name.endswith("[]"):
self.setUniqueFieldName(field)
@ -157,15 +160,15 @@ class GenericFieldSet(BasicFieldSet):
# required for the msoffice parser
if field._address != self._current_size:
self.warning("Fix address of %s to %s (was %s)" %
(field.path, self._current_size, field._address))
(field.path, self._current_size, field._address))
field._address = self._current_size
ask_stop = False
# Compute field size and check that there is enough place for it
self.__is_feeding = True
try:
field_size = field.size
except HACHOIR_ERRORS, err:
_ = field.size
except Exception as err:
if field.is_field_set and field.current_length and field.eof:
self.warning("Error when getting size of '%s': %s" % (field.name, err))
field._stopFeeding()
@ -221,7 +224,7 @@ class GenericFieldSet(BasicFieldSet):
if key < 0:
raise KeyError("Key must be positive!")
if not const:
self.readFirstFields(key+1)
self.readFirstFields(key + 1)
if len(self._fields.values) <= key:
raise MissingField(self, key)
return self._fields.values[key]
@ -272,7 +275,7 @@ class GenericFieldSet(BasicFieldSet):
# If last field is too big, delete it
while self._size < self._current_size:
field = self._deleteField(len(self._fields)-1)
field = self._deleteField(len(self._fields) - 1)
message.append("delete field %s" % field.path)
assert self._current_size <= self._size
@ -318,9 +321,9 @@ class GenericFieldSet(BasicFieldSet):
Return the field if it was found, None else
"""
if self.__is_feeding \
or (self._field_generator and self._field_generator.gi_running):
or (self._field_generator and self._field_generator.gi_running):
self.warning("Unable to get %s (and generator is already running)"
% field_name)
% field_name)
return None
try:
while True:
@ -328,11 +331,11 @@ class GenericFieldSet(BasicFieldSet):
self._addField(field)
if field.name == field_name:
return field
except HACHOIR_ERRORS, err:
if self._fixFeedError(err) is False:
raise
except StopIteration:
self._stopFeeding()
except Exception as err:
if self._fixFeedError(err) is False:
raise
return None
def readMoreFields(self, number):
@ -346,12 +349,12 @@ class GenericFieldSet(BasicFieldSet):
oldlen = len(self._fields)
try:
for index in xrange(number):
self._addField( self._field_generator.next() )
except HACHOIR_ERRORS, err:
if self._fixFeedError(err) is False:
raise
self._addField(self._field_generator.next())
except StopIteration:
self._stopFeeding()
except Exception as err:
if self._fixFeedError(err) is False:
raise
return len(self._fields) - oldlen
def _feedAll(self):
@ -361,11 +364,11 @@ class GenericFieldSet(BasicFieldSet):
while True:
field = self._field_generator.next()
self._addField(field)
except HACHOIR_ERRORS, err:
if self._fixFeedError(err) is False:
raise
except StopIteration:
self._stopFeeding()
except Exception as err:
if self._fixFeedError(err) is False:
raise
def __iter__(self):
"""
@ -378,11 +381,18 @@ class GenericFieldSet(BasicFieldSet):
if done == len(self._fields):
if self._field_generator is None:
break
self._addField( self._field_generator.next() )
self._addField(self._field_generator.next())
for field in self._fields.values[done:]:
yield field
done += 1
except HACHOIR_ERRORS, err:
except StopIteration:
field = self._stopFeeding()
if isinstance(field, Field):
yield field
elif hasattr(field, '__iter__'):
for f in field:
yield f
except Exception as err:
field = self._fixFeedError(err)
if isinstance(field, Field):
yield field
@ -391,23 +401,17 @@ class GenericFieldSet(BasicFieldSet):
yield f
elif field is False:
raise
except StopIteration:
field = self._stopFeeding()
if isinstance(field, Field):
yield field
elif hasattr(field, '__iter__'):
for f in field:
yield f
def _isDone(self):
return (self._field_generator is None)
done = property(_isDone, doc="Boolean to know if parsing is done or not")
#
# FieldSet_SeekUtility
#
def seekBit(self, address, name="padding[]",
description=None, relative=True, null=False):
description=None, relative=True, null=False):
"""
Create a field to seek to specified address,
or None if it's not needed.
@ -444,11 +448,11 @@ class GenericFieldSet(BasicFieldSet):
raise ParserError("Unable to replace %s: field doesn't exist!" % name)
assert 1 <= len(new_fields)
old_field = self[name]
total_size = sum( (field.size for field in new_fields) )
total_size = sum((field.size for field in new_fields))
if old_field.size != total_size:
raise ParserError("Unable to replace %s: "
"new field(s) hasn't same size (%u bits instead of %u bits)!"
% (name, total_size, old_field.size))
"new field(s) hasn't same size (%u bits instead of %u bits)!"
% (name, total_size, old_field.size))
field = new_fields[0]
if field._name.endswith("[]"):
self.setUniqueFieldName(field)
@ -460,7 +464,7 @@ class GenericFieldSet(BasicFieldSet):
self._fields.replace(name, field.name, field)
self.raiseEvent("field-replaced", old_field, field)
if 1 < len(new_fields):
index = self._fields.index(new_fields[0].name)+1
index = self._fields.index(new_fields[0].name) + 1
address = field.address + field.size
for field in new_fields[1:]:
if field._name.endswith("[]"):
@ -493,7 +497,7 @@ class GenericFieldSet(BasicFieldSet):
"""
# Check size
total_size = sum( field.size for field in new_fields )
total_size = sum(field.size for field in new_fields)
if old_field.size < total_size:
raise ParserError( \
"Unable to write fields at address %s " \
@ -529,4 +533,3 @@ class GenericFieldSet(BasicFieldSet):
def getFieldIndex(self, field):
return self._fields.index(field._name)

View file

@ -1,43 +1,51 @@
from hachoir_core.field import (FieldError,
RawBits, RawBytes,
PaddingBits, PaddingBytes,
NullBits, NullBytes,
GenericString, GenericInteger)
from hachoir_core.stream import FileOutputStream
from hachoir.field import (FieldError,
RawBits, RawBytes,
PaddingBits, PaddingBytes,
NullBits, NullBytes,
GenericString, GenericInteger)
from hachoir.stream import FileOutputStream
def createRawField(parent, size, name="raw[]", description=None):
if size <= 0:
raise FieldError("Unable to create raw field of %s bits" % size)
if (size % 8) == 0:
return RawBytes(parent, name, size/8, description)
return RawBytes(parent, name, size // 8, description)
else:
return RawBits(parent, name, size, description)
def createPaddingField(parent, nbits, name="padding[]", description=None):
if nbits <= 0:
raise FieldError("Unable to create padding of %s bits" % nbits)
if (nbits % 8) == 0:
return PaddingBytes(parent, name, nbits/8, description)
return PaddingBytes(parent, name, nbits // 8, description)
else:
return PaddingBits(parent, name, nbits, description)
def createNullField(parent, nbits, name="padding[]", description=None):
if nbits <= 0:
raise FieldError("Unable to create null padding of %s bits" % nbits)
if (nbits % 8) == 0:
return NullBytes(parent, name, nbits/8, description)
return NullBytes(parent, name, nbits // 8, description)
else:
return NullBits(parent, name, nbits, description)
def isString(field):
return issubclass(field.__class__, GenericString)
def isInteger(field):
return issubclass(field.__class__, GenericInteger)
def writeIntoFile(fieldset, filename):
output = FileOutputStream(filename)
fieldset.writeInto(output)
with output:
fieldset.writeInto(output)
def createOrphanField(fieldset, address, field_cls, *args, **kw):
"""
@ -54,4 +62,3 @@ def createOrphanField(fieldset, address, field_cls, *args, **kw):
finally:
fieldset._current_size = save_size
return field

View file

@ -4,12 +4,14 @@ Integer field classes:
- Int8, Int16, Int24, Int32, Int64: signed integer of 8, 16, 32, 64 bits.
"""
from hachoir_core.field import Bits, FieldError
from hachoir.field import Bits, FieldError
class GenericInteger(Bits):
"""
Generic integer class used to generate other classes.
"""
def __init__(self, parent, name, signed, size, description=None):
if not (8 <= size <= 16384):
raise FieldError("Invalid integer size (%s): have to be in 8..16384" % size)
@ -20,16 +22,20 @@ class GenericInteger(Bits):
return self._parent.stream.readInteger(
self.absolute_address, self.signed, self._size, self._parent.endian)
def integerFactory(name, is_signed, size, doc):
class Integer(GenericInteger):
__doc__ = doc
static_size = size
def __init__(self, parent, name, description=None):
GenericInteger.__init__(self, parent, name, is_signed, size, description)
cls = Integer
cls.__name__ = name
return cls
UInt8 = integerFactory("UInt8", False, 8, "Unsigned integer of 8 bits")
UInt16 = integerFactory("UInt16", False, 16, "Unsigned integer of 16 bits")
UInt24 = integerFactory("UInt24", False, 24, "Unsigned integer of 24 bits")
@ -41,4 +47,3 @@ Int16 = integerFactory("Int16", True, 16, "Signed integer of 16 bits")
Int24 = integerFactory("Int24", True, 24, "Signed integer of 24 bits")
Int32 = integerFactory("Int32", True, 32, "Signed integer of 32 bits")
Int64 = integerFactory("Int64", True, 64, "Signed integer of 64 bits")

View file

@ -1,5 +1,5 @@
from hachoir_core.field import Field, FieldSet, ParserError, Bytes, MissingField
from hachoir_core.stream import FragmentedStream
from hachoir.field import Field, FieldSet, ParserError, Bytes, MissingField
from hachoir.stream import FragmentedStream
class Link(Field):
@ -64,6 +64,7 @@ class Fragment(FieldSet):
if self._first is None:
raise ParserError("first is None")
return self
first = property(lambda self: self._feedLinks()._first)
def _getNext(self):
@ -71,7 +72,8 @@ class Fragment(FieldSet):
if callable(next):
self._next = next = next()
return next
next = property(_getNext)
next = property(_getNext)
def _createInputStream(self, **args):
first = self.first
@ -105,5 +107,4 @@ class Fragment(FieldSet):
def createFields(self):
if self._size is None:
self._size = self._getSize()
yield Bytes(self, "data", self._size/8)
yield Bytes(self, "data", self._size // 8)

View file

@ -1,6 +1,7 @@
from hachoir_core.field import Bits, Bytes
from hachoir_core.tools import makePrintable, humanFilesize
from hachoir_core import config
from hachoir.field import Bits, Bytes
from hachoir.core.tools import makePrintable, humanFilesize
from hachoir.core import config
class PaddingBits(Bits):
"""
@ -22,7 +23,7 @@ class PaddingBits(Bits):
self._display_pattern = self.checkPattern()
def checkPattern(self):
if not(config.check_padding_pattern):
if not (config.check_padding_pattern):
return False
if self.pattern != 0:
return False
@ -45,6 +46,7 @@ class PaddingBits(Bits):
else:
return Bits.createDisplay(self)
class PaddingBytes(Bytes):
"""
Padding bytes used, for example, to align address (of next field).
@ -57,11 +59,11 @@ class PaddingBytes(Bytes):
* pattern (str): Content pattern, eg. "\0" for nul bytes
"""
static_size = staticmethod(lambda *args, **kw: args[1]*8)
static_size = staticmethod(lambda *args, **kw: args[1] * 8)
MAX_SIZE = 4096
def __init__(self, parent, name, nbytes,
description="Padding", pattern=None):
description="Padding", pattern=None):
""" pattern is None or repeated string """
assert (pattern is None) or (isinstance(pattern, str))
Bytes.__init__(self, parent, name, nbytes, description)
@ -69,12 +71,12 @@ class PaddingBytes(Bytes):
self._display_pattern = self.checkPattern()
def checkPattern(self):
if not(config.check_padding_pattern):
if not (config.check_padding_pattern):
return False
if self.pattern is None:
return False
if self.MAX_SIZE < self._size/8:
if self.MAX_SIZE < self._size // 8:
self.info("only check first %s of padding" % humanFilesize(self.MAX_SIZE))
content = self._parent.stream.readBytes(
self.absolute_address, self.MAX_SIZE)
@ -83,7 +85,7 @@ class PaddingBytes(Bytes):
index = 0
pattern_len = len(self.pattern)
while index < len(content):
if content[index:index+pattern_len] != self.pattern:
if content[index:index + pattern_len] != self.pattern:
self.warning(
"padding contents doesn't look normal"
" (invalid pattern at byte %u)!"
@ -101,6 +103,7 @@ class PaddingBytes(Bytes):
def createRawDisplay(self):
return Bytes.createDisplay(self)
class NullBits(PaddingBits):
"""
Null padding bits used, for example, to align address (of next field).
@ -119,6 +122,7 @@ class NullBits(PaddingBits):
else:
return Bits.createDisplay(self)
class NullBytes(PaddingBytes):
"""
Null padding bytes used, for example, to align address (of next field).
@ -127,6 +131,7 @@ class NullBytes(PaddingBytes):
Arguments:
* nbytes: Size of the field in bytes
"""
def __init__(self, parent, name, nbytes, description=None):
PaddingBytes.__init__(self, parent, name, nbytes, description, pattern="\0")
@ -135,4 +140,3 @@ class NullBytes(PaddingBytes):
return "<null>"
else:
return Bytes.createDisplay(self)

View file

@ -1,7 +1,8 @@
from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from hachoir_core.field import GenericFieldSet
from hachoir_core.log import Logger
import hachoir_core.config as config
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from hachoir.field import GenericFieldSet
from hachoir.core.log import Logger
import hachoir.core.config as config
class Parser(GenericFieldSet):
"""
@ -21,17 +22,27 @@ class Parser(GenericFieldSet):
"""
# Check arguments
assert hasattr(self, "endian") \
and self.endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN)
and self.endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN)
# Call parent constructor
GenericFieldSet.__init__(self, None, "root", stream, description, stream.askSize(self))
def close(self):
self.stream.close()
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
self.close()
def _logger(self):
return Logger._logger(self)
def _setSize(self, size):
self._truncate(size)
self.raiseEvent("field-resized", self)
size = property(lambda self: self._size, doc="Size in bits")
path = property(lambda self: "/")

View file

@ -1,27 +1,38 @@
from hachoir_core.field import BasicFieldSet, GenericFieldSet, ParserError, createRawField
from hachoir_core.error import HACHOIR_ERRORS
from hachoir.field import BasicFieldSet, GenericFieldSet, ParserError, createRawField
# getgaps(int, int, [listof (int, int)]) -> generator of (int, int)
# Gets all the gaps not covered by a block in `blocks` from `start` for `length` units.
def getgaps(start, length, blocks):
'''
"""
Example:
>>> list(getgaps(0, 20, [(15,3), (6,2), (6,2), (1,2), (2,3), (11,2), (9,5)]))
[(0, 1), (5, 1), (8, 1), (14, 1), (18, 2)]
'''
"""
# done this way to avoid mutating the original
blocks = sorted(blocks, key=lambda b: b[0])
end = start+length
end = start + length
for s, l in blocks:
if s > start:
yield (start, s-start)
yield (start, s - start)
start = s
if s+l > start:
start = s+l
if s + l > start:
start = s + l
if start < end:
yield (start, end-start)
yield (start, end - start)
class RootSeekableFieldSet(GenericFieldSet):
def close(self):
self.stream.close()
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
self.close()
def seekBit(self, address, relative=True):
if not relative:
address -= self.absolute_address
@ -31,7 +42,7 @@ class RootSeekableFieldSet(GenericFieldSet):
return None
def seekByte(self, address, relative=True):
return self.seekBit(address*8, relative)
return self.seekBit(address * 8, relative)
def _fixLastField(self):
"""
@ -46,13 +57,13 @@ class RootSeekableFieldSet(GenericFieldSet):
# If last field is too big, delete it
while self._size < self._current_size:
field = self._deleteField(len(self._fields)-1)
field = self._deleteField(len(self._fields) - 1)
message.append("delete field %s" % field.path)
assert self._current_size <= self._size
blocks = [(x.absolute_address, x.size) for x in self._fields]
fields = []
self._size = max(self._size, max(a+b for a,b in blocks) - self.absolute_address)
self._size = max(self._size, max(a + b for a, b in blocks) - self.absolute_address)
for start, length in getgaps(self.absolute_address, self._size, blocks):
self.seekBit(start, relative=False)
field = createRawField(self, length, "unparsed[]")
@ -67,7 +78,6 @@ class RootSeekableFieldSet(GenericFieldSet):
return fields
def _stopFeeding(self):
new_field = None
if self._size is None:
if self._parent:
self._size = self._current_size
@ -76,6 +86,7 @@ class RootSeekableFieldSet(GenericFieldSet):
self._field_generator = None
return new_field
class SeekableFieldSet(RootSeekableFieldSet):
def __init__(self, parent, name, description=None, size=None):
assert issubclass(parent.__class__, BasicFieldSet)

View file

@ -1,4 +1,5 @@
from hachoir_core.field import FieldSet, ParserError
from hachoir.field import FieldSet, ParserError
class StaticFieldSet(FieldSet):
"""
@ -27,7 +28,7 @@ class StaticFieldSet(FieldSet):
item_class = item[0]
if item_class.static_size is None:
raise ParserError("Unable to get static size of field type: %s"
% item_class.__name__)
% item_class.__name__)
if callable(item_class.static_size):
if isinstance(item[-1], dict):
return item_class.static_size(*item[1:-1], **item[-1])
@ -51,4 +52,3 @@ class StaticFieldSet(FieldSet):
# Initial value of static_size, it changes when first instance
# is created (see __new__)
static_size = _computeStaticSize

View file

@ -15,17 +15,18 @@ Note: For PascalStringXX, prefixed value is the number of bytes and not
of characters!
"""
from hachoir_core.field import FieldError, Bytes
from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN
from hachoir_core.tools import alignValue, makePrintable
from hachoir_core.i18n import guessBytesCharset, _
from hachoir_core import config
from hachoir.field import FieldError, Bytes
from hachoir.core.endian import LITTLE_ENDIAN, BIG_ENDIAN
from hachoir.core.tools import alignValue, makePrintable
from hachoir.core.i18n import guessBytesCharset, _
from hachoir.core import config
from codecs import BOM_UTF16_LE, BOM_UTF16_BE, BOM_UTF32_LE, BOM_UTF32_BE
# Default charset used to convert byte string to Unicode
# This charset is used if no charset is specified or on conversion error
FALLBACK_CHARSET = "ISO-8859-1"
class GenericString(Bytes):
"""
Generic string class.
@ -34,7 +35,7 @@ class GenericString(Bytes):
"""
VALID_FORMATS = ("C", "UnixLine",
"fixed", "Pascal8", "Pascal16", "Pascal32")
"fixed", "Pascal8", "Pascal16", "Pascal32")
# 8-bit charsets
CHARSET_8BIT = set((
@ -88,13 +89,13 @@ class GenericString(Bytes):
# Suffix format: value is suffix (string)
SUFFIX_FORMAT = {
"C": {
8: {LITTLE_ENDIAN: "\0", BIG_ENDIAN: "\0"},
16: {LITTLE_ENDIAN: "\0\0", BIG_ENDIAN: "\0\0"},
8: {LITTLE_ENDIAN: "\0", BIG_ENDIAN: "\0"},
16: {LITTLE_ENDIAN: "\0\0", BIG_ENDIAN: "\0\0"},
32: {LITTLE_ENDIAN: "\0\0\0\0", BIG_ENDIAN: "\0\0\0\0"},
},
"UnixLine": {
8: {LITTLE_ENDIAN: "\n", BIG_ENDIAN: "\n"},
16: {LITTLE_ENDIAN: "\n\0", BIG_ENDIAN: "\0\n"},
8: {LITTLE_ENDIAN: "\n", BIG_ENDIAN: "\n"},
16: {LITTLE_ENDIAN: "\n\0", BIG_ENDIAN: "\0\n"},
32: {LITTLE_ENDIAN: "\n\0\0\0", BIG_ENDIAN: "\0\0\0\n"},
},
@ -102,7 +103,7 @@ class GenericString(Bytes):
# Pascal format: value is the size of the prefix in bits
PASCAL_FORMATS = {
"Pascal8": 1,
"Pascal8": 1,
"Pascal16": 2,
"Pascal32": 4
}
@ -112,7 +113,7 @@ class GenericString(Bytes):
_raw_value = None
def __init__(self, parent, name, format, description=None,
strip=None, charset=None, nbytes=None, truncate=None):
strip=None, charset=None, nbytes=None, truncate=None):
Bytes.__init__(self, parent, name, 1, description)
# Is format valid?
@ -126,12 +127,12 @@ class GenericString(Bytes):
# Check charset and compute character size in bytes
# (or None when it's not possible to guess character size)
if not charset or charset in self.CHARSET_8BIT:
self._character_size = 1 # one byte per character
self._character_size = 1 # one byte per character
elif charset in self.UTF_CHARSET:
self._character_size = None
else:
raise FieldError("Invalid charset for %s: \"%s\"" %
(self.path, charset))
(self.path, charset))
self._charset = charset
# It is a fixed string?
@ -140,8 +141,8 @@ class GenericString(Bytes):
# Arbitrary limits, just to catch some bugs...
if not (1 <= nbytes <= 0xffff):
raise FieldError("Invalid string size for %s: %s" %
(self.path, nbytes))
self._content_size = nbytes # content length in bytes
(self.path, nbytes))
self._content_size = nbytes # content length in bytes
self._size = nbytes * 8
self._content_offset = 0
else:
@ -157,7 +158,7 @@ class GenericString(Bytes):
suffix, False, self.absolute_address)
if length is None:
raise FieldError("Unable to find end of string %s (format %s)!"
% (self.path, self._format))
% (self.path, self._format))
if 1 < len(suffix):
# Fix length for little endian bug with UTF-xx charset:
# u"abc" -> "a\0b\0c\0\0\0" (UTF-16-LE)
@ -165,7 +166,7 @@ class GenericString(Bytes):
length = alignValue(length, len(suffix))
# Compute sizes
self._content_size = length # in bytes
self._content_size = length # in bytes
self._size = (length + len(suffix)) * 8
# Format with a prefix: Read prefixed length in bytes
@ -178,14 +179,14 @@ class GenericString(Bytes):
# Read the prefix and compute sizes
value = self._parent.stream.readBits(
self.absolute_address, prefix_size*8, self._parent.endian)
self._content_size = value # in bytes
self.absolute_address, prefix_size * 8, self._parent.endian)
self._content_size = value # in bytes
self._size = (prefix_size + value) * 8
# For UTF-16 and UTF-32, choose the right charset using BOM
if self._charset in self.UTF_CHARSET:
# Charset requires a BOM?
bomsize, endian = self.UTF_CHARSET[self._charset]
bomsize, endian = self.UTF_CHARSET[self._charset]
if endian == "BOM":
# Read the BOM value
nbytes = bomsize // 8
@ -195,14 +196,14 @@ class GenericString(Bytes):
bom_endian = self.UTF_BOM[bomsize]
if bom not in bom_endian:
raise FieldError("String %s has invalid BOM (%s)!"
% (self.path, repr(bom)))
% (self.path, repr(bom)))
self._charset = bom_endian[bom]
self._content_size -= nbytes
self._content_offset += nbytes
# Compute length in character if possible
if self._character_size:
self._length = self._content_size // self._character_size
self._length = self._content_size // self._character_size
else:
self._length = None
@ -221,6 +222,7 @@ class GenericString(Bytes):
def _getSuffixStr(self):
return self.staticSuffixStr(
self._format, self._charset, self._parent.endian)
suffix_str = property(_getSuffixStr)
def _convertText(self, text):
@ -231,18 +233,18 @@ class GenericString(Bytes):
# Try to convert to Unicode
try:
return unicode(text, self._charset, "strict")
except UnicodeDecodeError, err:
except UnicodeDecodeError as err:
pass
#--- Conversion error ---
# --- Conversion error ---
# Fix truncated UTF-16 string like 'B\0e' (3 bytes)
# => Add missing nul byte: 'B\0e\0' (4 bytes)
if err.reason == "truncated data" \
and err.end == len(text) \
and self._charset == "UTF-16-LE":
and err.end == len(text) \
and self._charset == "UTF-16-LE":
try:
text = unicode(text+"\0", self._charset, "strict")
text = unicode(text + "\0", self._charset, "strict")
self.warning("Fix truncated %s string: add missing nul byte" % self._charset)
return text
except UnicodeDecodeError, err:
@ -321,24 +323,29 @@ class GenericString(Bytes):
if self._length is None:
self._length = len(self.value)
return self._length
length = property(_getLength, doc="String length in characters")
def _getFormat(self):
return self._format
format = property(_getFormat, doc="String format (eg. 'C')")
def _getCharset(self):
if not self._charset:
self._charset = self._guessCharset()
return self._charset
charset = property(_getCharset, doc="String charset (eg. 'ISO-8859-1')")
def _getContentSize(self):
return self._content_size
content_size = property(_getContentSize, doc="Content size in bytes")
def _getContentOffset(self):
return self._content_offset
content_offset = property(_getContentOffset, doc="Content offset in bytes")
def getFieldType(self):
@ -350,40 +357,44 @@ class GenericString(Bytes):
info += ",strip=True"
return "%s<%s>" % (Bytes.getFieldType(self), info)
def stringFactory(name, format, doc):
class NewString(GenericString):
__doc__ = doc
def __init__(self, parent, name, description=None,
strip=None, charset=None, truncate=None):
strip=None, charset=None, truncate=None):
GenericString.__init__(self, parent, name, format, description,
strip=strip, charset=charset, truncate=truncate)
strip=strip, charset=charset, truncate=truncate)
cls = NewString
cls.__name__ = name
return cls
# String which ends with nul byte ("\0")
CString = stringFactory("CString", "C",
r"""C string: string ending with nul byte.
r"""C string: string ending with nul byte.
See GenericString to get more information.""")
# Unix line of text: string which ends with "\n" (ASCII 0x0A)
UnixLine = stringFactory("UnixLine", "UnixLine",
r"""Unix line: string ending with "\n" (ASCII code 10).
r"""Unix line: string ending with "\n" (ASCII code 10).
See GenericString to get more information.""")
# String prefixed with length written in a 8-bit integer
PascalString8 = stringFactory("PascalString8", "Pascal8",
r"""Pascal string: string prefixed with 8-bit integer containing its length (endian depends on parent endian).
r"""Pascal string: string prefixed with 8-bit integer containing its length (endian depends on parent endian).
See GenericString to get more information.""")
# String prefixed with length written in a 16-bit integer (use parent endian)
PascalString16 = stringFactory("PascalString16", "Pascal16",
r"""Pascal string: string prefixed with 16-bit integer containing its length (endian depends on parent endian).
r"""Pascal string: string prefixed with 16-bit integer containing its length (endian depends on parent endian).
See GenericString to get more information.""")
# String prefixed with length written in a 32-bit integer (use parent endian)
PascalString32 = stringFactory("PascalString32", "Pascal32",
r"""Pascal string: string prefixed with 32-bit integer containing its length (endian depends on parent endian).
r"""Pascal string: string prefixed with 32-bit integer containing its length (endian depends on parent endian).
See GenericString to get more information.""")
@ -392,11 +403,12 @@ class String(GenericString):
String with fixed size (size in bytes).
See GenericString to get more information.
"""
static_size = staticmethod(lambda *args, **kw: args[1]*8)
static_size = staticmethod(lambda *args, **kw: args[1] * 8)
def __init__(self, parent, name, nbytes, description=None,
strip=None, charset=None, truncate=None):
strip=None, charset=None, truncate=None):
GenericString.__init__(self, parent, name, "fixed", description,
strip=strip, charset=charset, nbytes=nbytes, truncate=truncate)
String.__name__ = "FixedString"
strip=strip, charset=charset, nbytes=nbytes, truncate=truncate)
String.__name__ = "FixedString"

View file

@ -1,32 +1,37 @@
from hachoir_core.field import Bytes
from hachoir_core.tools import makePrintable, humanFilesize
from hachoir_core.stream import InputIOStream
from hachoir.field import Bytes
from hachoir.core.tools import makePrintable, humanFilesize
from hachoir.stream import InputIOStream
class SubFile(Bytes):
"""
File stored in another file
"""
def __init__(self, parent, name, length, description=None,
parser=None, filename=None, mime_type=None, parser_class=None):
parser=None, filename=None, mime_type=None, parser_class=None):
if filename:
if not isinstance(filename, unicode):
filename = makePrintable(filename, "ISO-8859-1")
if not description:
description = 'File "%s" (%s)' % (filename, humanFilesize(length))
Bytes.__init__(self, parent, name, length, description)
def createInputStream(cis, **args):
tags = args.setdefault("tags",[])
tags = args.setdefault("tags", [])
if parser_class:
tags.append(( "class", parser_class ))
tags.append(("class", parser_class))
if parser is not None:
tags.append(( "id", parser.PARSER_TAGS["id"] ))
tags.append(("id", parser.PARSER_TAGS["id"]))
if mime_type:
tags.append(( "mime", mime_type ))
tags.append(("mime", mime_type))
if filename:
tags.append(( "filename", filename ))
tags.append(("filename", filename))
return cis(**args)
self.setSubIStream(createInputStream)
class CompressedStream:
offset = 0
@ -37,7 +42,7 @@ class CompressedStream:
def read(self, size):
d = self._buffer
data = [ d[:size] ]
data = [d[:size]]
size -= len(d)
if size > 0:
d = self.decompressor(size)
@ -54,9 +59,10 @@ class CompressedStream:
d = self.decompressor(size, d)
data.append(d[:size])
size -= len(d)
self._buffer = d[size+len(d):]
self._buffer = d[size + len(d):]
return ''.join(data)
def CompressedField(field, decompressor):
def createInputStream(cis, source=None, **args):
if field._parent:
@ -68,5 +74,6 @@ def CompressedField(field, decompressor):
if source is None:
source = "Compressed source: '%s' (offset=%s)" % (stream.source, field.absolute_address)
return InputIOStream(input, source=source, **args)
field.setSubIStream(createInputStream)
return field

View file

@ -1,9 +1,10 @@
from hachoir_core.tools import (humanDatetime, humanDuration,
timestampUNIX, timestampMac32, timestampUUID60,
timestampWin64, durationWin64)
from hachoir_core.field import Bits, FieldSet
from hachoir.core.tools import (humanDatetime, humanDuration,
timestampUNIX, timestampMac32, timestampUUID60,
timestampWin64, durationWin64)
from hachoir.field import Bits, FieldSet
from datetime import datetime
class GenericTimestamp(Bits):
def __init__(self, parent, name, size, description=None):
Bits.__init__(self, parent, name, size, description)
@ -18,6 +19,7 @@ class GenericTimestamp(Bits):
def __nonzero__(self):
return Bits.createValue(self) != 0
def timestampFactory(cls_name, handler, size):
class Timestamp(GenericTimestamp):
def __init__(self, parent, name, description=None):
@ -26,16 +28,19 @@ def timestampFactory(cls_name, handler, size):
def createValue(self):
value = Bits.createValue(self)
return handler(value)
cls = Timestamp
cls.__name__ = cls_name
return cls
TimestampUnix32 = timestampFactory("TimestampUnix32", timestampUNIX, 32)
TimestampUnix64 = timestampFactory("TimestampUnix64", timestampUNIX, 64)
TimestampMac32 = timestampFactory("TimestampMac32", timestampMac32, 32)
TimestampUUID60 = timestampFactory("TimestampUUID60", timestampUUID60, 60)
TimestampWin64 = timestampFactory("TimestampWin64", timestampWin64, 64)
class TimeDateMSDOS32(FieldSet):
"""
32-bit MS-DOS timestamp (16-bit time, 16-bit date)
@ -55,16 +60,18 @@ class TimeDateMSDOS32(FieldSet):
def createValue(self):
return datetime(
1980+self["year"].value, self["month"].value, self["day"].value,
self["hour"].value, self["minute"].value, 2*self["second"].value)
1980 + self["year"].value, self["month"].value, self["day"].value,
self["hour"].value, self["minute"].value, 2 * self["second"].value)
def createDisplay(self):
return humanDatetime(self.value)
class DateTimeMSDOS32(TimeDateMSDOS32):
"""
32-bit MS-DOS timestamp (16-bit date, 16-bit time)
"""
def createFields(self):
yield Bits(self, "day", 5)
yield Bits(self, "month", 4)
@ -73,6 +80,7 @@ class DateTimeMSDOS32(TimeDateMSDOS32):
yield Bits(self, "minute", 6)
yield Bits(self, "hour", 5)
class TimedeltaWin64(GenericTimestamp):
def __init__(self, parent, name, description=None):
GenericTimestamp.__init__(self, parent, name, 64, description)
@ -83,4 +91,3 @@ class TimedeltaWin64(GenericTimestamp):
def createValue(self):
value = Bits.createValue(self)
return durationWin64(value)

View file

@ -1,13 +1,14 @@
from hachoir_core.field import Field, FieldSet, ParserError
from hachoir.field import Field, FieldSet, ParserError
class GenericVector(FieldSet):
def __init__(self, parent, name, nb_items, item_class, item_name="item", description=None):
# Sanity checks
assert issubclass(item_class, Field)
assert isinstance(item_class.static_size, (int, long))
if not(0 < nb_items):
raise ParserError('Unable to create empty vector "%s" in %s' \
% (name, parent.path))
if not (0 < nb_items):
raise ParserError('Unable to create empty vector "%s" in %s'
% (name, parent.path))
size = nb_items * item_class.static_size
self.__nb_items = nb_items
self._item_class = item_class
@ -23,6 +24,7 @@ class GenericVector(FieldSet):
for index in xrange(len(self)):
yield parser(self, name)
class UserVector(GenericVector):
"""
To implement:
@ -35,4 +37,3 @@ class UserVector(GenericVector):
def __init__(self, parent, name, nb_items, description=None):
GenericVector.__init__(self, parent, name, nb_items, self.item_class, self.item_name, description)

View file

@ -0,0 +1,12 @@
from hachoir.metadata.metadata import extractMetadata
# Just import the module,
# each module use registerExtractor() method
import hachoir.metadata.archive
import hachoir.metadata.audio
import hachoir.metadata.image
import hachoir.metadata.jpeg
import hachoir.metadata.misc
import hachoir.metadata.program
import hachoir.metadata.riff
import hachoir.metadata.video

View file

@ -1,11 +1,12 @@
from hachoir_metadata.metadata_item import QUALITY_BEST, QUALITY_FASTEST
from hachoir_metadata.safe import fault_tolerant, getValue
from hachoir_metadata.metadata import (
from hachoir.metadata.metadata_item import QUALITY_BEST, QUALITY_FASTEST
from hachoir.metadata.safe import fault_tolerant, getValue
from hachoir.metadata.metadata import (
RootMetadata, Metadata, MultipleMetadata, registerExtractor)
from hachoir_parser.archive import (Bzip2Parser, CabFile, GzipParser,
TarFile, ZipFile, MarFile)
from hachoir_core.tools import humanUnixAttributes
from hachoir_core.i18n import _
from hachoir.parser.archive import (Bzip2Parser, CabFile, GzipParser,
TarFile, ZipFile, MarFile)
from hachoir.core.tools import humanUnixAttributes
from hachoir.core.i18n import _
def maxNbFile(meta):
if meta.quality <= QUALITY_FASTEST:
@ -14,22 +15,25 @@ def maxNbFile(meta):
return None
return 1 + int(10 * meta.quality)
def computeCompressionRate(meta):
"""
Compute compression rate, sizes have to be in byte.
"""
if not meta.has("file_size") \
or not meta.get("compr_size", 0):
if (not meta.has("file_size")
or not meta.get("compr_size", 0)):
return
file_size = meta.get("file_size")
if not file_size:
return
meta.compr_rate = float(file_size) / meta.get("compr_size")
class Bzip2Metadata(RootMetadata):
def extract(self, zip):
if "file" in zip:
self.compr_size = zip["file"].size/8
self.compr_size = zip["file"].size // 8
class GzipMetadata(RootMetadata):
def extract(self, gzip):
@ -46,18 +50,30 @@ class GzipMetadata(RootMetadata):
self.filename = getValue(gzip, "filename")
if gzip["has_comment"].value:
self.comment = getValue(gzip, "comment")
self.compr_size = gzip["file"].size/8
self.compr_size = gzip["file"].size // 8
self.file_size = gzip["size"].value
class ZipMetadata(MultipleMetadata):
def extract(self, zip):
max_nb = maxNbFile(self)
for index, field in enumerate(zip.array("file")):
if max_nb is not None and max_nb <= index:
self.warning("ZIP archive contains many files, but only first %s files are processed" % max_nb)
self.warning("ZIP archive contains many files, "
"but only first %s files are processed"
% max_nb)
break
self.processFile(field)
self.extract_end_central_directory(zip)
@fault_tolerant
def extract_end_central_directory(self, parser):
comment = parser['end_central_directory/comment'].value
if comment:
self.comment = comment
@fault_tolerant
def processFile(self, field):
meta = Metadata(self)
@ -75,12 +91,15 @@ class ZipMetadata(MultipleMetadata):
computeCompressionRate(meta)
self.addGroup(field.name, meta, "File \"%s\"" % meta.get('filename'))
class TarMetadata(MultipleMetadata):
def extract(self, tar):
max_nb = maxNbFile(self)
for index, field in enumerate(tar.array("file")):
if max_nb is not None and max_nb <= index:
self.warning("TAR archive contains many files, but only first %s files are processed" % max_nb)
self.warning("TAR archive contains many files, "
"but only first %s files are processed"
% max_nb)
break
meta = Metadata(self)
self.extractFile(field, meta)
@ -101,22 +120,26 @@ class TarMetadata(MultipleMetadata):
except ValueError:
pass
meta.file_type = field["type"].display
meta.author = "%s (uid=%s), group %s (gid=%s)" %\
(field["uname"].value, field.getOctal("uid"),
field["gname"].value, field.getOctal("gid"))
meta.author = "%s (uid=%s), group %s (gid=%s)" % \
(field["uname"].value, field.getOctal("uid"),
field["gname"].value, field.getOctal("gid"))
class CabMetadata(MultipleMetadata):
def extract(self, cab):
if "folder[0]" in cab:
self.useFolder(cab["folder[0]"])
self.format_version = "Microsoft Cabinet version %s.%s" % (cab["major_version"].display, cab["minor_version"].display)
self.format_version = "Microsoft Cabinet version %s.%s"\
% (cab["major_version"].display,
cab["minor_version"].display)
self.comment = "%s folders, %s files" % (
cab["nb_folder"].value, cab["nb_files"].value)
max_nb = maxNbFile(self)
for index, field in enumerate(cab.array("file")):
if max_nb is not None and max_nb <= index:
self.warning("CAB archive contains many files, but only first %s files are processed" % max_nb)
self.warning("CAB archive contains many files, "
"but only first %s files are processed"
% max_nb)
break
self.useFile(field)
@ -142,20 +165,26 @@ class CabMetadata(MultipleMetadata):
title = _("File")
self.addGroup(field.name, meta, title)
class MarMetadata(MultipleMetadata):
def extract(self, mar):
self.comment = "Contains %s files" % mar["nb_file"].value
self.format_version = "Microsoft Archive version %s" % mar["version"].value
self.format_version = "Microsoft Archive version %s"\
% mar["version"].value
max_nb = maxNbFile(self)
for index, field in enumerate(mar.array("file")):
if max_nb is not None and max_nb <= index:
self.warning("MAR archive contains many files, but only first %s files are processed" % max_nb)
self.warning("MAR archive contains many files, "
"but only first %s files are processed"
% max_nb)
break
meta = Metadata(self)
meta.filename = field["filename"].value
meta.compression = "None"
meta.file_size = field["filesize"].value
self.addGroup(field.name, meta, "File \"%s\"" % meta.getText('filename'))
self.addGroup(field.name, meta,
"File \"%s\"" % meta.getText('filename'))
registerExtractor(CabFile, CabMetadata)
registerExtractor(GzipParser, GzipMetadata)
@ -163,4 +192,3 @@ registerExtractor(Bzip2Parser, Bzip2Metadata)
registerExtractor(TarFile, TarMetadata)
registerExtractor(ZipFile, ZipMetadata)
registerExtractor(MarFile, MarMetadata)

View file

@ -1,30 +1,34 @@
from hachoir_metadata.metadata import (registerExtractor,
Metadata, RootMetadata, MultipleMetadata)
from hachoir_parser.audio import AuFile, MpegAudioFile, RealAudioFile, AiffFile, FlacParser
from hachoir_parser.container import OggFile, RealMediaFile
from hachoir_core.i18n import _
from hachoir_core.tools import makePrintable, timedelta2seconds, humanBitRate
from hachoir.metadata.metadata import (registerExtractor,
Metadata, RootMetadata, MultipleMetadata)
from hachoir.parser.audio import AuFile, MpegAudioFile, RealAudioFile, AiffFile, FlacParser
from hachoir.parser.container import OggFile, RealMediaFile
from hachoir.core.i18n import _
from hachoir.core.tools import makePrintable, timedelta2seconds, humanBitRate
from datetime import timedelta
from hachoir_metadata.metadata_item import QUALITY_FAST, QUALITY_NORMAL, QUALITY_BEST
from hachoir_metadata.safe import fault_tolerant, getValue
from hachoir.metadata.metadata_item import QUALITY_FAST, QUALITY_NORMAL, QUALITY_BEST
from hachoir.metadata.safe import fault_tolerant, getValue
def computeComprRate(meta, size):
if not meta.has("duration") \
or not meta.has("sample_rate") \
or not meta.has("bits_per_sample") \
or not meta.has("nb_channel") \
or not size:
or not meta.has("sample_rate") \
or not meta.has("bits_per_sample") \
or not meta.has("nb_channel") \
or not size:
return
orig_size = timedelta2seconds(meta.get("duration")) * meta.get('sample_rate') * meta.get('bits_per_sample') * meta.get('nb_channel')
orig_size = timedelta2seconds(meta.get("duration")) * meta.get('sample_rate') * meta.get(
'bits_per_sample') * meta.get('nb_channel')
meta.compr_rate = float(orig_size) / size
def computeBitRate(meta):
if not meta.has("bits_per_sample") \
or not meta.has("nb_channel") \
or not meta.has("sample_rate"):
or not meta.has("nb_channel") \
or not meta.has("sample_rate"):
return
meta.bit_rate = meta.get('bits_per_sample') * meta.get('nb_channel') * meta.get('sample_rate')
VORBIS_KEY_TO_ATTR = {
"ARTIST": "artist",
"ALBUM": "album",
@ -45,6 +49,7 @@ VORBIS_KEY_TO_ATTR = {
"LICENSE": "copyright",
}
@fault_tolerant
def readVorbisComment(metadata, comment):
metadata.producer = getValue(comment, "vendor")
@ -58,6 +63,7 @@ def readVorbisComment(metadata, comment):
elif value:
metadata.warning("Skip Vorbis comment %s: %s" % (key, value))
class OggMetadata(MultipleMetadata):
def extract(self, ogg):
granule_quotient = None
@ -106,7 +112,7 @@ class OggMetadata(MultipleMetadata):
def theoraHeader(self, header, meta):
meta.compression = "Theora"
meta.format_version = "Theora version %u.%u (revision %u)" % (\
meta.format_version = "Theora version %u.%u (revision %u)" % ( \
header["version_major"].value,
header["version_minor"].value,
header["version_revision"].value)
@ -126,6 +132,7 @@ class OggMetadata(MultipleMetadata):
meta.format_version = u"Vorbis version %s" % header["vorbis_version"].value
meta.bit_rate = header["bitrate_nominal"].value
class AuMetadata(RootMetadata):
def extract(self, audio):
self.sample_rate = audio["sample_rate"].value
@ -140,9 +147,10 @@ class AuMetadata(RootMetadata):
self.duration = timedelta(seconds=float(audio["audio_data"].size) / self.get('bit_rate'))
computeComprRate(self, audio["audio_data"].size)
class RealAudioMetadata(RootMetadata):
FOURCC_TO_BITRATE = {
u"28_8": 15200, # 28.8 kbit/sec (audio bit rate: 15.2 kbit/s)
u"28_8": 15200, # 28.8 kbit/sec (audio bit rate: 15.2 kbit/s)
u"14_4": 8000, # 14.4 kbit/sec
u"lpcJ": 8000, # 14.4 kbit/sec
}
@ -175,7 +183,7 @@ class RealAudioMetadata(RootMetadata):
@fault_tolerant
def useRoot(self, real):
self.bits_per_sample = 16 # FIXME: Is that correct?
self.bits_per_sample = 16 # FIXME: Is that correct?
if real["version"].value != 3:
self.sample_rate = real["sample_rate"].value
self.nb_channel = real["channels"].value
@ -190,6 +198,7 @@ class RealAudioMetadata(RootMetadata):
except LookupError:
pass
class RealMediaMetadata(MultipleMetadata):
KEY_TO_ATTR = {
"generated by": "producer",
@ -239,7 +248,8 @@ class RealMediaMetadata(MultipleMetadata):
meta.duration = timedelta(milliseconds=stream["duration"].value)
meta.mime_type = getValue(stream, "mime_type")
meta.title = getValue(stream, "desc")
self.addGroup("stream[%u]" % index, meta, "Stream #%u" % (1+index))
self.addGroup("stream[%u]" % index, meta, "Stream #%u" % (1 + index))
class MpegAudioMetadata(RootMetadata):
TAG_TO_KEY = {
@ -304,7 +314,7 @@ class MpegAudioMetadata(RootMetadata):
frame = mp3["/frames/frame[0]"]
self.nb_channel = (frame.getNbChannel(), frame["channel_mode"].display)
self.format_version = u"MPEG version %s layer %s" % \
(frame["version"].display, frame["layer"].display)
(frame["version"].display, frame["layer"].display)
self.sample_rate = frame.getSampleRate()
self.bits_per_sample = 16
if mp3["frames"].looksConstantBitRate():
@ -327,7 +337,7 @@ class MpegAudioMetadata(RootMetadata):
computeComprRate(self, mp3["frames"].size)
def computeBitrate(self, frame):
bit_rate = frame.getBitRate() # may returns None on error
bit_rate = frame.getBitRate() # may returns None on error
if not bit_rate:
return
self.bit_rate = (bit_rate, _("%s (constant)") % humanBitRate(bit_rate))
@ -356,10 +366,11 @@ class MpegAudioMetadata(RootMetadata):
return
bit_rate = total_bit_rate / count
self.bit_rate = (bit_rate,
_("%s (Variable bit rate)") % humanBitRate(bit_rate))
_("%s (Variable bit rate)") % humanBitRate(bit_rate))
duration = timedelta(seconds=float(mp3["frames"].size) / bit_rate)
self.duration = duration
class AiffMetadata(RootMetadata):
def extract(self, aiff):
if "common" in aiff:
@ -379,6 +390,7 @@ class AiffMetadata(RootMetadata):
if "codec" in info:
self.compression = info["codec"].display
class FlacMetadata(RootMetadata):
def extract(self, flac):
if "metadata/stream_info/content" in flac:
@ -396,6 +408,7 @@ class FlacMetadata(RootMetadata):
sec = float(sec) / info["sample_hertz"].value
self.duration = timedelta(seconds=sec)
registerExtractor(AuFile, AuMetadata)
registerExtractor(MpegAudioFile, MpegAudioMetadata)
registerExtractor(OggFile, OggMetadata)
@ -403,4 +416,3 @@ registerExtractor(RealMediaFile, RealMediaMetadata)
registerExtractor(RealAudioFile, RealAudioMetadata)
registerExtractor(AiffFile, AiffMetadata)
registerExtractor(FlacParser, FlacMetadata)

View file

@ -1,10 +1,11 @@
from hachoir_metadata.timezone import UTC
from hachoir.metadata.timezone import UTC
from datetime import date, datetime
# Year in 1850..2030
MIN_YEAR = 1850
MAX_YEAR = 2030
class Filter:
def __init__(self, valid_types, min=None, max=None):
self.types = valid_types
@ -20,15 +21,17 @@ class Filter:
return False
return True
class NumberFilter(Filter):
def __init__(self, min=None, max=None):
Filter.__init__(self, (int, long, float), min, max)
class DatetimeFilter(Filter):
def __init__(self, min=None, max=None):
Filter.__init__(self, (date, datetime),
datetime(MIN_YEAR, 1, 1),
datetime(MAX_YEAR, 12, 31))
datetime(MIN_YEAR, 1, 1),
datetime(MAX_YEAR, 12, 31))
self.min_date = date(MIN_YEAR, 1, 1)
self.max_date = date(MAX_YEAR, 12, 31)
self.min_tz = datetime(MIN_YEAR, 1, 1, tzinfo=UTC)
@ -42,11 +45,11 @@ class DatetimeFilter(Filter):
if not isinstance(value, self.types):
return True
if hasattr(value, "tzinfo") and value.tzinfo:
return (self.min_tz <= value <= self.max_tz)
return self.min_tz <= value <= self.max_tz
elif isinstance(value, datetime):
return (self.min <= value <= self.max)
return self.min <= value <= self.max
else:
return (self.min_date <= value <= self.max_date)
return self.min_date <= value <= self.max_date
DATETIME_FILTER = DatetimeFilter()

View file

@ -1,25 +1,30 @@
from hachoir_core.i18n import _, ngettext
from hachoir.core.i18n import _, ngettext
NB_CHANNEL_NAME = {1: _("mono"), 2: _("stereo")}
def humanAudioChannel(value):
return NB_CHANNEL_NAME.get(value, unicode(value))
def humanFrameRate(value):
if isinstance(value, (int, long, float)):
return _("%.1f fps") % value
else:
return value
def humanComprRate(rate):
return u"%.1fx" % rate
def humanAltitude(value):
return ngettext("%.1f meter", "%.1f meters", value) % value
def humanPixelSize(value):
return ngettext("%s pixel", "%s pixels", value) % value
def humanDPI(value):
return u"%s DPI" % value

View file

@ -1,12 +1,13 @@
from hachoir_metadata.metadata import (registerExtractor,
Metadata, RootMetadata, MultipleMetadata)
from hachoir_parser.image import (
from hachoir.metadata.metadata import (registerExtractor,
Metadata, RootMetadata, MultipleMetadata)
from hachoir.parser.image import (
BmpFile, IcoFile, PcxFile, GifFile, PngFile, TiffFile,
XcfFile, TargaFile, WMF_File, PsdFile)
from hachoir_parser.image.png import getBitsPerPixel as pngBitsPerPixel
from hachoir_parser.image.xcf import XcfProperty
from hachoir_core.i18n import _
from hachoir_metadata.safe import fault_tolerant
from hachoir.parser.image.png import getBitsPerPixel as pngBitsPerPixel
from hachoir.parser.image.xcf import XcfProperty
from hachoir.core.i18n import _
from hachoir.metadata.safe import fault_tolerant
def computeComprRate(meta, compr_size):
"""
@ -17,14 +18,15 @@ def computeComprRate(meta, compr_size):
Set "compr_data" with a string like "1.52x".
"""
if not meta.has("width") \
or not meta.has("height") \
or not meta.has("bits_per_pixel"):
or not meta.has("height") \
or not meta.has("bits_per_pixel"):
return
if not compr_size:
return
orig_size = meta.get('width') * meta.get('height') * meta.get('bits_per_pixel')
meta.compr_rate = float(orig_size) / compr_size
class BmpMetadata(RootMetadata):
def extract(self, image):
if "header" not in image:
@ -38,7 +40,8 @@ class BmpMetadata(RootMetadata):
self.nb_colors = hdr["used_colors"].value
self.bits_per_pixel = bpp
self.compression = hdr["compression"].display
self.format_version = u"Microsoft Bitmap version %s" % hdr.getFormatVersion()
self.format_version = u"Microsoft Bitmap version %s"\
% hdr.getFormatVersion()
self.width_dpi = hdr["horizontal_dpi"].value
self.height_dpi = hdr["vertical_dpi"].value
@ -46,30 +49,48 @@ class BmpMetadata(RootMetadata):
if "pixels" in image:
computeComprRate(self, image["pixels"].size)
class TiffMetadata(RootMetadata):
key_to_attr = {
"img_width": "width",
"img_height": "width",
# TODO: Enable that (need link to value)
# "description": "comment",
# "doc_name": "title",
# "orientation": "image_orientation",
"ImageWidth": "width",
"ImageLength": "height",
"Software": "producer",
"ImageDescription": "comment",
"DocumentName": "title",
"XResolution": "width_dpi",
"YResolution": "height_dpi",
"DateTime": "creation_date",
}
def extract(self, tiff):
if "ifd" in tiff:
self.useIFD(tiff["ifd"])
if "ifd[0]" in tiff:
self.useIFD(tiff["ifd[0]"])
def useIFD(self, ifd):
for field in ifd:
try:
attrname = self.key_to_attr[field.name]
except KeyError:
continue
if "value" not in field:
continue
value = field["value"].value
setattr(self, attrname, value)
attr = {}
for entry in ifd.array("entry"):
self.processIfdEntry(ifd, entry, attr)
if 'BitsPerSample' in attr and 'SamplesPerPixel' in attr:
self.bits_per_pixel = attr[
'BitsPerSample'] * attr['SamplesPerPixel']
@fault_tolerant
def processIfdEntry(self, ifd, entry, attr):
tag = entry["tag"].display
if tag in {"BitsPerSample", "SamplesPerPixel"}:
value = ifd.getEntryValues(entry)[0].value
attr[tag] = value
return
try:
attrname = self.key_to_attr[tag]
except KeyError:
return
value = ifd.getEntryValues(entry)[0].value
if tag in {"XResolution", "YResolution"}:
value = round(value)
setattr(self, attrname, value)
class IcoMetadata(MultipleMetadata):
color_to_bpp = {
@ -95,7 +116,9 @@ class IcoMetadata(MultipleMetadata):
bpp = 8
image.bits_per_pixel = bpp
image.setHeader(_("Icon #%u (%sx%s)")
% (1+index, image.get("width", "?"), image.get("height", "?")))
% (1 + index,
image.get("width", "?"),
image.get("height", "?")))
# Read compression from data (if available)
key = "icon_data[%u]/header/codec" % index
@ -108,6 +131,7 @@ class IcoMetadata(MultipleMetadata):
# Store new image
self.addGroup("image[%u]" % index, image)
class PcxMetadata(RootMetadata):
@fault_tolerant
def extract(self, pcx):
@ -123,6 +147,7 @@ class PcxMetadata(RootMetadata):
if "image_data" in pcx:
computeComprRate(self, pcx["image_data"].size)
class XcfMetadata(RootMetadata):
# Map image type to bits/pixel
TYPE_TO_BPP = {0: 24, 1: 8, 2: 8}
@ -131,7 +156,7 @@ class XcfMetadata(RootMetadata):
self.width = xcf["width"].value
self.height = xcf["height"].value
try:
self.bits_per_pixel = self.TYPE_TO_BPP[ xcf["type"].value ]
self.bits_per_pixel = self.TYPE_TO_BPP[xcf["type"].value]
except KeyError:
pass
self.format_version = xcf["type"].display
@ -156,6 +181,7 @@ class XcfMetadata(RootMetadata):
for prop in xcf.array("property"):
self.processProperty(prop)
class PngMetadata(RootMetadata):
TEXT_TO_ATTR = {
"software": "producer",
@ -181,7 +207,7 @@ class PngMetadata(RootMetadata):
self.comment = "%s=%s" % (keyword, text)
else:
self.comment = text
compr_size = sum( data.size for data in png.array("data") )
compr_size = sum(data.size for data in png.array("data"))
computeComprRate(self, compr_size)
@fault_tolerant
@ -221,17 +247,19 @@ class PngMetadata(RootMetadata):
# Read compression, timestamp, etc.
self.compression = header["compression"].display
class GifMetadata(RootMetadata):
def extract(self, gif):
self.useScreen(gif["/screen"])
if self.has("bits_per_pixel"):
self.nb_colors = (1 << self.get('bits_per_pixel'))
self.compression = _("LZW")
self.format_version = "GIF version %s" % gif["version"].value
self.format_version = "GIF version %s" % gif["version"].value
for comments in gif.array("comments"):
for comment in gif.array(comments.name + "/comment"):
self.comment = comment.value
if "graphic_ctl/has_transp" in gif and gif["graphic_ctl/has_transp"].value:
if ("graphic_ctl/has_transp" in gif
and gif["graphic_ctl/has_transp"].value):
self.pixel_format = _("Color index with transparency")
else:
self.pixel_format = _("Color index")
@ -242,6 +270,7 @@ class GifMetadata(RootMetadata):
self.height = screen["height"].value
self.bits_per_pixel = (1 + screen["size_global_map"].value)
class TargaMetadata(RootMetadata):
def extract(self, tga):
self.width = tga["width"].value
@ -253,6 +282,7 @@ class TargaMetadata(RootMetadata):
if "pixels" in tga:
computeComprRate(self, tga["pixels"].size)
class WmfMetadata(RootMetadata):
def extract(self, wmf):
if wmf.isAPM():
@ -277,6 +307,7 @@ class WmfMetadata(RootMetadata):
self.width = emf["width_px"].value
self.height = emf["height_px"].value
class PsdMetadata(RootMetadata):
@fault_tolerant
def extract(self, psd):
@ -286,6 +317,7 @@ class PsdMetadata(RootMetadata):
self.pixel_format = psd["color_mode"].display
self.compression = psd["compression"].display
registerExtractor(IcoFile, IcoMetadata)
registerExtractor(GifFile, GifMetadata)
registerExtractor(XcfFile, XcfMetadata)
@ -296,4 +328,3 @@ registerExtractor(PngFile, PngMetadata)
registerExtractor(TiffFile, TiffMetadata)
registerExtractor(WMF_File, WmfMetadata)
registerExtractor(PsdFile, PsdMetadata)

View file

@ -1,19 +1,21 @@
from hachoir_metadata.metadata import RootMetadata, registerExtractor
from hachoir_metadata.image import computeComprRate
from hachoir_parser.image.exif import IFD, BasicIFDEntry
from hachoir_parser.image.jpeg import (
from hachoir.metadata.metadata import RootMetadata, registerExtractor
from hachoir.metadata.image import computeComprRate
from hachoir.parser.image.exif import IFD, BasicIFDEntry
from hachoir.parser.image.jpeg import (
JpegFile, JpegChunk,
QUALITY_HASH_COLOR, QUALITY_SUM_COLOR,
QUALITY_HASH_GRAY, QUALITY_SUM_GRAY)
from hachoir_core.field import MissingField
from hachoir_core.i18n import _
from hachoir_core.tools import makeUnicode
from hachoir_metadata.safe import fault_tolerant
from hachoir.field import MissingField
from hachoir.core.i18n import _
from hachoir.core.tools import makeUnicode
from hachoir.metadata.safe import fault_tolerant
from datetime import datetime
def deg2float(degree, minute, second):
return degree + (float(minute) + float(second) / 60.0) / 60.0
class JpegMetadata(RootMetadata):
EXIF_KEY = {
# Exif metadatas
@ -49,8 +51,8 @@ class JpegMetadata(RootMetadata):
}
IPTC_KEY = {
80: "author",
90: "city",
80: "author",
90: "city",
101: "country",
116: "copyright",
120: "title",
@ -133,13 +135,13 @@ class JpegMetadata(RootMetadata):
# Compute sum of all coefficients
sumcoeff = 0
for qt in qtlist:
coeff = qt.array("coeff")
for index in xrange(64):
coeff = qt.array("coeff")
for index in xrange(64):
sumcoeff += coeff[index].value
# Choose the right quality table and compute hash value
try:
hashval= qtlist[0]["coeff[2]"].value + qtlist[0]["coeff[53]"].value
hashval = qtlist[0]["coeff[2]"].value + qtlist[0]["coeff[53]"].value
if 2 <= len(qtlist):
hashval += qtlist[1]["coeff[0]"].value + qtlist[1]["coeff[63]"].value
hashtable = QUALITY_HASH_COLOR
@ -163,7 +165,7 @@ class JpegMetadata(RootMetadata):
@fault_tolerant
def extractAPP0(self, app0):
self.format_version = u"JFIF %u.%02u" \
% (app0["ver_maj"].value, app0["ver_min"].value)
% (app0["ver_maj"].value, app0["ver_min"].value)
if "y_density" in app0:
self.width_dpi = app0["x_density"].value
self.height_dpi = app0["y_density"].value
@ -190,7 +192,7 @@ class JpegMetadata(RootMetadata):
if not value:
return
if isinstance(value, float):
value = (value, u"1/%g" % (1/value))
value = (value, u"1/%g" % (1 / value))
elif entry["type"].value in (BasicIFDEntry.TYPE_RATIONAL, BasicIFDEntry.TYPE_SIGNED_RATIONAL):
value = (value, u"%.3g" % value)
@ -210,7 +212,7 @@ class JpegMetadata(RootMetadata):
datestamp = None
for entry in ifd.array("entry"):
tag = entry["tag"].display
if tag not in ["GPSLatitudeRef", "GPSLongitudeRef","GPSAltitudeRef",
if tag not in ["GPSLatitudeRef", "GPSLongitudeRef", "GPSAltitudeRef",
"GPSLatitude", "GPSLongitude", "GPSAltitude",
"GPSDateStamp", "GPSTimeStamp"]:
continue
@ -299,5 +301,5 @@ class JpegMetadata(RootMetadata):
except ValueError:
pass
registerExtractor(JpegFile, JpegMetadata)
registerExtractor(JpegFile, JpegMetadata)

View file

@ -1,17 +1,16 @@
# -*- coding: utf-8 -*-
from hachoir_core.compatibility import any, sorted
from hachoir_core.endian import endian_name
from hachoir_core.tools import makePrintable, makeUnicode
from hachoir_core.dict import Dict
from hachoir_core.error import error, HACHOIR_ERRORS
from hachoir_core.i18n import _
from hachoir_core.log import Logger
from hachoir_metadata.metadata_item import (
from hachoir.core.endian import endian_name
from hachoir.core.tools import makePrintable, makeUnicode
from hachoir.core.dict import Dict
from hachoir.core.i18n import _
from hachoir.core.log import Logger
from hachoir.metadata.metadata_item import (
MIN_PRIORITY, MAX_PRIORITY, QUALITY_NORMAL)
from hachoir_metadata.register import registerAllItems
from hachoir.metadata.register import registerAllItems
extractors = {}
class Metadata(Logger):
header = u"Metadata"
@ -41,7 +40,8 @@ class Metadata(Logger):
"""
# Invalid key?
if key not in self.__data:
raise KeyError(_("%s has no metadata '%s'") % (self.__class__.__name__, key))
raise KeyError(_("%s has no metadata '%s'") %
(self.__class__.__name__, key))
# Skip duplicates
self.__data[key].add(value)
@ -79,7 +79,8 @@ class Metadata(Logger):
item = self.getItem(key, index)
if item is None:
if default is None:
raise ValueError("Metadata has no value '%s' (index %s)" % (key, index))
raise ValueError(
"Metadata has no value '%s' (index %s)" % (key, index))
else:
return default
return item.value
@ -89,7 +90,7 @@ class Metadata(Logger):
data = self.__data[key]
except LookupError:
raise ValueError("Metadata has no value '%s'" % key)
return [ item.value for item in data ]
return [item.value for item in data]
def getText(self, key, default=None, index=0):
"""
@ -133,7 +134,7 @@ class Metadata(Logger):
@see __unicode__() and exportPlaintext()
"""
text = self.exportPlaintext()
return "\n".join( makePrintable(line, "ASCII") for line in text )
return "\n".join(makePrintable(line, "ASCII") for line in text)
def __unicode__(self):
r"""
@ -197,15 +198,57 @@ class Metadata(Logger):
else:
return None
def exportDictionary(self, priority=None, human=True, title=None):
r"""
Convert metadata to python Dictionary and skip datas
with priority lower than specified priority.
Default priority is Metadata.MAX_PRIORITY. If human flag is True, data
key are translated to better human name (eg. "bit_rate" becomes
"Bit rate") which may be translated using gettext.
If priority is too small, metadata are empty and so None is returned.
"""
if priority is not None:
priority = max(priority, MIN_PRIORITY)
priority = min(priority, MAX_PRIORITY)
else:
priority = MAX_PRIORITY
if not title:
title = self.header
text = {}
text[title] = {}
for data in sorted(self):
if priority < data.priority:
break
if not data.values:
continue
if human:
field = data.description
else:
field = data.key
text[title][field] = {}
for item in data.values:
if human:
value = item.text
else:
value = makeUnicode(item.value)
text[title][field] = value
return text
def __nonzero__(self):
return any(item for item in self.__data.itervalues())
class RootMetadata(Metadata):
def __init__(self, quality=QUALITY_NORMAL):
Metadata.__init__(self, None, quality)
class MultipleMetadata(RootMetadata):
header = _("Common")
def __init__(self, quality=QUALITY_NORMAL):
RootMetadata.__init__(self, quality)
object.__setattr__(self, "_MultipleMetadata__groups", Dict())
@ -257,7 +300,8 @@ class MultipleMetadata(RootMetadata):
title = key
else:
title = None
value = metadata.exportPlaintext(priority, human, line_prefix, title=title)
value = metadata.exportPlaintext(
priority, human, line_prefix, title=title)
if value:
text.extend(value)
if len(text):
@ -265,6 +309,23 @@ class MultipleMetadata(RootMetadata):
else:
return None
def exportDictionary(self, priority=None, human=True):
common = Metadata.exportDictionary(self, priority, human)
if common:
text = common
else:
text = {}
for key, metadata in self.__groups.items():
if not human:
title = key
else:
title = None
value = metadata.exportDictionary(priority, human, title=title)
if value:
text.update(value)
return text
def registerExtractor(parser, extractor):
assert parser not in extractors
assert issubclass(extractor, RootMetadata)
@ -288,9 +349,9 @@ def extractMetadata(parser, quality=QUALITY_NORMAL, **kwargs):
else:
metadata.extract(parser)
meta_extract_error = False
except HACHOIR_ERRORS, err:
except Exception as err:
error("Error during metadata extraction: %s" % unicode(err))
except Exception, err:
except Exception as err:
error("Error during metadata extraction: %s" % unicode(err))
if meta_extract_error:
@ -304,4 +365,3 @@ def extractMetadata(parser, quality=QUALITY_NORMAL, **kwargs):
metadata.mime_type = parser.mime_type
metadata.endian = endian_name[parser.endian]
return metadata

View file

@ -1,7 +1,6 @@
from hachoir_core.tools import makeUnicode, normalizeNewline
from hachoir_core.error import HACHOIR_ERRORS
from hachoir_metadata import config
from hachoir_metadata.setter import normalizeString
from hachoir.core.tools import makeUnicode, normalizeNewline
from hachoir.metadata import config
from hachoir.metadata.setter import normalizeString
MIN_PRIORITY = 100
MAX_PRIORITY = 999
@ -12,14 +11,16 @@ QUALITY_NORMAL = 0.5
QUALITY_GOOD = 0.75
QUALITY_BEST = 1.0
class DataValue:
def __init__(self, value, text):
self.value = value
self.text = text
class Data:
def __init__(self, key, priority, description,
text_handler=None, type=None, filter=None, conversion=None):
text_handler=None, type=None, filter=None, conversion=None):
"""
handler is only used if value is not string nor unicode, prototype:
def handler(value) -> str/unicode
@ -38,6 +39,9 @@ class Data:
self.priority = priority
self.conversion = conversion
def __lt__(self, other):
return self.priority < other.priority
def _createItem(self, value, text=None):
if text is None:
if isinstance(value, unicode):
@ -52,7 +56,8 @@ class Data:
def add(self, value):
if isinstance(value, tuple):
if len(value) != 2:
raise ValueError("Data.add() only accept tuple of 2 elements: (value,text)")
raise ValueError("Data.add() only accept "
"tuple of 2 elements: (value,text)")
value, text = value
else:
text = None
@ -61,21 +66,17 @@ class Data:
if value is None:
return
if isinstance(value, (str, unicode)):
value = normalizeString(value)
if not value:
return
# Convert string to Unicode string using charset ISO-8859-1
if self.conversion:
try:
new_value = self.conversion(self.metadata, self.key, value)
except HACHOIR_ERRORS, err:
self.metadata.warning("Error during conversion of %r value: %s" % (
self.key, err))
except Exception as err:
self.metadata.warning("Error during conversion of %r value: %s"
% (self.key, err))
return
if new_value is None:
dest_types = " or ".join(str(item.__name__) for item in self.type)
dest_types = " or ".join(str(item.__name__)
for item in self.type)
self.metadata.warning("Unable to convert %s=%r (%s) to %s" % (
self.key, value, type(value).__name__, dest_types))
return
@ -89,6 +90,11 @@ class Data:
elif isinstance(value, str):
value = unicode(value, "ISO-8859-1")
if isinstance(value, (str, unicode)):
value = normalizeString(value)
if not value:
return
if self.type and not isinstance(value, self.type):
dest_types = " or ".join(str(item.__name__) for item in self.type)
self.metadata.warning("Key %r: value %r type (%s) is not %s" % (
@ -98,8 +104,8 @@ class Data:
# Skip empty strings
if isinstance(value, unicode):
value = normalizeNewline(value)
if config.MAX_STR_LENGTH \
and config.MAX_STR_LENGTH < len(value):
if (config.MAX_STR_LENGTH
and config.MAX_STR_LENGTH < len(value)):
value = value[:config.MAX_STR_LENGTH] + "(...)"
# Skip duplicates
@ -108,7 +114,8 @@ class Data:
# Use filter
if self.filter and not self.filter(value):
self.metadata.warning("Skip value %s=%r (filter)" % (self.key, value))
self.metadata.warning("Skip value %s=%r (filter)"
% (self.key, value))
return
# For string, if you have "verlongtext" and "verylo",
@ -143,4 +150,3 @@ class Data:
def __cmp__(self, other):
return cmp(self.priority, other.priority)

View file

@ -0,0 +1,265 @@
from hachoir.metadata.metadata import RootMetadata, registerExtractor
from hachoir.metadata.safe import fault_tolerant
from hachoir.parser.container import SwfFile
from hachoir.parser.misc import TorrentFile, TrueTypeFontFile, PcfFile
from hachoir.field import isString
from hachoir.core.error import warning
from hachoir.parser import guessParser
from hachoir.metadata.setter import normalizeString
class TorrentMetadata(RootMetadata):
KEY_TO_ATTR = {
u"announce": "url",
u"comment": "comment",
u"creation_date": "creation_date",
}
INFO_TO_ATTR = {
u"length": "file_size",
u"name": "filename",
}
def extract(self, torrent):
for field in torrent[0]:
self.processRoot(field)
@fault_tolerant
def processRoot(self, field):
if field.name in self.KEY_TO_ATTR:
key = self.KEY_TO_ATTR[field.name]
value = field.value
setattr(self, key, value)
elif field.name == "info" and "value" in field:
for field in field["value"]:
self.processInfo(field)
@fault_tolerant
def processInfo(self, field):
if field.name in self.INFO_TO_ATTR:
key = self.INFO_TO_ATTR[field.name]
value = field.value
setattr(self, key, value)
elif field.name == "piece_length":
self.comment = "Piece length: %s" % field.display
class TTF_Metadata(RootMetadata):
NAMEID_TO_ATTR = {
0: "copyright", # Copyright notice
3: "title", # Unique font identifier
5: "version", # Version string
8: "author", # Manufacturer name
11: "url", # URL Vendor
14: "copyright", # License info URL
}
def extract(self, ttf):
if "header" in ttf:
self.extractHeader(ttf["header"])
if "names" in ttf:
self.extractNames(ttf["names"])
@fault_tolerant
def extractHeader(self, header):
self.creation_date = header["created"].value
self.last_modification = header["modified"].value
self.comment = u"Smallest readable size in pixels: %s pixels" % header["lowest"].value
self.comment = u"Font direction: %s" % header["font_dir"].display
@fault_tolerant
def extractNames(self, names):
offset = names["offset"].value
for header in names.array("header"):
key = header["nameID"].value
foffset = offset + header["offset"].value
field = names.getFieldByAddress(foffset * 8)
if not field or not isString(field):
continue
value = field.value
if key not in self.NAMEID_TO_ATTR:
continue
key = self.NAMEID_TO_ATTR[key]
if key == "version" and value.startswith(u"Version "):
# "Version 1.2" => "1.2"
value = value[8:]
setattr(self, key, value)
# deprecated
# class OLE2_Metadata(RootMetadata):
# SUMMARY_ID_TO_ATTR = {
# 2: "title", # Title
# 3: "title", # Subject
# 4: "author",
# 6: "comment",
# 8: "author", # Last saved by
# 12: "creation_date",
# 13: "last_modification",
# 14: "nb_page",
# 18: "producer",
# }
# IGNORE_SUMMARY = set((
# 1, # Code page
# ))
#
# DOC_SUMMARY_ID_TO_ATTR = {
# 3: "title", # Subject
# 14: "author", # Manager
# }
# IGNORE_DOC_SUMMARY = set((
# 1, # Code page
# ))
#
# def extract(self, ole2):
# self._extract(ole2)
#
# def _extract(self, fieldset):
# try:
# fieldset._feedAll()
# except StopIteration:
# pass
# if "root[0]" in fieldset:
# self._extract(self.getFragment(fieldset["root[0]"]))
# doc_summary = self.getField(fieldset, "doc_summary[0]")
# if doc_summary:
# self.useSummary(doc_summary, True)
# word_doc = self.getField(fieldset, "word_doc[0]")
# if word_doc:
# self.useWordDocument(word_doc)
# summary = self.getField(fieldset, "summary[0]")
# if summary:
# self.useSummary(summary, False)
#
# def getFragment(self, frag):
# stream = frag.getSubIStream()
# ministream = guessParser(stream)
# if not ministream:
# warning("Unable to create the OLE2 mini stream parser!")
# return frag
# return ministream
#
# def getField(self, fieldset, name):
# # _feedAll() is needed to make sure that we get all fragments
# # eg. summary[0], summary[1], ..., summary[n]
# try:
# fieldset._feedAll()
# except StopIteration:
# pass
# if name not in fieldset:
# return None
# field = fieldset[name]
# return self.getFragment(field)
#
# @fault_tolerant
# def useSummary(self, summary, is_doc_summary):
# if "os" in summary:
# self.os = summary["os"].display
# if "section[0]" not in summary:
# return
# summary = summary["section[0]"]
# for property in summary.array("property_index"):
# self.useProperty(summary, property, is_doc_summary)
#
# @fault_tolerant
# def useWordDocument(self, doc):
# self.comment = "Encrypted: %s" % doc["FIB/fEncrypted"].value
#
# @fault_tolerant
# def useProperty(self, summary, property, is_doc_summary):
# field = summary.getFieldByAddress(property["offset"].value * 8)
# if not field \
# or "value" not in field:
# return
# field = field["value"]
# if not field.hasValue():
# return
#
# # Get value
# value = field.value
# if isinstance(value, (str, unicode)):
# value = normalizeString(value)
# if not value:
# return
#
# # Get property identifier
# prop_id = property["id"].value
# if is_doc_summary:
# id_to_attr = self.DOC_SUMMARY_ID_TO_ATTR
# ignore = self.IGNORE_DOC_SUMMARY
# else:
# id_to_attr = self.SUMMARY_ID_TO_ATTR
# ignore = self.IGNORE_SUMMARY
# if prop_id in ignore:
# return
#
# # Get Hachoir metadata key
# try:
# key = id_to_attr[prop_id]
# use_prefix = False
# except LookupError:
# key = "comment"
# use_prefix = True
# if use_prefix:
# prefix = property["id"].display
# if (prefix in ("TotalEditingTime", "LastPrinted")) \
# and (not field):
# # Ignore null time delta
# return
# value = "%s: %s" % (prefix, value)
# else:
# if (key == "last_modification") and (not field):
# # Ignore null timestamp
# return
# setattr(self, key, value)
#
class PcfMetadata(RootMetadata):
PROP_TO_KEY = {
'CHARSET_REGISTRY': 'charset',
'COPYRIGHT': 'copyright',
'WEIGHT_NAME': 'font_weight',
'FOUNDRY': 'author',
'FONT': 'title',
'_XMBDFED_INFO': 'producer',
}
def extract(self, pcf):
if "properties" in pcf:
self.useProperties(pcf["properties"])
def useProperties(self, properties):
last = properties["total_str_length"]
offset0 = last.address + last.size
for index in properties.array("property"):
# Search name and value
value = properties.getFieldByAddress(offset0 + index["value_offset"].value * 8)
if not value:
continue
value = value.value
if not value:
continue
name = properties.getFieldByAddress(offset0 + index["name_offset"].value * 8)
if not name:
continue
name = name.value
if name not in self.PROP_TO_KEY:
warning("Skip %s=%r" % (name, value))
continue
key = self.PROP_TO_KEY[name]
setattr(self, key, value)
class SwfMetadata(RootMetadata):
def extract(self, swf):
self.height = swf["rect/ymax"].value # twips
self.width = swf["rect/xmax"].value # twips
self.format_version = "flash version %s" % swf["version"].value
self.frame_rate = swf["frame_rate"].value
self.comment = "Frame count: %s" % swf["frame_count"].value
registerExtractor(TorrentFile, TorrentMetadata)
registerExtractor(TrueTypeFontFile, TTF_Metadata)
# registerExtractor(OLE2_File, OLE2_Metadata)
registerExtractor(PcfFile, PcfMetadata)
registerExtractor(SwfFile, SwfMetadata)

View file

@ -1,6 +1,7 @@
from hachoir_metadata.metadata import RootMetadata, registerExtractor
from hachoir_parser.program import ExeFile
from hachoir_metadata.safe import fault_tolerant, getValue
from hachoir.metadata.metadata import RootMetadata, registerExtractor
from hachoir.parser.program import ExeFile
from hachoir.metadata.safe import fault_tolerant, getValue
class ExeMetadata(RootMetadata):
KEY_TO_ATTR = {
@ -46,7 +47,7 @@ class ExeMetadata(RootMetadata):
if resource and "version_info/node[0]" in resource:
for node in resource.array("version_info/node[0]/node"):
if getValue(node, "name") == "StringFileInfo" \
and "node[0]" in node:
and "node[0]" in node:
self.readVersionInfo(node["node[0]"])
@fault_tolerant
@ -96,5 +97,5 @@ class ExeMetadata(RootMetadata):
elif key not in self.SKIP_KEY:
self.comment = "%s=%s" % (key, value)
registerExtractor(ExeFile, ExeMetadata)
registerExtractor(ExeFile, ExeMetadata)

View file

@ -0,0 +1,183 @@
from hachoir.core.i18n import _
from hachoir.core.tools import (
humanDuration, humanBitRate,
humanFrequency, humanBitSize, humanFilesize,
humanDatetime)
from hachoir.core.language import Language
from hachoir.metadata.filter import Filter, NumberFilter, DATETIME_FILTER
from datetime import date, datetime, timedelta
from hachoir.metadata.formatter import (
humanAudioChannel, humanFrameRate, humanComprRate, humanAltitude,
humanPixelSize, humanDPI)
from hachoir.metadata.setter import (
setDatetime, setTrackNumber, setTrackTotal, setLanguage)
from hachoir.metadata.metadata_item import Data
MIN_SAMPLE_RATE = 1000 # 1 kHz
MAX_SAMPLE_RATE = 192000 # 192 kHz
MAX_NB_CHANNEL = 8 # 8 channels
MAX_WIDTH = 20000 # 20 000 pixels
MAX_BIT_RATE = 500 * 1024 * 1024 # 500 Mbit/s
MAX_HEIGHT = MAX_WIDTH
MAX_DPI_WIDTH = 10000
MAX_DPI_HEIGHT = MAX_DPI_WIDTH
MAX_NB_COLOR = 2 ** 24 # 16 million of color
MAX_BITS_PER_PIXEL = 256 # 256 bits/pixel
MAX_FRAME_RATE = 150 # 150 frame/sec
MAX_NB_PAGE = 20000
MAX_COMPR_RATE = 1000.0
MIN_COMPR_RATE = 0.001
MAX_TRACK = 999
DURATION_FILTER = Filter(timedelta,
timedelta(milliseconds=1),
timedelta(days=365))
def registerAllItems(meta):
meta.register(Data("title", 100, _("Title"), type=unicode))
meta.register(Data("artist", 101, _("Artist"), type=unicode))
meta.register(Data("author", 102, _("Author"), type=unicode))
meta.register(Data("music_composer", 103, _("Music composer"), type=unicode))
meta.register(Data("album", 200, _("Album"), type=unicode))
meta.register(Data("duration", 201, _("Duration"),
# integer in milliseconde
type=timedelta,
text_handler=humanDuration,
filter=DURATION_FILTER))
meta.register(Data("nb_page", 202, _("Nb page"),
filter=NumberFilter(1, MAX_NB_PAGE)))
meta.register(Data("music_genre", 203, _("Music genre"),
type=unicode))
meta.register(Data("language", 204, _("Language"),
conversion=setLanguage,
type=Language))
meta.register(Data("track_number", 205, _("Track number"),
conversion=setTrackNumber,
filter=NumberFilter(1, MAX_TRACK),
type=(int, long)))
meta.register(Data("track_total", 206, _("Track total"),
conversion=setTrackTotal,
filter=NumberFilter(1, MAX_TRACK),
type=(int, long)))
meta.register(Data("organization", 210, _("Organization"),
type=unicode))
meta.register(Data("version", 220, _("Version")))
meta.register(Data("width", 301, _("Image width"),
filter=NumberFilter(1, MAX_WIDTH),
type=(int, long),
text_handler=humanPixelSize))
meta.register(Data("height", 302, _("Image height"),
filter=NumberFilter(1, MAX_HEIGHT),
type=(int, long),
text_handler=humanPixelSize))
meta.register(Data("nb_channel", 303, _("Channel"),
text_handler=humanAudioChannel,
filter=NumberFilter(1, MAX_NB_CHANNEL),
type=(int, long)))
meta.register(Data("sample_rate", 304, _("Sample rate"),
text_handler=humanFrequency,
filter=NumberFilter(MIN_SAMPLE_RATE, MAX_SAMPLE_RATE),
type=(int, long, float)))
meta.register(Data("bits_per_sample", 305, _("Bits/sample"),
text_handler=humanBitSize,
filter=NumberFilter(1, 64),
type=(int, long)))
meta.register(Data("image_orientation", 306, _("Image orientation")))
meta.register(Data("nb_colors", 307, _("Number of colors"), filter=NumberFilter(1, MAX_NB_COLOR), type=(int, long)))
meta.register(Data("bits_per_pixel", 308, _("Bits/pixel"), filter=NumberFilter(1, MAX_BITS_PER_PIXEL), type=(int, long)))
meta.register(Data("filename", 309, _("File name"), type=unicode))
meta.register(Data("file_size", 310, _("File size"),
text_handler=humanFilesize, type=(int, long)))
meta.register(Data("pixel_format", 311, _("Pixel format")))
meta.register(Data("compr_size", 312, _("Compressed file size"),
text_handler=humanFilesize, type=(int, long)))
meta.register(Data("compr_rate", 313, _("Compression rate"),
text_handler=humanComprRate,
filter=NumberFilter(MIN_COMPR_RATE, MAX_COMPR_RATE), type=(int, long, float)))
meta.register(Data("width_dpi", 320, _("Image DPI width"),
filter=NumberFilter(1, MAX_DPI_WIDTH),
type=(int, long),
text_handler=humanDPI))
meta.register(Data("height_dpi", 321, _("Image DPI height"),
filter=NumberFilter(1, MAX_DPI_HEIGHT),
type=(int, long),
text_handler=humanDPI))
meta.register(Data("file_attr", 400, _("File attributes")))
meta.register(Data("file_type", 401, _("File type")))
meta.register(Data("subtitle_author", 402, _("Subtitle author"), type=unicode))
meta.register(Data("creation_date", 500, _("Creation date"),
text_handler=humanDatetime,
filter=DATETIME_FILTER,
type=(datetime, date),
conversion=setDatetime))
meta.register(Data("last_modification", 501, _("Last modification"),
text_handler=humanDatetime,
filter=DATETIME_FILTER,
type=(datetime, date),
conversion=setDatetime))
meta.register(Data("latitude", 510, _("Latitude"), type=float))
meta.register(Data("longitude", 511, _("Longitude"), type=float))
meta.register(Data("altitude", 512, _("Altitude"), type=float,
text_handler=humanAltitude))
meta.register(Data("location", 530, _("Location"), type=unicode))
meta.register(Data("city", 531, _("City"), type=unicode))
meta.register(Data("country", 532, _("Country"), type=unicode))
meta.register(Data("charset", 540, _("Charset"), type=unicode))
meta.register(Data("font_weight", 550, _("Font weight")))
meta.register(Data("camera_aperture", 520, _("Camera aperture")))
meta.register(Data("camera_focal", 521, _("Camera focal")))
meta.register(Data("camera_exposure", 522, _("Camera exposure")))
meta.register(Data("camera_brightness", 530, _("Camera brightness")))
meta.register(Data("camera_model", 531, _("Camera model"), type=unicode))
meta.register(Data("camera_manufacturer", 532, _("Camera manufacturer"),
type=unicode))
meta.register(Data("compression", 600, _("Compression")))
meta.register(Data("copyright", 601, _("Copyright"), type=unicode))
meta.register(Data("url", 602, _("URL"), type=unicode))
meta.register(Data("frame_rate", 603, _("Frame rate"),
text_handler=humanFrameRate,
filter=NumberFilter(1, MAX_FRAME_RATE),
type=(int, long, float)))
meta.register(Data("bit_rate", 604, _("Bit rate"),
text_handler=humanBitRate,
filter=NumberFilter(1, MAX_BIT_RATE),
type=(int, long, float)))
meta.register(Data("aspect_ratio", 604, _("Aspect ratio"),
type=(int, long, float)))
meta.register(Data("thumbnail_size", 604, _("Thumbnail size"),
text_handler=humanFilesize, type=(int, long, float)))
meta.register(Data("iso_speed_ratings", 800, _("ISO speed rating")))
meta.register(Data("exif_version", 801, _("EXIF version")))
meta.register(Data("date_time_original", 802, _("Date-time original"),
text_handler=humanDatetime,
filter=DATETIME_FILTER,
type=(datetime, date), conversion=setDatetime))
meta.register(Data("date_time_digitized", 803, _("Date-time digitized"),
text_handler=humanDatetime,
filter=DATETIME_FILTER,
type=(datetime, date), conversion=setDatetime))
meta.register(Data("compressed_bits_per_pixel", 804, _("Compressed bits per pixel"), type=(int, long, float)))
meta.register(Data("shutter_speed_value", 805, _("Shutter speed"), type=(int, long, float)))
meta.register(Data("aperture_value", 806, _("Aperture")))
meta.register(Data("exposure_bias_value", 807, _("Exposure bias")))
meta.register(Data("focal_length", 808, _("Focal length")))
meta.register(Data("flashpix_version", 809, _("Flashpix version")))
meta.register(Data("focal_plane_x_resolution", 810, _("Focal plane width")))
meta.register(Data("focal_plane_y_resolution", 811, _("Focal plane height"), type=float))
meta.register(Data("focal_length_in_35mm_film", 812, _("Focal length in 35mm film")))
meta.register(Data("os", 900, _("OS"), type=unicode))
meta.register(Data("producer", 901, _("Producer"), type=unicode))
meta.register(Data("comment", 902, _("Comment"), type=unicode))
meta.register(Data("format_version", 950, _("Format version"), type=unicode))
meta.register(Data("mime_type", 951, _("MIME type"), type=unicode))
meta.register(Data("endian", 952, _("Endianness"), type=unicode))

View file

@ -2,22 +2,23 @@
Extract metadata from RIFF file format: AVI video and WAV sound.
"""
from hachoir_metadata.metadata import Metadata, MultipleMetadata, registerExtractor
from hachoir_metadata.safe import fault_tolerant, getValue
from hachoir_parser.container.riff import RiffFile
from hachoir_parser.video.fourcc import UNCOMPRESSED_AUDIO
from hachoir_core.tools import humanFilesize, makeUnicode, timedelta2seconds
from hachoir_core.i18n import _
from hachoir_metadata.audio import computeComprRate as computeAudioComprRate
from hachoir.metadata.metadata import Metadata, MultipleMetadata, registerExtractor
from hachoir.metadata.safe import fault_tolerant, getValue
from hachoir.parser.container.riff import RiffFile
from hachoir.parser.video.fourcc import UNCOMPRESSED_AUDIO
from hachoir.core.tools import humanFilesize, makeUnicode, timedelta2seconds
from hachoir.core.i18n import _
from hachoir.metadata.audio import computeComprRate as computeAudioComprRate
from datetime import timedelta
class RiffMetadata(MultipleMetadata):
TAG_TO_KEY = {
"INAM": "title",
"IART": "artist",
"ICMT": "comment",
"ICOP": "copyright",
"IENG": "author", # (engineer)
"IENG": "author", # (engineer)
"ISFT": "producer",
"ICRD": "creation_date",
"IDIT": "creation_date",
@ -29,7 +30,7 @@ class RiffMetadata(MultipleMetadata):
self.extractWAVE(riff)
size = getValue(riff, "audio_data/size")
if size:
computeAudioComprRate(self, size*8)
computeAudioComprRate(self, size * 8)
elif type == "AVI ":
if "headers" in riff:
if 'scan_index' in kwargs:
@ -64,15 +65,18 @@ class RiffMetadata(MultipleMetadata):
self.compression = format["codec"].display
if "nb_sample/nb_sample" in wav \
and 0 < format["sample_per_sec"].value:
self.duration = timedelta(seconds=float(wav["nb_sample/nb_sample"].value) / format["sample_per_sec"].value)
and 0 < format["sample_per_sec"].value:
self.duration = timedelta(seconds=float(
wav["nb_sample/nb_sample"].value) / format["sample_per_sec"].value)
if format["codec"].value in UNCOMPRESSED_AUDIO:
# Codec with fixed bit rate
self.bit_rate = format["nb_channel"].value * format["bit_per_sample"].value * format["sample_per_sec"].value
self.bit_rate = format[
"nb_channel"].value * format["bit_per_sample"].value * format["sample_per_sec"].value
if not self.has("duration") \
and "audio_data/size" in wav \
and self.has("bit_rate"):
duration = float(wav["audio_data/size"].value)*8 / self.get('bit_rate')
and "audio_data/size" in wav \
and self.has("bit_rate"):
duration = float(wav["audio_data/size"].value) * \
8 / self.get('bit_rate')
self.duration = timedelta(seconds=duration)
def extractInfo(self, fieldset):
@ -88,12 +92,13 @@ class RiffMetadata(MultipleMetadata):
@fault_tolerant
def extractAVIVideo(self, header, meta):
meta.compression = "%s (fourcc:\"%s\")" \
% (header["fourcc"].display, makeUnicode(header["fourcc"].value))
% (header["fourcc"].display, makeUnicode(header["fourcc"].value))
if header["rate"].value and header["scale"].value:
fps = float(header["rate"].value) / header["scale"].value
meta.frame_rate = fps
if 0 < fps:
self.duration = meta.duration = timedelta(seconds=float(header["length"].value) / fps)
self.duration = meta.duration = timedelta(
seconds=float(header["length"].value) / fps)
if "../stream_fmt/width" in header:
format = header["../stream_fmt"]
@ -114,11 +119,13 @@ class RiffMetadata(MultipleMetadata):
if "../stream_hdr" in format:
header = format["../stream_hdr"]
if header["rate"].value and header["scale"].value:
frame_rate = float(header["rate"].value) / header["scale"].value
meta.duration = timedelta(seconds=float(header["length"].value) / frame_rate)
frame_rate = float(
header["rate"].value) / header["scale"].value
meta.duration = timedelta(seconds=float(
header["length"].value) / frame_rate)
if header["fourcc"].value != "":
meta.compression = "%s (fourcc:\"%s\")" \
% (format["codec"].display, header["fourcc"].value)
% (format["codec"].display, header["fourcc"].value)
if not meta.has("compression"):
meta.compression = format["codec"].display
@ -129,7 +136,8 @@ class RiffMetadata(MultipleMetadata):
uncompr = meta.get('bit_rate', 0)
if not uncompr:
return
compr = meta.get('nb_channel') * meta.get('sample_rate') * meta.get('bits_per_sample', default=16)
compr = meta.get('nb_channel') * meta.get('sample_rate') * \
meta.get('bits_per_sample', default=16)
if not compr:
return
meta.compr_rate = float(compr) / uncompr
@ -167,13 +175,14 @@ class RiffMetadata(MultipleMetadata):
# Compute global bit rate
if self.has("duration") and "/movie/size" in headers:
self.bit_rate = float(headers["/movie/size"].value) * 8 / timedelta2seconds(self.get('duration'))
self.bit_rate = float(
headers["/movie/size"].value) * 8 / timedelta2seconds(self.get('duration'))
# Video has index?
scan_index = (True, kwargs['scan_index'])['scan_index' in kwargs]
if scan_index and "/index" in headers:
self.comment = _("Has audio/video index (%s)") \
% humanFilesize(headers["/index"].size/8)
% humanFilesize(headers["/index"].size // 8)
@fault_tolerant
def extractAnim(self, riff):
@ -190,5 +199,5 @@ class RiffMetadata(MultipleMetadata):
if not self.has("frame_rate") and "anim_hdr/jiffie_rate" in riff:
self.frame_rate = 60.0 / riff["anim_hdr/jiffie_rate"].value
registerExtractor(RiffFile, RiffMetadata)
registerExtractor(RiffFile, RiffMetadata)

View file

@ -1,27 +1,31 @@
from hachoir_core.error import HACHOIR_ERRORS, warning
from hachoir.core.error import warning
def fault_tolerant(func, *args):
def safe_func(*args, **kw):
try:
func(*args, **kw)
except HACHOIR_ERRORS, err:
except Exception as err:
warning("Error when calling function %s(): %s" % (
func.__name__, err))
return safe_func
def getFieldAttribute(fieldset, key, attrname):
try:
field = fieldset[key]
if field.hasValue():
return getattr(field, attrname)
except HACHOIR_ERRORS, err:
except Exception as err:
warning("Unable to get %s of field %s/%s: %s" % (
attrname, fieldset.path, key, err))
return None
def getValue(fieldset, key):
return getFieldAttribute(fieldset, key, "value")
def getDisplay(fieldset, key):
return getFieldAttribute(fieldset, key, "display")

View file

@ -1,10 +1,10 @@
from datetime import date, datetime
import re
from hachoir_core.language import Language
from hachoir.core.language import Language
from locale import setlocale, LC_ALL
from time import strptime
from hachoir_metadata.timezone import createTimezone
from hachoir_metadata import config
from hachoir.metadata.timezone import createTimezone
from hachoir.metadata import config
NORMALIZE_REGEX = re.compile("[-/.: ]+")
YEAR_REGEX1 = re.compile("^([0-9]{4})$")
@ -13,10 +13,12 @@ YEAR_REGEX1 = re.compile("^([0-9]{4})$")
DATE_REGEX1 = re.compile("^([0-9]{4})~([01][0-9])~([0-9]{2})$")
# Date regex: YYYY-MM-DD HH:MM:SS (US format)
DATETIME_REGEX1 = re.compile("^([0-9]{4})~([01][0-9])~([0-9]{2})~([0-9]{1,2})~([0-9]{2})~([0-9]{2})$")
DATETIME_REGEX1 = re.compile("^([0-9]{4})~([01][0-9])~([0-9]{2})"
"~([0-9]{1,2})~([0-9]{2})~([0-9]{1,2})$")
# Datetime regex: "MM-DD-YYYY HH:MM:SS" (FR format)
DATETIME_REGEX2 = re.compile("^([01]?[0-9])~([0-9]{2})~([0-9]{4})~([0-9]{1,2})~([0-9]{2})~([0-9]{2})$")
DATETIME_REGEX2 = re.compile("^([01]?[0-9])~([0-9]{2})~([0-9]{4})"
"~([0-9]{1,2})~([0-9]{2})~([0-9]{1,2})$")
# Timezone regex: "(...) +0200"
TIMEZONE_REGEX = re.compile("^(.*)~([+-][0-9]{2})00$")
@ -30,6 +32,7 @@ RIFF_TIMESTAMP = "%a~%b~%d~%H~%M~%S~%Y"
# Timestmap: 'Thu, 19 Jul 2007 09:03:57'
ISO_TIMESTAMP = "%a,~%d~%b~%Y~%H~%M~%S"
def parseDatetime(value):
"""
Year and date:
@ -125,6 +128,7 @@ def parseDatetime(value):
setlocale(LC_ALL, current_locale)
return None
def setDatetime(meta, key, value):
if isinstance(value, (str, unicode)):
return parseDatetime(value)
@ -132,6 +136,7 @@ def setDatetime(meta, key, value):
return value
return None
def setLanguage(meta, key, value):
"""
>>> setLanguage(None, None, "fre")
@ -141,6 +146,7 @@ def setLanguage(meta, key, value):
"""
return Language(value)
def setTrackTotal(meta, key, total):
"""
>>> setTrackTotal(None, None, "10")
@ -152,6 +158,7 @@ def setTrackTotal(meta, key, total):
meta.warning("Invalid track total: %r" % total)
return None
def setTrackNumber(meta, key, number):
if isinstance(number, (int, long)):
return number
@ -164,8 +171,8 @@ def setTrackNumber(meta, key, number):
meta.warning("Invalid track number: %r" % number)
return None
def normalizeString(text):
if config.RAW_OUTPUT:
return text
return text.strip(" \t\v\n\r\0")

View file

@ -1,5 +1,6 @@
from datetime import tzinfo, timedelta
class TimezoneUTC(tzinfo):
"""UTC timezone"""
ZERO = timedelta(0)
@ -16,10 +17,12 @@ class TimezoneUTC(tzinfo):
def __repr__(self):
return "<TimezoneUTC delta=0, name=u'UTC'>"
class Timezone(TimezoneUTC):
"""Fixed offset in hour from UTC."""
def __init__(self, offset):
self._offset = timedelta(minutes=offset*60)
self._offset = timedelta(minutes=offset * 60)
self._name = u"%+03u00" % offset
def utcoffset(self, dt):
@ -32,11 +35,12 @@ class Timezone(TimezoneUTC):
return "<Timezone delta=%s, name='%s'>" % (
self._offset, self._name)
UTC = TimezoneUTC()
def createTimezone(offset):
if offset:
return Timezone(offset)
else:
return UTC

View file

@ -1,16 +1,17 @@
from hachoir_core.field import MissingField
from hachoir_metadata.metadata import (registerExtractor,
Metadata, RootMetadata, MultipleMetadata)
from hachoir_metadata.metadata_item import QUALITY_GOOD
from hachoir_metadata.safe import fault_tolerant
from hachoir_parser.video import MovFile, AsfFile, FlvFile
from hachoir_parser.video.asf import Descriptor as ASF_Descriptor
from hachoir_parser.container import MkvFile
from hachoir_parser.container.mkv import dateToDatetime
from hachoir_core.i18n import _
from hachoir_core.tools import makeUnicode, makePrintable, timedelta2seconds
from hachoir.field import MissingField
from hachoir.metadata.metadata import (registerExtractor,
Metadata, RootMetadata, MultipleMetadata)
from hachoir.metadata.metadata_item import QUALITY_GOOD
from hachoir.metadata.safe import fault_tolerant
from hachoir.parser.video import AsfFile, FlvFile
from hachoir.parser.video.asf import Descriptor as ASF_Descriptor
from hachoir.parser.container import MkvFile, MP4File
from hachoir.parser.container.mkv import dateToDatetime
from hachoir.core.i18n import _
from hachoir.core.tools import makeUnicode, makePrintable, timedelta2seconds
from datetime import timedelta
class MkvMetadata(MultipleMetadata):
tag_key = {
"TITLE": "title",
@ -115,7 +116,7 @@ class MkvMetadata(MultipleMetadata):
def processSimpleTag(self, tag):
if "TagName/unicode" not in tag \
or "TagString/unicode" not in tag:
or "TagString/unicode" not in tag:
return
name = tag["TagName/unicode"].value
if name not in self.tag_key:
@ -147,6 +148,7 @@ class MkvMetadata(MultipleMetadata):
if "Title/unicode" in info:
self.title = info["Title/unicode"].value
class FlvMetadata(MultipleMetadata):
def extract(self, flv):
if "video[0]" in flv:
@ -211,7 +213,8 @@ class FlvMetadata(MultipleMetadata):
elif key == "height":
self.height = int(entry["value"].value)
class MovMetadata(RootMetadata):
class MP4Metadata(RootMetadata):
def extract(self, mov):
for atom in mov:
if "movie" in atom:
@ -222,8 +225,8 @@ class MovMetadata(RootMetadata):
self.creation_date = hdr["creation_date"].value
self.last_modification = hdr["lastmod_date"].value
self.duration = timedelta(seconds=float(hdr["duration"].value) / hdr["time_scale"].value)
self.comment = _("Play speed: %.1f%%") % (hdr["play_speed"].value*100)
self.comment = _("User volume: %.1f%%") % (float(hdr["volume"].value)*100)
self.comment = _("Play speed: %.1f%%") % (hdr["play_speed"].value * 100)
self.comment = _("User volume: %.1f%%") % (float(hdr["volume"].value) * 100)
@fault_tolerant
def processTrackHeader(self, hdr):
@ -396,6 +399,8 @@ class AsfMetadata(MultipleMetadata):
# It doesn't work when the video uses /header/content/bitrate_mutex
# since the codec list are shared between streams but... how is it
# shared?
# key = "codec_list/content/codec[%u]" % index
# if key in header:
# codec = header[key]
@ -406,8 +411,7 @@ class AsfMetadata(MultipleMetadata):
# else:
# meta.compression = text
registerExtractor(MovFile, MovMetadata)
registerExtractor(AsfFile, AsfMetadata)
registerExtractor(FlvFile, FlvMetadata)
registerExtractor(MkvFile, MkvMetadata)
registerExtractor(MP4File, MP4Metadata)

View file

@ -0,0 +1,4 @@
from hachoir.parser.parser import ValidateError, HachoirParser, Parser
from hachoir.parser.parser_list import ParserList, HachoirParserList
from hachoir.parser.guess import (QueryParser, guessParser, createParser)
from hachoir.parser import (archive, audio, container, image, misc, network, program, video)

View file

@ -0,0 +1,14 @@
from hachoir.parser.archive.ace import AceFile
from hachoir.parser.archive.ar import ArchiveFile
from hachoir.parser.archive.bomstore import BomFile
from hachoir.parser.archive.bzip2_parser import Bzip2Parser
from hachoir.parser.archive.cab import CabFile
from hachoir.parser.archive.gzip_parser import GzipParser
from hachoir.parser.archive.tar import TarFile
from hachoir.parser.archive.zip import ZipFile
from hachoir.parser.archive.rar import RarFile
from hachoir.parser.archive.rpm import RpmFile
from hachoir.parser.archive.sevenzip import SevenZipParser
from hachoir.parser.archive.mar import MarFile
from hachoir.parser.archive.mozilla_ar import MozillaArchive
from hachoir.parser.archive.zlib import ZlibData

View file

@ -11,15 +11,15 @@ Author: Christophe Gisquet <christophe.gisquet@free.fr>
Creation date: 19 january 2006
"""
from hachoir_parser import Parser
from hachoir_core.field import (StaticFieldSet, FieldSet,
Bit, Bits, NullBits, RawBytes, Enum,
UInt8, UInt16, UInt32,
PascalString8, PascalString16, String,
TimeDateMSDOS32)
from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_parser.common.msdos import MSDOSFileAttr32
from hachoir.parser import Parser
from hachoir.field import (StaticFieldSet, FieldSet,
Bit, Bits, NullBits, RawBytes, Enum,
UInt8, UInt16, UInt32,
PascalString8, PascalString16, String,
TimeDateMSDOS32)
from hachoir.core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.parser.common.msdos import MSDOSFileAttr32
MAGIC = "**ACE**"
@ -54,8 +54,9 @@ COMPRESSION_MODE = {
4: "best",
}
# TODO: Computing the CRC16 would also prove useful
#def markerValidate(self):
# def markerValidate(self):
# return not self["extend"].value and self["signature"].value == MAGIC and \
# self["host_os"].value<12
@ -73,9 +74,11 @@ class MarkerFlags(StaticFieldSet):
(Bit, "solid", "Archive uses solid compression")
)
def markerFlags(self):
yield MarkerFlags(self, "flags", "Marker flags")
def markerHeader(self):
yield String(self, "signature", 7, "Signature")
yield UInt8(self, "ver_extract", "Version needed to extract archive")
@ -94,6 +97,7 @@ def markerHeader(self):
yield RawBytes(self, "compressed_comment", size.value, \
"Compressed comment")
class FileFlags(StaticFieldSet):
format = (
(Bit, "extend", "Whether the header is extended"),
@ -105,9 +109,11 @@ class FileFlags(StaticFieldSet):
(Bit, "solid", "File compressed using previously archived files")
)
def fileFlags(self):
yield FileFlags(self, "flags", "File flags")
def fileHeader(self):
yield filesizeHandler(UInt32(self, "compressed_size", "Size of the compressed file"))
yield filesizeHandler(UInt32(self, "uncompressed_size", "Uncompressed file size"))
@ -129,21 +135,24 @@ def fileHeader(self):
if self["comment_size"].value > 0:
yield RawBytes(self, "comment_data", self["comment_size"].value, "Comment data")
def fileBody(self):
size = self["compressed_size"].value
if size > 0:
yield RawBytes(self, "compressed_data", size, "Compressed data")
def fileDesc(self):
return "File entry: %s (%s)" % (self["filename"].value, self["compressed_size"].display)
def recoveryHeader(self):
yield filesizeHandler(UInt32(self, "rec_blk_size", "Size of recovery data"))
self.body_size = self["rec_blk_size"].size
yield String(self, "signature", 7, "Signature, normally '**ACE**'")
yield textHandler(UInt32(self, "relative_start",
"Relative start (to this block) of the data this block is mode of"),
hexadecimal)
"Relative start (to this block) of the data this block is mode of"),
hexadecimal)
yield UInt32(self, "num_blocks", "Number of blocks the data is split into")
yield UInt32(self, "size_blocks", "Size of these blocks")
yield UInt16(self, "crc16_blocks", "CRC16 over recovery data")
@ -154,9 +163,11 @@ def recoveryHeader(self):
yield RawBytes(self, "data[]", size, "Recovery block %i" % index)
yield RawBytes(self, "xor_data", size, "The XOR value of the above data blocks")
def recoveryDesc(self):
return "Recovery block, size=%u" % self["body_size"].display
def newRecoveryHeader(self):
"""
This header is described nowhere
@ -165,12 +176,13 @@ def newRecoveryHeader(self):
yield filesizeHandler(UInt32(self, "body_size", "Size of the unknown body following"))
self.body_size = self["body_size"].value
yield textHandler(UInt32(self, "unknown[]", "Unknown field, probably 0"),
hexadecimal)
hexadecimal)
yield String(self, "signature", 7, "Signature, normally '**ACE**'")
yield textHandler(UInt32(self, "relative_start",
"Offset (=crc16's) of this block in the file"), hexadecimal)
"Offset (=crc16's) of this block in the file"), hexadecimal)
yield textHandler(UInt32(self, "unknown[]",
"Unknown field, probably 0"), hexadecimal)
"Unknown field, probably 0"), hexadecimal)
class BaseFlags(StaticFieldSet):
format = (
@ -178,18 +190,22 @@ class BaseFlags(StaticFieldSet):
(NullBits, "unused", 15, "Unused bit flags")
)
def parseFlags(self):
yield BaseFlags(self, "flags", "Unknown flags")
def parseHeader(self):
if self["flags/extend"].value:
yield filesizeHandler(UInt32(self, "body_size", "Size of the unknown body following"))
self.body_size = self["body_size"].value
def parseBody(self):
if self.body_size > 0:
yield RawBytes(self, "body_data", self.body_size, "Body data, unhandled")
class Block(FieldSet):
TAG_INFO = {
0: ("header", "Archiver header", markerFlags, markerHeader, None),
@ -231,7 +247,7 @@ class Block(FieldSet):
# Rest of the header
for field in self.parseHeader(self):
yield field
size = self["head_size"].value - (self.current_size//8) + (2+2)
size = self["head_size"].value - (self.current_size // 8) + (2 + 2)
if size > 0:
yield RawBytes(self, "extra_data", size, "Extra header data, unhandled")
@ -245,6 +261,7 @@ class Block(FieldSet):
else:
return "Block: %s" % self["type"].display
class AceFile(Parser):
endian = LITTLE_ENDIAN
PARSER_TAGS = {
@ -252,16 +269,15 @@ class AceFile(Parser):
"category": "archive",
"file_ext": ("ace",),
"mime": (u"application/x-ace-compressed",),
"min_size": 50*8,
"min_size": 50 * 8,
"description": "ACE archive"
}
def validate(self):
if self.stream.readBytes(7*8, len(MAGIC)) != MAGIC:
if self.stream.readBytes(7 * 8, len(MAGIC)) != MAGIC:
return "Invalid magic"
return True
def createFields(self):
while not self.eof:
yield Block(self, "block[]")

View file

@ -2,24 +2,27 @@
GNU ar archive : archive file (.a) and Debian (.deb) archive.
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet, ParserError,
String, RawBytes, UnixLine)
from hachoir_core.endian import BIG_ENDIAN
from hachoir.parser import Parser
from hachoir.field import (FieldSet, ParserError,
String, RawBytes, UnixLine)
from hachoir.core.endian import BIG_ENDIAN
class ArchiveFileEntry(FieldSet):
def createFields(self):
yield UnixLine(self, "header", "Header")
info = self["header"].value.split()
if len(info) != 7:
raise ParserError("Invalid file entry header")
size = int(info[5])
info = self["header"].value
info = info.split()
if len(info) < 3:
raise ParserError("Invalid file entry header: %r" % info)
size = int(info[-2])
if 0 < size:
yield RawBytes(self, "content", size, "File data")
def createDescription(self):
return "File entry (%s)" % self["header"].value.split()[0]
class ArchiveFile(Parser):
endian = BIG_ENDIAN
MAGIC = '!<arch>\n'
@ -31,7 +34,7 @@ class ArchiveFile(Parser):
(u"application/x-debian-package",
u"application/x-archive",
u"application/x-dpkg"),
"min_size": (8 + 13)*8, # file signature + smallest file as possible
"min_size": (8 + 13) * 8, # file signature + smallest file as possible
"magic": ((MAGIC, 0),),
"description": "Unix archive"
}
@ -49,4 +52,3 @@ class ArchiveFile(Parser):
yield RawBytes(self, "empty_line[]", 1, "Empty line")
else:
yield ArchiveFileEntry(self, "file[]", "File")

View file

@ -9,28 +9,29 @@ Author: Robert Xiao
Created: 2015-05-14
"""
from hachoir_parser import HachoirParser
from hachoir_core.field import (RootSeekableFieldSet, FieldSet, Enum,
Bits, GenericInteger, Float32, Float64, UInt8, UInt32, UInt64, Bytes, NullBytes, RawBytes, String)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import displayHandler
from hachoir_core.tools import humanDatetime
from datetime import datetime, timedelta
from hachoir.parser import HachoirParser
from hachoir.field import (RootSeekableFieldSet, FieldSet,
UInt32, Bytes, NullBytes, RawBytes)
from hachoir.core.endian import BIG_ENDIAN
class BomTrailerEntry(FieldSet):
static_size = 64 # bits
static_size = 64 # bits
def createFields(self):
yield UInt32(self, "offset")
yield UInt32(self, "size")
def createDescription(self):
return "Object at offset %d, size %d" % (self['offset'].value, self['size'].value)
class BomTrailer(FieldSet):
def createFields(self):
yield UInt32(self, "num_spaces", "Total number of entries, including blank entries")
nobj = self['/num_objects'].value
nspace = self['num_spaces'].value
for i in xrange(nobj+1):
for i in xrange(nobj + 1):
yield BomTrailerEntry(self, "entry[]")
yield NullBytes(self, "blank_entries", (nspace - nobj - 1) * (BomTrailerEntry.static_size / 8))
yield UInt32(self, "num_trail")
@ -41,15 +42,16 @@ class BomTrailer(FieldSet):
def createDescription(self):
return "Bom file trailer"
class BomFile(HachoirParser, RootSeekableFieldSet):
endian = BIG_ENDIAN
MAGIC = "BOMStore"
PARSER_TAGS = {
"id": "bom_store",
"category": "archive",
"file_ext": ("bom","car"),
"file_ext": ("bom", "car"),
"magic": ((MAGIC, 0),),
"min_size": 32, # 32-byte header
"min_size": 32, # 32-byte header
"description": "Apple bill-of-materials file",
}
@ -64,14 +66,14 @@ class BomFile(HachoirParser, RootSeekableFieldSet):
def createFields(self):
yield Bytes(self, "magic", 8, "File magic (BOMStore)")
yield UInt32(self, "version") # ?
yield UInt32(self, "version") # ?
yield UInt32(self, "num_objects")
yield UInt32(self, "trailer_offset")
yield UInt32(self, "trailer_size")
yield UInt32(self, "header_offset")
yield UInt32(self, "header_size")
yield RawBytes(self, "object[]", 512-32, "Null object (size 0, offset 0)") # null object
yield RawBytes(self, "object[]", 512 - 32, "Null object (size 0, offset 0)") # null object
self.seekByte(self['trailer_offset'].value)
yield BomTrailer(self, "trailer")

View file

@ -4,19 +4,20 @@ BZIP2 archive file
Author: Victor Stinner, Robert Xiao
"""
from hachoir_parser import Parser
from hachoir_core.tools import paddingSize
from hachoir_core.field import (Field, FieldSet, GenericVector,
ParserError, String,
PaddingBits, Bit, Bits, Character,
UInt32, Enum, CompressedField)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal
from hachoir_parser.archive.zlib import build_tree, HuffmanCode
from hachoir.parser import Parser
from hachoir.core.tools import paddingSize
from hachoir.field import (Field, FieldSet, GenericVector,
ParserError, String,
PaddingBits, Bit, Bits, Character,
UInt32, Enum, CompressedField)
from hachoir.core.endian import BIG_ENDIAN
from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.parser.archive.zlib import build_tree, HuffmanCode
try:
from bz2 import BZ2Decompressor
class Bunzip2:
def __init__(self, stream):
self.bzip2 = BZ2Decompressor()
@ -27,12 +28,15 @@ try:
except EOFError:
return ''
has_deflate = True
except ImportError:
has_deflate = False
class ZeroTerminatedNumber(Field):
"""Zero (bit) terminated number: e.g. 11110 is 4."""
def __init__(self, parent, name, description=None):
Field.__init__(self, parent, name, 0, description)
@ -49,11 +53,14 @@ class ZeroTerminatedNumber(Field):
break
value += 1
self._value = value
def createValue(self):
return self._value
def move_to_front(l, c):
l[:] = l[c:c+1] + l[0:c] + l[c+1:]
l[:] = l[c:c + 1] + l[0:c] + l[c + 1:]
class Bzip2Bitmap(FieldSet):
def __init__(self, parent, name, nb_items, start_index, *args, **kwargs):
@ -62,8 +69,9 @@ class Bzip2Bitmap(FieldSet):
self.start_index = start_index
def createFields(self):
for i in xrange(self.start_index, self.start_index+self.nb_items):
yield Bit(self, "symbol_used[%i]"%i, "Is the symbol %i (%r) used?"%(i, chr(i)))
for i in xrange(self.start_index, self.start_index + self.nb_items):
yield Bit(self, "symbol_used[%i]" % i, "Is the symbol %i (%r) used?" % (i, chr(i)))
class Bzip2Lengths(FieldSet):
def __init__(self, parent, name, symbols, *args, **kwargs):
@ -76,12 +84,13 @@ class Bzip2Lengths(FieldSet):
lengths = []
for i in xrange(self.symbols):
while True:
bit = Bit(self, "change_length[%i][]"%i, "Should the length be changed for symbol %i?"%i)
bit = Bit(self, "change_length[%i][]" % i, "Should the length be changed for symbol %i?" % i)
yield bit
if not bit.value:
break
else:
bit = Enum(Bit(self, "length_decrement[%i][]"%i, "Decrement the value?"), {True: "Decrement", False: "Increment"})
bit = Enum(Bit(self, "length_decrement[%i][]" % i, "Decrement the value?"),
{True: "Decrement", False: "Increment"})
yield bit
if bit.value:
length -= 1
@ -91,6 +100,7 @@ class Bzip2Lengths(FieldSet):
self.final_length = length
self.tree = build_tree(lengths)
class Bzip2Selectors(FieldSet):
def __init__(self, parent, name, ngroups, *args, **kwargs):
FieldSet.__init__(self, parent, name, *args, **kwargs)
@ -101,23 +111,27 @@ class Bzip2Selectors(FieldSet):
field = ZeroTerminatedNumber(self, "selector_list[]")
move_to_front(self.groups, field.value)
field.realvalue = self.groups[0]
field._description = "MTF'ed selector index: raw value %i, real value %i"%(field.value, field.realvalue)
field._description = "MTF'ed selector index: raw value %i, real value %i" % (field.value, field.realvalue)
yield field
class Bzip2Block(FieldSet):
def createFields(self):
yield textHandler(Bits(self, "blockheader", 48, "Block header"), hexadecimal)
if self["blockheader"].value != 0x314159265359: # pi
if self["blockheader"].value != 0x314159265359: # pi
raise ParserError("Invalid block header!")
yield textHandler(UInt32(self, "crc32", "CRC32 for this block"), hexadecimal)
yield Bit(self, "randomized", "Is this block randomized?")
yield Bits(self, "orig_bwt_pointer", 24, "Starting pointer into BWT after untransform")
yield GenericVector(self, "huffman_used_map", 16, Bit, 'block_used', "Bitmap showing which blocks (representing 16 literals each) are in use")
yield GenericVector(self, "huffman_used_map", 16, Bit, 'block_used',
"Bitmap showing which blocks (representing 16 literals each) are in use")
symbols_used = []
for index, block_used in enumerate(self["huffman_used_map"].array('block_used')):
if block_used.value:
start_index = index*16
field = Bzip2Bitmap(self, "huffman_used_bitmap[%i]"%index, 16, start_index, "Bitmap for block %i (literals %i to %i) showing which symbols are in use"%(index, start_index, start_index + 15))
start_index = index * 16
field = Bzip2Bitmap(self, "huffman_used_bitmap[%i]" % index, 16, start_index,
"Bitmap for block %i (literals %i to %i) showing which symbols are in use" % (
index, start_index, start_index + 15))
yield field
for i, used in enumerate(field):
if used.value:
@ -127,15 +141,15 @@ class Bzip2Block(FieldSet):
yield Bzip2Selectors(self, "selectors_list", self["huffman_groups"].value)
trees = []
for group in xrange(self["huffman_groups"].value):
field = Bzip2Lengths(self, "huffman_lengths[]", len(symbols_used)+2)
field = Bzip2Lengths(self, "huffman_lengths[]", len(symbols_used) + 2)
yield field
trees.append(field.tree)
counter = 0
rle_run = 0
selector_tree = None
while True:
if counter%50 == 0:
select_id = self["selectors_list"].array("selector_list")[counter//50].realvalue
if counter % 50 == 0:
select_id = self["selectors_list"].array("selector_list")[counter // 50].realvalue
selector_tree = trees[select_id]
field = HuffmanCode(self, "huffman_code[]", selector_tree)
if field.realvalue in [0, 1]:
@ -144,23 +158,27 @@ class Bzip2Block(FieldSet):
rle_power = 1
rle_run += (field.realvalue + 1) * rle_power
rle_power <<= 1
field._description = "RLE Run Code %i (for %r); Total accumulated run %i (Huffman Code %i)" % (field.realvalue, chr(symbols_used[0]), rle_run, field.value)
elif field.realvalue == len(symbols_used)+1:
field._description = "Block Terminator (%i) (Huffman Code %i)"%(field.realvalue, field.value)
field._description = "RLE Run Code %i (for %r); Total accumulated run %i (Huffman Code %i)" % (
field.realvalue, chr(symbols_used[0]), rle_run, field.value)
elif field.realvalue == len(symbols_used) + 1:
field._description = "Block Terminator (%i) (Huffman Code %i)" % (field.realvalue, field.value)
yield field
break
else:
rle_run = 0
move_to_front(symbols_used, field.realvalue-1)
field._description = "Literal %r (value %i) (Huffman Code %i)"%(chr(symbols_used[0]), field.realvalue, field.value)
move_to_front(symbols_used, field.realvalue - 1)
field._description = "Literal %r (value %i) (Huffman Code %i)" % (
chr(symbols_used[0]), field.realvalue, field.value)
yield field
if field.realvalue == len(symbols_used)+1:
if field.realvalue == len(symbols_used) + 1:
break
counter += 1
class Bzip2Stream(FieldSet):
START_BLOCK = 0x314159265359 # pi
END_STREAM = 0x177245385090 # sqrt(pi)
START_BLOCK = 0x314159265359 # pi
END_STREAM = 0x177245385090 # sqrt(pi)
def createFields(self):
end = False
while not end:
@ -175,7 +193,8 @@ class Bzip2Stream(FieldSet):
yield PaddingBits(self, "padding[]", padding)
end = True
else:
raise ParserError("Invalid marker 0x%02X!"%marker)
raise ParserError("Invalid marker 0x%02X!" % marker)
class Bzip2Parser(Parser):
PARSER_TAGS = {
@ -183,7 +202,7 @@ class Bzip2Parser(Parser):
"category": "archive",
"file_ext": ("bz2",),
"mime": (u"application/x-bzip2",),
"min_size": 10*8,
"min_size": 10 * 8,
"magic": (('BZh', 0),),
"description": "bzip2 archive"
}
@ -192,7 +211,7 @@ class Bzip2Parser(Parser):
def validate(self):
if self.stream.readBytes(0, 3) != 'BZh':
return "Wrong file signature"
if not("1" <= self["blocksize"].value <= "9"):
if not ("1" <= self["blocksize"].value <= "9"):
return "Wrong blocksize"
return True
@ -200,10 +219,10 @@ class Bzip2Parser(Parser):
yield String(self, "id", 3, "Identifier (BZh)", charset="ASCII")
yield Character(self, "blocksize", "Block size (KB of memory needed to uncompress)")
if self._size is None: # TODO: is it possible to handle piped input?
if self._size is None: # TODO: is it possible to handle piped input?
raise NotImplementedError
size = (self._size - self.current_size)/8
size = (self._size - self.current_size) // 8
if size:
for tag, filename in self.stream.tags:
if tag == "filename" and filename.endswith(".bz2"):
@ -211,13 +230,14 @@ class Bzip2Parser(Parser):
break
else:
filename = None
data = Bzip2Stream(self, "file", size=size*8)
data = Bzip2Stream(self, "file", size=size * 8)
if has_deflate:
CompressedField(self, Bunzip2)
def createInputStream(**args):
if filename:
args.setdefault("tags",[]).append(("filename", filename))
args.setdefault("tags", []).append(("filename", filename))
return self._createInputStream(**args)
data._createInputStream = createInputStream
yield data

View file

@ -8,17 +8,17 @@ Creation date: 31 january 2007
http://msdn2.microsoft.com/en-us/library/ms974336.aspx
"""
from __future__ import absolute_import
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet, Enum,
CString, String,
UInt8, UInt16, UInt32, Bit, Bits, PaddingBits, NullBits,
DateTimeMSDOS32, RawBytes)
from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.tools import paddingSize
from hachoir_core.stream import StringInputStream
from hachoir_parser.archive.lzx import LZXStream, lzx_decompress
from hachoir_parser.archive.zlib import DeflateBlock
from hachoir.parser import Parser
from hachoir.field import (FieldSet, Enum,
CString, String,
UInt8, UInt16, UInt32, Bit, Bits, PaddingBits, NullBits,
DateTimeMSDOS32, RawBytes, CustomFragment)
from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.tools import paddingSize
from hachoir.stream import StringInputStream
from hachoir.parser.archive.lzx import LZXStream, lzx_decompress
from hachoir.parser.archive.zlib import DeflateBlock
MAX_NB_FOLDER = 30
@ -30,12 +30,13 @@ COMPRESSION_NAME = {
3: "LZX",
}
class Folder(FieldSet):
def createFields(self):
yield UInt32(self, "offset", "Offset to data (from file start)")
yield UInt16(self, "data_blocks", "Number of data blocks which are in this cabinet")
yield Enum(Bits(self, "compr_method", 4, "Compression method"), COMPRESSION_NAME)
if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level
if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level
yield PaddingBits(self, "padding[]", 4)
yield Bits(self, "compr_level", 5, "Compression level")
yield PaddingBits(self, "padding[]", 3)
@ -45,11 +46,12 @@ class Folder(FieldSet):
yield RawBytes(self, "reserved_folder", self["../reserved_folder_size"].value, "Per-folder reserved area")
def createDescription(self):
text= "Folder: compression %s" % self["compr_method"].display
if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level
text += " (level %u: window size %u)" % (self["compr_level"].value, 2**self["compr_level"].value)
text = "Folder: compression %s" % self["compr_method"].display
if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level
text += " (level %u: window size %u)" % (self["compr_level"].value, 2 ** self["compr_level"].value)
return text
class CabFileAttributes(FieldSet):
def createFields(self):
yield Bit(self, "readonly")
@ -61,14 +63,15 @@ class CabFileAttributes(FieldSet):
yield Bit(self, "name_is_utf", "Is the filename using UTF-8?")
yield Bits(self, "reserved[]", 8)
class File(FieldSet):
def createFields(self):
yield filesizeHandler(UInt32(self, "filesize", "Uncompressed file size"))
yield UInt32(self, "folder_offset", "File offset in uncompressed folder")
yield Enum(UInt16(self, "folder_index", "Containing folder ID (index)"), {
0xFFFD:"Folder continued from previous cabinet (real folder ID = 0)",
0xFFFE:"Folder continued to next cabinet (real folder ID = %i)" % (self["../nb_folder"].value - 1),
0xFFFF:"Folder spanning previous, current and next cabinets (real folder ID = 0)"})
0xFFFD: "Folder continued from previous cabinet (real folder ID = 0)",
0xFFFE: "Folder continued to next cabinet (real folder ID = %i)" % (self["../nb_folder"].value - 1),
0xFFFF: "Folder spanning previous, current and next cabinets (real folder ID = 0)"})
yield DateTimeMSDOS32(self, "timestamp")
yield CabFileAttributes(self, "attributes")
if self["attributes/name_is_utf"].value:
@ -80,55 +83,21 @@ class File(FieldSet):
return "File %s (%s)" % (
self["filename"].display, self["filesize"].display)
class Flags(FieldSet):
static_size = 16
def createFields(self):
yield Bit(self, "has_previous")
yield Bit(self, "has_next")
yield Bit(self, "has_reserved")
yield NullBits(self, "padding", 13)
class FragmentGroup:
def __init__(self, parser):
self.items = []
self.parser = parser
self.args = {}
def add(self, item):
self.items.append(item)
def createInputStream(self):
# FIXME: Use lazy stream creation
data = []
for item in self.items:
data.append( item["rawdata"].value )
data = "".join(data)
# FIXME: Use smarter code to send arguments
self.args["compr_level"] = self.items[0].parent.parent.folder["compr_level"].value
tags = {"class": self.parser, "args": self.args}
tags = tags.iteritems()
return StringInputStream(data, "<fragment group>", tags=tags)
class CustomFragment(FieldSet):
def __init__(self, parent, name, size, parser, description=None, group=None):
FieldSet.__init__(self, parent, name, description, size=size)
if not group:
group = FragmentGroup(parser)
self.field_size = size
self.group = group
self.group.add(self)
def createFields(self):
yield RawBytes(self, "rawdata", self.field_size//8)
def _createInputStream(self, **args):
return self.group.createInputStream()
class DataBlock(FieldSet):
def __init__(self, *args, **kwargs):
FieldSet.__init__(self, *args, **kwargs)
size = (self["size"].value + 8) * 8 # +8 for header values
size = (self["size"].value + 8) * 8 # +8 for header values
if self["/flags/has_reserved"].value:
size += self["/reserved_data_size"].value * 8
self._size = size
@ -140,50 +109,58 @@ class DataBlock(FieldSet):
if self["/flags/has_reserved"].value and self["/reserved_data_size"].value:
yield RawBytes(self, "reserved_data", self["/reserved_data_size"].value, "Per-datablock reserved area")
compr_method = self.parent.folder["compr_method"].value
if compr_method == 0: # Uncompressed
if compr_method == 0: # Uncompressed
yield RawBytes(self, "data", self["size"].value, "Folder Data")
self.parent.uncompressed_data += self["data"].value
elif compr_method == 1: # MSZIP
elif compr_method == 1: # MSZIP
yield String(self, "mszip_signature", 2, "MSZIP Signature (CK)")
yield DeflateBlock(self, "deflate_block", self.parent.uncompressed_data)
padding = paddingSize(self.current_size, 8)
if padding:
yield PaddingBits(self, "padding[]", padding)
self.parent.uncompressed_data = self["deflate_block"].uncomp_data
elif compr_method == 2: # Quantum
elif compr_method == 2: # Quantum
yield RawBytes(self, "compr_data", self["size"].value, "Compressed Folder Data")
elif compr_method == 3: # LZX
elif compr_method == 3: # LZX
group = getattr(self.parent.folder, "lzx_group", None)
field = CustomFragment(self, "data", self["size"].value*8, LZXStream, "LZX data fragment", group)
field = CustomFragment(self, "data", self["size"].value * 8, LZXStream, "LZX data fragment", group)
if group is None:
field.group.args["compr_level"] = self.parent.folder["compr_level"].value
self.parent.folder.lzx_group = field.group
yield field
class FolderParser(Parser):
endian = LITTLE_ENDIAN
def createFields(self):
for file in sorted(self.files, key=lambda x:x["folder_offset"].value):
for file in sorted(self.files, key=lambda x: x["folder_offset"].value):
padding = self.seekByte(file["folder_offset"].value)
if padding:
yield padding
yield RawBytes(self, "file[]", file["filesize"].value, file.description)
class FolderData(FieldSet):
def __init__(self, parent, name, folder, files, *args, **kwargs):
FieldSet.__init__(self, parent, name, *args, **kwargs)
def createInputStream(cis, source=None, **args):
stream = cis(source=source)
tags = args.setdefault("tags",[])
tags = args.setdefault("tags", [])
tags.extend(stream.tags)
tags.append(( "class", FolderParser ))
tags.append(( "args", {'files': files} ))
tags.append(("class", FolderParser))
tags.append(("args", {'files': files}))
for unused in self:
pass
if folder["compr_method"].value == 3: # LZX
self.uncompressed_data = lzx_decompress(self["block[0]/data"].getSubIStream(), folder["compr_level"].value)
if folder["compr_method"].value == 3: # LZX
self.uncompressed_data = lzx_decompress(self["block[0]/data"].getSubIStream(),
folder["compr_level"].value)
return StringInputStream(self.uncompressed_data, source=source, **args)
self.setSubIStream(createInputStream)
self.files = files
self.folder = folder # Folder fieldset
self.folder = folder # Folder fieldset
def createFields(self):
self.uncompressed_data = ""
@ -193,6 +170,7 @@ class FolderData(FieldSet):
pass
yield block
class CabFile(Parser):
endian = LITTLE_ENDIAN
MAGIC = "MSCF"
@ -202,7 +180,7 @@ class CabFile(Parser):
"file_ext": ("cab",),
"mime": (u"application/vnd.ms-cab-compressed",),
"magic": ((MAGIC, 0),),
"min_size": 1*8, # header + file entry
"min_size": 1 * 8, # header + file entry
"description": "Microsoft Cabinet archive"
}
@ -238,7 +216,8 @@ class CabFile(Parser):
yield RawBytes(self, "reserved_header", self["reserved_header_size"].value, "Per-cabinet reserved area")
if self["flags/has_previous"].value:
yield CString(self, "previous_cabinet", "File name of previous cabinet", charset="ASCII")
yield CString(self, "previous_disk", "Description of disk/media on which previous cabinet resides", charset="ASCII")
yield CString(self, "previous_disk", "Description of disk/media on which previous cabinet resides",
charset="ASCII")
if self["flags/has_next"].value:
yield CString(self, "next_cabinet", "File name of next cabinet", charset="ASCII")
yield CString(self, "next_disk", "Description of disk/media on which next cabinet resides", charset="ASCII")
@ -254,7 +233,7 @@ class CabFile(Parser):
yield file
files.append(file)
folders = sorted(enumerate(folders), key=lambda x:x[1]["offset"].value)
folders = sorted(enumerate(folders), key=lambda x: x[1]["offset"].value)
for i in xrange(len(folders)):
index, folder = folders[i]
@ -265,11 +244,11 @@ class CabFile(Parser):
for file in files:
if file["folder_index"].value == index:
files.append(file)
if i+1 == len(folders):
if i + 1 == len(folders):
size = (self.size // 8) - folder["offset"].value
else:
size = (folders[i+1][1]["offset"].value) - folder["offset"].value
yield FolderData(self, "folder_data[%i]" % index, folder, files, size=size*8)
size = (folders[i + 1][1]["offset"].value) - folder["offset"].value
yield FolderData(self, "folder_data[%i]" % index, folder, files, size=size * 8)
end = self.seekBit(self.size, "endraw")
if end:
@ -277,4 +256,3 @@ class CabFile(Parser):
def createContentSize(self):
return self["filesize"].value * 8

View file

@ -4,14 +4,15 @@ GZIP archive parser.
Author: Victor Stinner
"""
from hachoir_parser import Parser
from hachoir_core.field import (
from hachoir.parser import Parser
from hachoir.field import (
UInt8, UInt16, UInt32, Enum, TimestampUnix32,
Bit, CString, SubFile,
NullBits, Bytes, RawBytes)
from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_parser.common.deflate import Deflate
from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.parser.common.deflate import Deflate
class GzipParser(Parser):
endian = LITTLE_ENDIAN
@ -20,8 +21,7 @@ class GzipParser(Parser):
"category": "archive",
"file_ext": ("gz",),
"mime": (u"application/x-gzip",),
"min_size": 18*8,
#"magic": (('\x1F\x8B\x08', 0),),
"min_size": 18 * 8,
"magic_regex": (
# (magic, compression=deflate, <flags>, <mtime>, )
('\x1F\x8B\x08.{5}[\0\2\4\6][\x00-\x0D]', 0),
@ -29,16 +29,16 @@ class GzipParser(Parser):
"description": u"gzip archive",
}
os_name = {
0: u"FAT filesystem",
1: u"Amiga",
2: u"VMS (or OpenVMS)",
3: u"Unix",
4: u"VM/CMS",
5: u"Atari TOS",
6: u"HPFS filesystem (OS/2, NT)",
7: u"Macintosh",
8: u"Z-System",
9: u"CP/M",
0: u"FAT filesystem",
1: u"Amiga",
2: u"VMS (or OpenVMS)",
3: u"Unix",
4: u"VM/CMS",
5: u"Atari TOS",
6: u"HPFS filesystem (OS/2, NT)",
7: u"Macintosh",
8: u"Z-System",
9: u"CP/M",
10: u"TOPS-20",
11: u"NTFS filesystem (NT)",
12: u"QDOS",
@ -92,9 +92,9 @@ class GzipParser(Parser):
yield CString(self, "comment", "Comment")
if self["has_crc16"].value:
yield textHandler(UInt16(self, "hdr_crc16", "CRC16 of the header"),
hexadecimal)
hexadecimal)
if self._size is None: # TODO: is it possible to handle piped input?
if self._size is None: # TODO: is it possible to handle piped input?
raise NotImplementedError()
# Read file
@ -113,7 +113,7 @@ class GzipParser(Parser):
# Footer
yield textHandler(UInt32(self, "crc32",
"Uncompressed data content CRC32"), hexadecimal)
"Uncompressed data content CRC32"), hexadecimal)
yield filesizeHandler(UInt32(self, "size", "Uncompressed size"))
def createDescription(self):
@ -126,4 +126,3 @@ class GzipParser(Parser):
if self["mtime"].value:
info.append(self["mtime"].display)
return "%s: %s" % (desc, ", ".join(info))

View file

@ -6,37 +6,37 @@ LZX data stored in a Hachoir stream.
Author: Robert Xiao
Creation date: July 18, 2007
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
UInt32, Bit, Bits, PaddingBits,
RawBytes, ParserError)
from hachoir_core.endian import MIDDLE_ENDIAN, LITTLE_ENDIAN
from hachoir_core.tools import paddingSize, alignValue
from hachoir_parser.archive.zlib import build_tree, HuffmanCode, extend_data
from hachoir_core.bits import str2long
import new # for instancemethod
from hachoir.parser import Parser
from hachoir.field import (FieldSet,
UInt32, Bit, Bits, PaddingBits,
RawBytes, ParserError)
from hachoir.core.endian import MIDDLE_ENDIAN, LITTLE_ENDIAN
from hachoir.core.tools import paddingSize
from hachoir.parser.archive.zlib import build_tree, HuffmanCode, extend_data
import new # for instancemethod
class LZXPreTreeEncodedTree(FieldSet):
def __init__(self, parent, name, num_elements, *args, **kwargs):
FieldSet.__init__(self, parent, name, *args, **kwargs)
self.num_elements = num_elements
def createFields(self):
for i in xrange(20):
yield Bits(self, "pretree_lengths[]", 4)
pre_tree = build_tree([self['pretree_lengths[%d]'%x].value for x in xrange(20)])
if not hasattr(self.root, "lzx_tree_lengths_"+self.name):
pre_tree = build_tree([self['pretree_lengths[%d]' % x].value for x in xrange(20)])
if not hasattr(self.root, "lzx_tree_lengths_" + self.name):
self.lengths = [0] * self.num_elements
setattr(self.root, "lzx_tree_lengths_"+self.name, self.lengths)
setattr(self.root, "lzx_tree_lengths_" + self.name, self.lengths)
else:
self.lengths = getattr(self.root, "lzx_tree_lengths_"+self.name)
self.lengths = getattr(self.root, "lzx_tree_lengths_" + self.name)
i = 0
while i < self.num_elements:
field = HuffmanCode(self, "tree_code[]", pre_tree)
if field.realvalue <= 16:
self.lengths[i] = (self.lengths[i] - field.realvalue) % 17
field._description = "Literal tree delta length %i (new length value %i for element %i)" % (
field.realvalue, self.lengths[i], i)
field.realvalue, self.lengths[i], i)
i += 1
yield field
elif field.realvalue == 17:
@ -44,93 +44,97 @@ class LZXPreTreeEncodedTree(FieldSet):
yield field
extra = Bits(self, "extra[]", 4)
zeros = 4 + extra.value
extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1)
extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (
zeros, i, i + zeros - 1)
yield extra
self.lengths[i:i+zeros] = [0] * zeros
self.lengths[i:i + zeros] = [0] * zeros
i += zeros
elif field.realvalue == 18:
field._description = "Tree Code 18: Zeros for 20-51 elements"
yield field
extra = Bits(self, "extra[]", 5)
zeros = 20 + extra.value
extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1)
extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (
zeros, i, i + zeros - 1)
yield extra
self.lengths[i:i+zeros] = [0] * zeros
self.lengths[i:i + zeros] = [0] * zeros
i += zeros
elif field.realvalue == 19:
field._description = "Tree Code 19: Same code for 4-5 elements"
yield field
extra = Bits(self, "extra[]", 1)
run = 4 + extra.value
extra._description = "Extra bits: run for %i elements (elements %i through %i)" % (run, i, i+run-1)
extra._description = "Extra bits: run for %i elements (elements %i through %i)" % (run, i, i + run - 1)
yield extra
newfield = HuffmanCode(self, "tree_code[]", pre_tree)
assert newfield.realvalue <= 16
newfield._description = "Literal tree delta length %i (new length value %i for elements %i through %i)" % (
newfield.realvalue, self.lengths[i], i, i+run-1)
self.lengths[i:i+run] = [(self.lengths[i] - newfield.realvalue) % 17] * run
newfield.realvalue, self.lengths[i], i, i + run - 1)
self.lengths[i:i + run] = [(self.lengths[i] - newfield.realvalue) % 17] * run
i += run
yield newfield
class LZXBlock(FieldSet):
WINDOW_SIZE = {15:30,
16:32,
17:34,
18:36,
19:38,
20:42,
21:50}
POSITION_SLOTS = {0:(0,0,0),
1:(1,1,0),
2:(2,2,0),
3:(3,3,0),
4:(4,5,1),
5:(6,7,1),
6:(8,11,2),
7:(12,15,2),
8:(16,23,3),
9:(24,31,3),
10:(32,47,4),
11:(48,63,4),
12:(64,95,5),
13:(96,127,5),
14:(128,191,6),
15:(192,255,6),
16:(256,383,7),
17:(384,511,7),
18:(512,767,8),
19:(768,1023,8),
20:(1024,1535,9),
21:(1536,2047,9),
22:(2048,3071,10),
23:(3072,4095,10),
24:(4096,6143,11),
25:(6144,8191,11),
26:(8192,12287,12),
27:(12288,16383,12),
28:(16384,24575,13),
29:(24576,32767,13),
30:(32768,49151,14),
31:(49152,65535,14),
32:(65536,98303,15),
33:(98304,131071,15),
34:(131072,196607,16),
35:(196608,262143,16),
36:(262144,393215,17),
37:(393216,524287,17),
38:(524288,655359,17),
39:(655360,786431,17),
40:(786432,917503,17),
41:(917504,1048575,17),
42:(1048576,1179647,17),
43:(1179648,1310719,17),
44:(1310720,1441791,17),
45:(1441792,1572863,17),
46:(1572864,1703935,17),
47:(1703936,1835007,17),
48:(1835008,1966079,17),
49:(1966080,2097151,17),
WINDOW_SIZE = {15: 30,
16: 32,
17: 34,
18: 36,
19: 38,
20: 42,
21: 50}
POSITION_SLOTS = {0: (0, 0, 0),
1: (1, 1, 0),
2: (2, 2, 0),
3: (3, 3, 0),
4: (4, 5, 1),
5: (6, 7, 1),
6: (8, 11, 2),
7: (12, 15, 2),
8: (16, 23, 3),
9: (24, 31, 3),
10: (32, 47, 4),
11: (48, 63, 4),
12: (64, 95, 5),
13: (96, 127, 5),
14: (128, 191, 6),
15: (192, 255, 6),
16: (256, 383, 7),
17: (384, 511, 7),
18: (512, 767, 8),
19: (768, 1023, 8),
20: (1024, 1535, 9),
21: (1536, 2047, 9),
22: (2048, 3071, 10),
23: (3072, 4095, 10),
24: (4096, 6143, 11),
25: (6144, 8191, 11),
26: (8192, 12287, 12),
27: (12288, 16383, 12),
28: (16384, 24575, 13),
29: (24576, 32767, 13),
30: (32768, 49151, 14),
31: (49152, 65535, 14),
32: (65536, 98303, 15),
33: (98304, 131071, 15),
34: (131072, 196607, 16),
35: (196608, 262143, 16),
36: (262144, 393215, 17),
37: (393216, 524287, 17),
38: (524288, 655359, 17),
39: (655360, 786431, 17),
40: (786432, 917503, 17),
41: (917504, 1048575, 17),
42: (1048576, 1179647, 17),
43: (1179648, 1310719, 17),
44: (1310720, 1441791, 17),
45: (1441792, 1572863, 17),
46: (1572864, 1703935, 17),
47: (1703936, 1835007, 17),
48: (1835008, 1966079, 17),
49: (1966080, 2097151, 17),
}
def createFields(self):
yield Bits(self, "block_type", 3)
yield Bits(self, "block_size", 24)
@ -139,11 +143,11 @@ class LZXBlock(FieldSet):
self.window_size = self.WINDOW_SIZE[self.compression_level]
self.block_type = self["block_type"].value
curlen = len(self.parent.uncompressed_data)
if self.block_type in (1, 2): # Verbatim or aligned offset block
if self.block_type in (1, 2): # Verbatim or aligned offset block
if self.block_type == 2:
for i in xrange(8):
yield Bits(self, "aligned_len[]", 3)
aligned_tree = build_tree([self['aligned_len[%d]'%i].value for i in xrange(8)])
aligned_tree = build_tree([self['aligned_len[%d]' % i].value for i in xrange(8)])
yield LZXPreTreeEncodedTree(self, "main_tree_start", 256)
yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8)
main_tree = build_tree(self["main_tree_start"].lengths + self["main_tree_rest"].lengths)
@ -151,7 +155,7 @@ class LZXBlock(FieldSet):
length_tree = build_tree(self["length_tree"].lengths)
current_decoded_size = 0
while current_decoded_size < self.uncompressed_size:
if (curlen+current_decoded_size) % 32768 == 0 and (curlen+current_decoded_size) != 0:
if (curlen + current_decoded_size) % 32768 == 0 and (curlen + current_decoded_size) != 0:
padding = paddingSize(self.address + self.current_size, 16)
if padding:
yield PaddingBits(self, "padding[]", padding)
@ -185,7 +189,8 @@ class LZXBlock(FieldSet):
self.parent.r0 = position
field._description = "Position Slot %i, Position %i" % (position_header, position)
else:
field._description = "Position Slot %i, Positions %i to %i" % (position_header, info[0] - 2, info[1] - 2)
field._description = "Position Slot %i, Positions %i to %i" % (
position_header, info[0] - 2, info[1] - 2)
if length_header == 7:
field._description += ", Length Values 9 and up"
yield field
@ -194,33 +199,37 @@ class LZXBlock(FieldSet):
length_field._description = "Length Code %i, total length %i" % (length_field.realvalue, length)
yield length_field
else:
field._description += ", Length Value %i (Huffman Code %i)"%(length_header + 2, field.value)
field._description += ", Length Value %i (Huffman Code %i)" % (length_header + 2, field.value)
yield field
length = length_header + 2
if info[2]:
if self.block_type == 1 or info[2] < 3: # verbatim
if self.block_type == 1 or info[2] < 3: # verbatim
extrafield = Bits(self, "position_extra[%s" % field.name.split('[')[1], info[2])
position = extrafield.value + info[0] - 2
extrafield._description = "Position Extra Bits (%i), total position %i"%(extrafield.value, position)
extrafield._description = "Position Extra Bits (%i), total position %i" % (
extrafield.value, position)
yield extrafield
else: # aligned offset
else: # aligned offset
position = info[0] - 2
if info[2] > 3:
extrafield = Bits(self, "position_verbatim[%s" % field.name.split('[')[1], info[2]-3)
position += extrafield.value*8
extrafield._description = "Position Verbatim Bits (%i), added position %i"%(extrafield.value, extrafield.value*8)
extrafield = Bits(self, "position_verbatim[%s" % field.name.split('[')[1], info[2] - 3)
position += extrafield.value * 8
extrafield._description = "Position Verbatim Bits (%i), added position %i" % (
extrafield.value, extrafield.value * 8)
yield extrafield
if info[2] >= 3:
extrafield = HuffmanCode(self, "position_aligned[%s" % field.name.split('[')[1], aligned_tree)
extrafield = HuffmanCode(self, "position_aligned[%s" % field.name.split('[')[1],
aligned_tree)
position += extrafield.realvalue
extrafield._description = "Position Aligned Bits (%i), total position %i"%(extrafield.realvalue, position)
extrafield._description = "Position Aligned Bits (%i), total position %i" % (
extrafield.realvalue, position)
yield extrafield
self.parent.r2 = self.parent.r1
self.parent.r1 = self.parent.r0
self.parent.r0 = position
self.parent.uncompressed_data = extend_data(self.parent.uncompressed_data, length, position)
current_decoded_size += length
elif self.block_type == 3: # Uncompressed block
elif self.block_type == 3: # Uncompressed block
padding = paddingSize(self.address + self.current_size, 16)
if padding:
yield PaddingBits(self, "padding[]", padding)
@ -234,14 +243,16 @@ class LZXBlock(FieldSet):
self.parent.r1 = self["r[1]"].value
self.parent.r2 = self["r[2]"].value
yield RawBytes(self, "data", self.uncompressed_size)
self.parent.uncompressed_data+=self["data"].value
self.parent.uncompressed_data += self["data"].value
if self["block_size"].value % 2:
yield PaddingBits(self, "padding", 8)
else:
raise ParserError("Unknown block type %d!"%self.block_type)
raise ParserError("Unknown block type %d!" % self.block_type)
class LZXStream(Parser):
endian = MIDDLE_ENDIAN
def createFields(self):
self.uncompressed_data = ""
self.r0 = 1
@ -259,6 +270,7 @@ class LZXStream(Parser):
yield PaddingBits(self, "padding[]", padding)
break
def lzx_decompress(stream, window_bits):
data = LZXStream(stream)
data.compr_level = window_bits

View file

@ -5,15 +5,16 @@ Author: Victor Stinner
Creation date: 2007-03-04
"""
from hachoir.parser import Parser
from hachoir.field import FieldSet, String, UInt32, SubFile
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.text_handler import textHandler, filesizeHandler, hexadecimal
MAX_NB_FILE = 100000
from hachoir_parser import Parser
from hachoir_core.field import FieldSet, String, UInt32, SubFile
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal
class FileIndex(FieldSet):
static_size = 68*8
static_size = 68 * 8
def createFields(self):
yield String(self, "filename", 56, truncate="\0", charset="ASCII")
@ -25,13 +26,14 @@ class FileIndex(FieldSet):
return "File %s (%s) at %s" % (
self["filename"].value, self["filesize"].display, self["offset"].value)
class MarFile(Parser):
MAGIC = "MARC"
PARSER_TAGS = {
"id": "mar",
"category": "archive",
"file_ext": ("mar",),
"min_size": 80*8, # At least one file index
"min_size": 80 * 8, # At least one file index
"magic": ((MAGIC, 0),),
"description": "Microsoft Archive",
}
@ -42,7 +44,7 @@ class MarFile(Parser):
return "Invalid magic"
if self["version"].value != 3:
return "Invalid version"
if not(1 <= self["nb_file"].value <= MAX_NB_FILE):
if not (1 <= self["nb_file"].value <= MAX_NB_FILE):
return "Invalid number of file"
return True
@ -64,4 +66,3 @@ class MarFile(Parser):
size = index["filesize"].value
desc = "File %s" % index["filename"].value
yield SubFile(self, "data[]", size, desc, filename=index["filename"].value)

View file

@ -5,12 +5,13 @@ Creation date: July 10, 2007
"""
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.field import (RootSeekableFieldSet, FieldSet,
String, CString, UInt32, RawBytes)
from hachoir_core.text_handler import displayHandler, filesizeHandler
from hachoir_core.tools import humanUnixAttributes
from hachoir_parser import HachoirParser
from hachoir.core.endian import BIG_ENDIAN
from hachoir.field import (RootSeekableFieldSet, FieldSet,
String, CString, UInt32, RawBytes)
from hachoir.core.text_handler import displayHandler, filesizeHandler
from hachoir.core.tools import humanUnixAttributes
from hachoir.parser import HachoirParser
class IndexEntry(FieldSet):
def createFields(self):
@ -20,25 +21,27 @@ class IndexEntry(FieldSet):
yield CString(self, "name", "Filename (byte array)")
def createDescription(self):
return 'File %s, Size %s, Mode %s'%(
return 'File %s, Size %s, Mode %s' % (
self["name"].display, self["length"].display, self["flags"].display)
class MozillaArchive(HachoirParser, RootSeekableFieldSet):
MAGIC = "MAR1"
PARSER_TAGS = {
"id": "mozilla_ar",
"category": "archive",
"file_ext": ("mar",),
"min_size": (8+4+13)*8, # Header, Index Header, 1 Index Entry
"min_size": (8 + 4 + 13) * 8, # Header, Index Header, 1 Index Entry
"magic": ((MAGIC, 0),),
"description": "Mozilla Archive",
}
endian = BIG_ENDIAN
def __init__(self, stream, **args):
RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
RootSeekableFieldSet.__init__\
(self, None, "root", stream, None, stream.askSize(self))
HachoirParser.__init__(self, stream, **args)
def validate(self):
if self.stream.readBytes(0, 4) != self.MAGIC:
return "Invalid magic"
@ -49,10 +52,11 @@ class MozillaArchive(HachoirParser, RootSeekableFieldSet):
yield UInt32(self, "index_offset", "Offset to index relative to file start")
self.seekByte(self["index_offset"].value, False)
yield UInt32(self, "index_size", "size of index in bytes")
current_index_size = 0 # bytes
current_index_size = 0 # bytes
while current_index_size < self["index_size"].value:
# plus 4 compensates for index_size
self.seekByte(self["index_offset"].value + current_index_size + 4, False)
self.seekByte(self["index_offset"].value +
current_index_size + 4, False)
entry = IndexEntry(self, "index_entry[]")
yield entry
current_index_size += entry.size // 8

View file

@ -5,15 +5,15 @@ Status: can only read higher-level attructures
Author: Christophe Gisquet
"""
from hachoir_parser import Parser
from hachoir_core.field import (StaticFieldSet, FieldSet,
Bit, Bits, Enum,
UInt8, UInt16, UInt32, UInt64,
String, TimeDateMSDOS32,
NullBytes, NullBits, RawBytes)
from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_parser.common.msdos import MSDOSFileAttr32
from hachoir.parser import Parser
from hachoir.field import (StaticFieldSet, FieldSet,
Bit, Bits, Enum,
UInt8, UInt16, UInt32, UInt64,
String, TimeDateMSDOS32,
NullBytes, NullBits, RawBytes)
from hachoir.core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.parser.common.msdos import MSDOSFileAttr32
from datetime import timedelta
MAX_FILESIZE = 1000 * 1024 * 1024
@ -58,42 +58,49 @@ DICTIONARY_SIZE = {
7: "File is a directory",
}
def formatRARVersion(field):
"""
Decodes the RAR version stored on 1 byte
"""
return "%u.%u" % divmod(field.value, 10)
def markerFlags(s):
yield UInt16(s, "flags", "Marker flags, always 0x1a21")
commonFlags = (
(Bit, "is_ignorable", "Old versions of RAR should ignore this block when copying data"),
(Bit, "has_added_size", "Additional field indicating additional size"),
)
class ArchiveFlags(StaticFieldSet):
format = (
(Bit, "vol", "Archive volume"),
(Bit, "has_comment", "Whether there is a comment"),
(Bit, "is_locked", "Archive volume"),
(Bit, "is_solid", "Whether files can be extracted separately"),
(Bit, "new_numbering", "New numbering, or compressed comment"), # From unrar
(Bit, "has_authenticity_information", "The integrity/authenticity of the archive can be checked"),
(Bit, "is_protected", "The integrity/authenticity of the archive can be checked"),
(Bit, "is_passworded", "Needs a password to be decrypted"),
(Bit, "is_first_vol", "Whether it is the first volume"),
(Bit, "is_encrypted", "Whether the encryption version is present"),
(NullBits, "internal", 4, "Reserved for 'internal use'"),
) + commonFlags
(Bit, "vol", "Archive volume"),
(Bit, "has_comment", "Whether there is a comment"),
(Bit, "is_locked", "Archive volume"),
(Bit, "is_solid", "Whether files can be extracted separately"),
(Bit, "new_numbering", "New numbering, or compressed comment"), # From unrar
(Bit, "has_authenticity_information", "The integrity/authenticity of the archive can be checked"),
(Bit, "is_protected", "The integrity/authenticity of the archive can be checked"),
(Bit, "is_passworded", "Needs a password to be decrypted"),
(Bit, "is_first_vol", "Whether it is the first volume"),
(Bit, "is_encrypted", "Whether the encryption version is present"),
(NullBits, "internal", 4, "Reserved for 'internal use'"),
) + commonFlags
def archiveFlags(s):
yield ArchiveFlags(s, "flags", "Archiver block flags")
def archiveHeader(s):
yield NullBytes(s, "reserved[]", 2, "Reserved word")
yield NullBytes(s, "reserved[]", 4, "Reserved dword")
def commentHeader(s):
yield filesizeHandler(UInt16(s, "total_size", "Comment header size + comment size"))
yield filesizeHandler(UInt16(s, "uncompressed_size", "Uncompressed comment size"))
@ -101,16 +108,19 @@ def commentHeader(s):
yield UInt8(s, "packing_method", "Comment packing method")
yield UInt16(s, "comment_crc16", "Comment CRC")
def commentBody(s):
size = s["total_size"].value - s.current_size
if size > 0:
yield RawBytes(s, "comment_data", size, "Compressed comment data")
def signatureHeader(s):
yield TimeDateMSDOS32(s, "creation_time")
yield filesizeHandler(UInt16(s, "arc_name_size"))
yield filesizeHandler(UInt16(s, "user_name_size"))
def recoveryHeader(s):
yield filesizeHandler(UInt32(s, "total_size"))
yield textHandler(UInt8(s, "version"), hexadecimal)
@ -118,6 +128,7 @@ def recoveryHeader(s):
yield UInt32(s, "total_blocks")
yield RawBytes(s, "mark", 8)
def avInfoHeader(s):
yield filesizeHandler(UInt16(s, "total_size", "Total block size"))
yield UInt8(s, "version", "Version needed to decompress", handler=hexadecimal)
@ -125,13 +136,16 @@ def avInfoHeader(s):
yield UInt8(s, "av_version", "Version for AV", handler=hexadecimal)
yield UInt32(s, "av_crc", "AV info CRC32", handler=hexadecimal)
def avInfoBody(s):
size = s["total_size"].value - s.current_size
if size > 0:
yield RawBytes(s, "av_info_data", size, "AV info")
class FileFlags(FieldSet):
static_size = 16
def createFields(self):
yield Bit(self, "continued_from", "File continued from previous volume")
yield Bit(self, "continued_in", "File continued in next volume")
@ -149,17 +163,21 @@ class FileFlags(FieldSet):
for field in commonFlags:
yield field[0](self, *field[1:])
def fileFlags(s):
yield FileFlags(s, "flags", "File block flags")
class ExtTimeFlags(FieldSet):
static_size = 16
def createFields(self):
for name in ['arctime', 'atime', 'ctime', 'mtime']:
yield Bits(self, "%s_count" % name, 2, "Number of %s bytes" % name)
yield Bit(self, "%s_onesec" % name, "Add one second to the timestamp?")
yield Bit(self, "%s_present" % name, "Is %s extra time present?" % name)
class ExtTime(FieldSet):
def createFields(self):
yield ExtTimeFlags(self, "time_flags")
@ -169,7 +187,8 @@ class ExtTime(FieldSet):
yield TimeDateMSDOS32(self, "%s" % name, "%s DOS timestamp" % name)
count = self['time_flags/%s_count' % name].value
if count:
yield Bits(self, "%s_remainder" % name, 8 * count, "%s extra precision time (in 100ns increments)" % name)
yield Bits(self, "%s_remainder" % name, 8 * count,
"%s extra precision time (in 100ns increments)" % name)
def createDescription(self):
out = 'Time extension'
@ -185,13 +204,14 @@ class ExtTime(FieldSet):
delta = timedelta()
if self['time_flags/%s_onesec' % name].value:
delta += timedelta(seconds=1)
if '%s_remainder'%name in self:
if '%s_remainder' % name in self:
delta += timedelta(microseconds=self['%s_remainder' % name].value / 10.0)
pieces.append('%s=%s' % (name, basetime + delta))
if pieces:
out += ': ' + ', '.join(pieces)
return out
def specialHeader(s, is_file):
yield filesizeHandler(UInt32(s, "compressed_size", "Compressed size (bytes)"))
yield filesizeHandler(UInt32(s, "uncompressed_size", "Uncompressed size (bytes)"))
@ -225,9 +245,11 @@ def specialHeader(s, is_file):
if s["flags/has_ext_time"].value:
yield ExtTime(s, "extra_time")
def fileHeader(s):
return specialHeader(s, True)
def fileBody(s):
# File compressed data
size = s["compressed_size"].value
@ -236,33 +258,40 @@ def fileBody(s):
if size > 0:
yield RawBytes(s, "compressed_data", size, "File compressed data")
def fileDescription(tag):
def _fileDescription(s):
return "%s: %s (%s)" % \
(tag, s["filename"].display, s["compressed_size"].display)
return _fileDescription
def newSubHeader(s):
return specialHeader(s, False)
class EndFlags(StaticFieldSet):
format = (
(Bit, "has_next_vol", "Whether there is another next volume"),
(Bit, "has_data_crc", "Whether a CRC value is present"),
(Bit, "rev_space"),
(Bit, "has_vol_number", "Whether the volume number is present"),
(NullBits, "unused[]", 10),
) + commonFlags
(Bit, "has_next_vol", "Whether there is another next volume"),
(Bit, "has_data_crc", "Whether a CRC value is present"),
(Bit, "rev_space"),
(Bit, "has_vol_number", "Whether the volume number is present"),
(NullBits, "unused[]", 10),
) + commonFlags
def endFlags(s):
yield EndFlags(s, "flags", "End block flags")
class BlockFlags(StaticFieldSet):
static_size = 16
format = (
(NullBits, "unused[]", 14),
) + commonFlags
(NullBits, "unused[]", 14),
) + commonFlags
class Block(FieldSet):
BLOCK_INFO = {
@ -289,19 +318,19 @@ class Block(FieldSet):
self.createDescription = lambda: desc(self)
elif desc:
self._description = desc
if parseFlags : self.parseFlags = lambda: parseFlags(self)
if parseHeader : self.parseHeader = lambda: parseHeader(self)
if parseBody : self.parseBody = lambda: parseBody(self)
if parseFlags: self.parseFlags = lambda: parseFlags(self)
if parseHeader: self.parseHeader = lambda: parseHeader(self)
if parseBody: self.parseBody = lambda: parseBody(self)
else:
self.info("Processing as unknown block block of type %u" % type)
self.info("Processing as unknown block of type %u" % t)
self._size = 8*self["block_size"].value
self._size = 8 * self["block_size"].value
if t == 0x74 or t == 0x7A:
self._size += 8*self["compressed_size"].value
self._size += 8 * self["compressed_size"].value
if "is_large" in self["flags"] and self["flags/is_large"].value:
self._size += 8*self["large_size"].value
self._size += 8 * self["large_size"].value
elif "has_added_size" in self:
self._size += 8*self["added_size"].value
self._size += 8 * self["added_size"].value
# TODO: check if any other member is needed here
def createFields(self):
@ -320,7 +349,7 @@ class Block(FieldSet):
yield field
# Finish header with stuff of unknow size
size = self["block_size"].value - (self.current_size//8)
size = self["block_size"].value - (self.current_size // 8)
if size > 0:
yield RawBytes(self, "unknown", size, "Unknow data (UInt32 probably)")
@ -336,28 +365,29 @@ class Block(FieldSet):
def parseHeader(self):
if "has_added_size" in self["flags"] and \
self["flags/has_added_size"].value:
self["flags/has_added_size"].value:
yield filesizeHandler(UInt32(self, "added_size",
"Supplementary block size"))
"Supplementary block size"))
def parseBody(self):
"""
Parse what is left of the block
"""
size = self["block_size"].value - (self.current_size//8)
size = self["block_size"].value - (self.current_size // 8)
if "has_added_size" in self["flags"] and self["flags/has_added_size"].value:
size += self["added_size"].value
if size > 0:
yield RawBytes(self, "body", size, "Body data")
class RarFile(Parser):
MAGIC = "Rar!\x1A\x07\x00"
PARSER_TAGS = {
"id": "rar",
"category": "archive",
"file_ext": ("rar",),
"mime": (u"application/x-rar-compressed", ),
"min_size": 7*8,
"mime": (u"application/x-rar-compressed",),
"min_size": 7 * 8,
"magic": ((MAGIC, 0),),
"description": "Roshal archive (RAR)",
}
@ -378,6 +408,5 @@ class RarFile(Parser):
end = MAX_FILESIZE * 8
pos = self.stream.searchBytes("\xC4\x3D\x7B\x00\x40\x07\x00", start, end)
if pos is not None:
return pos + 7*8
return pos + 7 * 8
return None

View file

@ -4,14 +4,15 @@ RPM archive parser.
Author: Victor Stinner, 1st December 2005.
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet, ParserError,
UInt8, UInt16, UInt32, UInt64, Enum,
NullBytes, Bytes, RawBytes, SubFile,
Character, CString, String)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_parser.archive.gzip_parser import GzipParser
from hachoir_parser.archive.bzip2_parser import Bzip2Parser
from hachoir.parser import Parser
from hachoir.field import (FieldSet, ParserError,
UInt8, UInt16, UInt32, UInt64, Enum,
NullBytes, Bytes, RawBytes, SubFile,
Character, CString, String)
from hachoir.core.endian import BIG_ENDIAN
from hachoir.parser.archive.gzip_parser import GzipParser
from hachoir.parser.archive.bzip2_parser import Bzip2Parser
class ItemContent(FieldSet):
format_type = {
@ -38,11 +39,11 @@ class ItemContent(FieldSet):
cls = self.format_type[type]
count = item["count"].value
if cls is RawBytes: # or type == 8:
if cls is RawBytes: # or type == 8:
if cls is RawBytes:
args = (self, "value", count)
else:
args = (self, "value") # cls is CString
args = (self, "value") # cls is CString
count = 1
else:
if 1 < count:
@ -52,6 +53,7 @@ class ItemContent(FieldSet):
for index in xrange(count):
yield cls(*args)
class Item(FieldSet):
type_name = {
0: "NULL",
@ -74,11 +76,11 @@ class Item(FieldSet):
1005: "GnuPG signature",
1006: "PGP5 signature",
1007: "Uncompressed payload size (bytes)",
256+8: "Broken SHA1 header digest",
256+9: "Broken SHA1 header digest",
256+13: "Broken SHA1 header digest",
256+11: "DSA header signature",
256+12: "RSA header signature"
256 + 8: "Broken SHA1 header digest",
256 + 9: "Broken SHA1 header digest",
256 + 13: "Broken SHA1 header digest",
256 + 11: "DSA header signature",
256 + 12: "RSA header signature"
}
def __init__(self, parent, name, description=None, tag_name_dict=None):
@ -96,6 +98,7 @@ class Item(FieldSet):
def createDescription(self):
return "Item: %s (%s)" % (self["tag"].display, self["type"].display)
class ItemHeader(Item):
tag_name = {
61: "Current image",
@ -171,14 +174,12 @@ class ItemHeader(Item):
1068: "Trigger flags",
1069: "Trigger index",
1079: "Verify script",
#TODO: Finish the list (id 1070..1162 using rpm library source code)
# TODO: Finish the list (id 1070..1162 using rpm library source code)
}
def __init__(self, parent, name, description=None):
Item.__init__(self, parent, name, description, self.tag_name)
def sortRpmItem(a,b):
return int( a["offset"].value - b["offset"].value )
class PropertySet(FieldSet):
def __init__(self, parent, name, *args):
@ -203,27 +204,29 @@ class PropertySet(FieldSet):
items.append(item)
# Sort items by their offset
items.sort( sortRpmItem )
items.sort(key=lambda field: field["offset"].value)
# Read item content
start = self.current_size/8
start = self.current_size // 8
for item in items:
offset = item["offset"].value
diff = offset - (self.current_size/8 - start)
diff = offset - (self.current_size // 8 - start)
if 0 < diff:
yield NullBytes(self, "padding[]", diff)
yield ItemContent(self, "content[]", item)
size = start + self["size"].value - self.current_size/8
size = start + self["size"].value - self.current_size // 8
if 0 < size:
yield NullBytes(self, "padding[]", size)
class RpmFile(Parser):
PARSER_TAGS = {
"id": "rpm",
"category": "archive",
"file_ext": ("rpm",),
"mime": (u"application/x-rpm",),
"min_size": (96 + 16 + 16)*8, # file header + checksum + content header
# file header + checksum + content header
"min_size": (96 + 16 + 16) * 8,
"magic": (('\xED\xAB\xEE\xDB', 0),),
"description": "RPM package"
}
@ -255,7 +258,7 @@ class RpmFile(Parser):
yield PropertySet(self, "checksum", "Checksum (signature)")
yield PropertySet(self, "header", "Header")
if self._size is None: # TODO: is it possible to handle piped input?
if self._size is None: # TODO: is it possible to handle piped input?
raise NotImplementedError
size = (self._size - self.current_size) // 8
@ -264,4 +267,3 @@ class RpmFile(Parser):
yield SubFile(self, "content", size, "bzip2 content", parser=Bzip2Parser)
else:
yield SubFile(self, "content", size, "gzip content", parser=GzipParser)

View file

@ -12,16 +12,15 @@ Updated by: Robert Xiao
Date: February 26 2011
"""
from hachoir_parser import Parser
from hachoir_core.field import (Field, FieldSet, ParserError,
CompressedField, CString,
Enum, Bit, Bits, UInt8, UInt32, UInt64,
Bytes, RawBytes, TimestampWin64)
from hachoir_core.stream import StringInputStream
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler
from hachoir_core.tools import createDict, alignValue
from hachoir_parser.common.msdos import MSDOSFileAttr32
from hachoir.parser import Parser
from hachoir.field import (Field, FieldSet, ParserError, CString,
Enum, Bit, Bits, UInt8, UInt32, UInt64,
Bytes, RawBytes, TimestampWin64)
from hachoir.stream import StringInputStream
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.core.tools import alignValue, humanFilesize, makePrintable
from hachoir.parser.common.msdos import MSDOSFileAttr32
try:
from pylzma import decompress as lzmadecompress
@ -29,11 +28,13 @@ try:
except ImportError:
has_lzma = False
class SZUInt64(Field):
"""
Variable length UInt64, where the first byte gives both the number of bytes
needed and the upper byte value.
"""
def __init__(self, parent, name, max_size=None, description=None):
Field.__init__(self, parent, name, size=8, description=description)
value = 0
@ -43,101 +44,168 @@ class SZUInt64(Field):
for i in xrange(8):
addr += 8
if not (firstByte & mask):
value += ((firstByte & (mask-1)) << (8*i))
value += ((firstByte & (mask - 1)) << (8 * i))
break
value |= (parent.stream.readBits(addr, 8, LITTLE_ENDIAN) << (8*i))
value |= (parent.stream.readBits(addr, 8, LITTLE_ENDIAN) << (8 * i))
mask >>= 1
self._size += 8
self.createValue = lambda: value
PROP_INFO = {
0x00: ('kEnd', 'End-of-header marker'),
0x01: ('kHeader', 'Archive header'),
0x02: ('kArchiveProperties', 'Archive properties'),
kEnd = 0x00
kHeader = 0x01
kArchiveProperties = 0x02
kAdditionalStreamsInfo = 0x03
kMainStreamsInfo = 0x04
kFilesInfo = 0x05
kPackInfo = 0x06
kUnPackInfo = 0x07
kSubStreamsInfo = 0x08
kSize = 0x09
kCRC = 0x0A
kFolder = 0x0B
kCodersUnPackSize = 0x0C
kNumUnPackStream = 0x0D
kEmptyStream = 0x0E
kEmptyFile = 0x0F
kAnti = 0x10
kName = 0x11
kCreationTime = 0x12
kLastAccessTime = 0x13
kLastWriteTime = 0x14
kWinAttributes = 0x15
kComment = 0x16
kEncodedHeader = 0x17
kStartPos = 0x18
kDummy = 0x19
0x03: ('kAdditionalStreamsInfo', 'AdditionalStreamsInfo'),
0x04: ('kMainStreamsInfo', 'MainStreamsInfo'),
0x05: ('kFilesInfo', 'FilesInfo'),
0x06: ('kPackInfo', 'PackInfo'),
0x07: ('kUnPackInfo', 'UnPackInfo'),
0x08: ('kSubStreamsInfo', 'SubStreamsInfo'),
0x09: ('kSize', 'Size'),
0x0A: ('kCRC', 'CRC'),
0x0B: ('kFolder', 'Folder'),
0x0C: ('kCodersUnPackSize', 'CodersUnPackSize'),
0x0D: ('kNumUnPackStream', 'NumUnPackStream'),
0x0E: ('kEmptyStream', 'EmptyStream'),
0x0F: ('kEmptyFile', 'EmptyFile'),
0x10: ('kAnti', 'Anti'),
0x11: ('kName', 'Name'),
0x12: ('kCreationTime', 'CreationTime'),
0x13: ('kLastAccessTime', 'LastAccessTime'),
0x14: ('kLastWriteTime', 'LastWriteTime'),
0x15: ('kWinAttributes', 'WinAttributes'),
0x16: ('kComment', 'Comment'),
0x17: ('kEncodedHeader', 'Encoded archive header'),
PROP_IDS = {
kEnd: 'kEnd',
kHeader: 'kHeader',
kArchiveProperties: 'kArchiveProperties',
kAdditionalStreamsInfo: 'kAdditionalStreamsInfo',
kMainStreamsInfo: 'kMainStreamsInfo',
kFilesInfo: 'kFilesInfo',
kPackInfo: 'kPackInfo',
kUnPackInfo: 'kUnPackInfo',
kSubStreamsInfo: 'kSubStreamsInfo',
kSize: 'kSize',
kCRC: 'kCRC',
kFolder: 'kFolder',
kCodersUnPackSize: 'kCodersUnPackSize',
kNumUnPackStream: 'kNumUnPackStream',
kEmptyStream: 'kEmptyStream',
kEmptyFile: 'kEmptyFile',
kAnti: 'kAnti',
kName: 'kName',
kCreationTime: 'kCreationTime',
kLastAccessTime: 'kLastAccessTime',
kLastWriteTime: 'kLastWriteTime',
kWinAttributes: 'kWinAttributes',
kComment: 'kComment',
kEncodedHeader: 'kEncodedHeader',
kStartPos: 'kStartPos',
kDummy: 'kDummy',
}
PROP_DESC = {
kEnd: 'End-of-header marker',
kHeader: 'Archive header',
kArchiveProperties: 'Archive properties',
kAdditionalStreamsInfo: 'AdditionalStreamsInfo',
kMainStreamsInfo: 'MainStreamsInfo',
kFilesInfo: 'FilesInfo',
kPackInfo: 'PackInfo',
kUnPackInfo: 'UnPackInfo',
kSubStreamsInfo: 'SubStreamsInfo',
kSize: 'Size',
kCRC: 'CRC',
kFolder: 'Folder',
kCodersUnPackSize: 'CodersUnPackSize',
kNumUnPackStream: 'NumUnPackStream',
kEmptyStream: 'EmptyStream',
kEmptyFile: 'EmptyFile',
kAnti: 'Anti',
kName: 'Name',
kCreationTime: 'CreationTime',
kLastAccessTime: 'LastAccessTime',
kLastWriteTime: 'LastWriteTime',
kWinAttributes: 'WinAttributes',
kComment: 'Comment',
kEncodedHeader: 'Encoded archive header',
kStartPos: 'Unknown',
kDummy: 'Dummy entry',
}
PROP_IDS = createDict(PROP_INFO, 0)
PROP_DESC = createDict(PROP_INFO, 1)
# create k* constants
for k in PROP_IDS:
globals()[PROP_IDS[k]] = k
def ReadNextByte(self):
return self.stream.readBits(self.absolute_address + self.current_size, 8, self.endian)
def PropID(self, name):
return Enum(UInt8(self, name), PROP_IDS)
class SevenZipBitVector(FieldSet):
def __init__(self, parent, name, num, has_all_byte=False, **args):
FieldSet.__init__(self, parent, name, **args)
self.has_all_byte=has_all_byte
self.has_all_byte = has_all_byte
self.num = num
def createFields(self):
if self.has_all_byte:
yield Enum(UInt8(self, "all_defined"), {0:'False', 1:'True'})
yield Enum(UInt8(self, "all_defined"), {0: 'False', 1: 'True'})
if self['all_defined'].value:
return
nbytes = alignValue(self.num, 8)//8
nbytes = alignValue(self.num, 8) // 8
ctr = 0
for i in xrange(nbytes):
for j in reversed(xrange(8)):
yield Bit(self, "bit[%d]"%(ctr+j))
yield Bit(self, "bit[%d]" % (ctr + j))
ctr += 8
def isAllDefined(self):
return self.has_all_byte and self['all_defined'].value
def isDefined(self, index):
if self.isAllDefined():
return True
return self['bit[%d]'%index].value
return self['bit[%d]' % index].value
def createValue(self):
if self.isAllDefined():
return range(self.num)
return [i for i in xrange(self.num) if self['bit[%d]'%i].value]
return [i for i in xrange(self.num) if self['bit[%d]' % i].value]
def createDisplay(self):
if self.isAllDefined():
return 'all'
return ','.join(str(i) for i in self.value)
class ArchiveProperty(FieldSet):
def createFields(self):
yield PropID(self, "id")
size = SZUInt64(self, "size")
yield size
yield RawBytes(self, "data", size.value)
if size.value:
yield RawBytes(self, "data", size.value)
def createDescription(self):
return self['id'].display
class ArchiveProperties(FieldSet):
def createFields(self):
yield PropID(self, "id")
@ -148,21 +216,24 @@ class ArchiveProperties(FieldSet):
break
yield ArchiveProperty(self, "prop[]")
class Digests(FieldSet):
def __init__(self, parent, name, num_digests, digest_desc=None, desc=None):
FieldSet.__init__(self, parent, name, desc)
self.num_digests = num_digests
if digest_desc is None:
self.digest_desc = ['stream %d'%i for i in xrange(num_digests)]
self.digest_desc = ['stream %d' % i for i in xrange(num_digests)]
else:
self.digest_desc = digest_desc
def createFields(self):
yield PropID(self, "id")
definearr = SevenZipBitVector(self, "defined", self.num_digests, has_all_byte=True)
yield definearr
for index in definearr.value:
yield textHandler(UInt32(self, "digest[]",
"Digest for %s" % self.digest_desc[index]), hexadecimal)
"Digest for %s" % self.digest_desc[index]), hexadecimal)
class PackInfo(FieldSet):
def createFields(self):
@ -186,6 +257,7 @@ class PackInfo(FieldSet):
else:
raise ParserError("Unexpected ID (%i)" % uid)
METHODS = {
"\0": "Copy",
"\3": "Delta",
@ -193,8 +265,8 @@ METHODS = {
"\5": "PowerPC",
"\6": "IA64",
"\7": "ARM_LE",
"\8": "ARMT_LE", # thumb
"\9": "SPARC",
"\x08": "ARMT_LE", # thumb
"\x09": "SPARC",
"\x21": "LZMA2",
"\2\3\2": "Common-Swap-2",
"\2\3\4": "Common-Swap-4",
@ -207,7 +279,7 @@ METHODS = {
"\3\3\5\1": "7z-Branch-ARM-LE",
"\3\3\6\5": "7z-Branch-M68-BE",
"\3\3\7\1": "7z-Branch-ARMT-LE",
"\3\3\8\5": "7z-Branch-SPARC-BE",
"\3\3\x08\5": "7z-Branch-SPARC-BE",
"\3\4\1": "7z-PPMD",
"\3\x7f\1": "7z-Experimental",
"\4\0": "Reserved",
@ -231,33 +303,37 @@ METHODS = {
"\4\5": "Z",
"\4\6": "LZH",
"\4\7": "7z-Reserved",
"\4\8": "CAB",
"\4\9\1": "NSIS-Deflate",
"\4\9\1": "NSIS-BZip2",
"\4\x08": "CAB",
"\4\x09\1": "NSIS-Deflate",
"\4\x09\2": "NSIS-BZip2",
"\6\0": "Crypto-Reserved",
"\6\1\x00": "Crypto-AES128-ECB",
"\6\1\x01": "Crypto-AES128-CBC",
"\6\1\x02": "Crypto-AES128-CFB",
"\6\1\x03": "Crypto-AES128-OFB",
"\6\1\x04": "Crypto-AES128-CTR",
"\6\1\x40": "Crypto-AES192-ECB",
"\6\1\x41": "Crypto-AES192-CBC",
"\6\1\x42": "Crypto-AES192-CFB",
"\6\1\x43": "Crypto-AES192-OFB",
"\6\1\x44": "Crypto-AES192-CTR",
"\6\1\x80": "Crypto-AES256-ECB",
"\6\1\x81": "Crypto-AES256-CBC",
"\6\1\x82": "Crypto-AES256-CFB",
"\6\1\x83": "Crypto-AES256-OFB",
"\6\1\x84": "Crypto-AES256-CTR",
"\6\1\xc0": "Crypto-AES-ECB",
"\6\1\xc1": "Crypto-AES-CBC",
"\6\1\xc2": "Crypto-AES-CFB",
"\6\1\xc3": "Crypto-AES-OFB",
"\6\1\xc4": "Crypto-AES-CTR",
"\6\7": "Crypto-Reserved",
"\6\x0f": "Crypto-Reserved",
"\6\xf0": "Crypto-Misc",
"\6\xf1\1\1": "Crypto-Zip",
"\6\xf1\3\2": "Crypto-RAR-Unknown",
"\6\xf1\3\3": "Crypto-RAR-29", # AES128
"\6\xf1\7\1": "Crypto-7z", # AES256
"\6\xf1\3\3": "Crypto-RAR-29", # AES128
"\6\xf1\7\1": "Crypto-7z", # AES256
"\7\0": "Hash-None",
"\7\1": "Hash-CRC",
"\7\2": "Hash-SHA1",
@ -265,10 +341,11 @@ METHODS = {
"\7\4": "Hash-SHA384",
"\7\5": "Hash-SHA512",
"\7\xf0": "Hash-Misc",
"\7\xf1\3\3": "Hash-RAR-29", # modified SHA1
"\7\xf1\7\1": "Hash-7z", # SHA256
"\7\xf1\3\3": "Hash-RAR-29", # modified SHA1
"\7\xf1\7\1": "Hash-7z", # SHA256
}
class Coder(FieldSet):
def createFields(self):
yield Bits(self, "id_size", 4)
@ -288,13 +365,16 @@ class Coder(FieldSet):
size = SZUInt64(self, "properties_size")
yield size
yield RawBytes(self, "properties", size.value)
def _get_num_streams(self, direction):
if self['is_not_simple'].value:
return self['num_stream_%s'%direction].value
return self['num_stream_%s' % direction].value
return 1
in_streams = property(lambda self: self._get_num_streams('in'))
out_streams = property(lambda self: self._get_num_streams('out'))
class CoderList(FieldSet):
def createFields(self):
while not self.eof:
@ -303,6 +383,7 @@ class CoderList(FieldSet):
if not field['is_not_last_method'].value:
break
class BindPairInfo(FieldSet):
def createFields(self):
# 64 bits values then cast to 32 in fact
@ -311,6 +392,7 @@ class BindPairInfo(FieldSet):
self.info("Indexes: IN=%u OUT=%u" % \
(self["in_index"].value, self["out_index"].value))
class Folder(FieldSet):
def createFields(self):
yield SZUInt64(self, "num_coders")
@ -330,7 +412,7 @@ class Folder(FieldSet):
# Bind pairs
self.info("out streams: %u" % out_streams)
for index in xrange(out_streams-1):
for index in xrange(out_streams - 1):
yield BindPairInfo(self, "bind_pair[]")
# Packed streams
@ -339,12 +421,15 @@ class Folder(FieldSet):
if packed_streams > 1:
for index in xrange(packed_streams):
yield SZUInt64(self, "pack_stream[]")
def _get_num_streams(self, direction):
list(self)
return getattr(self, '_'+direction+'_streams')
return getattr(self, '_' + direction + '_streams')
in_streams = property(lambda self: self._get_num_streams('in'))
out_streams = property(lambda self: self._get_num_streams('out'))
class UnpackInfo(FieldSet):
def createFields(self):
yield PropID(self, "id")
@ -370,7 +455,7 @@ class UnpackInfo(FieldSet):
for folder_index in xrange(num):
folder = self["folder[%u]" % folder_index]
for index in xrange(folder.out_streams):
yield SZUInt64(self, "unpack_size[%d][%d]"%(folder_index,index))
yield SZUInt64(self, "unpack_size[%d][%d]" % (folder_index, index))
# Extract digests
while not self.eof:
@ -383,11 +468,12 @@ class UnpackInfo(FieldSet):
else:
raise ParserError("Unexpected ID (%i)" % uid)
class SubStreamInfo(FieldSet):
def createFields(self):
yield PropID(self, "id")
num_folders = self['../unpack_info/num_folders'].value
num_unpackstreams = [1]*num_folders
num_unpackstreams = [1] * num_folders
while not self.eof:
uid = ReadNextByte(self)
if uid == kEnd:
@ -403,19 +489,20 @@ class SubStreamInfo(FieldSet):
yield PropID(self, "size_marker")
for i in xrange(num_folders):
# The last substream's size is the stream size minus the other substreams.
for j in xrange(num_unpackstreams[i]-1):
yield SZUInt64(self, "unpack_size[%d][%d]"%(i,j))
for j in xrange(num_unpackstreams[i] - 1):
yield SZUInt64(self, "unpack_size[%d][%d]" % (i, j))
elif uid == kCRC:
digests = []
for i in xrange(num_folders):
if num_unpackstreams[i] == 1 and 'digests' in self['../unpack_info']:
continue
for j in xrange(num_unpackstreams[i]):
digests.append('folder %i, stream %i'%(i, j))
digests.append('folder %i, stream %i' % (i, j))
yield Digests(self, "digests", len(digests), digests)
else:
raise ParserError("Unexpected ID (%i)" % uid)
class StreamsInfo(FieldSet):
def createFields(self):
yield PropID(self, "id")
@ -433,31 +520,39 @@ class StreamsInfo(FieldSet):
else:
raise ParserError("Unexpected ID (%i)" % uid)
class EncodedHeader(StreamsInfo):
pass
class EmptyStreamProperty(FieldSet):
def createFields(self):
yield PropID(self, "id")
yield SZUInt64(self, "size")
yield SevenZipBitVector(self, "vec", self['../num_files'].value)
def createValue(self):
return self['vec'].value
def createDisplay(self):
return self['vec'].display
class EmptyFileProperty(FieldSet):
def createFields(self):
yield PropID(self, "id")
yield SZUInt64(self, "size")
empty_streams = self['../empty_streams/vec'].value
yield SevenZipBitVector(self, "vec", len(empty_streams))
def createValue(self):
empty_streams = self['../empty_streams/vec'].value
return [empty_streams[i] for i in self['vec'].value]
def createDisplay(self):
return ','.join(str(i) for i in self.value)
class FileTimeProperty(FieldSet):
def createFields(self):
yield PropID(self, "id")
@ -469,7 +564,8 @@ class FileTimeProperty(FieldSet):
yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
else:
for index in definearr.value:
yield TimestampWin64(self, "timestamp[%d]"%index)
yield TimestampWin64(self, "timestamp[%d]" % index)
class FileNames(FieldSet):
def createFields(self):
@ -480,7 +576,8 @@ class FileNames(FieldSet):
yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
else:
for index in xrange(self['../num_files'].value):
yield CString(self, "name[%d]"%index, charset="UTF-16-LE")
yield CString(self, "name[%d]" % index, charset="UTF-16-LE")
class FileAttributes(FieldSet):
def createFields(self):
@ -493,7 +590,8 @@ class FileAttributes(FieldSet):
yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
else:
for index in definearr.value:
yield MSDOSFileAttr32(self, "attributes[%d]"%index)
yield MSDOSFileAttr32(self, "attributes[%d]" % index)
class FilesInfo(FieldSet):
def createFields(self):
@ -520,9 +618,12 @@ class FilesInfo(FieldSet):
yield FileNames(self, "filenames")
elif uid == kWinAttributes:
yield FileAttributes(self, "attributes")
elif uid == kDummy:
yield ArchiveProperty(self, "dummy[]")
else:
yield ArchiveProperty(self, "prop[]")
class Header(FieldSet):
def createFields(self):
yield PropID(self, "id")
@ -542,10 +643,12 @@ class Header(FieldSet):
else:
raise ParserError("Unexpected ID %u" % uid)
class NextHeader(FieldSet):
def __init__(self, parent, name, desc="Next header"):
FieldSet.__init__(self, parent, name, desc)
self._size = 8*self["/signature/start_hdr/next_hdr_size"].value
self._size = 8 * self["/signature/start_hdr/next_hdr_size"].value
def createFields(self):
uid = ReadNextByte(self)
if uid == kHeader:
@ -555,6 +658,7 @@ class NextHeader(FieldSet):
else:
raise ParserError("Unexpected ID %u" % uid)
class NextHeaderParser(Parser):
PARSER_TAGS = {
}
@ -572,32 +676,37 @@ class NextHeaderParser(Parser):
def validate(self):
return True
class CompressedData(Bytes):
def __init__(self, parent, name, length, decompressor, description=None,
parser=None, filename=None, mime_type=None, parser_class=None):
parser=None, filename=None, mime_type=None, parser_class=None):
if filename:
if not isinstance(filename, unicode):
filename = makePrintable(filename, "ISO-8859-1")
if not description:
description = 'File "%s" (%s)' % (filename, humanFilesize(length))
description = 'File "%s" (%s)' % (
filename, humanFilesize(length))
Bytes.__init__(self, parent, name, length, description)
self.setupInputStream(decompressor, parser, filename, mime_type, parser_class)
self.setupInputStream(decompressor, parser,
filename, mime_type, parser_class)
def setupInputStream(self, decompressor, parser, filename, mime_type, parser_class):
def createInputStream(cis, **args):
tags = args.setdefault("tags",[])
tags = args.setdefault("tags", [])
if parser_class:
tags.append(( "class", parser_class ))
tags.append(("class", parser_class))
if parser is not None:
tags.append(( "id", parser.PARSER_TAGS["id"] ))
tags.append(("id", parser.PARSER_TAGS["id"]))
if mime_type:
tags.append(( "mime", mime_type ))
tags.append(("mime", mime_type))
if filename:
tags.append(( "filename", filename ))
print args
tags.append(("filename", filename))
# print args
return StringInputStream(decompressor(self.value), **args)
self.setSubIStream(createInputStream)
def get_header_decompressor(self):
unpack_info = self['/next_hdr/encoded_hdr/unpack_info']
assert unpack_info['num_folders'].value == 1
@ -605,21 +714,24 @@ def get_header_decompressor(self):
method = METHODS[coder['id'].value]
if method == 'Copy':
return lambda data: data
elif method == '7z-LZMA':
elif method == '7z-LZMA' and has_lzma:
props = coder['properties'].value
length = unpack_info['unpack_size[0][0]'].value
return lambda data: lzmadecompress(props+data, maxlength=length)
return lambda data: lzmadecompress(props + data, maxlength=length)
def get_header_field(self, name, size, description=None):
decompressor = get_header_decompressor(self)
if decompressor is None:
return RawBytes(self, name, size, description=description)
return CompressedData(self, name, size, decompressor, description=description, parser_class=NextHeaderParser)
class Body(FieldSet):
def __init__(self, parent, name, desc="Body data"):
FieldSet.__init__(self, parent, name, desc)
self._size = 8*self["/signature/start_hdr/next_hdr_offset"].value
self._size = 8 * self["/signature/start_hdr/next_hdr_offset"].value
def createFields(self):
if "encoded_hdr" in self["/next_hdr"]:
pack_size = sum([s.value for s in self.array("/next_hdr/encoded_hdr/pack_info/pack_size")])
@ -629,32 +741,37 @@ class Body(FieldSet):
# Here we could check if copy method was used to "compress" it,
# but this never happens, so just output "compressed file info"
yield get_header_field(self, "compressed_file_info", pack_size,
"Compressed file information")
size = (self._size//8) - pack_size - body_size
"Compressed file information")
size = (self._size // 8) - pack_size - body_size
if size > 0:
yield RawBytes(self, "unknown_data", size)
elif "header" in self["/next_hdr"]:
yield RawBytes(self, "compressed_data", self._size//8, "Compressed data")
yield RawBytes(self, "compressed_data", self._size // 8, "Compressed data")
class StartHeader(FieldSet):
static_size = 160
def createFields(self):
yield textHandler(UInt64(self, "next_hdr_offset",
"Next header offset"), hexadecimal)
"Next header offset"), hexadecimal)
yield UInt64(self, "next_hdr_size", "Next header size")
yield textHandler(UInt32(self, "next_hdr_crc",
"Next header CRC"), hexadecimal)
"Next header CRC"), hexadecimal)
class SignatureHeader(FieldSet):
static_size = 96 + StartHeader.static_size
def createFields(self):
yield Bytes(self, "signature", 6, "Signature Header")
yield UInt8(self, "major_ver", "Archive major version")
yield UInt8(self, "minor_ver", "Archive minor version")
yield textHandler(UInt32(self, "start_hdr_crc",
"Start header CRC"), hexadecimal)
"Start header CRC"), hexadecimal)
yield StartHeader(self, "start_hdr", "Start header")
class SevenZipParser(Parser):
MAGIC = "7z\xbc\xaf\x27\x1c"
PARSER_TAGS = {
@ -662,7 +779,7 @@ class SevenZipParser(Parser):
"category": "archive",
"file_ext": ("7z",),
"mime": (u"application/x-7z-compressed",),
"min_size": 32*8,
"min_size": 32 * 8,
"magic": ((MAGIC, 0),),
"description": "Compressed archive in 7z format"
}
@ -674,12 +791,12 @@ class SevenZipParser(Parser):
yield NextHeader(self, "next_hdr")
def validate(self):
if self.stream.readBytes(0,len(self.MAGIC)) != self.MAGIC:
if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
return "Invalid signature"
return True
def createContentSize(self):
size = self["/signature/start_hdr/next_hdr_offset"].value*8
size += self["/signature/start_hdr/next_hdr_size"].value*8
size = self["/signature/start_hdr/next_hdr_offset"].value * 8
size += self["/signature/start_hdr/next_hdr_size"].value * 8
size += SignatureHeader.static_size
return size

View file

@ -4,17 +4,18 @@ Tar archive parser.
Author: Victor Stinner
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
Enum, UInt8, SubFile, String, NullBytes)
from hachoir_core.tools import humanFilesize, paddingSize, timestampUNIX
from hachoir_core.endian import BIG_ENDIAN
from hachoir.parser import Parser
from hachoir.field import (FieldSet,
Enum, UInt8, SubFile, String, NullBytes)
from hachoir.core.tools import humanFilesize, paddingSize, timestampUNIX
from hachoir.core.endian import BIG_ENDIAN
import re
class FileEntry(FieldSet):
type_name = {
# 48 is "0", 49 is "1", ...
0: u"Normal disk file (old format)",
0: u"Normal disk file (old format)",
48: u"Normal disk file",
49: u"Link to previously dumped file",
50: u"Symbolic link",
@ -50,13 +51,14 @@ class FileEntry(FieldSet):
yield String(self, "gname", 32, "Group name", strip=" \0", charset="ISO-8859-1")
yield String(self, "devmajor", 8, "Dev major", strip=" \0", charset="ASCII")
yield String(self, "devminor", 8, "Dev minor", strip=" \0", charset="ASCII")
yield NullBytes(self, "padding", 167, "Padding (zero)")
yield String(self, "prefix", 155, "Prefix for filename", strip="\0", charset="ASCII")
yield NullBytes(self, "padding", 12, "Padding (zero)")
filesize = self.getOctal("size")
if filesize:
yield SubFile(self, "content", filesize, filename=self["name"].value)
size = paddingSize(self.current_size//8, 512)
size = paddingSize(self.current_size // 8, 512)
if size:
yield NullBytes(self, "padding_end", size, "Padding (512 align)")
@ -77,11 +79,14 @@ class FileEntry(FieldSet):
desc = "(terminator, empty header)"
else:
filename = self["name"].value
if self["prefix"].value:
filename = self["prefix"].value + '/' + filename
filesize = humanFilesize(self.getOctal("size"))
desc = "(%s: %s, %s)" % \
(filename, self["type"].display, filesize)
(filename, self["type"].display, filesize)
return "Tar File " + desc
class TarFile(Parser):
endian = BIG_ENDIAN
PARSER_TAGS = {
@ -89,15 +94,15 @@ class TarFile(Parser):
"category": "archive",
"file_ext": ("tar",),
"mime": (u"application/x-tar", u"application/x-gtar"),
"min_size": 512*8,
"magic": (("ustar \0", 257*8),),
"min_size": 512 * 8,
"magic": (("ustar \0", 257 * 8),),
"subfile": "skip",
"description": "TAR archive",
}
_sign = re.compile("ustar *\0|[ \0]*$")
def validate(self):
if not self._sign.match(self.stream.readBytes(257*8, 8)):
if not self._sign.match(self.stream.readBytes(257 * 8, 8)):
return "Invalid magic number"
if self[0].name == "terminator":
return "Don't contain any file"
@ -121,4 +126,3 @@ class TarFile(Parser):
def createContentSize(self):
return self["terminator"].address + self["terminator"].size

View file

@ -5,33 +5,32 @@ Status: can read most important headers
Authors: Christophe Gisquet and Victor Stinner
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet, ParserError,
Bit, Bits, Enum,
TimeDateMSDOS32, SubFile,
UInt8, UInt16, UInt32, UInt64,
String, PascalString16,
RawBytes)
from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir_core.error import HACHOIR_ERRORS
from hachoir_core.tools import makeUnicode
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_parser.common.deflate import Deflate
from hachoir.parser import Parser
from hachoir.field import (FieldSet, ParserError,
Bit, Bits, Enum,
TimeDateMSDOS32, SubFile,
UInt8, UInt16, UInt32, UInt64,
String, PascalString16,
RawBytes)
from hachoir.core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir.core.tools import makeUnicode
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.parser.common.deflate import Deflate
MAX_FILESIZE = 1000 * 1024 * 1024
COMPRESSION_DEFLATE = 8
COMPRESSION_METHOD = {
0: u"no compression",
1: u"Shrunk",
2: u"Reduced (factor 1)",
3: u"Reduced (factor 2)",
4: u"Reduced (factor 3)",
5: u"Reduced (factor 4)",
6: u"Imploded",
7: u"Tokenizing",
8: u"Deflate",
9: u"Deflate64",
0: u"no compression",
1: u"Shrunk",
2: u"Reduced (factor 1)",
3: u"Reduced (factor 2)",
4: u"Reduced (factor 3)",
5: u"Reduced (factor 4)",
6: u"Imploded",
7: u"Tokenizing",
8: u"Deflate",
9: u"Deflate64",
10: u"PKWARE Imploding",
11: u"Reserved by PKWARE",
12: u"File is compressed using BZIP2 algorithm",
@ -45,22 +44,24 @@ COMPRESSION_METHOD = {
98: u"PPMd version I, Rev 1",
}
def ZipRevision(field):
return "%u.%u" % divmod(field.value, 10)
class ZipVersion(FieldSet):
static_size = 16
HOST_OS = {
0: u"FAT file system (DOS, OS/2, NT)",
1: u"Amiga",
2: u"VMS (VAX or Alpha AXP)",
3: u"Unix",
4: u"VM/CMS",
5: u"Atari",
6: u"HPFS file system (OS/2, NT 3.x)",
7: u"Macintosh",
8: u"Z-System",
9: u"CP/M",
0: u"FAT file system (DOS, OS/2, NT)",
1: u"Amiga",
2: u"VMS (VAX or Alpha AXP)",
3: u"Unix",
4: u"VM/CMS",
5: u"Atari",
6: u"HPFS file system (OS/2, NT 3.x)",
7: u"Macintosh",
8: u"Z-System",
9: u"CP/M",
10: u"TOPS-20",
11: u"NTFS file system (NT)",
12: u"SMS/QDOS",
@ -70,15 +71,20 @@ class ZipVersion(FieldSet):
16: u"BeOS (BeBox or PowerMac)",
17: u"Tandem",
}
def createFields(self):
yield textHandler(UInt8(self, "zip_version", "ZIP version"), ZipRevision)
yield Enum(UInt8(self, "host_os", "ZIP Host OS"), self.HOST_OS)
class ZipGeneralFlags(FieldSet):
static_size = 16
def createFields(self):
# Need the compression info from the parent, and that is the byte following
method = self.stream.readBits(self.absolute_address+16, 16, LITTLE_ENDIAN)
# Need the compression info from the parent, and that is the byte
# following
method = self.stream.readBits(
self.absolute_address + 16, 16, LITTLE_ENDIAN)
yield Bit(self, "is_encrypted", "File is encrypted?")
if method == 6:
@ -92,7 +98,7 @@ class ZipGeneralFlags(FieldSet):
3: "Super Fast compression"
}
yield Enum(Bits(self, "method", 2), NAME)
elif method == 14: #LZMA
elif method == 14: # LZMA
yield Bit(self, "lzma_eos", "LZMA stream is ended with a EndOfStream marker")
yield Bit(self, "unused[]")
else:
@ -108,11 +114,12 @@ class ZipGeneralFlags(FieldSet):
yield Bit(self, "encrypted_central_dir", "Selected data values in the Local Header are masked")
yield Bits(self, "unused[]", 2, "Unused")
class ExtraField(FieldSet):
EXTRA_FIELD_ID = {
0x0007: "AV Info",
0x0009: "OS/2 extended attributes (also Info-ZIP)",
0x000a: "PKWARE Win95/WinNT FileTimes", # undocumented!
0x000a: "PKWARE Win95/WinNT FileTimes", # undocumented!
0x000c: "PKWARE VAX/VMS (also Info-ZIP)",
0x000d: "PKWARE Unix",
0x000f: "Patch Descriptor",
@ -135,6 +142,7 @@ class ExtraField(FieldSet):
0x7855: "Info-ZIP Unix (new)",
0xfb4a: "SMS/QDOS",
}
def createFields(self):
yield Enum(UInt16(self, "field_id", "Extra field ID"),
self.EXTRA_FIELD_ID)
@ -143,11 +151,13 @@ class ExtraField(FieldSet):
if size.value > 0:
yield RawBytes(self, "field_data", size.value, "Unknown field data")
class ExtraFields(FieldSet):
def createFields(self):
while self.current_size < self.size:
yield ExtraField(self, "extra[]")
def ZipStartCommonFields(self):
yield ZipVersion(self, "version_needed", "Version needed")
yield ZipGeneralFlags(self, "flags", "General purpose flag")
@ -160,14 +170,17 @@ def ZipStartCommonFields(self):
yield UInt16(self, "filename_length", "Filename length")
yield UInt16(self, "extra_length", "Extra fields length")
def zipGetCharset(self):
if self["flags/uses_unicode"].value:
return "UTF-8"
else:
return "ISO-8859-15"
class ZipCentralDirectory(FieldSet):
HEADER = 0x02014b50
def createFields(self):
yield ZipVersion(self, "version_made_by", "Version made by")
for field in ZipStartCommonFields(self):
@ -184,8 +197,8 @@ class ZipCentralDirectory(FieldSet):
yield String(self, "filename", self["filename_length"].value,
"Filename", charset=charset)
if 0 < self["extra_length"].value:
yield ExtraFields(self, "extra", size=self["extra_length"].value*8,
description="Extra fields")
yield ExtraFields(self, "extra", size=self["extra_length"].value * 8,
description="Extra fields")
if 0 < self["comment_length"].value:
yield String(self, "comment", self["comment_length"].value,
"Comment", charset=charset)
@ -193,8 +206,10 @@ class ZipCentralDirectory(FieldSet):
def createDescription(self):
return "Central directory: %s" % self["filename"].display
class Zip64EndCentralDirectory(FieldSet):
HEADER = 0x06064b50
def createFields(self):
yield UInt64(self, "zip64_end_size",
"Size of zip64 end of central directory record")
@ -213,8 +228,10 @@ class Zip64EndCentralDirectory(FieldSet):
yield RawBytes(self, "data_sector", self["zip64_end_size"].value,
"zip64 extensible data sector")
class ZipEndCentralDirectory(FieldSet):
HEADER = 0x06054b50
def createFields(self):
yield UInt16(self, "number_disk", "Number of this disk")
yield UInt16(self, "number_disk2", "Number in the central dir")
@ -226,17 +243,20 @@ class ZipEndCentralDirectory(FieldSet):
yield UInt32(self, "offset", "Offset of start of central directory")
yield PascalString16(self, "comment", "ZIP comment")
class ZipDataDescriptor(FieldSet):
HEADER_STRING = "\x50\x4B\x07\x08"
HEADER = 0x08074B50
static_size = 96
def createFields(self):
yield textHandler(UInt32(self, "file_crc32",
"Checksum (CRC32)"), hexadecimal)
"Checksum (CRC32)"), hexadecimal)
yield filesizeHandler(UInt32(self, "file_compressed_size",
"Compressed size (bytes)"))
"Compressed size (bytes)"))
yield filesizeHandler(UInt32(self, "file_uncompressed_size",
"Uncompressed size (bytes)"))
"Uncompressed size (bytes)"))
class FileEntry(FieldSet):
HEADER = 0x04034B50
@ -255,19 +275,19 @@ class FileEntry(FieldSet):
def resync(self):
# Non-seekable output, search the next data descriptor
size = self.stream.searchBytesLength(ZipDataDescriptor.HEADER_STRING, False,
self.absolute_address+self.current_size)
self.absolute_address + self.current_size)
if size <= 0:
raise ParserError("Couldn't resync to %s" %
ZipDataDescriptor.HEADER_STRING)
yield self.data(size)
yield textHandler(UInt32(self, "header[]", "Header"), hexadecimal)
data_desc = ZipDataDescriptor(self, "data_desc", "Data descriptor")
#self.info("Resynced!")
# self.info("Resynced!")
yield data_desc
# The above could be checked anytime, but we prefer trying parsing
# than aborting
if self["crc32"].value == 0 and \
data_desc["file_compressed_size"].value != size:
data_desc["file_compressed_size"].value != size:
raise ParserError("Bad resync: position=>%i but data_desc=>%i" %
(size, data_desc["file_compressed_size"].value))
@ -276,15 +296,14 @@ class FileEntry(FieldSet):
yield field
length = self["filename_length"].value
if length:
filename = String(self, "filename", length, "Filename",
charset=zipGetCharset(self))
yield filename
self.filename = filename.value
if self["extra_length"].value:
yield ExtraFields(self, "extra", size=self["extra_length"].value*8,
description="Extra fields")
yield ExtraFields(self, "extra", size=self["extra_length"].value * 8,
description="Extra fields")
size = self["compressed_size"].value
if size > 0:
yield self.data(size)
@ -296,24 +315,28 @@ class FileEntry(FieldSet):
def createDescription(self):
return "File entry: %s (%s)" % \
(self["filename"].value, self["compressed_size"].display)
(self["filename"].value, self["compressed_size"].display)
def validate(self):
if self["compression"].value not in COMPRESSION_METHOD:
return "Unknown compression method (%u)" % self["compression"].value
return ""
class ZipSignature(FieldSet):
HEADER = 0x05054B50
def createFields(self):
yield PascalString16(self, "signature", "Signature")
class Zip64EndCentralDirectoryLocator(FieldSet):
HEADER = 0x07064b50
def createFields(self):
yield UInt32(self, "disk_number", \
yield UInt32(self, "disk_number",
"Number of the disk with the start of the zip64 end of central directory")
yield UInt64(self, "relative_offset", \
yield UInt64(self, "relative_offset",
"Relative offset of the zip64 end of central directory record")
yield UInt32(self, "disk_total_number", "Total number of disks")
@ -370,7 +393,7 @@ class ZipFile(Parser):
"mime": tuple(MIME_TYPES.iterkeys()),
"magic": (("PK\3\4", 0),),
"subfile": "skip",
"min_size": (4 + 26)*8, # header + file entry
"min_size": (4 + 26) * 8, # header + file entry
"description": "ZIP archive"
}
@ -379,7 +402,7 @@ class ZipFile(Parser):
return "Invalid magic"
try:
file0 = self["file[0]"]
except HACHOIR_ERRORS, err:
except Exception as err:
return "Unable to get file #0"
err = file0.validate()
if err:
@ -391,7 +414,8 @@ class ZipFile(Parser):
self.signature = None
self.central_directory = []
while not self.eof:
header = textHandler(UInt32(self, "header[]", "Header"), hexadecimal)
header = textHandler(
UInt32(self, "header[]", "Header"), hexadecimal)
yield header
header = header.value
if header == FileEntry.HEADER:
@ -411,7 +435,8 @@ class ZipFile(Parser):
elif header == Zip64EndCentralDirectoryLocator.HEADER:
yield Zip64EndCentralDirectoryLocator(self, "end_locator", "ZIP64 Enf of central directory locator")
else:
raise ParserError("Error, unknown ZIP header (0x%08X)." % header)
raise ParserError(
"Error, unknown ZIP header (0x%08X)." % header)
def createMimeType(self):
if self["file[0]/filename"].value == "mimetype":
@ -431,6 +456,5 @@ class ZipFile(Parser):
end = MAX_FILESIZE * 8
end = self.stream.searchBytes("PK\5\6", start, end)
if end is not None:
return end + 22*8
return end + 22 * 8
return None

View file

@ -5,13 +5,14 @@ Creation date: July 9 2007
"""
from hachoir_parser import Parser
from hachoir_core.field import (Bit, Bits, Field, Int16, UInt32,
Enum, FieldSet, GenericFieldSet,
PaddingBits, ParserError, RawBytes)
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal
from hachoir_core.tools import paddingSize, alignValue
from hachoir.parser import Parser
from hachoir.field import (Bit, Bits, Field, Int16, UInt32,
Enum, FieldSet, GenericFieldSet,
PaddingBits, ParserError, RawBytes)
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.core.tools import paddingSize, alignValue
def extend_data(data, length, offset):
"""Extend data using a length and an offset."""
@ -19,30 +20,33 @@ def extend_data(data, length, offset):
new_data = data[-offset:] * (alignValue(length, offset) // offset)
return data + new_data[:length]
else:
return data + data[-offset:-offset+length]
return data + data[-offset:-offset + length]
def build_tree(lengths):
"""Build a Huffman tree from a list of lengths.
The ith entry of the input list is the length of the Huffman code corresponding to
integer i, or 0 if the integer i is unused."""
max_length = max(lengths) + 1
bit_counts = [0]*max_length
next_code = [0]*max_length
bit_counts = [0] * max_length
next_code = [0] * max_length
tree = {}
for i in lengths:
if i:
bit_counts[i] += 1
code = 0
for i in xrange(1, len(bit_counts)):
next_code[i] = code = (code + bit_counts[i-1]) << 1
next_code[i] = code = (code + bit_counts[i - 1]) << 1
for i, ln in enumerate(lengths):
if ln:
tree[(ln, next_code[ln])] = i
next_code[ln] += 1
return tree
class HuffmanCode(Field):
"""Huffman code. Uses tree parameter as the Huffman tree."""
def __init__(self, parent, name, tree, description=None):
Field.__init__(self, parent, name, 0, description)
@ -61,111 +65,114 @@ class HuffmanCode(Field):
addr += 1
self.huffvalue = value
self.realvalue = tree[(self.size, value)]
def createValue(self):
return self.huffvalue
class DeflateBlock(FieldSet):
# code: (min, max, extrabits)
LENGTH_SYMBOLS = {257:(3,3,0),
258:(4,4,0),
259:(5,5,0),
260:(6,6,0),
261:(7,7,0),
262:(8,8,0),
263:(9,9,0),
264:(10,10,0),
265:(11,12,1),
266:(13,14,1),
267:(15,16,1),
268:(17,18,1),
269:(19,22,2),
270:(23,26,2),
271:(27,30,2),
272:(31,34,2),
273:(35,42,3),
274:(43,50,3),
275:(51,58,3),
276:(59,66,3),
277:(67,82,4),
278:(83,98,4),
279:(99,114,4),
280:(115,130,4),
281:(131,162,5),
282:(163,194,5),
283:(195,226,5),
284:(227,257,5),
285:(258,258,0)
LENGTH_SYMBOLS = {257: (3, 3, 0),
258: (4, 4, 0),
259: (5, 5, 0),
260: (6, 6, 0),
261: (7, 7, 0),
262: (8, 8, 0),
263: (9, 9, 0),
264: (10, 10, 0),
265: (11, 12, 1),
266: (13, 14, 1),
267: (15, 16, 1),
268: (17, 18, 1),
269: (19, 22, 2),
270: (23, 26, 2),
271: (27, 30, 2),
272: (31, 34, 2),
273: (35, 42, 3),
274: (43, 50, 3),
275: (51, 58, 3),
276: (59, 66, 3),
277: (67, 82, 4),
278: (83, 98, 4),
279: (99, 114, 4),
280: (115, 130, 4),
281: (131, 162, 5),
282: (163, 194, 5),
283: (195, 226, 5),
284: (227, 257, 5),
285: (258, 258, 0)
}
DISTANCE_SYMBOLS = {0:(1,1,0),
1:(2,2,0),
2:(3,3,0),
3:(4,4,0),
4:(5,6,1),
5:(7,8,1),
6:(9,12,2),
7:(13,16,2),
8:(17,24,3),
9:(25,32,3),
10:(33,48,4),
11:(49,64,4),
12:(65,96,5),
13:(97,128,5),
14:(129,192,6),
15:(193,256,6),
16:(257,384,7),
17:(385,512,7),
18:(513,768,8),
19:(769,1024,8),
20:(1025,1536,9),
21:(1537,2048,9),
22:(2049,3072,10),
23:(3073,4096,10),
24:(4097,6144,11),
25:(6145,8192,11),
26:(8193,12288,12),
27:(12289,16384,12),
28:(16385,24576,13),
29:(24577,32768,13),
DISTANCE_SYMBOLS = {0: (1, 1, 0),
1: (2, 2, 0),
2: (3, 3, 0),
3: (4, 4, 0),
4: (5, 6, 1),
5: (7, 8, 1),
6: (9, 12, 2),
7: (13, 16, 2),
8: (17, 24, 3),
9: (25, 32, 3),
10: (33, 48, 4),
11: (49, 64, 4),
12: (65, 96, 5),
13: (97, 128, 5),
14: (129, 192, 6),
15: (193, 256, 6),
16: (257, 384, 7),
17: (385, 512, 7),
18: (513, 768, 8),
19: (769, 1024, 8),
20: (1025, 1536, 9),
21: (1537, 2048, 9),
22: (2049, 3072, 10),
23: (3073, 4096, 10),
24: (4097, 6144, 11),
25: (6145, 8192, 11),
26: (8193, 12288, 12),
27: (12289, 16384, 12),
28: (16385, 24576, 13),
29: (24577, 32768, 13),
}
CODE_LENGTH_ORDER = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
def __init__(self, parent, name, uncomp_data="", *args, **kwargs):
FieldSet.__init__(self, parent, name, *args, **kwargs)
self.uncomp_data = uncomp_data
def createFields(self):
yield Bit(self, "final", "Is this the final block?") # BFINAL
yield Enum(Bits(self, "compression_type", 2), # BTYPE
{0:"None", 1:"Fixed Huffman", 2:"Dynamic Huffman", 3:"Reserved"})
if self["compression_type"].value == 0: # no compression
padding = paddingSize(self.current_size + self.absolute_address, 8) # align on byte boundary
yield Bit(self, "final", "Is this the final block?") # BFINAL
yield Enum(Bits(self, "compression_type", 2), # BTYPE
{0: "None", 1: "Fixed Huffman", 2: "Dynamic Huffman", 3: "Reserved"})
if self["compression_type"].value == 0: # no compression
padding = paddingSize(self.current_size + self.absolute_address, 8) # align on byte boundary
if padding:
yield PaddingBits(self, "padding[]", padding)
yield Int16(self, "len")
yield Int16(self, "nlen", "One's complement of len")
if self["len"].value != ~self["nlen"].value:
raise ParserError("len must be equal to the one's complement of nlen!")
if self["len"].value: # null stored blocks produced by some encoders (e.g. PIL)
if self["len"].value: # null stored blocks produced by some encoders (e.g. PIL)
yield RawBytes(self, "data", self["len"].value, "Uncompressed data")
return
elif self["compression_type"].value == 1: # Fixed Huffman
length_tree = {} # (size, huffman code): value
elif self["compression_type"].value == 1: # Fixed Huffman
length_tree = {} # (size, huffman code): value
distance_tree = {}
for i in xrange(144):
length_tree[(8, i+48)] = i
length_tree[(8, i + 48)] = i
for i in xrange(144, 256):
length_tree[(9, i+256)] = i
length_tree[(9, i + 256)] = i
for i in xrange(256, 280):
length_tree[(7, i-256)] = i
length_tree[(7, i - 256)] = i
for i in xrange(280, 288):
length_tree[(8, i-88)] = i
length_tree[(8, i - 88)] = i
for i in xrange(32):
distance_tree[(5, i)] = i
elif self["compression_type"].value == 2: # Dynamic Huffman
elif self["compression_type"].value == 2: # Dynamic Huffman
yield Bits(self, "huff_num_length_codes", 5, "Number of Literal/Length Codes, minus 257")
yield Bits(self, "huff_num_distance_codes", 5, "Number of Distance Codes, minus 1")
yield Bits(self, "huff_num_code_length_codes", 4, "Number of Code Length Codes, minus 4")
code_length_code_lengths = [0]*19 # confusing variable name...
for i in self.CODE_LENGTH_ORDER[:self["huff_num_code_length_codes"].value+4]:
code_length_code_lengths = [0] * 19 # confusing variable name...
for i in self.CODE_LENGTH_ORDER[:self["huff_num_code_length_codes"].value + 4]:
field = Bits(self, "huff_code_length_code[%i]" % i, 3, "Code lengths for the code length alphabet")
yield field
code_length_code_lengths[i] = field.value
@ -173,8 +180,8 @@ class DeflateBlock(FieldSet):
length_code_lengths = []
distance_code_lengths = []
for numcodes, name, lengths in (
(self["huff_num_length_codes"].value + 257, "length", length_code_lengths),
(self["huff_num_distance_codes"].value + 1, "distance", distance_code_lengths)):
(self["huff_num_length_codes"].value + 257, "length", length_code_lengths),
(self["huff_num_distance_codes"].value + 1, "distance", distance_code_lengths)):
while len(lengths) < numcodes:
field = HuffmanCode(self, "huff_%s_code[]" % name, code_length_tree)
value = field.realvalue
@ -184,20 +191,22 @@ class DeflateBlock(FieldSet):
yield field
lengths.append(value)
else:
info = {16: (3,6,2),
17: (3,10,3),
18: (11,138,7)}[value]
info = {16: (3, 6, 2),
17: (3, 10, 3),
18: (11, 138, 7)}[value]
if value == 16:
repvalue = prev_value
else:
repvalue = 0
field._description = "Repeat Code %i, Repeating value (%i) %i to %i times (Huffman Code %i)" % (value, repvalue, info[0], info[1], field.value)
field._description = "Repeat Code %i, Repeating value (%i) %i to %i times (Huffman Code %i)" % (
value, repvalue, info[0], info[1], field.value)
yield field
extrafield = Bits(self, "huff_%s_code_extra[%s" % (name, field.name.split('[')[1]), info[2])
num_repeats = extrafield.value+info[0]
extrafield._description = "Repeat Extra Bits (%i), total repeats %i"%(extrafield.value, num_repeats)
num_repeats = extrafield.value + info[0]
extrafield._description = "Repeat Extra Bits (%i), total repeats %i" % (
extrafield.value, num_repeats)
yield extrafield
lengths += [repvalue]*num_repeats
lengths += [repvalue] * num_repeats
length_tree = build_tree(length_code_lengths)
distance_tree = build_tree(distance_code_lengths)
else:
@ -220,11 +229,12 @@ class DeflateBlock(FieldSet):
length = info[0]
yield field
else:
field._description = "Length Code %i, Values %i to %i (Huffman Code %i)" % (value, info[0], info[1], field.value)
field._description = "Length Code %i, Values %i to %i (Huffman Code %i)" % (
value, info[0], info[1], field.value)
yield field
extrafield = Bits(self, "length_extra[%s" % field.name.split('[')[1], info[2])
length = extrafield.value + info[0]
extrafield._description = "Length Extra Bits (%i), total length %i"%(extrafield.value, length)
extrafield._description = "Length Extra Bits (%i), total length %i" % (extrafield.value, length)
yield extrafield
field = HuffmanCode(self, "distance_code[]", distance_tree)
value = field.realvalue
@ -234,36 +244,40 @@ class DeflateBlock(FieldSet):
distance = info[0]
yield field
else:
field._description = "Distance Code %i, Values %i to %i (Huffman Code %i)" % (value, info[0], info[1], field.value)
field._description = "Distance Code %i, Values %i to %i (Huffman Code %i)" % (
value, info[0], info[1], field.value)
yield field
extrafield = Bits(self, "distance_extra[%s" % field.name.split('[')[1], info[2])
distance = extrafield.value + info[0]
extrafield._description = "Distance Extra Bits (%i), total length %i"%(extrafield.value, distance)
extrafield._description = "Distance Extra Bits (%i), total length %i" % (extrafield.value, distance)
yield extrafield
self.uncomp_data = extend_data(self.uncomp_data, length, distance)
class DeflateData(GenericFieldSet):
endian = LITTLE_ENDIAN
def createFields(self):
uncomp_data = ""
blk=DeflateBlock(self, "compressed_block[]", uncomp_data)
blk = DeflateBlock(self, "compressed_block[]", uncomp_data)
yield blk
uncomp_data = blk.uncomp_data
while not blk["final"].value:
blk=DeflateBlock(self, "compressed_block[]", uncomp_data)
blk = DeflateBlock(self, "compressed_block[]", uncomp_data)
yield blk
uncomp_data = blk.uncomp_data
padding = paddingSize(self.current_size + self.absolute_address, 8) # align on byte boundary
padding = paddingSize(self.current_size + self.absolute_address, 8) # align on byte boundary
if padding:
yield PaddingBits(self, "padding[]", padding)
self.uncompressed_data = uncomp_data
class ZlibData(Parser):
PARSER_TAGS = {
"id": "zlib",
"category": "archive",
"file_ext": ("zlib",),
"min_size": 8*8,
"min_size": 8 * 8,
"description": "ZLIB Data",
}
endian = LITTLE_ENDIAN
@ -280,17 +294,18 @@ class ZlibData(Parser):
return True
def createFields(self):
yield Enum(Bits(self, "compression_method", 4), {8:"deflate", 15:"reserved"}) # CM
yield Bits(self, "compression_info", 4, "base-2 log of the window size") # CINFO
yield Bits(self, "flag_check_bits", 5) # FCHECK
yield Bit(self, "flag_dictionary_present") # FDICT
yield Enum(Bits(self, "flag_compression_level", 2), # FLEVEL
{0:"Fastest", 1:"Fast", 2:"Default", 3:"Maximum, Slowest"})
yield Enum(Bits(self, "compression_method", 4), {8: "deflate", 15: "reserved"}) # CM
yield Bits(self, "compression_info", 4, "base-2 log of the window size") # CINFO
yield Bits(self, "flag_check_bits", 5) # FCHECK
yield Bit(self, "flag_dictionary_present") # FDICT
yield Enum(Bits(self, "flag_compression_level", 2), # FLEVEL
{0: "Fastest", 1: "Fast", 2: "Default", 3: "Maximum, Slowest"})
if self["flag_dictionary_present"].value:
yield textHandler(UInt32(self, "dict_checksum", "ADLER32 checksum of dictionary information"), hexadecimal)
yield DeflateData(self, "data", self.stream, description = "Compressed Data")
yield DeflateData(self, "data", self.stream, description="Compressed Data")
yield textHandler(UInt32(self, "data_checksum", "ADLER32 checksum of compressed data"), hexadecimal)
def zlib_inflate(stream, wbits=None, prevdata=""):
if wbits is None or wbits >= 0:
return ZlibData(stream)["data"].uncompressed_data

View file

@ -0,0 +1,12 @@
from hachoir.parser.audio.aiff import AiffFile
from hachoir.parser.audio.au import AuFile
from hachoir.parser.audio.itunesdb import ITunesDBFile
from hachoir.parser.audio.ipod_playcounts import PlayCountFile
from hachoir.parser.audio.midi import MidiFile
from hachoir.parser.audio.mpeg_audio import MpegAudioFile
from hachoir.parser.audio.real_audio import RealAudioFile
from hachoir.parser.audio.xm import XMModule
from hachoir.parser.audio.s3m import S3MModule
from hachoir.parser.audio.s3m import PTMModule
from hachoir.parser.audio.mod import AmigaModule
from hachoir.parser.audio.flac import FlacParser

View file

@ -5,15 +5,15 @@ Author: Victor Stinner
Creation: 27 december 2006
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
UInt16, UInt32, Float80, TimestampMac32,
RawBytes, NullBytes,
String, Enum, PascalString32)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import filesizeHandler
from hachoir_core.tools import alignValue
from hachoir_parser.audio.id3 import ID3v2
from hachoir.parser import Parser
from hachoir.field import (FieldSet,
UInt16, UInt32, Float80, TimestampMac32,
RawBytes, NullBytes,
String, Enum, PascalString32)
from hachoir.core.endian import BIG_ENDIAN
from hachoir.core.text_handler import filesizeHandler
from hachoir.core.tools import alignValue
from hachoir.parser.audio.id3 import ID3v2
CODEC_NAME = {
'ACE2': u"ACE 2-to-1",
@ -24,22 +24,27 @@ CODEC_NAME = {
'sowt': u"Little-endian, no compression",
}
class Comment(FieldSet):
def createFields(self):
yield TimestampMac32(self, "timestamp")
yield PascalString32(self, "text")
def parseText(self):
yield String(self, "text", self["size"].value)
def parseID3(self):
yield ID3v2(self, "id3v2", size=self["size"].value*8)
yield ID3v2(self, "id3v2", size=self["size"].value * 8)
def parseComment(self):
yield UInt16(self, "nb_comment")
for index in xrange(self["nb_comment"].value):
yield Comment(self, "comment[]")
def parseCommon(self):
yield UInt16(self, "nb_channel")
yield UInt32(self, "nb_sample")
@ -47,9 +52,11 @@ def parseCommon(self):
yield Float80(self, "sample_rate")
yield Enum(String(self, "codec", 4, strip="\0", charset="ASCII"), CODEC_NAME)
def parseVersion(self):
yield TimestampMac32(self, "timestamp")
def parseSound(self):
yield UInt32(self, "offset")
yield UInt32(self, "block_size")
@ -57,6 +64,7 @@ def parseSound(self):
if size:
yield RawBytes(self, "data", size)
class Chunk(FieldSet):
TAG_INFO = {
'COMM': ('common', "Common chunk", parseCommon),
@ -90,6 +98,7 @@ class Chunk(FieldSet):
else:
yield RawBytes(self, "data", size)
class AiffFile(Parser):
PARSER_TAGS = {
"id": "aiff",
@ -97,7 +106,7 @@ class AiffFile(Parser):
"file_ext": ("aif", "aiff", "aifc"),
"mime": (u"audio/x-aiff",),
"magic_regex": (("FORM.{4}AIF[CF]", 0),),
"min_size": 12*8,
"min_size": 12 * 8,
"description": "Audio Interchange File Format (AIFF)"
}
endian = BIG_ENDIAN
@ -105,7 +114,7 @@ class AiffFile(Parser):
def validate(self):
if self.stream.readBytes(0, 4) != "FORM":
return "Invalid signature"
if self.stream.readBytes(8*8, 4) not in ("AIFF", "AIFC"):
if self.stream.readBytes(8 * 8, 4) not in ("AIFF", "AIFC"):
return "Invalid type"
return True
@ -124,4 +133,3 @@ class AiffFile(Parser):
def createContentSize(self):
return self["filesize"].value * 8

View file

@ -5,11 +5,12 @@ Author: Victor Stinner
Creation: 12 july 2006
"""
from hachoir_parser import Parser
from hachoir_core.field import UInt32, Enum, String, RawBytes
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import displayHandler, filesizeHandler
from hachoir_core.tools import createDict, humanFrequency
from hachoir.parser import Parser
from hachoir.field import UInt32, Enum, String, RawBytes
from hachoir.core.endian import BIG_ENDIAN
from hachoir.core.text_handler import displayHandler, filesizeHandler
from hachoir.core.tools import createDict, humanFrequency
class AuFile(Parser):
PARSER_TAGS = {
@ -17,42 +18,42 @@ class AuFile(Parser):
"category": "audio",
"file_ext": ("au", "snd"),
"mime": (u"audio/basic",),
"min_size": 24*8,
"min_size": 24 * 8,
"magic": ((".snd", 0),),
"description": "Sun/NeXT audio"
}
endian = BIG_ENDIAN
CODEC_INFO = {
1: (8, u"8-bit ISDN u-law"),
2: (8, u"8-bit linear PCM"),
3: (16, u"16-bit linear PCM"),
4: (24, u"24-bit linear PCM"),
5: (32, u"32-bit linear PCM"),
6: (32, u"32-bit IEEE floating point"),
7: (64, u"64-bit IEEE floating point"),
1: (8, u"8-bit ISDN u-law"),
2: (8, u"8-bit linear PCM"),
3: (16, u"16-bit linear PCM"),
4: (24, u"24-bit linear PCM"),
5: (32, u"32-bit linear PCM"),
6: (32, u"32-bit IEEE floating point"),
7: (64, u"64-bit IEEE floating point"),
8: (None, u"Fragmented sample data"),
9: (None, u"DSP program"),
10: (8, u"8-bit fixed point"),
11: (16, u"16-bit fixed point"),
12: (24, u"24-bit fixed point"),
13: (32, u"32-bit fixed point"),
18: (16, u"16-bit linear with emphasis"),
19: (16, u"16-bit linear compressed"),
20: (16, u"16-bit linear with emphasis and compression"),
21: (None, u"Music kit DSP commands"),
23: (None, u"4-bit ISDN u-law compressed (CCITT G.721 ADPCM)"),
24: (None, u"ITU-T G.722 ADPCM"),
25: (None, u"ITU-T G.723 3-bit ADPCM"),
26: (None, u"ITU-T G.723 5-bit ADPCM"),
27: (8, u"8-bit ISDN A-law"),
10: (8, u"8-bit fixed point"),
11: (16, u"16-bit fixed point"),
12: (24, u"24-bit fixed point"),
13: (32, u"32-bit fixed point"),
18: (16, u"16-bit linear with emphasis"),
19: (16, u"16-bit linear compressed"),
20: (16, u"16-bit linear with emphasis and compression"),
21: (None, u"Music kit DSP commands"),
23: (None, u"4-bit ISDN u-law compressed (CCITT G.721 ADPCM)"),
24: (None, u"ITU-T G.722 ADPCM"),
25: (None, u"ITU-T G.723 3-bit ADPCM"),
26: (None, u"ITU-T G.723 5-bit ADPCM"),
27: (8, u"8-bit ISDN A-law"),
}
# Create bit rate and codec name dictionnaries
BITS_PER_SAMPLE = createDict(CODEC_INFO, 0)
CODEC_NAME = createDict(CODEC_INFO, 1)
VALID_NB_CHANNEL = set((1,2)) # FIXME: 4, 5, 7, 8 channels are supported?
VALID_NB_CHANNEL = set((1, 2)) # FIXME: 4, 5, 7, 8 channels are supported?
def validate(self):
if self.stream.readBytes(0, 4) != ".snd":
@ -85,4 +86,3 @@ class AuFile(Parser):
def createContentSize(self):
return (self["data_ofs"].value + self["data_size"].value) * 8

View file

@ -9,18 +9,21 @@ Author: Esteban Loiseau <baal AT tuxfamily.org>
Creation date: 2008-04-09
"""
from hachoir_parser import Parser
from hachoir_core.field import FieldSet, String, Bit, Bits, UInt16, UInt24, RawBytes, Enum, NullBytes
from hachoir_core.stream import BIG_ENDIAN, LITTLE_ENDIAN
from hachoir_core.tools import createDict
from hachoir_parser.container.ogg import parseVorbisComment
from hachoir.parser import Parser
from hachoir.field import FieldSet, String, Bit, Bits, UInt16, UInt24, RawBytes, Enum, NullBytes
from hachoir.stream import BIG_ENDIAN, LITTLE_ENDIAN
from hachoir.core.tools import createDict
from hachoir.parser.container.ogg import parseVorbisComment
class VorbisComment(FieldSet):
endian = LITTLE_ENDIAN
createFields = parseVorbisComment
class StreamInfo(FieldSet):
static_size = 34*8
static_size = 34 * 8
def createFields(self):
yield UInt16(self, "min_block_size", "The minimum block size (in samples) used in the stream")
yield UInt16(self, "max_block_size", "The maximum block size (in samples) used in the stream")
@ -32,17 +35,20 @@ class StreamInfo(FieldSet):
yield Bits(self, "total_samples", 36, "Total samples in stream")
yield RawBytes(self, "md5sum", 16, "MD5 signature of the unencoded audio data")
class SeekPoint(FieldSet):
def createFields(self):
yield Bits(self, "sample_number", 64, "Sample number")
yield Bits(self, "offset", 64, "Offset in bytes")
yield Bits(self, "nb_sample", 16)
class SeekTable(FieldSet):
def createFields(self):
while not self.eof:
yield SeekPoint(self, "point[]")
class MetadataBlock(FieldSet):
"Metadata block field: http://flac.sourceforge.net/format.html#metadata_block"
@ -80,20 +86,22 @@ class MetadataBlock(FieldSet):
except KeyError:
handler = None
if handler:
yield handler(self, "content", size=size*8)
yield handler(self, "content", size=size * 8)
elif self["block_type"].value == 1:
yield NullBytes(self, "padding", size)
else:
yield RawBytes(self, "rawdata", size)
class Metadata(FieldSet):
def createFields(self):
while not self.eof:
field = MetadataBlock(self,"metadata_block[]")
field = MetadataBlock(self, "metadata_block[]")
yield field
if field["last_metadata_block"].value:
break
class Frame(FieldSet):
SAMPLE_RATES = {
0: "get from STREAMINFO metadata block",
@ -124,6 +132,7 @@ class Frame(FieldSet):
yield Bit(self, "reserved[]")
# FIXME: Finish frame header parser
class Frames(FieldSet):
def createFields(self):
while not self.eof:
@ -131,6 +140,7 @@ class Frames(FieldSet):
# FIXME: Parse all frames
return
class FlacParser(Parser):
"Parse FLAC audio files: FLAC is a lossless audio codec"
MAGIC = "fLaC\x00"
@ -140,7 +150,7 @@ class FlacParser(Parser):
"file_ext": ("flac",),
"mime": (u"audio/x-flac",),
"magic": ((MAGIC, 0),),
"min_size": 4*8,
"min_size": 4 * 8,
"description": "FLAC audio",
}
endian = BIG_ENDIAN
@ -151,7 +161,6 @@ class FlacParser(Parser):
return True
def createFields(self):
yield String(self, "signature", 4,charset="ASCII", description="FLAC signature: fLaC string")
yield Metadata(self,"metadata")
yield Frames(self,"frames")
yield String(self, "signature", 4, charset="ASCII", description="FLAC signature: fLaC string")
yield Metadata(self, "metadata")
yield Frames(self, "frames")

View file

@ -6,118 +6,119 @@ Informations: http://www.id3.org/
Author: Victor Stinner
"""
from hachoir_core.field import (FieldSet, MatchError, ParserError,
Enum, UInt8, UInt24, UInt32,
CString, String, RawBytes,
Bit, Bits, NullBytes, NullBits)
from hachoir_core.text_handler import textHandler
from hachoir_core.tools import humanDuration
from hachoir_core.endian import NETWORK_ENDIAN
from hachoir.field import (FieldSet, MatchError, ParserError,
Enum, UInt8, UInt24, UInt32,
CString, String, RawBytes,
Bit, Bits, NullBytes, NullBits)
from hachoir.core.text_handler import textHandler
from hachoir.core.tools import humanDuration
from hachoir.core.endian import NETWORK_ENDIAN
class ID3v1(FieldSet):
static_size = 128 * 8
GENRE_NAME = {
0: u"Blues",
1: u"Classic Rock",
2: u"Country",
3: u"Dance",
4: u"Disco",
5: u"Funk",
6: u"Grunge",
7: u"Hip-Hop",
8: u"Jazz",
9: u"Metal",
10: u"New Age",
11: u"Oldies",
12: u"Other",
13: u"Pop",
14: u"R&B",
15: u"Rap",
16: u"Reggae",
17: u"Rock",
18: u"Techno",
19: u"Industrial",
20: u"Alternative",
21: u"Ska",
22: u"Death Metal",
23: u"Pranks",
24: u"Soundtrack",
25: u"Euro-Techno",
26: u"Ambient",
27: u"Trip-Hop",
28: u"Vocal",
29: u"Jazz+Funk",
30: u"Fusion",
31: u"Trance",
32: u"Classical",
33: u"Instrumental",
34: u"Acid",
35: u"House",
36: u"Game",
37: u"Sound Clip",
38: u"Gospel",
39: u"Noise",
40: u"AlternRock",
41: u"Bass",
42: u"Soul",
43: u"Punk",
44: u"Space",
45: u"Meditative",
46: u"Instrumental Pop",
47: u"Instrumental Rock",
48: u"Ethnic",
49: u"Gothic",
50: u"Darkwave",
51: u"Techno-Industrial",
52: u"Electronic",
53: u"Pop-Folk",
54: u"Eurodance",
55: u"Dream",
56: u"Southern Rock",
57: u"Comedy",
58: u"Cult",
59: u"Gangsta",
60: u"Top 40",
61: u"Christian Rap",
62: u"Pop/Funk",
63: u"Jungle",
64: u"Native American",
65: u"Cabaret",
66: u"New Wave",
67: u"Psychadelic",
68: u"Rave",
69: u"Showtunes",
70: u"Trailer",
71: u"Lo-Fi",
72: u"Tribal",
73: u"Acid Punk",
74: u"Acid Jazz",
75: u"Polka",
76: u"Retro",
77: u"Musical",
78: u"Rock & Roll",
79: u"Hard Rock",
# Following are winamp extentions
80: u"Folk",
81: u"Folk-Rock",
82: u"National Folk",
83: u"Swing",
84: u"Fast Fusion",
85: u"Bebob",
86: u"Latin",
87: u"Revival",
88: u"Celtic",
89: u"Bluegrass",
90: u"Avantgarde",
91: u"Gothic Rock",
92: u"Progressive Rock",
93: u"Psychedelic Rock",
94: u"Symphonic Rock",
95: u"Slow Rock",
96: u"Big Band",
97: u"Chorus",
98: u"Easy Listening",
99: u"Acoustic",
0: u"Blues",
1: u"Classic Rock",
2: u"Country",
3: u"Dance",
4: u"Disco",
5: u"Funk",
6: u"Grunge",
7: u"Hip-Hop",
8: u"Jazz",
9: u"Metal",
10: u"New Age",
11: u"Oldies",
12: u"Other",
13: u"Pop",
14: u"R&B",
15: u"Rap",
16: u"Reggae",
17: u"Rock",
18: u"Techno",
19: u"Industrial",
20: u"Alternative",
21: u"Ska",
22: u"Death Metal",
23: u"Pranks",
24: u"Soundtrack",
25: u"Euro-Techno",
26: u"Ambient",
27: u"Trip-Hop",
28: u"Vocal",
29: u"Jazz+Funk",
30: u"Fusion",
31: u"Trance",
32: u"Classical",
33: u"Instrumental",
34: u"Acid",
35: u"House",
36: u"Game",
37: u"Sound Clip",
38: u"Gospel",
39: u"Noise",
40: u"AlternRock",
41: u"Bass",
42: u"Soul",
43: u"Punk",
44: u"Space",
45: u"Meditative",
46: u"Instrumental Pop",
47: u"Instrumental Rock",
48: u"Ethnic",
49: u"Gothic",
50: u"Darkwave",
51: u"Techno-Industrial",
52: u"Electronic",
53: u"Pop-Folk",
54: u"Eurodance",
55: u"Dream",
56: u"Southern Rock",
57: u"Comedy",
58: u"Cult",
59: u"Gangsta",
60: u"Top 40",
61: u"Christian Rap",
62: u"Pop/Funk",
63: u"Jungle",
64: u"Native American",
65: u"Cabaret",
66: u"New Wave",
67: u"Psychadelic",
68: u"Rave",
69: u"Showtunes",
70: u"Trailer",
71: u"Lo-Fi",
72: u"Tribal",
73: u"Acid Punk",
74: u"Acid Jazz",
75: u"Polka",
76: u"Retro",
77: u"Musical",
78: u"Rock & Roll",
79: u"Hard Rock",
# Following are winamp extentions
80: u"Folk",
81: u"Folk-Rock",
82: u"National Folk",
83: u"Swing",
84: u"Fast Fusion",
85: u"Bebob",
86: u"Latin",
87: u"Revival",
88: u"Celtic",
89: u"Bluegrass",
90: u"Avantgarde",
91: u"Gothic Rock",
92: u"Progressive Rock",
93: u"Psychedelic Rock",
94: u"Symphonic Rock",
95: u"Slow Rock",
96: u"Big Band",
97: u"Chorus",
98: u"Easy Listening",
99: u"Acoustic",
100: u"Humour",
101: u"Speech",
102: u"Chanson",
@ -171,7 +172,8 @@ class ID3v1(FieldSet):
def createFields(self):
yield String(self, "signature", 3, "IDv1 signature (\"TAG\")", charset="ASCII")
if self["signature"].value != "TAG":
raise MatchError("Stream doesn't look like ID3v1 (wrong signature)!")
raise MatchError(
"Stream doesn't look like ID3v1 (wrong signature)!")
# TODO: Charset of below strings?
yield String(self, "song", 30, "Song title", strip=" \0", charset="ISO-8859-1")
yield String(self, "author", 30, "Author", strip=" \0", charset="ISO-8859-1")
@ -194,7 +196,7 @@ class ID3v1(FieldSet):
yield String(self, "comment", 31, "Comment", strip=" \0", charset="ISO-8859-1")
def getVersion(self):
addr = self.absolute_address + 126*8
addr = self.absolute_address + 126 * 8
bytes = self.stream.readBytes(addr, 2)
# last byte (127) is not space?
@ -212,6 +214,7 @@ class ID3v1(FieldSet):
return "ID3 %s: author=%s, song=%s" % (
version, self["author"].value, self["song"].value)
def getCharset(field):
try:
key = field.value
@ -219,10 +222,13 @@ def getCharset(field):
except KeyError:
raise ParserError("ID3v2: Invalid charset (%s)." % key)
class ID3_String(FieldSet):
STRIP = " \0"
def createFields(self):
yield String(self, "text", self._size/8, "Text", charset="ISO-8859-1", strip=self.STRIP)
yield String(self, "text", self._size // 8, "Text", charset="ISO-8859-1", strip=self.STRIP)
class ID3_StringCharset(ID3_String):
STRIP = " \0"
@ -238,14 +244,16 @@ class ID3_StringCharset(ID3_String):
2: "UTF-16-BE",
3: "UTF-8"
}
def createFields(self):
yield Enum(UInt8(self, "charset"), self.charset_desc)
size = (self.size - self.current_size)/8
size = (self.size - self.current_size) // 8
if not size:
return
charset = getCharset(self["charset"])
yield String(self, "text", size, "Text", charset=charset, strip=self.STRIP)
class ID3_GEOB(ID3_StringCharset):
def createFields(self):
yield Enum(UInt8(self, "charset"), self.charset_desc)
@ -258,6 +266,7 @@ class ID3_GEOB(ID3_StringCharset):
return
yield String(self, "text", size, "Text", charset=charset)
class ID3_Comment(ID3_StringCharset):
def createFields(self):
yield Enum(UInt8(self, "charset"), self.charset_desc)
@ -269,6 +278,7 @@ class ID3_Comment(ID3_StringCharset):
return
yield String(self, "text", size, "Text", charset=charset, strip=self.STRIP)
class ID3_StringTitle(ID3_StringCharset):
def createFields(self):
yield Enum(UInt8(self, "charset"), self.charset_desc)
@ -276,33 +286,36 @@ class ID3_StringTitle(ID3_StringCharset):
return
charset = getCharset(self["charset"])
yield CString(self, "title", "Title", charset=charset, strip=self.STRIP)
size = (self.size - self.current_size)/8
size = (self.size - self.current_size) // 8
if not size:
return
yield String(self, "text", size, "Text", charset=charset, strip=self.STRIP)
class ID3_Private(FieldSet):
def createFields(self):
size = self._size/8
size = self._size // 8
# TODO: Strings charset?
if self.stream.readBytes(self.absolute_address, 9) == "PeakValue":
yield String(self, "text", 9, "Text")
size -= 9
yield String(self, "content", size, "Content")
class ID3_TrackLength(FieldSet):
def createFields(self):
yield NullBytes(self, "zero", 1)
yield textHandler(String(self, "length", self._size/8 - 1,
"Length in ms", charset="ASCII"), self.computeLength)
yield textHandler(String(self, "length", self._size // 8 - 1,
"Length in ms", charset="ASCII"), self.computeLength)
def computeLength(self, field):
try:
ms = int(field.value)
return humanDuration(ms)
except:
except Exception:
return field.value
class ID3_Picture23(FieldSet):
pict_type_name = {
0x00: "Other",
@ -327,16 +340,18 @@ class ID3_Picture23(FieldSet):
0x13: "Band/artist logotype",
0x14: "Publisher/Studio logotype"
}
def createFields(self):
yield Enum(UInt8(self, "charset"), ID3_StringCharset.charset_desc)
charset = getCharset(self["charset"])
yield String(self, "img_fmt", 3, charset="ASCII")
yield Enum(UInt8(self, "pict_type"), self.pict_type_name)
yield CString(self, "text", "Text", charset=charset, strip=" \0")
size = (self._size - self._current_size) / 8
size = (self._size - self._current_size) // 8
if size:
yield RawBytes(self, "img_data", size)
class ID3_Picture24(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "charset"), ID3_StringCharset.charset_desc)
@ -344,10 +359,11 @@ class ID3_Picture24(FieldSet):
yield CString(self, "mime", "MIME type", charset=charset)
yield Enum(UInt8(self, "pict_type"), ID3_Picture23.pict_type_name)
yield CString(self, "description", charset=charset)
size = (self._size - self._current_size) / 8
size = (self._size - self._current_size) // 8
if size:
yield RawBytes(self, "img_data", size)
class ID3_Chunk(FieldSet):
endian = NETWORK_ENDIAN
tag22_name = {
@ -404,9 +420,9 @@ class ID3_Chunk(FieldSet):
# ID3 v2.3 and 2.4
yield Enum(String(self, "tag", 4, "Tag", charset="ASCII", strip="\0"), ID3_Chunk.tag23_name)
if 4 <= self["../ver_major"].value:
yield ID3_Size(self, "size") # ID3 v2.4
yield ID3_Size(self, "size") # ID3 v2.4
else:
yield UInt32(self, "size") # ID3 v2.3
yield UInt32(self, "size") # ID3 v2.3
yield Bit(self, "tag_alter", "Tag alter preservation")
yield Bit(self, "file_alter", "Tag alter preservation")
@ -423,19 +439,19 @@ class ID3_Chunk(FieldSet):
# ID3 v2.2
yield Enum(String(self, "tag", 3, "Tag", charset="ASCII", strip="\0"), ID3_Chunk.tag22_name)
yield UInt24(self, "size")
size = self["size"].value - self.current_size/8 + 6
size = self["size"].value - self.current_size // 8 + 6
is_compressed = False
if size:
cls = None
if not(is_compressed):
if not (is_compressed):
tag = self["tag"].value
if tag in ID3_Chunk.handler:
cls = ID3_Chunk.handler[tag]
elif tag[0] == "T":
cls = ID3_StringCharset
if cls:
yield cls(self, "content", "Content", size=size*8)
yield cls(self, "content", "Content", size=size * 8)
else:
yield RawBytes(self, "content", size, "Raw data content")
@ -445,6 +461,7 @@ class ID3_Chunk(FieldSet):
else:
return "ID3 Chunk: (terminator)"
class ID3_Size(Bits):
static_size = 32
@ -454,7 +471,8 @@ class ID3_Size(Bits):
def createValue(self):
data = self.parent.stream.readBytes(self.absolute_address, 4)
# TODO: Check that bit #7 of each byte is nul: not(ord(data[i]) & 127)
return reduce(lambda x, y: x*128 + y, (ord(item) for item in data ))
return reduce(lambda x, y: x * 128 + y, (ord(item) for item in data))
class ID3v2(FieldSet):
endian = NETWORK_ENDIAN
@ -467,7 +485,7 @@ class ID3v2(FieldSet):
def createDescription(self):
return "ID3 v2.%s.%s" % \
(self["ver_major"].value, self["ver_minor"].value)
(self["ver_major"].value, self["ver_minor"].value)
def createFields(self):
# Signature + version
@ -479,7 +497,7 @@ class ID3v2(FieldSet):
if self["header"].value != "ID3":
raise MatchError("Signature error, should be \"ID3\".")
if self["ver_major"].value not in self.VALID_MAJOR_VERSIONS \
or self["ver_minor"].value != 0:
or self["ver_minor"].value != 0:
raise MatchError(
"Unknown ID3 metadata version (2.%u.%u)"
% (self["ver_major"].value, self["ver_minor"].value))
@ -504,4 +522,3 @@ class ID3v2(FieldSet):
padding = self.seekBit(self._size)
if padding:
yield padding

View file

@ -9,20 +9,21 @@ Author: m42i
Creation date: 01 March 2014
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
UInt8, UInt16, UInt32, Int32, UInt64, TimestampMac32,
String, Float32, NullBytes, Enum, RawBytes)
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.tools import humanDuration
from hachoir_core.text_handler import displayHandler, filesizeHandler
from hachoir.parser import Parser
from hachoir.field import (FieldSet,
UInt8, UInt16, UInt32, Int32, UInt64, TimestampMac32,
String, Float32, NullBytes, Enum, RawBytes)
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.tools import humanDuration
from hachoir.core.text_handler import displayHandler, filesizeHandler
class PlayCountFile(Parser):
PARSER_TAGS = {
"id": "playcounts",
"category": "audio",
"min_size": 44*8,
"magic": (('mhdp',0),),
"min_size": 44 * 8,
"magic": (('mhdp', 0),),
"description": "iPod Play Counts file"
}
@ -47,7 +48,7 @@ class PlayCountFile(Parser):
class PlayCountEntry(FieldSet):
def __init__(self, *args, **kw):
FieldSet.__init__(self, *args, **kw)
self._size = 28*8
self._size = 28 * 8
def createFields(self):
yield UInt32(self, "play_count", "Playcount since last sync")
@ -57,4 +58,3 @@ class PlayCountEntry(FieldSet):
yield UInt32(self, "unknown", "unknown")
yield UInt32(self, "skip_count", "Number of skips since last sync")
yield TimestampMac32(self, "last_skipped", "Time of the last skip")

View file

@ -8,126 +8,127 @@ Author: Romain HERAULT
Creation date: 19 august 2006
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
UInt8, UInt16, UInt32, Int32, UInt64, TimestampMac32,
String, Float32, NullBytes, Enum, RawBytes)
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.tools import humanDuration
from hachoir_core.text_handler import displayHandler, filesizeHandler
from hachoir.parser import Parser
from hachoir.field import (FieldSet,
UInt8, UInt16, UInt32, Int32, UInt64, TimestampMac32,
String, Float32, NullBytes, Enum, RawBytes)
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.tools import humanDuration
from hachoir.core.text_handler import displayHandler, filesizeHandler
list_order = {
1: "playlist order (manual sort order)",
2: "???",
3: "songtitle",
4: "album",
5: "artist",
6: "bitrate",
7: "genre",
8: "kind",
9: "date modified",
10: "track number",
11: "size",
12: "time",
13: "year",
14: "sample rate",
15: "comment",
16: "date added",
17: "equalizer",
18: "composer",
19: "???",
20: "play count",
21: "last played",
22: "disc number",
23: "my rating",
24: "release date",
25: "BPM",
26: "grouping",
27: "category",
28: "description",
29: "show",
30: "season",
31: "episode number"
}
list_order={
1 : "playlist order (manual sort order)",
2 : "???",
3 : "songtitle",
4 : "album",
5 : "artist",
6 : "bitrate",
7 : "genre",
8 : "kind",
9 : "date modified",
10 : "track number",
11 : "size",
12 : "time",
13 : "year",
14 : "sample rate",
15 : "comment",
16 : "date added",
17 : "equalizer",
18 : "composer",
19 : "???",
20 : "play count",
21 : "last played",
22 : "disc number",
23 : "my rating",
24 : "release date",
25 : "BPM",
26 : "grouping",
27 : "category",
28 : "description",
29 : "show",
30 : "season",
31 : "episode number"
}
class DataObject(FieldSet):
type_name={
1:"Title",
2:"Location",
3:"Album",
4:"Artist",
5:"Genre",
6:"Filetype",
7:"EQ Setting",
8:"Comment",
9:"Category",
12:"Composer",
13:"Grouping",
14:"Description text",
15:"Podcast Enclosure URL",
16:"Podcast RSS URL",
17:"Chapter data",
18:"Subtitle",
19:"Show (for TV Shows only)",
20:"Episode",
21:"TV Network",
22:"Album-Artist",
23:"Artist for Sorting",
24:"List of keywords pretaining track",
25:"Locale for TV show(?)",
27:"Title for Sorting",
28:"Album for Sorting",
29:"Album-Artist for Sorting",
30:"Composer for Sorting",
31:"Show for Sorting",
type_name = {
1: "Title",
2: "Location",
3: "Album",
4: "Artist",
5: "Genre",
6: "Filetype",
7: "EQ Setting",
8: "Comment",
9: "Category",
12: "Composer",
13: "Grouping",
14: "Description text",
15: "Podcast Enclosure URL",
16: "Podcast RSS URL",
17: "Chapter data",
18: "Subtitle",
19: "Show (for TV Shows only)",
20: "Episode",
21: "TV Network",
22: "Album-Artist",
23: "Artist for Sorting",
24: "List of keywords pretaining track",
25: "Locale for TV show(?)",
27: "Title for Sorting",
28: "Album for Sorting",
29: "Album-Artist for Sorting",
30: "Composer for Sorting",
31: "Show for Sorting",
# 32:"Unknown binary field for video tracks",
50:"Smart Playlist Data",
51:"Smart Playlist Rules",
52:"Library Playlist Index",
53:"Library Playlist Index letter in jump table",
100:"Ccolumn Sizing Info as well as an order indicator in playlists.",
102:"For iPhone",
200:"Album name (for album descriptions)",
201:"Album artist (for album descriptions)",
202:"Album sort artist (for album descriptions)",
203:"Podcast URL in Album List",
204:"TV Show in Album List"
50: "Smart Playlist Data",
51: "Smart Playlist Rules",
52: "Library Playlist Index",
53: "Library Playlist Index letter in jump table",
100: "Ccolumn Sizing Info as well as an order indicator in playlists.",
102: "For iPhone",
200: "Album name (for album descriptions)",
201: "Album artist (for album descriptions)",
202: "Album sort artist (for album descriptions)",
203: "Podcast URL in Album List",
204: "TV Show in Album List"
}
mhod52_sort_index_type_name={
3:"Title",
4:"Album, then Disk/Tracknumber, then Title",
5:"Artist, then Album, then Disc/Tracknumber, then Title",
7:"Genre, then Artist, then Album, then Disc/Tracknumber, then Title",
8:"Composer, then Title"
mhod52_sort_index_type_name = {
3: "Title",
4: "Album, then Disk/Tracknumber, then Title",
5: "Artist, then Album, then Disc/Tracknumber, then Title",
7: "Genre, then Artist, then Album, then Disc/Tracknumber, then Title",
8: "Composer, then Title"
}
def __init__(self, *args, **kw):
FieldSet.__init__(self, *args, **kw)
self._size = self["entry_length"].value *8
self._size = self["entry_length"].value * 8
def createFields(self):
yield String(self, "header_id", 4, "Data Object Header Markup (\"mhod\")", charset="ISO-8859-1")
yield UInt32(self, "header_length", "Header Length")
yield UInt32(self, "entry_length", "Entry Length")
yield Enum(UInt32(self, "type", "type"),self.type_name)
yield Enum(UInt32(self, "type", "type"), self.type_name)
if (self["type"].value == 15) or (self["type"].value == 16):
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield String(self, "string", self._size/8-self["header_length"].value, "String Data", charset="UTF-8")
yield String(self, "string", self._size // 8 - self["header_length"].value, "String Data", charset="UTF-8")
elif (self["type"].value == 52):
yield UInt32(self, "unknown[]", "unk1")
yield UInt32(self, "unknown[]", "unk2")
yield Enum(UInt32(self, "sort_index_type", "Sort Index Type"),self.mhod52_sort_index_type_name)
yield Enum(UInt32(self, "sort_index_type", "Sort Index Type"), self.mhod52_sort_index_type_name)
yield UInt32(self, "entry_count", "Entry Count")
indexes_size = self["entry_count"].value*4
indexes_size = self["entry_count"].value * 4
padding_offset = self["entry_length"].value - indexes_size
padding = self.seekByte(padding_offset, "header padding")
if padding:
yield padding
for i in xrange(self["entry_count"].value):
yield UInt32(self, "index["+str(i)+"]", "Index of the "+str(i)+"nth mhit")
elif(self["type"].value<15) or (self["type"].value>17) or (self["type"].value >= 200):
yield UInt32(self, "index[" + str(i) + "]", "Index of the " + str(i) + "nth mhit")
elif (self["type"].value < 15) or (self["type"].value > 17) or (self["type"].value >= 200):
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield UInt32(self, "position", "Position")
@ -143,29 +144,31 @@ class DataObject(FieldSet):
if padding:
yield padding
class TrackItem(FieldSet):
x1_type_name={
0:"AAC or CBR MP3",
1:"VBR MP3"
x1_type_name = {
0: "AAC or CBR MP3",
1: "VBR MP3"
}
x2_type_name={
0:"AAC",
1:"MP3"
x2_type_name = {
0: "AAC",
1: "MP3"
}
media_type_name={
0x00:"Audio/Video",
0x01:"Audio",
0x02:"Video",
0x04:"Podcast",
0x06:"Video Podcast",
0x08:"Audiobook",
0x20:"Music Video",
0x40:"TV Show",
0X60:"TV Show (Music lists)",
media_type_name = {
0x00: "Audio/Video",
0x01: "Audio",
0x02: "Video",
0x04: "Podcast",
0x06: "Video Podcast",
0x08: "Audiobook",
0x20: "Music Video",
0x40: "TV Show",
0X60: "TV Show (Music lists)",
}
def __init__(self, *args, **kw):
FieldSet.__init__(self, *args, **kw)
self._size = self["entry_length"].value *8
self._size = self["entry_length"].value * 8
def createFields(self):
yield String(self, "header_id", 4, "Track Item Header Markup (\"mhit\")", charset="ISO-8859-1")
@ -175,8 +178,8 @@ class TrackItem(FieldSet):
yield UInt32(self, "unique_id", "Unique ID")
yield UInt32(self, "visible_tag", "Visible Tag")
yield String(self, "file_type", 4, "File Type")
yield Enum(UInt8(self, "x1_type", "Extended Type 1"),self.x1_type_name)
yield Enum(UInt8(self, "x2_type", "Extended type 2"),self.x2_type_name)
yield Enum(UInt8(self, "x1_type", "Extended Type 1"), self.x1_type_name)
yield Enum(UInt8(self, "x2_type", "Extended type 2"), self.x2_type_name)
yield UInt8(self, "compilation_flag", "Compilation Flag")
yield UInt8(self, "rating", "Rating")
yield TimestampMac32(self, "last_modified", "Time of the last modification of the track")
@ -230,7 +233,7 @@ class TrackItem(FieldSet):
yield UInt32(self, "unknown[]")
yield UInt32(self, "postgap[]", "Number of samples of silence at the end of the song")
yield UInt32(self, "unknown[]")
yield Enum(UInt32(self, "media_type", "Media Type for video iPod"),self.media_type_name)
yield Enum(UInt32(self, "media_type", "Media Type for video iPod"), self.media_type_name)
yield UInt32(self, "season_number", "Season Number")
yield UInt32(self, "episode_number", "Episode Number")
yield UInt32(self, "unknown[]")
@ -240,7 +243,8 @@ class TrackItem(FieldSet):
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield UInt32(self, "gapless_data[]","The size in bytes from first Sync Frame until the 8th before the last frame." )
yield UInt32(self, "gapless_data[]",
"The size in bytes from first Sync Frame until the 8th before the last frame.")
yield UInt32(self, "unknown[]")
yield UInt16(self, "gaplessTrackFlag[]", "1 if track has gapless data")
yield UInt16(self, "gaplessAlbumFlag[]", "1 if track uses crossfading in iTunes")
@ -257,13 +261,14 @@ class TrackItem(FieldSet):
if padding:
yield padding
#while ((self.stream.readBytes(0, 4) == 'mhod') and ((self.current_size/8) < self["entry_length"].value)):
# while ((self.stream.readBytes(0, 4) == 'mhod') and ((self.current_size/8) < self["entry_length"].value)):
for i in xrange(self["string_number"].value):
yield DataObject(self, "data[]")
padding = self.seekBit(self._size, "entry padding")
if padding:
yield padding
class TrackList(FieldSet):
def createFields(self):
yield String(self, "header_id", 4, "Track List Header Markup (\"mhlt\")", charset="ISO-8859-1")
@ -277,10 +282,11 @@ class TrackList(FieldSet):
for i in xrange(self["track_number"].value):
yield TrackItem(self, "track[]")
class PlaylistItem(FieldSet):
def __init__(self, *args, **kw):
FieldSet.__init__(self, *args, **kw)
self._size = self["entry_length"].value *8
self._size = self["entry_length"].value * 8
def createFields(self):
yield String(self, "header_id", 4, "Playlist Item Header Markup (\"mhip\")", charset="ISO-8859-1")
@ -301,53 +307,53 @@ class PlaylistItem(FieldSet):
class Playlist(FieldSet):
is_master_pl_name={
0:"Regular playlist",
1:"Master playlist"
is_master_pl_name = {
0: "Regular playlist",
1: "Master playlist"
}
is_podcast_name={
0:"Normal Playlist List",
1:"Podcast Playlist List"
is_podcast_name = {
0: "Normal Playlist List",
1: "Podcast Playlist List"
}
list_sort_order_name={
1:"Manual Sort Order",
2:"???",
3:"Song Title",
4:"Album",
5:"Artist",
6:"Bitrate",
7:"Genre",
8:"Kind",
9:"Date Modified",
10:"Track Number",
11:"Size",
12:"Time",
13:"Year",
14:"Sample Rate",
15:"Comment",
16:"Date Added",
17:"Equalizer",
18:"Composer",
19:"???",
20:"Play Count",
21:"Last Played",
22:"Disc Number",
23:"My Rating",
24:"Release Date",
25:"BPM",
26:"Grouping",
27:"Category",
28:"Description",
29:"Show",
30:"Season",
31:"Episode Number"
list_sort_order_name = {
1: "Manual Sort Order",
2: "???",
3: "Song Title",
4: "Album",
5: "Artist",
6: "Bitrate",
7: "Genre",
8: "Kind",
9: "Date Modified",
10: "Track Number",
11: "Size",
12: "Time",
13: "Year",
14: "Sample Rate",
15: "Comment",
16: "Date Added",
17: "Equalizer",
18: "Composer",
19: "???",
20: "Play Count",
21: "Last Played",
22: "Disc Number",
23: "My Rating",
24: "Release Date",
25: "BPM",
26: "Grouping",
27: "Category",
28: "Description",
29: "Show",
30: "Season",
31: "Episode Number"
}
def __init__(self, *args, **kw):
FieldSet.__init__(self, *args, **kw)
self._size = self["entry_length"].value *8
self._size = self["entry_length"].value * 8
def createFields(self):
yield String(self, "header_id", 4, "Playlist Header Markup (\"mhyp\")", charset="ISO-8859-1")
@ -377,7 +383,6 @@ class Playlist(FieldSet):
yield PlaylistItem(self, "playlist_item[]")
class PlaylistList(FieldSet):
def createFields(self):
yield String(self, "header_id", 4, "Playlist List Header Markup (\"mhlp\")", charset="ISO-8859-1")
@ -391,10 +396,11 @@ class PlaylistList(FieldSet):
for i in xrange(self["playlist_number"].value):
yield Playlist(self, "playlist[]")
class Album(FieldSet):
def __init__(self, *args, **kw):
FieldSet.__init__(self, *args, **kw)
self._size = self["entry_length"].value *8
self._size = self["entry_length"].value * 8
def createFields(self):
yield String(self, "header_id", 4, "Album Item Header Markup (\"mhia\")", charset="ISO-8859-1")
@ -414,6 +420,7 @@ class Album(FieldSet):
for i in xrange(self["data_object_child_count"].value):
yield DataObject(self, "mhod[]")
class AlbumList(FieldSet):
def createFields(self):
yield String(self, "header_id", 4, "Album List Header Markup (\"mhla\")", charset="ISO-8859-1")
@ -427,22 +434,24 @@ class AlbumList(FieldSet):
for i in xrange(self["album_number"].value):
yield Album(self, "album[]")
class DataSet(FieldSet):
type_name={
1:"Track List",
2:"Play List",
3:"Podcast List",
4:"Album List"
}
type_name = {
1: "Track List",
2: "Play List",
3: "Podcast List",
4: "Album List"
}
def __init__(self, *args, **kw):
FieldSet.__init__(self, *args, **kw)
self._size = self["entry_length"].value *8
self._size = self["entry_length"].value * 8
def createFields(self):
yield String(self, "header_id", 4, "DataSet Header Markup (\"mhsd\")", charset="ISO-8859-1")
yield UInt32(self, "header_length", "Header Length")
yield UInt32(self, "entry_length", "Entry Length")
yield Enum(UInt32(self, "type", "type"),self.type_name)
yield Enum(UInt32(self, "type", "type"), self.type_name)
padding = self.seekByte(self["header_length"].value, "header_raw")
if padding:
yield padding
@ -458,10 +467,12 @@ class DataSet(FieldSet):
if padding:
yield padding
class DataBase(FieldSet):
def __init__(self, *args, **kw):
FieldSet.__init__(self, *args, **kw)
self._size = self["entry_length"].value *8
self._size = self["entry_length"].value * 8
# def createFields(self):
@ -469,8 +480,8 @@ class ITunesDBFile(Parser):
PARSER_TAGS = {
"id": "itunesdb",
"category": "audio",
"min_size": 44*8,
"magic": (('mhbd',0),),
"min_size": 44 * 8,
"magic": (('mhbd', 0),),
"description": "iPod iTunesDB file"
}
@ -501,8 +512,8 @@ class ITunesDBFile(Parser):
yield Int32(self, "timezone_offset[]", "Timezone offset in seconds")
yield UInt16(self, "unknown[]")
yield RawBytes(self, "iphone_hash[]", 45)
size = self["header_length"].value-self.current_size/ 8
if size>0:
size = self["header_length"].value - self.current_size / 8
if size > 0:
yield NullBytes(self, "padding", size)
for i in xrange(self["child_number"].value):
yield DataSet(self, "dataset[]")
@ -512,4 +523,3 @@ class ITunesDBFile(Parser):
def createContentSize(self):
return self["entry_length"].value * 8

View file

@ -8,16 +8,17 @@ Author: Victor Stinner
Creation: 27 december 2006
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet, Bits, ParserError,
String, UInt32, UInt24, UInt16, UInt8, Enum, RawBits, RawBytes)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal
from hachoir_core.tools import createDict, humanDurationNanosec
from hachoir_parser.common.tracker import NOTE_NAME
from hachoir.parser import Parser
from hachoir.field import (FieldSet, Bits, ParserError,
String, UInt32, UInt24, UInt16, UInt8, Enum, RawBits, RawBytes)
from hachoir.core.endian import BIG_ENDIAN
from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.core.tools import createDict, humanDurationNanosec
from hachoir.parser.common.tracker import NOTE_NAME
MAX_FILESIZE = 10 * 1024 * 1024
class Integer(Bits):
def __init__(self, parent, name, description=None):
Bits.__init__(self, parent, name, 8, description)
@ -27,7 +28,7 @@ class Integer(Bits):
while True:
bits = stream.readBits(addr, 8, parent.endian)
value = (value << 7) + (bits & 127)
if not(bits & 128):
if not (bits & 128):
break
addr += 8
self._size += 8
@ -35,59 +36,76 @@ class Integer(Bits):
raise ParserError("Integer size is bigger than 32-bit")
self.createValue = lambda: value
def parseNote(parser):
yield Enum(UInt8(parser, "note", "Note number"), NOTE_NAME)
yield UInt8(parser, "velocity")
def parseControl(parser):
yield UInt8(parser, "control", "Controller number")
yield UInt8(parser, "value", "New value")
def parsePatch(parser):
yield UInt8(parser, "program", "New program number")
def parseChannel(parser, size=1):
yield UInt8(parser, "channel", "Channel number")
def parsePitch(parser):
yield UInt8(parser, "bottom", "(least sig) 7 bits of value")
yield UInt8(parser, "top", "(most sig) 7 bits of value")
def parseText(parser, size):
yield String(parser, "text", size)
def parseSMPTEOffset(parser, size):
yield RawBits(parser, "padding", 1)
yield Enum(Bits(parser, "frame_rate", 2),
{0:"24 fps", 1:"25 fps", 2:"30 fps (drop frame)", 3:"30 fps"})
{0: "24 fps", 1: "25 fps", 2: "30 fps (drop frame)", 3: "30 fps"})
yield Bits(parser, "hour", 5)
yield UInt8(parser, "minute")
yield UInt8(parser, "second")
yield UInt8(parser, "frame")
yield UInt8(parser, "subframe", "100 subframes per frame")
def formatTempo(field):
return humanDurationNanosec(field.value*1000)
return humanDurationNanosec(field.value * 1000)
def parseTempo(parser, size):
yield textHandler(UInt24(parser, "microsec_quarter", "Microseconds per quarter note"), formatTempo)
def parseTimeSignature(parser, size):
yield UInt8(parser, "numerator", "Numerator of time signature")
yield UInt8(parser, "denominator", "denominator of time signature 2=quarter 3=eighth, etc.")
yield UInt8(parser, "nb_tick", "Number of ticks in metronome click")
yield UInt8(parser, "nb_32nd_note", "Number of 32nd notes to the quarter note")
class Command(FieldSet):
COMMAND = {}
for channel in xrange(16):
COMMAND[0x80+channel] = ("Note off (channel %u)" % channel, parseNote)
COMMAND[0x90+channel] = ("Note on (channel %u)" % channel, parseNote)
COMMAND[0xA0+channel] = ("Key after-touch (channel %u)" % channel, parseNote)
COMMAND[0xB0+channel] = ("Control change (channel %u)" % channel, parseControl)
COMMAND[0xC0+channel] = ("Program (patch) change (channel %u)" % channel, parsePatch)
COMMAND[0xD0+channel] = ("Channel after-touch (channel %u)" % channel, parseChannel)
COMMAND[0xE0+channel] = ("Pitch wheel change (channel %u)" % channel, parsePitch)
COMMAND[0x80 + channel] = ("Note off (channel %u)" %
channel, parseNote)
COMMAND[0x90 + channel] = ("Note on (channel %u)" % channel, parseNote)
COMMAND[
0xA0 + channel] = ("Key after-touch (channel %u)" % channel, parseNote)
COMMAND[0xB0 + channel] = ("Control change (channel %u)" %
channel, parseControl)
COMMAND[
0xC0 + channel] = ("Program (patch) change (channel %u)" % channel, parsePatch)
COMMAND[
0xD0 + channel] = ("Channel after-touch (channel %u)" % channel, parseChannel)
COMMAND[
0xE0 + channel] = ("Pitch wheel change (channel %u)" % channel, parsePitch)
COMMAND_DESC = createDict(COMMAND, 0)
COMMAND_PARSER = createDict(COMMAND, 1)
@ -124,11 +142,13 @@ class Command(FieldSet):
def createFields(self):
yield Integer(self, "time", "Delta time in ticks")
next = self.stream.readBits(self.absolute_address+self.current_size, 8, self.root.endian)
next = self.stream.readBits(
self.absolute_address + self.current_size, 8, self.root.endian)
if next & 0x80 == 0:
# "Running Status" command
if self.prev_command is None:
raise ParserError("Running Status command not preceded by another command.")
raise ParserError(
"Running Status command not preceded by another command.")
self.command = self.prev_command.command
else:
yield Enum(textHandler(UInt8(self, "command"), hexadecimal), self.COMMAND_DESC)
@ -150,7 +170,8 @@ class Command(FieldSet):
yield RawBytes(self, "data", size)
else:
if self.command not in self.COMMAND_PARSER:
raise ParserError("Unknown command: %s" % self["command"].display)
raise ParserError("Unknown command: %s"
% self["command"].display)
parser = self.COMMAND_PARSER[self.command]
for field in parser(self):
yield field
@ -161,6 +182,7 @@ class Command(FieldSet):
else:
return self.COMMAND_DESC[self.command]
class Track(FieldSet):
def __init__(self, *args):
FieldSet.__init__(self, *args)
@ -182,14 +204,15 @@ class Track(FieldSet):
def createDescription(self):
command = self["command[0]"]
if "meta_command" in command \
and command["meta_command"].value in (Command.META_COMMAND_TEXT, Command.META_COMMAND_NAME) \
and "text" in command:
and command["meta_command"].value in (Command.META_COMMAND_TEXT, Command.META_COMMAND_NAME) \
and "text" in command:
return command["text"].value.strip("\r\n")
else:
return ""
class Header(FieldSet):
static_size = 10*8
static_size = 10 * 8
FILE_FORMAT = {
0: "Single track",
1: "Multiple tracks, synchronous",
@ -206,13 +229,14 @@ class Header(FieldSet):
return "%s; %s tracks" % (
self["file_format"].display, self["nb_track"].value)
class MidiFile(Parser):
MAGIC = "MThd"
PARSER_TAGS = {
"id": "midi",
"category": "audio",
"file_ext": ["mid", "midi"],
"mime": (u"audio/mime", ),
"mime": (u"audio/mime",),
"magic": ((MAGIC, 0),),
"min_size": 64,
"description": "MIDI audio"
@ -239,8 +263,7 @@ class MidiFile(Parser):
count = self["/header/nb_track"].value - 1
start = self["track[%u]" % count].absolute_address
# Search "End of track" of last track
end = self.stream.searchBytes("\xff\x2f\x00", start, MAX_FILESIZE*8)
end = self.stream.searchBytes("\xff\x2f\x00", start, MAX_FILESIZE * 8)
if end is not None:
return end + 3*8
return end + 3 * 8
return None

View file

@ -18,12 +18,12 @@ Creation: 18th February 2007
"""
from math import log10
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
Bits, UInt16, UInt8,
RawBytes, String, GenericVector)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import textHandler
from hachoir.parser import Parser
from hachoir.field import (FieldSet,
Bits, UInt16, UInt8,
RawBytes, String, GenericVector)
from hachoir.core.endian import BIG_ENDIAN
from hachoir.core.text_handler import textHandler
# Old NoiseTracker 15-samples modules can have anything here.
MODULE_TYPE = {
@ -42,15 +42,19 @@ MODULE_TYPE = {
"FA08": ("Digital Tracker", 8),
}
def getFineTune(val):
return ("0", "1", "2", "3", "4", "5", "6", "7", "8",
"-8", "-7", "-6", "-5", "-4", "-3", "-2", "-1")[val.value]
def getVolume(val):
return "%.1f dB" % (20.0*log10(val.value/64.0))
return "%.1f dB" % (20.0 * log10(val.value / 64.0))
class SampleInfo(FieldSet):
static_size = 30*8
static_size = 30 * 8
def createFields(self):
yield String(self, "name", 22, strip='\0')
yield UInt16(self, "sample_count")
@ -62,8 +66,9 @@ class SampleInfo(FieldSet):
def createValue(self):
return self["name"].value
class Header(FieldSet):
static_size = 1084*8
static_size = 1084 * 8
def createFields(self):
yield String(self, "name", 20, strip='\0')
@ -76,8 +81,10 @@ class Header(FieldSet):
def getNumChannels(self):
return MODULE_TYPE[self["type"].value][1]
class Note(FieldSet):
static_size = 8*4
static_size = 8 * 4
def createFields(self):
yield Bits(self, 4, "note_hi_nibble")
yield Bits(self, 12, "period")
@ -85,42 +92,45 @@ class Note(FieldSet):
yield Bits(self, 4, "effect")
yield UInt8(self, "parameter")
class Row(FieldSet):
def __init__(self, parent, name, channels, desc=None):
FieldSet.__init__(self, parent, name, description=desc)
self.channels = channels
self._size = 8*self.channels*4
self._size = 8 * self.channels * 4
def createFields(self):
for index in xrange(self.channels):
yield Note(self, "note[]")
class Pattern(FieldSet):
def __init__(self, parent, name, channels, desc=None):
FieldSet.__init__(self, parent, name, description=desc)
self.channels = channels
self._size = 64*8*self.channels*4
self._size = 64 * 8 * self.channels * 4
def createFields(self):
for index in xrange(64):
yield Row(self, "row[]", self.channels)
class AmigaModule(Parser):
PARSER_TAGS = {
"id": "mod",
"category": "audio",
"file_ext": ("mod", "nst", "wow", "oct", "sd0" ),
"file_ext": ("mod", "nst", "wow", "oct", "sd0"),
"mime": (u'audio/mod', u'audio/x-mod', u'audio/mod', u'audio/x-mod'),
"min_size": 1084*8,
"min_size": 1084 * 8,
"description": "Uncompressed amiga module"
}
endian = BIG_ENDIAN
def validate(self):
t = self.stream.readBytes(1080*8, 4)
if t not in MODULE_TYPE:
return "Invalid module type '%s'" % t
self.createValue = lambda t: "%s module, %u channels" % MODULE_TYPE[t]
modtype = self.stream.readBytes(1080 * 8, 4)
if modtype not in MODULE_TYPE:
return "Invalid module type %a" % modtype
self.createValue = lambda modtype: "%s module, %u channels" % MODULE_TYPE[modtype]
return True
def createFields(self):
@ -144,6 +154,5 @@ class AmigaModule(Parser):
count = header["samples/info[%u]/sample_count" % index].value
if count:
self.info("Yielding sample %u: %u samples" % (index, count))
yield RawBytes(self, "sample_data[]", 2*count, \
yield RawBytes(self, "sample_data[]", 2 * count, \
"Sample %u" % index)

View file

@ -8,65 +8,79 @@ Author: Christophe GISQUET <christophe.gisquet@free.fr>
Creation: 10th February 2007
"""
from hachoir_core.field import (FieldSet,
UInt32, UInt16, UInt8, Int8, Float32,
RawBytes, String, GenericVector, ParserError)
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal
from hachoir.field import (FieldSet,
UInt32, UInt16, UInt8, Int8, Float32,
RawBytes, String, GenericVector, ParserError)
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.text_handler import textHandler, hexadecimal
MAX_ENVPOINTS = 32
def parseComments(parser):
size = parser["block_size"].value
if size > 0:
yield String(parser, "comment", size)
class MidiOut(FieldSet):
static_size = 9*32*8
static_size = 9 * 32 * 8
def createFields(self):
for name in ("start", "stop", "tick", "noteon", "noteoff",
"volume", "pan", "banksel", "program"):
yield String(self, name, 32, strip='\0')
class Command(FieldSet):
static_size = 32*8
static_size = 32 * 8
def createFields(self):
start = self.absolute_address
size = self.stream.searchBytesLength("\0", False, start)
if size > 0:
self.info("Command: %s" % self.stream.readBytes(start, size))
yield String(self, "command", size, strip='\0')
yield RawBytes(self, "parameter", (self._size//8)-size)
yield RawBytes(self, "parameter", (self._size // 8) - size)
class MidiSFXExt(FieldSet):
static_size = 16*32*8
static_size = 16 * 32 * 8
def createFields(self):
for index in xrange(16):
yield Command(self, "command[]")
class MidiZXXExt(FieldSet):
static_size = 128*32*8
static_size = 128 * 32 * 8
def createFields(self):
for index in xrange(128):
yield Command(self, "command[]")
def parseMidiConfig(parser):
yield MidiOut(parser, "midi_out")
yield MidiSFXExt(parser, "sfx_ext")
yield MidiZXXExt(parser, "zxx_ext")
def parseChannelSettings(parser):
size = parser["block_size"].value//4
size = parser["block_size"].value // 4
if size > 0:
yield GenericVector(parser, "settings", size, UInt32, "mix_plugin")
def parseEQBands(parser):
size = parser["block_size"].value//4
size = parser["block_size"].value // 4
if size > 0:
yield GenericVector(parser, "gains", size, UInt32, "band")
class SoundMixPluginInfo(FieldSet):
static_size = 128*8
static_size = 128 * 8
def createFields(self):
yield textHandler(UInt32(self, "plugin_id1"), hexadecimal)
yield textHandler(UInt32(self, "plugin_id2"), hexadecimal)
@ -76,10 +90,11 @@ class SoundMixPluginInfo(FieldSet):
yield String(self, "name", 32, strip='\0')
yield String(self, "dll_name", 64, desc="Original DLL name", strip='\0')
class ExtraData(FieldSet):
def __init__(self, parent, name, desc=None):
FieldSet.__init__(self, parent, name, desc)
self._size = (4+self["size"].value)*8
self._size = (4 + self["size"].value) * 8
def createFields(self):
yield UInt32(self, "size")
@ -87,10 +102,11 @@ class ExtraData(FieldSet):
if size:
yield RawBytes(self, "data", size)
class XPlugData(FieldSet):
def __init__(self, parent, name, desc=None):
FieldSet.__init__(self, parent, name, desc)
self._size = (4+self["size"].value)*8
self._size = (4 + self["size"].value) * 8
def createFields(self):
yield UInt32(self, "size")
@ -101,97 +117,100 @@ class XPlugData(FieldSet):
elif self["marker"].value == 'PORG':
yield UInt32(self, "default_program")
def parsePlugin(parser):
yield SoundMixPluginInfo(parser, "info")
# Check if VST setchunk present
size = parser.stream.readBits(parser.absolute_address+parser.current_size, 32, LITTLE_ENDIAN)
size = parser.stream.readBits(parser.absolute_address + parser.current_size, 32, LITTLE_ENDIAN)
if 0 < size < parser.current_size + parser._size:
yield ExtraData(parser, "extra_data")
# Check if XPlugData is present
size = parser.stream.readBits(parser.absolute_address+parser.current_size, 32, LITTLE_ENDIAN)
size = parser.stream.readBits(parser.absolute_address + parser.current_size, 32, LITTLE_ENDIAN)
if 0 < size < parser.current_size + parser._size:
yield XPlugData(parser, "xplug_data")
# Format: "XXXX": (type, count, name)
EXTENSIONS = {
# WriteInstrumentHeaderStruct@Sndfile.cpp
"XTPM": {
"..Fd": (UInt32, 1, "Flags"),
"..OF": (UInt32, 1, "Fade out"),
"..VG": (UInt32, 1, "Global Volume"),
"...P": (UInt32, 1, "Panning"),
"..EV": (UInt32, 1, "Volume Envelope"),
"..EP": (UInt32, 1, "Panning Envelope"),
".EiP": (UInt32, 1, "Pitch Envelope"),
".SLV": (UInt8, 1, "Volume Loop Start"),
".ELV": (UInt8, 1, "Volume Loop End"),
".BSV": (UInt8, 1, "Volume Sustain Begin"),
".ESV": (UInt8, 1, "Volume Sustain End"),
".SLP": (UInt8, 1, "Panning Loop Start"),
".ELP": (UInt8, 1, "Panning Loop End"),
".BSP": (UInt8, 1, "Panning Substain Begin"),
".ESP": (UInt8, 1, "Padding Substain End"),
"SLiP": (UInt8, 1, "Pitch Loop Start"),
"ELiP": (UInt8, 1, "Pitch Loop End"),
"BSiP": (UInt8, 1, "Pitch Substain Begin"),
"ESiP": (UInt8, 1, "Pitch Substain End"),
".ANN": (UInt8, 1, "NNA"),
".TCD": (UInt8, 1, "DCT"),
".AND": (UInt8, 1, "DNA"),
"..SP": (UInt8, 1, "Panning Swing"),
"..SV": (UInt8, 1, "Volume Swing"),
".CFI": (UInt8, 1, "IFC"),
".RFI": (UInt8, 1, "IFR"),
"..BM": (UInt32, 1, "Midi Bank"),
"..PM": (UInt8, 1, "Midi Program"),
"..CM": (UInt8, 1, "Midi Channel"),
".KDM": (UInt8, 1, "Midi Drum Key"),
".SPP": (Int8, 1, "PPS"),
".CPP": (UInt8, 1, "PPC"),
".[PV": (UInt32, MAX_ENVPOINTS, "Volume Points"),
".[PP": (UInt32, MAX_ENVPOINTS, "Panning Points"),
"[PiP": (UInt32, MAX_ENVPOINTS, "Pitch Points"),
".[EV": (UInt8, MAX_ENVPOINTS, "Volume Enveloppe"),
".[EP": (UInt8, MAX_ENVPOINTS, "Panning Enveloppe"),
"[EiP": (UInt8, MAX_ENVPOINTS, "Pitch Enveloppe"),
".[MN": (UInt8, 128, "Note Mapping"),
"..[K": (UInt32, 128, "Keyboard"),
"..[n": (String, 32, "Name"),
".[nf": (String, 12, "Filename"),
".PiM": (UInt8, 1, "MixPlug"),
"..RV": (UInt16, 1, "Volume Ramping"),
"...R": (UInt16, 1, "Resampling"),
"..SC": (UInt8, 1, "Cut Swing"),
"..SR": (UInt8, 1, "Res Swing"),
"..MF": (UInt8, 1, "Filter Mode"),
"..Fd": (UInt32, 1, "Flags"),
"..OF": (UInt32, 1, "Fade out"),
"..VG": (UInt32, 1, "Global Volume"),
"...P": (UInt32, 1, "Panning"),
"..EV": (UInt32, 1, "Volume Envelope"),
"..EP": (UInt32, 1, "Panning Envelope"),
".EiP": (UInt32, 1, "Pitch Envelope"),
".SLV": (UInt8, 1, "Volume Loop Start"),
".ELV": (UInt8, 1, "Volume Loop End"),
".BSV": (UInt8, 1, "Volume Sustain Begin"),
".ESV": (UInt8, 1, "Volume Sustain End"),
".SLP": (UInt8, 1, "Panning Loop Start"),
".ELP": (UInt8, 1, "Panning Loop End"),
".BSP": (UInt8, 1, "Panning Substain Begin"),
".ESP": (UInt8, 1, "Padding Substain End"),
"SLiP": (UInt8, 1, "Pitch Loop Start"),
"ELiP": (UInt8, 1, "Pitch Loop End"),
"BSiP": (UInt8, 1, "Pitch Substain Begin"),
"ESiP": (UInt8, 1, "Pitch Substain End"),
".ANN": (UInt8, 1, "NNA"),
".TCD": (UInt8, 1, "DCT"),
".AND": (UInt8, 1, "DNA"),
"..SP": (UInt8, 1, "Panning Swing"),
"..SV": (UInt8, 1, "Volume Swing"),
".CFI": (UInt8, 1, "IFC"),
".RFI": (UInt8, 1, "IFR"),
"..BM": (UInt32, 1, "Midi Bank"),
"..PM": (UInt8, 1, "Midi Program"),
"..CM": (UInt8, 1, "Midi Channel"),
".KDM": (UInt8, 1, "Midi Drum Key"),
".SPP": (Int8, 1, "PPS"),
".CPP": (UInt8, 1, "PPC"),
".[PV": (UInt32, MAX_ENVPOINTS, "Volume Points"),
".[PP": (UInt32, MAX_ENVPOINTS, "Panning Points"),
"[PiP": (UInt32, MAX_ENVPOINTS, "Pitch Points"),
".[EV": (UInt8, MAX_ENVPOINTS, "Volume Enveloppe"),
".[EP": (UInt8, MAX_ENVPOINTS, "Panning Enveloppe"),
"[EiP": (UInt8, MAX_ENVPOINTS, "Pitch Enveloppe"),
".[MN": (UInt8, 128, "Note Mapping"),
"..[K": (UInt32, 128, "Keyboard"),
"..[n": (String, 32, "Name"),
".[nf": (String, 12, "Filename"),
".PiM": (UInt8, 1, "MixPlug"),
"..RV": (UInt16, 1, "Volume Ramping"),
"...R": (UInt16, 1, "Resampling"),
"..SC": (UInt8, 1, "Cut Swing"),
"..SR": (UInt8, 1, "Res Swing"),
"..MF": (UInt8, 1, "Filter Mode"),
},
# See after "CODE tag dictionary", same place, elements with [EXT]
"STPM": {
"...C": (UInt32, 1, "Channels"),
".VWC": (None, 0, "CreatedWith version"),
".VGD": (None, 0, "Default global volume"),
"..TD": (None, 0, "Default tempo"),
"HIBE": (None, 0, "Embedded instrument header"),
"VWSL": (None, 0, "LastSavedWith version"),
".MMP": (None, 0, "Plugin Mix mode"),
".BPR": (None, 0, "Rows per beat"),
".MPR": (None, 0, "Rows per measure"),
"@PES": (None, 0, "Chunk separator"),
".APS": (None, 0, "Song Pre-amplification"),
"..MT": (None, 0, "Tempo mode"),
"VTSV": (None, 0, "VSTi volume"),
"...C": (UInt32, 1, "Channels"),
".VWC": (None, 0, "CreatedWith version"),
".VGD": (None, 0, "Default global volume"),
"..TD": (None, 0, "Default tempo"),
"HIBE": (None, 0, "Embedded instrument header"),
"VWSL": (None, 0, "LastSavedWith version"),
".MMP": (None, 0, "Plugin Mix mode"),
".BPR": (None, 0, "Rows per beat"),
".MPR": (None, 0, "Rows per measure"),
"@PES": (None, 0, "Chunk separator"),
".APS": (None, 0, "Song Pre-amplification"),
"..MT": (None, 0, "Tempo mode"),
"VTSV": (None, 0, "VSTi volume"),
}
}
class MPField(FieldSet):
def __init__(self, parent, name, ext, desc=None):
FieldSet.__init__(self, parent, name, desc)
self.ext = ext
self.info(self.createDescription())
self._size = (6+self["data_size"].value)*8
self._size = (6 + self["data_size"].value) * 8
def createFields(self):
# Identify tag
@ -222,10 +241,11 @@ class MPField(FieldSet):
return "Element '%s', size %i" % \
(self["code"]._description, self["data_size"].value)
def parseFields(parser):
# Determine field names
ext = EXTENSIONS[parser["block_type"].value]
if ext == None:
if ext is None:
raise ParserError("Unknown parent '%s'" % parser["block_type"].value)
# Parse fields
@ -237,7 +257,8 @@ def parseFields(parser):
# Abort on unknown codes
parser.info("End of extension '%s' when finding '%s'" %
(parser["block_type"].value, parser.stream.readBytes(addr, 4)))
(parser["block_type"].value, parser.stream.readBytes(addr, 4)))
class ModplugBlock(FieldSet):
BLOCK_INFO = {
@ -247,6 +268,7 @@ class ModplugBlock(FieldSet):
"XTPM": ("instrument_ext", False, "Instrument extensions", parseFields),
"STPM": ("song_ext", False, "Song extensions", parseFields),
}
def __init__(self, parent, name, desc=None):
FieldSet.__init__(self, parent, name, desc)
self.parseBlock = parsePlugin
@ -261,7 +283,7 @@ class ModplugBlock(FieldSet):
self.parseBlock = lambda: parseBlock(self)
if self.has_size:
self._size = 8*(self["block_size"].value + 8)
self._size = 8 * (self["block_size"].value + 8)
def createFields(self):
yield String(self, "block_type", 4)
@ -273,10 +295,11 @@ class ModplugBlock(FieldSet):
yield field
if self.has_size:
size = self["block_size"].value - (self.current_size//8)
size = self["block_size"].value - (self.current_size // 8)
if size > 0:
yield RawBytes(self, "data", size, "Unknown data")
def ParseModplugMetadata(parser):
while not parser.eof:
block = ModplugBlock(parser, "block[]")
@ -285,7 +308,6 @@ def ParseModplugMetadata(parser):
break
# More undocumented stuff: date ?
size = (parser._size - parser.absolute_address - parser.current_size)//8
size = (parser._size - parser.absolute_address - parser.current_size) // 8
if size > 0:
yield RawBytes(parser, "info", size)

View file

@ -5,29 +5,29 @@ Creation: 12 decembre 2005
Author: Victor Stinner
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
MissingField, ParserError, createOrphanField,
Bit, Bits, Enum,
PaddingBits, PaddingBytes,
RawBytes)
from hachoir_parser.audio.id3 import ID3v1, ID3v2
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.tools import humanFrequency, humanBitSize
from hachoir_core.bits import long2raw
from hachoir_core.error import HACHOIR_ERRORS
from hachoir_core.stream import InputStreamError
from hachoir.parser import Parser
from hachoir.field import (FieldSet,
MissingField, ParserError, createOrphanField,
Bit, Bits, Enum,
PaddingBits, PaddingBytes,
RawBytes)
from hachoir.parser.audio.id3 import ID3v1, ID3v2
from hachoir.core.endian import BIG_ENDIAN
from hachoir.core.tools import humanFrequency, humanBitSize
from hachoir.core.bits import long2raw
from hachoir.stream import InputStreamError
# Max MP3 filesize: 200 MB
MAX_FILESIZE = 200*1024*1024*8
MAX_FILESIZE = 200 * 1024 * 1024 * 8
class Frame(FieldSet):
VERSION_NAME = { 0: "2.5", 2: "2", 3: "1" }
VERSION_NAME = {0: "2.5", 2: "2", 3: "1"}
MPEG_I = 3
MPEG_II = 2
MPEG_II_5 = 0
LAYER_NAME = { 1: "III", 2: "II", 3: "I" }
LAYER_NAME = {1: "III", 2: "II", 3: "I"}
LAYER_I = 3
LAYER_II = 2
LAYER_III = 1
@ -35,25 +35,25 @@ class Frame(FieldSet):
# Bit rates (bit_rate * 1000 = bits/sec)
# key 15 is always invalid
BIT_RATES = {
1: ( # MPEG1
( 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448 ), # layer I
( 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384 ), # layer II
( 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 ), # layer III
1: ( # MPEG1
(0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448), # layer I
(0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384), # layer II
(0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320), # layer III
# - 1 2 3 4 5 6 7 8 9 10 11 12 13 14 -
),
2: ( # MPEG2 / MPEG2.5
( 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256 ), # layer I
( 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 ), # layer II
( 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 ), # layer III
2: ( # MPEG2 / MPEG2.5
(0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256), # layer I
(0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160), # layer II
(0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160), # layer III
# - 1 2 3 4 5 6 7 8 9 10 11 12 13 14 -
)
}
SAMPLING_RATES = {
3: {0: 44100, 1: 48000, 2: 32000}, # MPEG1
2: {0: 22050, 1: 24000, 2: 16000}, # MPEG2
0: {0: 11025, 1: 12000, 2: 8000} # MPEG2.5
0: {0: 11025, 1: 12000, 2: 8000} # MPEG2.5
}
EMPHASIS_NAME = {0: "none", 1: "50/15 ms", 3: "CCIT J.17"}
EMPHASIS_NAME = {0: "none", 1: "50/15 ms", 3: "CCIT J.17"}
CHANNEL_MODE_NAME = {
0: "Stereo",
1: "Joint stereo",
@ -96,17 +96,17 @@ class Frame(FieldSet):
yield Bit(self, "original", "Is original?")
yield Enum(Bits(self, "emphasis", 2, "Emphasis"), self.EMPHASIS_NAME)
size = (self.size - self.current_size) / 8
size = (self.size - self.current_size) // 8
if size:
yield RawBytes(self, "data", size)
def isValid(self):
return (self["layer"].value != 0
and self["sync"].value == 2047
and self["version"].value != 1
and self["sampling_rate"].value != 3
and self["bit_rate"].value not in (0, 15)
and self["emphasis"].value != 2)
and self["sync"].value == 2047
and self["version"].value != 1
and self["sampling_rate"].value != 3
and self["bit_rate"].value not in (0, 15)
and self["emphasis"].value != 2)
def getSampleRate(self):
"""
@ -128,9 +128,9 @@ class Frame(FieldSet):
if bit_rate in (0, 15):
return None
if self["version"].value == 3:
dataset = self.BIT_RATES[1] # MPEG1
dataset = self.BIT_RATES[1] # MPEG1
else:
dataset = self.BIT_RATES[2] # MPEG2 / MPEG2.5
dataset = self.BIT_RATES[2] # MPEG2 / MPEG2.5
try:
return dataset[layer][bit_rate] * 1000
except (KeyError, IndexError):
@ -152,15 +152,15 @@ class Frame(FieldSet):
if self["version"].value == self.MPEG_I:
return (frame_size * 144) // sample_rate + padding
else:
return (frame_size * 72) // sample_rate + padding
return (frame_size * 72) // sample_rate + padding
elif self["layer"].value == self.LAYER_II:
return (frame_size * 144) / sample_rate + padding
else: # self.LAYER_I:
frame_size = (frame_size * 12) / sample_rate
return (frame_size * 144) // sample_rate + padding
else: # self.LAYER_I:
frame_size = (frame_size * 12) // sample_rate
return (frame_size + padding) * 4
def getNbChannel(self):
return self.NB_CHANNEL[ self["channel_mode"].value ]
return self.NB_CHANNEL[self["channel_mode"].value]
def createDescription(self):
info = ["layer %s" % self["layer"].display]
@ -172,6 +172,7 @@ class Frame(FieldSet):
info.append(humanFrequency(sampling_rate))
return "MPEG-%s %s" % (self["version"].display, ", ".join(info))
def findSynchronizeBits(parser, start, max_size):
"""
Find synchronisation bits (11 bits set to 1)
@ -192,9 +193,9 @@ def findSynchronizeBits(parser, start, max_size):
# Strong validation of frame: create the frame
# and call method isValid()
try:
frame = createOrphanField(parser, start-address0, Frame, "frame")
frame = createOrphanField(parser, start - address0, Frame, "frame")
valid = frame.isValid()
except HACHOIR_ERRORS:
except Exception:
valid = False
if valid:
return size
@ -204,6 +205,7 @@ def findSynchronizeBits(parser, start, max_size):
size += 1
return None
class Frames(FieldSet):
# Padding bytes allowed before a frame
MAX_PADDING = 256
@ -211,7 +213,7 @@ class Frames(FieldSet):
def synchronize(self):
addr = self.absolute_address
start = addr + self.current_size
end = min(start + self.MAX_PADDING*8, addr + self.size)
end = min(start + self.MAX_PADDING * 8, addr + self.size)
padding = findSynchronizeBits(self, start, end)
if padding is None:
raise ParserError("MPEG audio: Unable to find synchronization bits")
@ -246,12 +248,12 @@ class Frames(FieldSet):
while self.current_size < self.size:
yield Frame(self, "frame[]")
# padding = self.synchronize()
# if padding:
# yield padding
# padding = self.synchronize()
# if padding:
# yield padding
# Read raw bytes at the end (if any)
size = (self.size - self.current_size) / 8
size = (self.size - self.current_size) // 8
if size:
yield RawBytes(self, "raw", size)
@ -262,15 +264,15 @@ class Frames(FieldSet):
text = "Variable bit rate (VBR)"
return "Frames: %s" % text
def createMpegAudioMagic():
def createMpegAudioMagic():
# ID3v1 magic
magics = [("TAG", 0)]
# ID3v2 magics
for ver_major in ID3v2.VALID_MAJOR_VERSIONS:
magic = "ID3%c\x00" % ver_major
magics.append( (magic,0) )
magic = "ID3%c\x00" % ver_major
magics.append((magic, 0))
# MPEG frame magic
# TODO: Use longer magic: 32 bits instead of 16 bits
@ -280,17 +282,18 @@ def createMpegAudioMagic():
for crc16 in (0, 1):
magic = (SYNC_BITS << 5) | (version << 3) | (layer << 1) | crc16
magic = long2raw(magic, BIG_ENDIAN, 2)
magics.append( (magic, 0) )
magics.append((magic, 0))
return magics
class MpegAudioFile(Parser):
PARSER_TAGS = {
"id": "mpeg_audio",
"category": "audio",
"file_ext": ("mpa", "mp1", "mp2", "mp3"),
"mime": (u"audio/mpeg",),
"min_size": 4*8,
# "magic": createMpegAudioMagic(),
"min_size": 4 * 8,
# "magic": createMpegAudioMagic(),
"description": "MPEG audio version 1, 2, 2.5",
"subfile": "skip",
}
@ -300,7 +303,7 @@ class MpegAudioFile(Parser):
if self[0].name in ("id3v2", "id3v1"):
return True
if not self.stream.checked: # TODO: is it possible to handle piped input?
if not self.stream.checked: # TODO: is it possible to handle piped input?
return False
# Validate first 5 frames
@ -310,7 +313,7 @@ class MpegAudioFile(Parser):
except MissingField:
# Require a least one valid frame
if (1 <= index) \
and self["frames"].done:
and self["frames"].done:
return True
return "Unable to get frame #%u" % index
except (InputStreamError, ParserError):
@ -333,16 +336,16 @@ class MpegAudioFile(Parser):
if self.stream.readBytes(0, 3) == "ID3":
yield ID3v2(self, "id3v2")
if self._size is None: # TODO: is it possible to handle piped input?
if self._size is None: # TODO: is it possible to handle piped input?
raise NotImplementedError
# Check if file is ending with ID3v1 or not and compute frames size
frames_size = self.size - self.current_size
addr = self.size - 128*8
addr = self.size - 128 * 8
if 0 <= addr:
has_id3 = (self.stream.readBytes(addr, 3) == "TAG")
if has_id3:
frames_size -= 128*8
frames_size -= 128 * 8
else:
has_id3 = False
@ -392,7 +395,7 @@ class MpegAudioFile(Parser):
# Check frame 32 bits header
if not frame.isValid():
break
except HACHOIR_ERRORS:
except Exception:
break
if MAX_FILESIZE < (size + frame.size):
break
@ -405,4 +408,3 @@ class MpegAudioFile(Parser):
except InputStreamError:
pass
return size

View file

@ -8,14 +8,15 @@ Samples:
http://samples.mplayerhq.hu/real/RA/
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
UInt8, UInt16, UInt32,
Bytes, RawBytes, String,
PascalString8)
from hachoir_core.tools import humanFrequency
from hachoir_core.text_handler import displayHandler
from hachoir_core.endian import BIG_ENDIAN
from hachoir.parser import Parser
from hachoir.field import (FieldSet,
UInt8, UInt16, UInt32,
Bytes, RawBytes, String,
PascalString8)
from hachoir.core.tools import humanFrequency
from hachoir.core.text_handler import displayHandler
from hachoir.core.endian import BIG_ENDIAN
class Metadata(FieldSet):
def createFields(self):
@ -24,6 +25,7 @@ class Metadata(FieldSet):
yield PascalString8(self, "copyright", charset="ISO-8859-1")
yield PascalString8(self, "comment", charset="ISO-8859-1")
class RealAudioFile(Parser):
MAGIC = ".ra\xFD"
PARSER_TAGS = {
@ -31,7 +33,7 @@ class RealAudioFile(Parser):
"category": "audio",
"file_ext": ["ra"],
"mime": (u"audio/x-realaudio", u"audio/x-pn-realaudio"),
"min_size": 6*8,
"min_size": 6 * 8,
"magic": ((MAGIC, 0),),
"description": u"Real audio (.ra)",
}
@ -55,7 +57,7 @@ class RealAudioFile(Parser):
yield UInt8(self, "Unknown2")
yield PascalString8(self, "FourCC")
audio_size = self["data_size"].value
else: # version = 4
else: # version = 4
yield UInt16(self, "reserved1", "Reserved, should be 0")
yield String(self, "ra4sig", 4, "'.ra4' signature")
yield UInt32(self, "filesize", "File size (minus 40 bytes)")

View file

@ -9,34 +9,36 @@ Author: Christophe GISQUET <christophe.gisquet@free.fr>
Creation: 11th February 2007
"""
from hachoir_parser import Parser
from hachoir_core.field import (StaticFieldSet, FieldSet, Field,
Bit, Bits,
UInt32, UInt16, UInt8, Enum,
PaddingBytes, RawBytes, NullBytes,
String, GenericVector, ParserError)
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal
from hachoir_core.tools import alignValue
from hachoir.parser import Parser
from hachoir.field import (StaticFieldSet, FieldSet, Field,
Bit, Bits,
UInt32, UInt16, UInt8, Enum,
PaddingBytes, RawBytes, NullBytes,
String, GenericVector, ParserError)
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.core.tools import alignValue
class Chunk:
def __init__(self, cls, name, offset, size, *args):
# Todo: swap and have None=unknown instead of now: 0=unknown
assert size != None and size>=0
assert size is not None and size >= 0
self.cls = cls
self.name = name
self.offset = offset
self.size = size
self.args = args
class ChunkIndexer:
def __init__(self):
self.chunks = [ ]
self.chunks = []
# Check if a chunk fits
def canHouse(self, chunk, index):
if index > 1:
if chunk.offset + chunk.size > self.chunks[index-1].offset:
if chunk.offset + chunk.size > self.chunks[index - 1].offset:
return False
# We could test now that it fits in the memory
return True
@ -49,7 +51,8 @@ class ChunkIndexer:
offset = self.chunks[index].offset
if offset < new_chunk.offset:
if not self.canHouse(new_chunk, index):
raise ParserError("Chunk '%s' doesn't fit!" % new_chunk.name)
raise ParserError("Chunk '%s' doesn't fit!" %
new_chunk.name)
self.chunks.insert(index, new_chunk)
return
index += 1
@ -61,15 +64,15 @@ class ChunkIndexer:
def yieldChunks(self, obj):
while len(self.chunks) > 0:
chunk = self.chunks.pop()
current_pos = obj.current_size//8
current_pos = obj.current_size // 8
# Check if padding needed
size = chunk.offset - current_pos
if size > 0:
obj.info("Padding of %u bytes needed: curr=%u offset=%u" % \
obj.info("Padding of %u bytes needed: curr=%u offset=%u" %
(size, current_pos, chunk.offset))
yield PaddingBytes(obj, "padding[]", size)
current_pos = obj.current_size//8
current_pos = obj.current_size // 8
# Find resynch point if needed
count = 0
@ -78,15 +81,15 @@ class ChunkIndexer:
count += 1
chunk = self.chunks.pop()
# Unfortunaly, we also pass the underlying chunks
if chunk == None:
obj.info("Couldn't resynch: %u object skipped to reach %u" % \
if chunk is None:
obj.info("Couldn't resynch: %u object skipped to reach %u" %
(count, current_pos))
return
# Resynch
size = chunk.offset-current_pos
size = chunk.offset - current_pos
if size > 0:
obj.info("Skipped %u objects to resynch to %u; chunk offset: %u->%u" % \
obj.info("Skipped %u objects to resynch to %u; chunk offset: %u->%u" %
(count, current_pos, old_off, chunk.offset))
yield RawBytes(obj, "resynch[]", size)
@ -95,23 +98,25 @@ class ChunkIndexer:
(chunk.size, chunk.offset))
field = chunk.cls(obj, chunk.name, chunk.size, *chunk.args)
# Not tested, probably wrong:
#if chunk.size: field.static_size = 8*chunk.size
# if chunk.size: field.static_size = 8*chunk.size
yield field
if hasattr(field, "getSubChunks"):
for sub_chunk in field.getSubChunks():
obj.info("Adding sub chunk: position=%u size=%u name='%s'" % \
obj.info("Adding sub chunk: position=%u size=%u name='%s'" %
(sub_chunk.offset, sub_chunk.size, sub_chunk.name))
self.addChunk(sub_chunk)
# Let missing padding be done by next chunk
class S3MFlags(StaticFieldSet):
format = (
(Bit, "st2_vibrato", "Vibrato (File version 1/ScreamTrack 2)"),
(Bit, "st2_tempo", "Tempo (File version 1/ScreamTrack 2)"),
(Bit, "amiga_slides", "Amiga slides (File version 1/ScreamTrack 2)"),
(Bit, "zero_vol_opt", "Automatically turn off looping notes whose volume is zero for >2 note rows"),
(Bit, "zero_vol_opt",
"Automatically turn off looping notes whose volume is zero for >2 note rows"),
(Bit, "amiga_limits", "Disallow notes beyond Amiga hardware specs"),
(Bit, "sb_processing", "Enable filter/SFX with SoundBlaster"),
(Bit, "vol_slide", "Volume slide also performed on first row"),
@ -119,30 +124,36 @@ class S3MFlags(StaticFieldSet):
(Bits, "unused[]", 8)
)
def parseChannelType(val):
val = val.value
if val<8:
if val < 8:
return "Left Sample Channel %u" % val
if val<16:
return "Right Sample Channel %u" % (val-8)
if val<32:
return "Adlib channel %u" % (val-16)
if val < 16:
return "Right Sample Channel %u" % (val - 8)
if val < 32:
return "Adlib channel %u" % (val - 16)
return "Value %u unknown" % val
class ChannelSettings(FieldSet):
static_size = 8
def createFields(self):
yield textHandler(Bits(self, "type", 7), parseChannelType)
yield Bit(self, "enabled")
class ChannelPanning(FieldSet):
static_size = 8
def createFields(self):
yield Bits(self, "default_position", 4, "Default pan position")
yield Bit(self, "reserved[]")
yield Bit(self, "use_default", "Bits 0:3 specify default position")
yield Bits(self, "reserved[]", 2)
# Provide an automatic constructor
class SizeFieldSet(FieldSet):
"""
@ -161,11 +172,12 @@ class SizeFieldSet(FieldSet):
- derive a class with ALIGN = 0.
"""
ALIGN = 16
def __init__(self, parent, name, size, desc=None):
FieldSet.__init__(self, parent, name, desc)
if size:
self.real_size = size
if self.static_size == None:
if self.static_size is None:
self.setCheckedSizes(size)
def setCheckedSizes(self, size):
@ -173,7 +185,7 @@ class SizeFieldSet(FieldSet):
self.real_size = size
size *= 8
if self.ALIGN:
size = alignValue(self.absolute_address+size, 8*self.ALIGN) \
size = alignValue(self.absolute_address + size, 8 * self.ALIGN) \
- self.absolute_address
if self._parent._size:
@ -185,10 +197,11 @@ class SizeFieldSet(FieldSet):
def createFields(self):
for field in self.createUnpaddedFields():
yield field
size = (self._size - self.current_size)//8
size = (self._size - self.current_size) // 8
if size > 0:
yield PaddingBytes(self, "padding", size)
class Header(SizeFieldSet):
def createDescription(self):
return "%s (%u patterns, %u instruments)" % \
@ -225,6 +238,7 @@ class Header(SizeFieldSet):
for field in self.getHeaderEndFields():
yield field
class S3MHeader(Header):
"""
0 1 2 3 4 5 6 7 8 9 A B C D E F
@ -253,12 +267,13 @@ class S3MHeader(Header):
xxx2=70h+orders+instruments*2
xxx3=70h+orders+instruments*2+patterns*2
"""
def __init__(self, parent, name, size, desc=None):
Header.__init__(self, parent, name, size, desc)
# Overwrite real_size
size = 0x60 + self["num_orders"].value + \
2*(self["num_instruments"].value + self["num_patterns"].value)
2 * (self["num_instruments"].value + self["num_patterns"].value)
if self["panning_info"].value == 252:
size += 32
@ -288,7 +303,8 @@ class S3MHeader(Header):
yield UInt16(self, "custom_data_parapointer",
"Parapointer to special custom data (not used by ST3.01)")
def getNumOrders(self): return self["num_orders"].value
def getNumOrders(self):
return self["num_orders"].value
def getHeaderEndFields(self):
instr = self["num_instruments"].value
@ -301,30 +317,31 @@ class S3MHeader(Header):
# S3M 3.20 extension
if self["creation_version_major"].value >= 3 \
and self["creation_version_minor"].value >= 0x20 \
and self["panning_info"].value == 252:
and self["creation_version_minor"].value >= 0x20 \
and self["panning_info"].value == 252:
yield GenericVector(self, "channel_panning", 32, ChannelPanning, "channel")
# Padding required for 16B alignment
size = self._size - self.current_size
if size > 0:
yield PaddingBytes(self, "padding", size//8)
yield PaddingBytes(self, "padding", size // 8)
def getSubChunks(self):
# Instruments - no warranty that they are concatenated
for index in xrange(self["num_instruments"].value):
yield Chunk(S3MInstrument, "instrument[]",
16*self["instr_pptr/offset[%u]" % index].value,
S3MInstrument.static_size//8)
16 * self["instr_pptr/offset[%u]" % index].value,
S3MInstrument.static_size // 8)
# Patterns - size unknown but listed in their headers
for index in xrange(self["num_patterns"].value):
yield Chunk(S3MPattern, "pattern[]",
16*self["pattern_pptr/offset[%u]" % index].value, 0)
16 * self["pattern_pptr/offset[%u]" % index].value, 0)
class PTMHeader(Header):
# static_size should prime over _size, right?
static_size = 8*608
static_size = 8 * 608
def getTrackerVersion(self, val):
val = val.value
@ -336,13 +353,14 @@ class PTMHeader(Header):
def getFirstProperties(self):
yield UInt16(self, "channels")
yield UInt16(self, "flags") # 0 => NullBytes
yield UInt16(self, "flags") # 0 => NullBytes
yield UInt16(self, "reserved[]")
def getLastProperties(self):
yield RawBytes(self, "reserved[]", 16)
def getNumOrders(self): return 256
def getNumOrders(self):
return 256
def getHeaderEndFields(self):
yield GenericVector(self, "pattern_pptr", 128, UInt16, "offset")
@ -351,7 +369,7 @@ class PTMHeader(Header):
# It goes like this in the BS: patterns->instruments->instr. samples
if self._parent._size:
min_off = self.absolute_address+self._parent._size
min_off = self.absolute_address + self._parent._size
else:
min_off = 99999999999
@ -359,22 +377,25 @@ class PTMHeader(Header):
count = self["num_instruments"].value
addr = self.absolute_address
for index in xrange(count):
offset = (self.static_size+index*PTMInstrument.static_size)//8
offset = (self.static_size + index *
PTMInstrument.static_size) // 8
yield Chunk(PTMInstrument, "instrument[]", offset,
PTMInstrument.static_size//8)
offset = self.stream.readBits(addr+8*(offset+18), 32, LITTLE_ENDIAN)
PTMInstrument.static_size // 8)
offset = self.stream.readBits \
(addr + 8 * (offset + 18), 32, LITTLE_ENDIAN)
min_off = min(min_off, offset)
# Patterns
count = self["num_patterns"].value
prev_off = 16*self["pattern_pptr/offset[0]"].value
prev_off = 16 * self["pattern_pptr/offset[0]"].value
for index in range(1, count):
offset = 16*self["pattern_pptr/offset[%u]" % index].value
yield Chunk(PTMPattern, "pattern[]", prev_off, offset-prev_off)
offset = 16 * self["pattern_pptr/offset[%u]" % index].value
yield Chunk(PTMPattern, "pattern[]", prev_off, offset - prev_off)
prev_off = offset
# Difficult to account for
yield Chunk(PTMPattern, "pattern[]", prev_off, min_off-prev_off)
yield Chunk(PTMPattern, "pattern[]", prev_off, min_off - prev_off)
class SampleFlags(StaticFieldSet):
format = (
@ -384,23 +405,29 @@ class SampleFlags(StaticFieldSet):
(Bits, "unused", 5)
)
class S3MUInt24(Field):
static_size = 24
def __init__(self, parent, name, desc=None):
Field.__init__(self, parent, name, size=24, description=desc)
addr = self.absolute_address
val = parent.stream.readBits(addr, 8, LITTLE_ENDIAN) << 20
val += parent.stream.readBits(addr+8, 16, LITTLE_ENDIAN) << 4
val += parent.stream.readBits(addr + 8, 16, LITTLE_ENDIAN) << 4
self.createValue = lambda: val
class SampleData(SizeFieldSet):
def createUnpaddedFields(self):
yield RawBytes(self, "data", self.real_size)
class PTMSampleData(SampleData):
ALIGN = 0
class Instrument(SizeFieldSet):
static_size = 8*0x50
static_size = 8 * 0x50
def createDescription(self):
info = [self["c4_speed"].display]
@ -427,6 +454,7 @@ class Instrument(SizeFieldSet):
def createValue(self):
return self["name"].value
class S3MInstrument(Instrument):
"""
In fact a sample. Description follows:
@ -446,14 +474,14 @@ class S3MInstrument(Instrument):
xxxx: sampledata
"""
MAGIC = "SCRS"
PACKING = {0: "Unpacked", 1: "DP30ADPCM" }
TYPE = {0: "Unknown", 1: "Sample", 2: "adlib melody", 3: "adlib drum2" }
PACKING = {0: "Unpacked", 1: "DP30ADPCM"}
TYPE = {0: "Unknown", 1: "Sample", 2: "adlib melody", 3: "adlib drum2"}
def getType(self):
return Enum(UInt8(self, "type"), self.TYPE)
def getSampleBits(self):
return 8*(1+self["flags/16bits"].value)
return 8 * (1 + self["flags/16bits"].value)
def getInstrumentFields(self):
yield S3MUInt24(self, "sample_offset")
@ -472,15 +500,19 @@ class S3MInstrument(Instrument):
def getSubChunks(self):
size = self["sample_size"].value
if self["flags/stereo"].value: size *= 2
if self["flags/16bits"].value: size *= 2
if self["flags/stereo"].value:
size *= 2
if self["flags/16bits"].value:
size *= 2
yield Chunk(SampleData, "sample_data[]",
self["sample_offset"].value, size)
class PTMType(FieldSet):
TYPES = {0: "No sample", 1: "Regular", 2: "OPL2/OPL2 instrument", 3: "MIDI instrument" }
TYPES = {0: "No sample", 1: "Regular",
2: "OPL2/OPL2 instrument", 3: "MIDI instrument"}
static_size = 8
def createFields(self):
yield Bits(self, "unused", 2)
yield Bit(self, "is_tonable")
@ -489,22 +521,24 @@ class PTMType(FieldSet):
yield Bit(self, "loop")
yield Enum(Bits(self, "origin", 2), self.TYPES)
##class PTMType(StaticFieldSet):
## format = (
## (Bits, "unused", 2),
## (Bit, "is_tonable"),
## (Bit, "16bits"),
## (Bit, "loop_bidir"),
## (Bit, "loop"),
## (Bits, "origin", 2),
## )
# class PTMType(StaticFieldSet):
# format = (
# (Bits, "unused", 2),
# (Bit, "is_tonable"),
# (Bit, "16bits"),
# (Bit, "loop_bidir"),
# (Bit, "loop"),
# (Bits, "origin", 2),
# )
class PTMInstrument(Instrument):
MAGIC = "PTMI"
ALIGN = 0
def getType(self):
return PTMType(self, "flags") # Hack to have more common code
return PTMType(self, "flags") # Hack to have more common code
# PTM doesn't pretend to manage 16bits
def getSampleBits(self):
@ -522,7 +556,7 @@ class PTMInstrument(Instrument):
yield UInt32(self, "gus_loop_start")
yield UInt32(self, "gus_loop_end")
yield textHandler(UInt8(self, "gus_loop_flags"), hexadecimal)
yield UInt8(self, "reserved[]") # Should be 0
yield UInt8(self, "reserved[]") # Should be 0
def getSubChunks(self):
# Samples are NOT padded, and the size is already the correct one
@ -546,6 +580,7 @@ class S3MNoteInfo(StaticFieldSet):
(Bit, "has_effect")
)
class PTMNoteInfo(StaticFieldSet):
format = (
(Bits, "channel", 5),
@ -554,6 +589,7 @@ class PTMNoteInfo(StaticFieldSet):
(Bit, "has_volume")
)
class Note(FieldSet):
def createFields(self):
# Used by Row to check if end of Row
@ -568,11 +604,15 @@ class Note(FieldSet):
yield UInt8(self, "effect")
yield UInt8(self, "param")
class S3MNote(Note):
NOTE_INFO = S3MNoteInfo
class PTMNote(Note):
NOTE_INFO = PTMNoteInfo
class Row(FieldSet):
def createFields(self):
addr = self.absolute_address
@ -587,11 +627,15 @@ class Row(FieldSet):
yield note
addr += note.size
class S3MRow(Row):
NOTE = S3MNote
class PTMRow(Row):
NOTE = PTMNote
class Pattern(SizeFieldSet):
def createUnpaddedFields(self):
count = 0
@ -599,8 +643,10 @@ class Pattern(SizeFieldSet):
yield self.ROW(self, "row[]")
count += 1
class S3MPattern(Pattern):
ROW = S3MRow
def __init__(self, parent, name, size, desc=None):
Pattern.__init__(self, parent, name, size, desc)
@ -609,18 +655,20 @@ class S3MPattern(Pattern):
size = self.stream.readBits(addr, 16, LITTLE_ENDIAN)
self.setCheckedSizes(size)
class PTMPattern(Pattern):
ROW = PTMRow
class Module(Parser):
# MARKER / HEADER are defined in derived classes
endian = LITTLE_ENDIAN
def validate(self):
marker = self.stream.readBits(0x1C*8, 8, LITTLE_ENDIAN)
marker = self.stream.readBits(0x1C * 8, 8, LITTLE_ENDIAN)
if marker != 0x1A:
return "Invalid start marker %u" % marker
marker = self.stream.readBytes(0x2C*8, 4)
marker = self.stream.readBytes(0x2C * 8, 4)
if marker != self.MARKER:
return "Invalid marker %s!=%s" % (marker, self.MARKER)
return True
@ -640,28 +688,29 @@ class S3MModule(Module):
"category": "audio",
"file_ext": ("s3m",),
"mime": (u'audio/s3m', u'audio/x-s3m'),
"min_size": 64*8,
"min_size": 64 * 8,
"description": "ScreamTracker3 module"
}
MARKER = "SCRM"
HEADER = S3MHeader
## def createContentSize(self):
## hdr = Header(self, "header")
## max_offset = hdr._size//8
## instr_size = Instrument._size//8
## for index in xrange(self["header/num_instruments"].value):
## offset = 16*hdr["instr_pptr/offset[%u]" % index].value
## max_offset = max(offset+instr_size, max_offset)
## addr = self.absolute_address + 8*offset
# def createContentSize(self):
# hdr = Header(self, "header")
# max_offset = hdr._size // 8
#
# instr_size = Instrument._size // 8
# for index in xrange(self["header/num_instruments"].value):
# offset = 16*hdr["instr_pptr/offset[%u]" % index].value
# max_offset = max(offset+instr_size, max_offset)
# addr = self.absolute_address + 8*offset
class PTMModule(Module):
PARSER_TAGS = {
"id": "ptm",
"category": "audio",
"file_ext": ("ptm",),
"min_size": 64*8,
"min_size": 64 * 8,
"description": "PolyTracker module (v1.17)"
}
MARKER = "PTMF"

View file

@ -13,18 +13,20 @@ Author: Christophe GISQUET <christophe.gisquet@free.fr>
Creation: 8th February 2007
"""
from hachoir_parser import Parser
from hachoir_core.field import (StaticFieldSet, FieldSet,
Bit, RawBits, Bits,
UInt32, UInt16, UInt8, Int8, Enum,
RawBytes, String, GenericVector)
from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN
from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir_parser.audio.modplug import ParseModplugMetadata
from hachoir_parser.common.tracker import NOTE_NAME
from hachoir.parser import Parser
from hachoir.field import (StaticFieldSet, FieldSet,
Bit, RawBits, Bits,
UInt32, UInt16, UInt8, Int8, Enum,
RawBytes, String, GenericVector)
from hachoir.core.endian import LITTLE_ENDIAN, BIG_ENDIAN
from hachoir.core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir.parser.audio.modplug import ParseModplugMetadata
from hachoir.parser.common.tracker import NOTE_NAME
def parseSigned(val):
return "%i" % (val.value-128)
return "%i" % (val.value - 128)
# From dumb
SEMITONE_BASE = 1.059463094359295309843105314939748495817
@ -32,16 +34,20 @@ PITCH_BASE = 1.000225659305069791926712241547647863626
SAMPLE_LOOP_MODE = ("No loop", "Forward loop", "Ping-pong loop", "Undef")
class SampleType(FieldSet):
static_size = 8
def createFields(self):
yield Bits(self, "unused[]", 4)
yield Bit(self, "16bits")
yield Bits(self, "unused[]", 1)
yield Enum(Bits(self, "loop_mode", 2), SAMPLE_LOOP_MODE)
class SampleHeader(FieldSet):
static_size = 40*8
static_size = 40 * 8
def createFields(self):
yield UInt32(self, "length")
yield UInt32(self, "loop_start")
@ -55,11 +61,12 @@ class SampleHeader(FieldSet):
yield String(self, "name", 22, charset="ASCII", strip=' \0')
def createValue(self):
bytes = 1+self["type/16bits"].value
C5_speed = int(16726.0*pow(SEMITONE_BASE, self["relative_note"].value)
*pow(PITCH_BASE, self["fine_tune"].value*2))
bytes = 1 + self["type/16bits"].value
C5_speed = int(16726.0 * pow(SEMITONE_BASE, self["relative_note"].value)
* pow(PITCH_BASE, self["fine_tune"].value * 2))
return "%s, %ubits, %u samples, %uHz" % \
(self["name"].display, 8*bytes, self["length"].value/bytes, C5_speed)
(self["name"].display, 8 * bytes, self["length"].value / bytes, C5_speed)
class StuffType(StaticFieldSet):
format = (
@ -69,8 +76,10 @@ class StuffType(StaticFieldSet):
(Bit, "on")
)
class InstrumentSecondHeader(FieldSet):
static_size = 234*8
static_size = 234 * 8
def createFields(self):
yield UInt32(self, "sample_header_size")
yield GenericVector(self, "notes", 96, UInt8, "sample")
@ -93,11 +102,12 @@ class InstrumentSecondHeader(FieldSet):
yield UInt16(self, "volume_fadeout")
yield GenericVector(self, "reserved", 11, UInt16, "word")
def createInstrumentContentSize(s, addr):
start = addr
samples = s.stream.readBits(addr+27*8, 16, LITTLE_ENDIAN)
samples = s.stream.readBits(addr + 27 * 8, 16, LITTLE_ENDIAN)
# Seek to end of header (1st + 2nd part)
addr += 8*s.stream.readBits(addr, 32, LITTLE_ENDIAN)
addr += 8 * s.stream.readBits(addr, 32, LITTLE_ENDIAN)
sample_size = 0
if samples:
@ -107,7 +117,8 @@ def createInstrumentContentSize(s, addr):
# Seek to next sample header
addr += SampleHeader.static_size
return addr - start + 8*sample_size
return addr - start + 8 * sample_size
class Instrument(FieldSet):
def __init__(self, parent, name):
@ -117,7 +128,7 @@ class Instrument(FieldSet):
# Seems to fix things...
def fixInstrumentHeader(self):
size = self["size"].value - self.current_size//8
size = self["size"].value - self.current_size // 8
if size:
yield RawBytes(self, "unknown_data", size)
@ -138,7 +149,7 @@ class Instrument(FieldSet):
yield field
# This part probably wrong
sample_size = [ ]
sample_size = []
for index in xrange(num):
sample = SampleHeader(self, "sample_header[]")
yield sample
@ -155,18 +166,21 @@ class Instrument(FieldSet):
return "Instrument '%s': %i samples, header %i bytes" % \
(self["name"].value, self["samples"].value, self["size"].value)
VOLUME_NAME = (
"Volume slide down", "Volume slide up", "Fine volume slide down",
"Fine volume slide up", "Set vibrato speed", "Vibrato",
"Set panning", "Panning slide left", "Panning slide right",
"Tone porta", "Unhandled")
def parseVolume(val):
val = val.value
if 0x10<=val<=0x50:
return "Volume %i" % val-16
if 0x10 <= val <= 0x50:
return "Volume %i" % val - 16
else:
return VOLUME_NAME[val/16 - 6]
return VOLUME_NAME[val / 16 - 6]
class RealBit(RawBits):
static_size = 1
@ -177,6 +191,7 @@ class RealBit(RawBits):
def createValue(self):
return self._parent.stream.readBits(self.absolute_address, 1, BIG_ENDIAN)
class NoteInfo(StaticFieldSet):
format = (
(RawBits, "unused", 2),
@ -187,6 +202,7 @@ class NoteInfo(StaticFieldSet):
(RealBit, "has_note")
)
EFFECT_NAME = (
"Arppegio", "Porta up", "Porta down", "Tone porta", "Vibrato",
"Tone porta+Volume slide", "Vibrato+Volume slide", "Tremolo",
@ -204,35 +220,37 @@ EFFECT_E_NAME = (
"Fine volume slide up", "Fine volume slide down", "Note cut",
"Note delay", "Pattern delay")
class Effect(RawBits):
def __init__(self, parent, name):
RawBits.__init__(self, parent, name, 8)
def createValue(self):
t = self.parent.stream.readBits(self.absolute_address, 8, LITTLE_ENDIAN)
param = self.parent.stream.readBits(self.absolute_address+8, 8, LITTLE_ENDIAN)
param = self.parent.stream.readBits(self.absolute_address + 8, 8, LITTLE_ENDIAN)
if t == 0x0E:
return EFFECT_E_NAME[param>>4] + " %i" % (param&0x07)
return EFFECT_E_NAME[param >> 4] + " %i" % (param & 0x07)
elif t == 0x21:
return ("Extra fine porta up", "Extra fine porta down")[param>>4]
return ("Extra fine porta up", "Extra fine porta down")[param >> 4]
else:
return EFFECT_NAME[t]
class Note(FieldSet):
def __init__(self, parent, name, desc=None):
FieldSet.__init__(self, parent, name, desc)
self.flags = self.stream.readBits(self.absolute_address, 8, LITTLE_ENDIAN)
if self.flags&0x80:
if self.flags & 0x80:
# TODO: optimize bitcounting with a table:
# http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable
self._size = 8
if self.flags&0x01: self._size += 8
if self.flags&0x02: self._size += 8
if self.flags&0x04: self._size += 8
if self.flags&0x08: self._size += 8
if self.flags&0x10: self._size += 8
if self.flags & 0x01: self._size += 8
if self.flags & 0x02: self._size += 8
if self.flags & 0x04: self._size += 8
if self.flags & 0x08: self._size += 8
if self.flags & 0x10: self._size += 8
else:
self._size = 5*8
self._size = 5 * 8
def createFields(self):
# This stupid shit gets the LSB, not the MSB...
@ -275,21 +293,24 @@ class Note(FieldSet):
desc.append("parameter %i" % self["effect_parameter"].value)
else:
desc = (self["note"].display, "instrument %i" % self["instrument"].value,
self["has_volume"].display, "effect %s" % self["effect_type"].value,
"parameter %i" % self["effect_parameter"].value)
self["has_volume"].display, "effect %s" % self["effect_type"].value,
"parameter %i" % self["effect_parameter"].value)
if desc:
return "Note %s" % ", ".join(desc)
else:
return "Note"
class Row(FieldSet):
def createFields(self):
for index in xrange(self["/header/channels"].value):
yield Note(self, "note[]")
def createPatternContentSize(s, addr):
return 8*(s.stream.readBits(addr, 32, LITTLE_ENDIAN) +
s.stream.readBits(addr+7*8, 16, LITTLE_ENDIAN))
return 8 * (s.stream.readBits(addr, 32, LITTLE_ENDIAN) +
s.stream.readBits(addr + 7 * 8, 16, LITTLE_ENDIAN))
class Pattern(FieldSet):
def __init__(self, parent, name, desc=None):
@ -309,9 +330,10 @@ class Pattern(FieldSet):
def createDescription(self):
return "Pattern with %i rows" % self["rows"].value
class Header(FieldSet):
MAGIC = "Extended Module: "
static_size = 336*8
static_size = 336 * 8
def createFields(self):
yield String(self, "signature", 17, "XM signature", charset="ASCII")
@ -337,6 +359,7 @@ class Header(FieldSet):
return "'%s' by '%s'" % (
self["title"].value, self["tracker_name"].value)
class XMModule(Parser):
PARSER_TAGS = {
"id": "fasttracker2",
@ -346,7 +369,7 @@ class XMModule(Parser):
u'audio/xm', u'audio/x-xm',
u'audio/module-xm', u'audio/mod', u'audio/x-mod'),
"magic": ((Header.MAGIC, 0),),
"min_size": Header.static_size +29*8, # Header + 1 empty instrument
"min_size": Header.static_size + 29 * 8, # Header + 1 empty instrument
"description": "FastTracker2 module"
}
endian = LITTLE_ENDIAN
@ -354,7 +377,7 @@ class XMModule(Parser):
def validate(self):
header = self.stream.readBytes(0, 17)
if header != Header.MAGIC:
return "Invalid signature '%s'" % header
return "Invalid signature %a" % header
if self["/header/header_size"].value != 276:
return "Unknown header size (%u)" % self["/header/header_size"].value
return True
@ -387,4 +410,3 @@ class XMModule(Parser):
def createDescription(self):
return self["header"].description

View file

View file

@ -1,8 +1,9 @@
from hachoir_core.field import CompressedField
from hachoir.field import CompressedField
try:
from zlib import decompressobj, MAX_WBITS
class DeflateStream:
def __init__(self, stream, wbits=None):
if wbits:
@ -13,21 +14,26 @@ try:
def __call__(self, size, data=None):
if data is None:
data = ''
return self.gzip.decompress(self.gzip.unconsumed_tail+data, size)
return self.gzip.decompress(self.gzip.unconsumed_tail + data, size)
class DeflateStreamWbits(DeflateStream):
def __init__(self, stream):
DeflateStream.__init__(self, stream, True)
def Deflate(field, wbits=True):
if wbits:
CompressedField(field, DeflateStreamWbits)
else:
CompressedField(field, DeflateStream)
return field
has_deflate = True
except ImportError:
def Deflate(field, wbits=True):
return field
has_deflate = False
has_deflate = False

View file

@ -6,8 +6,8 @@ Documentation:
http://www.cs.colorado.edu/~main/cs1300/include/ddk/winddk.h
"""
from hachoir_core.field import StaticFieldSet
from hachoir_core.field import Bit, NullBits
from hachoir.field import StaticFieldSet
from hachoir.field import Bit, NullBits
_FIELDS = (
(Bit, "read_only"),
@ -27,6 +27,7 @@ _FIELDS = (
(Bit, "encrypted"),
)
class MSDOSFileAttr16(StaticFieldSet):
"""
MSDOS 16-bit file attributes
@ -54,9 +55,9 @@ class MSDOSFileAttr16(StaticFieldSet):
else:
return "(none)"
class MSDOSFileAttr32(MSDOSFileAttr16):
"""
MSDOS 32-bit file attributes
"""
format = _FIELDS + ((NullBits, "reserved[]", 17),)

View file

@ -6,5 +6,5 @@ NOTE_NAME = {}
NOTES = ("C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "G#", "A", "A#", "B")
for octave in xrange(10):
for index, note in enumerate(NOTES):
NOTE_NAME[octave*12+index] = "%s (octave %s)" % (note, octave)
NOTE_NAME[octave * 12 + index] = "%s (octave %s)" % (note, octave)

View file

@ -1,29 +1,31 @@
from hachoir_core.field import (FieldSet,
UInt16, UInt32, Enum, String, Bytes, Bits, TimestampUUID60)
from hachoir_parser.video.fourcc import video_fourcc_name
from hachoir_core.bits import str2hex
from hachoir_core.text_handler import textHandler, hexadecimal
from hachoir_parser.network.common import MAC48_Address
from hachoir.field import (FieldSet,
UInt16, UInt32, Enum, String, Bytes, Bits, TimestampUUID60)
from hachoir.parser.video.fourcc import video_fourcc_name
from hachoir.core.bits import str2hex
from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.parser.network.common import MAC48_Address
# Dictionary: Windows codepage => Python charset name
CODEPAGE_CHARSET = {
874: "CP874",
# 932: Japanese Shift-JIS
# 936: Simplified Chinese GBK
# 949: Korean
# 950: Traditional Chinese Big5
1250: "WINDOWS-1250",
1251: "WINDOWS-1251",
1252: "WINDOWS-1252",
1253: "WINDOWS-1253",
1254: "WINDOWS-1254",
1255: "WINDOWS-1255",
1256: "WINDOWS-1256",
1257: "WINDOWS-1257",
1258: "WINDOWS-1258",
874: "CP874",
# 932: Japanese Shift-JIS
# 936: Simplified Chinese GBK
# 949: Korean
# 950: Traditional Chinese Big5
1250: "WINDOWS-1250",
1251: "WINDOWS-1251",
1252: "WINDOWS-1252",
1253: "WINDOWS-1253",
1254: "WINDOWS-1254",
1255: "WINDOWS-1255",
1256: "WINDOWS-1256",
1257: "WINDOWS-1257",
1258: "WINDOWS-1258",
10000: "MacRoman",
65001: "UTF-8",
}
class PascalStringWin16(FieldSet):
def __init__(self, parent, name, description=None, strip=None, charset="UTF-16-LE"):
FieldSet.__init__(self, parent, name, description)
@ -36,7 +38,7 @@ class PascalStringWin16(FieldSet):
yield UInt16(self, "length", "Length in widechar characters")
size = self["length"].value
if size:
yield String(self, "text", size*2, charset=self.charset, strip=self.strip)
yield String(self, "text", size * 2, charset=self.charset, strip=self.strip)
def createValue(self):
if "text" in self:
@ -44,6 +46,7 @@ class PascalStringWin16(FieldSet):
else:
return None
class PascalStringWin32(FieldSet):
def __init__(self, parent, name, description=None, strip=None, charset="UTF-16-LE"):
FieldSet.__init__(self, parent, name, description)
@ -56,7 +59,7 @@ class PascalStringWin32(FieldSet):
yield UInt32(self, "length", "Length in widechar characters")
size = self["length"].value
if size:
yield String(self, "text", size*2, charset=self.charset, strip=self.strip)
yield String(self, "text", size * 2, charset=self.charset, strip=self.strip)
def createValue(self):
if "text" in self:
@ -64,6 +67,7 @@ class PascalStringWin32(FieldSet):
else:
return None
class GUID(FieldSet):
"""
Windows 128 bits Globally Unique Identifier (GUID)
@ -87,13 +91,15 @@ class GUID(FieldSet):
VARIANT_NAME = {
0: "NCS",
2: "Leach-Salz",
# 5: Microsoft Corporation?
# 5: Microsoft Corporation?
6: "Microsoft Corporation",
7: "Reserved Future",
}
def __init__(self, *args):
FieldSet.__init__(self, *args)
self.version = self.stream.readBits(self.absolute_address + 32 + 16 + 12, 4, self.endian)
self.version = self.stream.readBits(
self.absolute_address + 32 + 16 + 12, 4, self.endian)
def createFields(self):
if self.version == 1:
@ -101,7 +107,7 @@ class GUID(FieldSet):
yield Enum(Bits(self, "version", 4), self.VERSION_NAME)
yield Enum(Bits(self, "variant", 3), self.VARIANT_NAME)
yield textHandler(Bits(self, "clock", 13), hexadecimal)
# yield textHandler(Bits(self, "clock", 16), hexadecimal)
# yield textHandler(Bits(self, "clock", 16), hexadecimal)
if self.version == 1:
yield MAC48_Address(self, "mac", "IEEE 802 MAC address")
else:
@ -116,9 +122,9 @@ class GUID(FieldSet):
def createValue(self):
addr = self.absolute_address
a = self.stream.readBits (addr, 32, self.endian)
b = self.stream.readBits (addr + 32, 16, self.endian)
c = self.stream.readBits (addr + 48, 16, self.endian)
a = self.stream.readBits(addr, 32, self.endian)
b = self.stream.readBits(addr + 32, 16, self.endian)
c = self.stream.readBits(addr + 48, 16, self.endian)
d = self.stream.readBytes(addr + 64, 2)
e = self.stream.readBytes(addr + 80, 6)
return "%08X-%04X-%04X-%s-%s" % (a, b, c, str2hex(d), str2hex(e))
@ -135,9 +141,10 @@ class GUID(FieldSet):
value = self.stream.readBytes(self.absolute_address, 16)
return str2hex(value, format=r"\x%02x")
class BitmapInfoHeader(FieldSet):
""" Win32 BITMAPINFOHEADER structure from GDI """
static_size = 40*8
static_size = 40 * 8
COMPRESSION_NAME = {
0: u"Uncompressed (RGB)",
@ -170,5 +177,4 @@ class BitmapInfoHeader(FieldSet):
def createDescription(self):
return "Bitmap info header: %ux%u pixels, %u bits/pixel" % \
(self["width"].value, self["height"].value, self["bpp"].value)
(self["width"].value, self["height"].value, self["bpp"].value)

View file

@ -133,4 +133,3 @@ LANGUAGE_ID = {
0x0843: u"Uzbek Cyrillic",
0x042a: u"Vietnamese",
}

View file

@ -0,0 +1,7 @@
from hachoir.parser.container.asn1 import ASN1File
from hachoir.parser.container.mkv import MkvFile
from hachoir.parser.container.ogg import OggFile, OggStream
from hachoir.parser.container.riff import RiffFile
from hachoir.parser.container.swf import SwfFile
from hachoir.parser.container.realmedia import RealMediaFile
from hachoir.parser.container.mp4 import MP4File

Some files were not shown because too many files have changed in this diff Show more