diff --git a/CHANGES.md b/CHANGES.md index dad0436f..f449fa55 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,10 @@ * Remove legacy anime split home option from anime settings tab (new option located in general/interface tab) * Remove "Manage Torrents" * Update Beautiful Soup 4.3.2 to 4.4.0 (r390) +* Update Hachoir library 1.3.3 to 1.3.4 (r1383) +* Change configure quiet option in Hachoir to suppress warnings (add ref:hacks.txt) +* Add parse media content to determine quality before making final assumptions during re-scan, update, pp +* Add a postprocess folder name validation ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/HACKS.txt b/HACKS.txt index 5aea55dc..75c4b37f 100644 --- a/HACKS.txt +++ b/HACKS.txt @@ -1,7 +1,8 @@ Libs with customisations... -/tornado +/lib/cachecontrol/caches/file_cache.py +/lib/hachoir_core/config.py +/lib/pynma/pynma.py /lib/requests/packages/urllib3/connectionpool.py /lib/requests/packages/urllib3/util/ssl_.py -/lib/cachecontrol/caches/file_cache.py -/lib/pynma/pynma.py \ No newline at end of file +/tornado diff --git a/lib/hachoir_core/__init__.py b/lib/hachoir_core/__init__.py index 5fcd20a4..df1988fd 100644 --- a/lib/hachoir_core/__init__.py +++ b/lib/hachoir_core/__init__.py @@ -1,2 +1,2 @@ -from lib.hachoir_core.version import VERSION as __version__, PACKAGE, WEBSITE, LICENSE +from hachoir_core.version import VERSION as __version__, PACKAGE, WEBSITE, LICENSE diff --git a/lib/hachoir_core/benchmark.py b/lib/hachoir_core/benchmark.py index a046c9a9..f823cfae 100644 --- a/lib/hachoir_core/benchmark.py +++ b/lib/hachoir_core/benchmark.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.tools import humanDurationNanosec -from lib.hachoir_core.i18n import _ +from hachoir_core.tools import humanDurationNanosec +from hachoir_core.i18n import _ from math import floor from time import time diff --git a/lib/hachoir_core/bits.py b/lib/hachoir_core/bits.py index b18547dd..97e84af8 100644 --- a/lib/hachoir_core/bits.py +++ b/lib/hachoir_core/bits.py @@ -3,8 +3,8 @@ Utilities to convert integers and binary strings to binary (number), binary string, number, hexadecimal, etc. """ -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.compatibility import reversed +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN +from hachoir_core.compatibility import reversed from itertools import chain, repeat from struct import calcsize, unpack, error as struct_error @@ -30,6 +30,28 @@ def swap32(value): | ((value & 0x00FF0000L) >> 8) \ | ((value & 0xFF000000L) >> 24) +def arrswapmid(data): + r""" + Convert an array of characters from middle-endian to big-endian and vice-versa. + + >>> arrswapmid("badcfehg") + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] + """ + assert len(data)%2 == 0 + ret = ['']*len(data) + ret[1::2] = data[0::2] + ret[0::2] = data[1::2] + return ret + +def strswapmid(data): + r""" + Convert raw data from middle-endian to big-endian and vice-versa. + + >>> strswapmid("badcfehg") + 'abcdefgh' + """ + return ''.join(arrswapmid(data)) + def bin2long(text, endian): """ Convert binary number written in a string into an integer. @@ -45,9 +67,10 @@ def bin2long(text, endian): assert endian in (LITTLE_ENDIAN, BIG_ENDIAN) bits = [ (ord(character)-ord("0")) \ for character in text if character in "01" ] - assert len(bits) != 0 if endian is not BIG_ENDIAN: - bits = reversed(bits) + bits = bits[::-1] + size = len(bits) + assert 0 < size value = 0 for bit in bits: value *= 2 @@ -142,7 +165,7 @@ def long2raw(value, endian, size=None): '\x19\x12\x00\x00' """ assert (not size and 0 < value) or (0 <= value) - assert endian in (LITTLE_ENDIAN, BIG_ENDIAN) + assert endian in (LITTLE_ENDIAN, BIG_ENDIAN, MIDDLE_ENDIAN) text = [] while (value != 0 or text == ""): byte = value % 256 @@ -153,13 +176,15 @@ def long2raw(value, endian, size=None): else: need = 0 if need: - if endian is BIG_ENDIAN: - text = chain(repeat("\0", need), reversed(text)) - else: + if endian is LITTLE_ENDIAN: text = chain(text, repeat("\0", need)) + else: + text = chain(repeat("\0", need), reversed(text)) else: - if endian is BIG_ENDIAN: + if endian is not LITTLE_ENDIAN: text = reversed(text) + if endian is MIDDLE_ENDIAN: + text = arrswapmid(text) return "".join(text) def long2bin(size, value, endian, classic_mode=False): @@ -257,6 +282,8 @@ def str2long(data, endian): True >>> str2long("\xff\xff\xff\xff\xff\xff\xff\xff", BIG_ENDIAN) == (2**64-1) True + >>> str2long("\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d + True """ assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits try: @@ -264,14 +291,15 @@ def str2long(data, endian): except KeyError: pass - assert endian in (BIG_ENDIAN, LITTLE_ENDIAN) + assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) shift = 0 value = 0 if endian is BIG_ENDIAN: data = reversed(data) + elif endian is MIDDLE_ENDIAN: + data = reversed(strswapmid(data)) for character in data: byte = ord(character) value += (byte << shift) shift += 8 return value - diff --git a/lib/hachoir_core/cmd_line.py b/lib/hachoir_core/cmd_line.py index 80161712..8c4178df 100644 --- a/lib/hachoir_core/cmd_line.py +++ b/lib/hachoir_core/cmd_line.py @@ -1,8 +1,8 @@ from optparse import OptionGroup -from lib.hachoir_core.log import log -from lib.hachoir_core.i18n import _, getTerminalCharset -from lib.hachoir_core.tools import makePrintable -import lib.hachoir_core.config as config +from hachoir_core.log import log +from hachoir_core.i18n import _, getTerminalCharset +from hachoir_core.tools import makePrintable +import hachoir_core.config as config def getHachoirOptions(parser): """ diff --git a/lib/hachoir_core/config.py b/lib/hachoir_core/config.py index 9250b627..e9bade06 100644 --- a/lib/hachoir_core/config.py +++ b/lib/hachoir_core/config.py @@ -14,7 +14,7 @@ unicode_stdout = True # Replace stdout and stderr with Unicode compatible ob # Global options debug = False # Display many informations usefull to debug verbose = False # Display more informations -quiet = False # Don't display warnings +quiet = True # Don't display warnings # Use internationalization and localization (gettext)? if os.name == "nt": diff --git a/lib/hachoir_core/dict.py b/lib/hachoir_core/dict.py index f7eb65e6..f887683c 100644 --- a/lib/hachoir_core/dict.py +++ b/lib/hachoir_core/dict.py @@ -2,8 +2,8 @@ Dictionnary classes which store values order. """ -from lib.hachoir_core.error import HachoirError -from lib.hachoir_core.i18n import _ +from hachoir_core.error import HachoirError +from hachoir_core.i18n import _ class UniqKeyError(HachoirError): """ diff --git a/lib/hachoir_core/endian.py b/lib/hachoir_core/endian.py index 6d09e261..3568010a 100644 --- a/lib/hachoir_core/endian.py +++ b/lib/hachoir_core/endian.py @@ -2,14 +2,15 @@ Constant values about endian. """ -from lib.hachoir_core.i18n import _ +from hachoir_core.i18n import _ BIG_ENDIAN = "ABCD" LITTLE_ENDIAN = "DCBA" +MIDDLE_ENDIAN = "BADC" NETWORK_ENDIAN = BIG_ENDIAN endian_name = { BIG_ENDIAN: _("Big endian"), LITTLE_ENDIAN: _("Little endian"), + MIDDLE_ENDIAN: _("Middle endian"), } - diff --git a/lib/hachoir_core/error.py b/lib/hachoir_core/error.py index 78f614a2..9ec6b573 100644 --- a/lib/hachoir_core/error.py +++ b/lib/hachoir_core/error.py @@ -2,8 +2,8 @@ Functions to display an error (error, warning or information) message. """ -from lib.hachoir_core.log import log -from lib.hachoir_core.tools import makePrintable +from hachoir_core.log import log +from hachoir_core.tools import makePrintable import sys, traceback def getBacktrace(empty="Empty backtrace."): diff --git a/lib/hachoir_core/field/__init__.py b/lib/hachoir_core/field/__init__.py index 66f3ed6b..f313c9ba 100644 --- a/lib/hachoir_core/field/__init__.py +++ b/lib/hachoir_core/field/__init__.py @@ -1,44 +1,44 @@ # Field classes -from lib.hachoir_core.field.field import Field, FieldError, MissingField, joinPath -from lib.hachoir_core.field.bit_field import Bit, Bits, RawBits -from lib.hachoir_core.field.byte_field import Bytes, RawBytes -from lib.hachoir_core.field.sub_file import SubFile, CompressedField -from lib.hachoir_core.field.character import Character -from lib.hachoir_core.field.integer import ( +from hachoir_core.field.field import Field, FieldError, MissingField, joinPath +from hachoir_core.field.bit_field import Bit, Bits, RawBits +from hachoir_core.field.byte_field import Bytes, RawBytes +from hachoir_core.field.sub_file import SubFile, CompressedField +from hachoir_core.field.character import Character +from hachoir_core.field.integer import ( Int8, Int16, Int24, Int32, Int64, UInt8, UInt16, UInt24, UInt32, UInt64, GenericInteger) -from lib.hachoir_core.field.enum import Enum -from lib.hachoir_core.field.string_field import (GenericString, +from hachoir_core.field.enum import Enum +from hachoir_core.field.string_field import (GenericString, String, CString, UnixLine, PascalString8, PascalString16, PascalString32) -from lib.hachoir_core.field.padding import (PaddingBits, PaddingBytes, +from hachoir_core.field.padding import (PaddingBits, PaddingBytes, NullBits, NullBytes) # Functions -from lib.hachoir_core.field.helper import (isString, isInteger, +from hachoir_core.field.helper import (isString, isInteger, createPaddingField, createNullField, createRawField, writeIntoFile, createOrphanField) # FieldSet classes -from lib.hachoir_core.field.fake_array import FakeArray -from lib.hachoir_core.field.basic_field_set import (BasicFieldSet, +from hachoir_core.field.fake_array import FakeArray +from hachoir_core.field.basic_field_set import (BasicFieldSet, ParserError, MatchError) -from lib.hachoir_core.field.generic_field_set import GenericFieldSet -from lib.hachoir_core.field.seekable_field_set import SeekableFieldSet, RootSeekableFieldSet -from lib.hachoir_core.field.field_set import FieldSet -from lib.hachoir_core.field.static_field_set import StaticFieldSet -from lib.hachoir_core.field.parser import Parser -from lib.hachoir_core.field.vector import GenericVector, UserVector +from hachoir_core.field.generic_field_set import GenericFieldSet +from hachoir_core.field.seekable_field_set import SeekableFieldSet, RootSeekableFieldSet +from hachoir_core.field.field_set import FieldSet +from hachoir_core.field.static_field_set import StaticFieldSet +from hachoir_core.field.parser import Parser +from hachoir_core.field.vector import GenericVector, UserVector # Complex types -from lib.hachoir_core.field.float import Float32, Float64, Float80 -from lib.hachoir_core.field.timestamp import (GenericTimestamp, +from hachoir_core.field.float import Float32, Float64, Float80 +from hachoir_core.field.timestamp import (GenericTimestamp, TimestampUnix32, TimestampUnix64, TimestampMac32, TimestampUUID60, TimestampWin64, DateTimeMSDOS32, TimeDateMSDOS32, TimedeltaWin64) # Special Field classes -from lib.hachoir_core.field.link import Link, Fragment +from hachoir_core.field.link import Link, Fragment available_types = ( Bit, Bits, RawBits, diff --git a/lib/hachoir_core/field/basic_field_set.py b/lib/hachoir_core/field/basic_field_set.py index 5c1bf9f8..74dc0571 100644 --- a/lib/hachoir_core/field/basic_field_set.py +++ b/lib/hachoir_core/field/basic_field_set.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.field import Field, FieldError -from lib.hachoir_core.stream import InputStream -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.event_handler import EventHandler +from hachoir_core.field import Field, FieldError +from hachoir_core.stream import InputStream +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN +from hachoir_core.event_handler import EventHandler class ParserError(FieldError): """ @@ -60,7 +60,7 @@ class BasicFieldSet(Field): self._global_event_handler = None # Sanity checks (post-conditions) - assert self.endian in (BIG_ENDIAN, LITTLE_ENDIAN) + assert self.endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) if (self._size is not None) and (self._size <= 0): raise ParserError("Invalid parser '%s' size: %s" % (self.path, self._size)) diff --git a/lib/hachoir_core/field/bit_field.py b/lib/hachoir_core/field/bit_field.py index 85f5502d..8fae3c7c 100644 --- a/lib/hachoir_core/field/bit_field.py +++ b/lib/hachoir_core/field/bit_field.py @@ -5,9 +5,9 @@ Bit sized classes: - RawBits: unknown content with a size in bits. """ -from lib.hachoir_core.field import Field -from lib.hachoir_core.i18n import _ -from lib.hachoir_core import config +from hachoir_core.field import Field +from hachoir_core.i18n import _ +from hachoir_core import config class RawBits(Field): """ diff --git a/lib/hachoir_core/field/byte_field.py b/lib/hachoir_core/field/byte_field.py index 4591daa1..16db1810 100644 --- a/lib/hachoir_core/field/byte_field.py +++ b/lib/hachoir_core/field/byte_field.py @@ -3,10 +3,10 @@ Very basic field: raw content with a size in byte. Use this class for unknown content. """ -from lib.hachoir_core.field import Field, FieldError -from lib.hachoir_core.tools import makePrintable -from lib.hachoir_core.bits import str2hex -from lib.hachoir_core import config +from hachoir_core.field import Field, FieldError +from hachoir_core.tools import makePrintable +from hachoir_core.bits import str2hex +from hachoir_core import config MAX_LENGTH = (2**64) diff --git a/lib/hachoir_core/field/character.py b/lib/hachoir_core/field/character.py index 002a18c9..566c4332 100644 --- a/lib/hachoir_core/field/character.py +++ b/lib/hachoir_core/field/character.py @@ -2,9 +2,9 @@ Character field class: a 8-bit character """ -from lib.hachoir_core.field import Bits -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.tools import makePrintable +from hachoir_core.field import Bits +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import makePrintable class Character(Bits): """ @@ -24,4 +24,3 @@ class Character(Bits): def createDisplay(self): return makePrintable(self.value, "ASCII", quote="'", to_unicode=True) - diff --git a/lib/hachoir_core/field/enum.py b/lib/hachoir_core/field/enum.py index cc04a29e..61873504 100644 --- a/lib/hachoir_core/field/enum.py +++ b/lib/hachoir_core/field/enum.py @@ -1,7 +1,7 @@ def Enum(field, enum, key_func=None): """ Enum is an adapter to another field: it will just change its display - attribute. It uses a dictionnary to associate a value to another. + attribute. It uses a dictionary to associate a value to another. key_func is an optional function with prototype "def func(key)->key" which is called to transform key. @@ -23,4 +23,3 @@ def Enum(field, enum, key_func=None): field.createDisplay = createDisplay field.getEnum = lambda: enum return field - diff --git a/lib/hachoir_core/field/fake_array.py b/lib/hachoir_core/field/fake_array.py index f5ae6d95..5535cafe 100644 --- a/lib/hachoir_core/field/fake_array.py +++ b/lib/hachoir_core/field/fake_array.py @@ -1,5 +1,5 @@ import itertools -from lib.hachoir_core.field import MissingField +from hachoir_core.field import MissingField class FakeArray: """ diff --git a/lib/hachoir_core/field/field.py b/lib/hachoir_core/field/field.py index ccb9e172..cc59e9ce 100644 --- a/lib/hachoir_core/field/field.py +++ b/lib/hachoir_core/field/field.py @@ -2,12 +2,12 @@ Parent of all (field) classes in Hachoir: Field. """ -from lib.hachoir_core.compatibility import reversed -from lib.hachoir_core.stream import InputFieldStream -from lib.hachoir_core.error import HachoirError, HACHOIR_ERRORS -from lib.hachoir_core.log import Logger -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import makePrintable +from hachoir_core.compatibility import reversed +from hachoir_core.stream import InputFieldStream +from hachoir_core.error import HachoirError, HACHOIR_ERRORS +from hachoir_core.log import Logger +from hachoir_core.i18n import _ +from hachoir_core.tools import makePrintable from weakref import ref as weakref_ref class FieldError(HachoirError): @@ -70,6 +70,8 @@ class Field(Logger): assert issubclass(parent.__class__, Field) assert (size is None) or (0 <= size) self._parent = parent + if not name: + raise ValueError("empty field name") self._name = name self._address = parent.nextFieldAddress() self._size = size @@ -166,7 +168,7 @@ class Field(Logger): return '/' names = [] field = self - while field: + while field is not None: names.append(field._name) field = field._parent names[-1] = '' diff --git a/lib/hachoir_core/field/field_set.py b/lib/hachoir_core/field/field_set.py index c535081f..92b51926 100644 --- a/lib/hachoir_core/field/field_set.py +++ b/lib/hachoir_core/field/field_set.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import BasicFieldSet, GenericFieldSet +from hachoir_core.field import BasicFieldSet, GenericFieldSet class FieldSet(GenericFieldSet): def __init__(self, parent, name, *args, **kw): diff --git a/lib/hachoir_core/field/float.py b/lib/hachoir_core/field/float.py index 56d77867..025b57df 100644 --- a/lib/hachoir_core/field/float.py +++ b/lib/hachoir_core/field/float.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.field import Bit, Bits, FieldSet -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN +from hachoir_core.field import Bit, Bits, FieldSet +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN import struct # Make sure that we use right struct types @@ -85,15 +85,15 @@ def floatFactory(name, format, mantissa_bits, exponent_bits, doc): cls.__name__ = name return cls -# 32-bit float (standart: IEEE 754/854) +# 32-bit float (standard: IEEE 754/854) Float32 = floatFactory("Float32", "f", 23, 8, "Floating point number: format IEEE 754 int 32 bit") -# 64-bit float (standart: IEEE 754/854) +# 64-bit float (standard: IEEE 754/854) Float64 = floatFactory("Float64", "d", 52, 11, "Floating point number: format IEEE 754 in 64 bit") -# 80-bit float (standart: IEEE 754/854) +# 80-bit float (standard: IEEE 754/854) Float80 = floatFactory("Float80", None, 64, 15, "Floating point number: format IEEE 754 in 80 bit") diff --git a/lib/hachoir_core/field/generic_field_set.py b/lib/hachoir_core/field/generic_field_set.py index 66634827..4817c2fc 100644 --- a/lib/hachoir_core/field/generic_field_set.py +++ b/lib/hachoir_core/field/generic_field_set.py @@ -1,9 +1,9 @@ -from lib.hachoir_core.field import (MissingField, BasicFieldSet, Field, ParserError, +from hachoir_core.field import (MissingField, BasicFieldSet, Field, ParserError, createRawField, createNullField, createPaddingField, FakeArray) -from lib.hachoir_core.dict import Dict, UniqKeyError -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_core.tools import lowerBound -import lib.hachoir_core.config as config +from hachoir_core.dict import Dict, UniqKeyError +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.tools import lowerBound, makeUnicode +import hachoir_core.config as config class GenericFieldSet(BasicFieldSet): """ @@ -12,8 +12,8 @@ class GenericFieldSet(BasicFieldSet): document). Class attributes: - - endian: Bytes order (L{BIG_ENDIAN} or L{LITTLE_ENDIAN}). Optional if the - field set has a parent ; + - endian: Bytes order (L{BIG_ENDIAN}, L{LITTLE_ENDIAN} or L{MIDDLE_ENDIAN}). + Optional if the field set has a parent ; - static_size: (optional) Size of FieldSet in bits. This attribute should be used in parser of constant size. @@ -310,7 +310,7 @@ class GenericFieldSet(BasicFieldSet): """ if self._size is None or not self.autofix: return False - self.warning(unicode(exception)) + self.warning(makeUnicode(exception)) return self._fixLastField() def _feedUntil(self, field_name): diff --git a/lib/hachoir_core/field/helper.py b/lib/hachoir_core/field/helper.py index 174d79e2..ba44f68e 100644 --- a/lib/hachoir_core/field/helper.py +++ b/lib/hachoir_core/field/helper.py @@ -1,9 +1,9 @@ -from lib.hachoir_core.field import (FieldError, +from hachoir_core.field import (FieldError, RawBits, RawBytes, PaddingBits, PaddingBytes, NullBits, NullBytes, GenericString, GenericInteger) -from lib.hachoir_core.stream import FileOutputStream +from hachoir_core.stream import FileOutputStream def createRawField(parent, size, name="raw[]", description=None): if size <= 0: diff --git a/lib/hachoir_core/field/integer.py b/lib/hachoir_core/field/integer.py index bad64996..1f98322b 100644 --- a/lib/hachoir_core/field/integer.py +++ b/lib/hachoir_core/field/integer.py @@ -4,15 +4,15 @@ Integer field classes: - Int8, Int16, Int24, Int32, Int64: signed integer of 8, 16, 32, 64 bits. """ -from lib.hachoir_core.field import Bits, FieldError +from hachoir_core.field import Bits, FieldError class GenericInteger(Bits): """ Generic integer class used to generate other classes. """ def __init__(self, parent, name, signed, size, description=None): - if not (8 <= size <= 256): - raise FieldError("Invalid integer size (%s): have to be in 8..256" % size) + if not (8 <= size <= 16384): + raise FieldError("Invalid integer size (%s): have to be in 8..16384" % size) Bits.__init__(self, parent, name, size, description) self.signed = signed diff --git a/lib/hachoir_core/field/link.py b/lib/hachoir_core/field/link.py index cccf2e67..b331c3b8 100644 --- a/lib/hachoir_core/field/link.py +++ b/lib/hachoir_core/field/link.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.field import Field, FieldSet, ParserError, Bytes, MissingField -from lib.hachoir_core.stream import FragmentedStream +from hachoir_core.field import Field, FieldSet, ParserError, Bytes, MissingField +from hachoir_core.stream import FragmentedStream class Link(Field): diff --git a/lib/hachoir_core/field/new_seekable_field_set.py b/lib/hachoir_core/field/new_seekable_field_set.py index 22403b6d..d145ab96 100644 --- a/lib/hachoir_core/field/new_seekable_field_set.py +++ b/lib/hachoir_core/field/new_seekable_field_set.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.field import BasicFieldSet, GenericFieldSet, ParserError, createRawField -from lib.hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.field import BasicFieldSet, GenericFieldSet, ParserError, createRawField +from hachoir_core.error import HACHOIR_ERRORS # getgaps(int, int, [listof (int, int)]) -> generator of (int, int) # Gets all the gaps not covered by a block in `blocks` from `start` for `length` units. diff --git a/lib/hachoir_core/field/padding.py b/lib/hachoir_core/field/padding.py index 3d0fcc0f..c1c4b8c0 100644 --- a/lib/hachoir_core/field/padding.py +++ b/lib/hachoir_core/field/padding.py @@ -1,6 +1,6 @@ -from lib.hachoir_core.field import Bits, Bytes -from lib.hachoir_core.tools import makePrintable, humanFilesize -from lib.hachoir_core import config +from hachoir_core.field import Bits, Bytes +from hachoir_core.tools import makePrintable, humanFilesize +from hachoir_core import config class PaddingBits(Bits): """ diff --git a/lib/hachoir_core/field/parser.py b/lib/hachoir_core/field/parser.py index f4aebadd..8c16bf13 100644 --- a/lib/hachoir_core/field/parser.py +++ b/lib/hachoir_core/field/parser.py @@ -1,13 +1,13 @@ -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.field import GenericFieldSet -from lib.hachoir_core.log import Logger -import lib.hachoir_core.config as config +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN +from hachoir_core.field import GenericFieldSet +from hachoir_core.log import Logger +import hachoir_core.config as config class Parser(GenericFieldSet): """ A parser is the root of all other fields. It create first level of fields and have special attributes and methods: - - endian: Byte order (L{BIG_ENDIAN} or L{LITTLE_ENDIAN}) of input data ; + - endian: Byte order (L{BIG_ENDIAN}, L{LITTLE_ENDIAN} or L{MIDDLE_ENDIAN}) of input data ; - stream: Data input stream (set in L{__init__()}) ; - size: Field set size will be size of input stream. """ @@ -21,7 +21,7 @@ class Parser(GenericFieldSet): """ # Check arguments assert hasattr(self, "endian") \ - and self.endian in (BIG_ENDIAN, LITTLE_ENDIAN) + and self.endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) # Call parent constructor GenericFieldSet.__init__(self, None, "root", stream, description, stream.askSize(self)) diff --git a/lib/hachoir_core/field/seekable_field_set.py b/lib/hachoir_core/field/seekable_field_set.py index ee7b1127..c3a3b448 100644 --- a/lib/hachoir_core/field/seekable_field_set.py +++ b/lib/hachoir_core/field/seekable_field_set.py @@ -1,182 +1,82 @@ -from lib.hachoir_core.field import Field, BasicFieldSet, FakeArray, MissingField, ParserError -from lib.hachoir_core.tools import makeUnicode -from lib.hachoir_core.error import HACHOIR_ERRORS -from itertools import repeat -import lib.hachoir_core.config as config +from hachoir_core.field import BasicFieldSet, GenericFieldSet, ParserError, createRawField +from hachoir_core.error import HACHOIR_ERRORS -class RootSeekableFieldSet(BasicFieldSet): - def __init__(self, parent, name, stream, description, size): - BasicFieldSet.__init__(self, parent, name, stream, description, size) - self._generator = self.createFields() - self._offset = 0 - self._current_size = 0 - if size: - self._current_max_size = size - else: - self._current_max_size = 0 - self._field_dict = {} - self._field_array = [] - - def _feedOne(self): - assert self._generator - field = self._generator.next() - self._addField(field) - return field - - def array(self, key): - return FakeArray(self, key) - - def getFieldByAddress(self, address, feed=True): - for field in self._field_array: - if field.address <= address < field.address + field.size: - return field - for field in self._readFields(): - if field.address <= address < field.address + field.size: - return field - return None - - def _stopFeed(self): - self._size = self._current_max_size - self._generator = None - done = property(lambda self: not bool(self._generator)) - - def _getSize(self): - if self._size is None: - self._feedAll() - return self._size - size = property(_getSize) - - def _getField(self, key, const): - field = Field._getField(self, key, const) - if field is not None: - return field - if key in self._field_dict: - return self._field_dict[key] - if self._generator and not const: - try: - while True: - field = self._feedOne() - if field.name == key: - return field - except StopIteration: - self._stopFeed() - except HACHOIR_ERRORS, err: - self.error("Error: %s" % makeUnicode(err)) - self._stopFeed() - return None - - def getField(self, key, const=True): - if isinstance(key, (int, long)): - if key < 0: - raise KeyError("Key must be positive!") - if not const: - self.readFirstFields(key+1) - if len(self._field_array) <= key: - raise MissingField(self, key) - return self._field_array[key] - return Field.getField(self, key, const) - - def _addField(self, field): - if field._name.endswith("[]"): - self.setUniqueFieldName(field) - if config.debug: - self.info("[+] DBG: _addField(%s)" % field.name) - - if field._address != self._offset: - self.warning("Set field %s address to %s (was %s)" % ( - field.path, self._offset//8, field._address//8)) - field._address = self._offset - assert field.name not in self._field_dict - - self._checkFieldSize(field) - - self._field_dict[field.name] = field - self._field_array.append(field) - self._current_size += field.size - self._offset += field.size - self._current_max_size = max(self._current_max_size, field.address + field.size) - - def _checkAddress(self, address): - if self._size is not None: - max_addr = self._size - else: - # FIXME: Use parent size - max_addr = self.stream.size - return address < max_addr - - def _checkFieldSize(self, field): - size = field.size - addr = field.address - if not self._checkAddress(addr+size-1): - raise ParserError("Unable to add %s: field is too large" % field.name) +# getgaps(int, int, [listof (int, int)]) -> generator of (int, int) +# Gets all the gaps not covered by a block in `blocks` from `start` for `length` units. +def getgaps(start, length, blocks): + ''' + Example: + >>> list(getgaps(0, 20, [(15,3), (6,2), (6,2), (1,2), (2,3), (11,2), (9,5)])) + [(0, 1), (5, 1), (8, 1), (14, 1), (18, 2)] + ''' + # done this way to avoid mutating the original + blocks = sorted(blocks, key=lambda b: b[0]) + end = start+length + for s, l in blocks: + if s > start: + yield (start, s-start) + start = s + if s+l > start: + start = s+l + if start < end: + yield (start, end-start) +class RootSeekableFieldSet(GenericFieldSet): def seekBit(self, address, relative=True): if not relative: address -= self.absolute_address if address < 0: raise ParserError("Seek below field set start (%s.%s)" % divmod(address, 8)) - if not self._checkAddress(address): - raise ParserError("Seek above field set end (%s.%s)" % divmod(address, 8)) - self._offset = address + self._current_size = address return None def seekByte(self, address, relative=True): return self.seekBit(address*8, relative) - def readMoreFields(self, number): - return self._readMoreFields(xrange(number)) + def _fixLastField(self): + """ + Try to fix last field when we know current field set size. + Returns new added field if any, or None. + """ + assert self._size is not None - def _feedAll(self): - return self._readMoreFields(repeat(1)) + # Stop parser + message = ["stop parser"] + self._field_generator = None - def _readFields(self): - while True: - added = self._readMoreFields(xrange(1)) - if not added: - break - yield self._field_array[-1] + # If last field is too big, delete it + while self._size < self._current_size: + field = self._deleteField(len(self._fields)-1) + message.append("delete field %s" % field.path) + assert self._current_size <= self._size - def _readMoreFields(self, index_generator): - added = 0 - if self._generator: - try: - for index in index_generator: - self._feedOne() - added += 1 - except StopIteration: - self._stopFeed() - except HACHOIR_ERRORS, err: - self.error("Error: %s" % makeUnicode(err)) - self._stopFeed() - return added + blocks = [(x.absolute_address, x.size) for x in self._fields] + fields = [] + self._size = max(self._size, max(a+b for a,b in blocks) - self.absolute_address) + for start, length in getgaps(self.absolute_address, self._size, blocks): + self.seekBit(start, relative=False) + field = createRawField(self, length, "unparsed[]") + self.setUniqueFieldName(field) + self._fields.append(field.name, field) + fields.append(field) + message.append("found unparsed segment: start %s, length %s" % (start, length)) + self.seekBit(self._size + self.absolute_address, relative=False) + message = ", ".join(message) + if fields: + self.warning("[Autofix] Fix parser error: " + message) + return fields - current_length = property(lambda self: len(self._field_array)) - current_size = property(lambda self: self._offset) + def _stopFeeding(self): + new_field = None + if self._size is None: + if self._parent: + self._size = self._current_size - def __iter__(self): - for field in self._field_array: - yield field - if self._generator: - try: - while True: - yield self._feedOne() - except StopIteration: - self._stopFeed() - raise StopIteration - - def __len__(self): - if self._generator: - self._feedAll() - return len(self._field_array) - - def nextFieldAddress(self): - return self._offset - - def getFieldIndex(self, field): - return self._field_array.index(field) + new_field = self._fixLastField() + self._field_generator = None + return new_field class SeekableFieldSet(RootSeekableFieldSet): def __init__(self, parent, name, description=None, size=None): assert issubclass(parent.__class__, BasicFieldSet) RootSeekableFieldSet.__init__(self, parent, name, parent.stream, description, size) - diff --git a/lib/hachoir_core/field/static_field_set.py b/lib/hachoir_core/field/static_field_set.py index 2e9e689e..e3897b30 100644 --- a/lib/hachoir_core/field/static_field_set.py +++ b/lib/hachoir_core/field/static_field_set.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import FieldSet, ParserError +from hachoir_core.field import FieldSet, ParserError class StaticFieldSet(FieldSet): """ @@ -20,7 +20,7 @@ class StaticFieldSet(FieldSet): if cls._class is not cls.__name__: cls._class = cls.__name__ cls.static_size = cls._computeStaticSize() - return object.__new__(cls) + return object.__new__(cls, *args, **kw) @staticmethod def _computeItemSize(item): diff --git a/lib/hachoir_core/field/string_field.py b/lib/hachoir_core/field/string_field.py index e2bb4ed0..e44e24dc 100644 --- a/lib/hachoir_core/field/string_field.py +++ b/lib/hachoir_core/field/string_field.py @@ -15,11 +15,11 @@ Note: For PascalStringXX, prefixed value is the number of bytes and not of characters! """ -from lib.hachoir_core.field import FieldError, Bytes -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_core.tools import alignValue, makePrintable -from lib.hachoir_core.i18n import guessBytesCharset, _ -from lib.hachoir_core import config +from hachoir_core.field import FieldError, Bytes +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.tools import alignValue, makePrintable +from hachoir_core.i18n import guessBytesCharset, _ +from hachoir_core import config from codecs import BOM_UTF16_LE, BOM_UTF16_BE, BOM_UTF32_LE, BOM_UTF32_BE # Default charset used to convert byte string to Unicode diff --git a/lib/hachoir_core/field/sub_file.py b/lib/hachoir_core/field/sub_file.py index b5993ba4..0f2912d4 100644 --- a/lib/hachoir_core/field/sub_file.py +++ b/lib/hachoir_core/field/sub_file.py @@ -1,6 +1,6 @@ -from lib.hachoir_core.field import Bytes -from lib.hachoir_core.tools import makePrintable, humanFilesize -from lib.hachoir_core.stream import InputIOStream +from hachoir_core.field import Bytes +from hachoir_core.tools import makePrintable, humanFilesize +from hachoir_core.stream import InputIOStream class SubFile(Bytes): """ diff --git a/lib/hachoir_core/field/timestamp.py b/lib/hachoir_core/field/timestamp.py index 57906537..8a07bcdf 100644 --- a/lib/hachoir_core/field/timestamp.py +++ b/lib/hachoir_core/field/timestamp.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.tools import (humanDatetime, humanDuration, +from hachoir_core.tools import (humanDatetime, humanDuration, timestampUNIX, timestampMac32, timestampUUID60, timestampWin64, durationWin64) -from lib.hachoir_core.field import Bits, FieldSet +from hachoir_core.field import Bits, FieldSet from datetime import datetime class GenericTimestamp(Bits): @@ -32,7 +32,7 @@ def timestampFactory(cls_name, handler, size): TimestampUnix32 = timestampFactory("TimestampUnix32", timestampUNIX, 32) TimestampUnix64 = timestampFactory("TimestampUnix64", timestampUNIX, 64) -TimestampMac32 = timestampFactory("TimestampUnix32", timestampMac32, 32) +TimestampMac32 = timestampFactory("TimestampMac32", timestampMac32, 32) TimestampUUID60 = timestampFactory("TimestampUUID60", timestampUUID60, 60) TimestampWin64 = timestampFactory("TimestampWin64", timestampWin64, 64) diff --git a/lib/hachoir_core/field/vector.py b/lib/hachoir_core/field/vector.py index eba86471..953fdbc3 100644 --- a/lib/hachoir_core/field/vector.py +++ b/lib/hachoir_core/field/vector.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import Field, FieldSet, ParserError +from hachoir_core.field import Field, FieldSet, ParserError class GenericVector(FieldSet): def __init__(self, parent, name, nb_items, item_class, item_name="item", description=None): diff --git a/lib/hachoir_core/i18n.py b/lib/hachoir_core/i18n.py index 8babf8e9..b34c7480 100644 --- a/lib/hachoir_core/i18n.py +++ b/lib/hachoir_core/i18n.py @@ -14,8 +14,8 @@ WARNING: Loading this module indirectly calls initLocale() which sets settings. """ -import lib.hachoir_core.config as config -import lib.hachoir_core +import hachoir_core.config as config +import hachoir_core import locale from os import path import sys @@ -133,7 +133,7 @@ def _initGettext(): return (_dummy_gettext, _dummy_ngettext) # Gettext variables - package = lib.hachoir_core.PACKAGE + package = hachoir_core.PACKAGE locale_dir = path.join(path.dirname(__file__), "..", "locale") # Initialize gettext module diff --git a/lib/hachoir_core/iso639.py b/lib/hachoir_core/iso639.py index 61a0ba93..5da70e11 100644 --- a/lib/hachoir_core/iso639.py +++ b/lib/hachoir_core/iso639.py @@ -328,7 +328,6 @@ _ISO639 = ( (u"Micmac", "mic", None), (u"Minangkabau", "min", None), (u"Mirandese", "mwl", None), - (u"Miscellaneous languages", "mis", None), (u"Mohawk", "moh", None), (u"Moksha", "mdf", None), (u"Moldavian", "mol", "mo"), @@ -513,6 +512,7 @@ _ISO639 = ( (u"Uighur", "uig", "ug"), (u"Ukrainian", "ukr", "uk"), (u"Umbundu", "umb", None), + (u"Uncoded languages", "mis", None), (u"Undetermined", "und", None), (u"Upper Sorbian", "hsb", None), (u"Urdu", "urd", "ur"), diff --git a/lib/hachoir_core/language.py b/lib/hachoir_core/language.py index 2f80ddce..997f7a61 100644 --- a/lib/hachoir_core/language.py +++ b/lib/hachoir_core/language.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.iso639 import ISO639_2 +from hachoir_core.iso639 import ISO639_2 class Language: def __init__(self, code): diff --git a/lib/hachoir_core/log.py b/lib/hachoir_core/log.py index f777ab6b..32fca06d 100644 --- a/lib/hachoir_core/log.py +++ b/lib/hachoir_core/log.py @@ -1,6 +1,6 @@ import os, sys, time -import lib.hachoir_core.config as config -from lib.hachoir_core.i18n import _ +import hachoir_core.config as config +from hachoir_core.i18n import _ class Log: LOG_INFO = 0 @@ -75,7 +75,7 @@ class Log: level <= self.LOG_INFO and not config.verbose: return if config.debug: - from lib.hachoir_core.error import getBacktrace + from hachoir_core.error import getBacktrace backtrace = getBacktrace(None) if backtrace: text += "\n\n" + backtrace diff --git a/lib/hachoir_core/stream/__init__.py b/lib/hachoir_core/stream/__init__.py index c2a8f6dc..163e12a3 100644 --- a/lib/hachoir_core/stream/__init__.py +++ b/lib/hachoir_core/stream/__init__.py @@ -1,11 +1,11 @@ -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.stream.stream import StreamError -from lib.hachoir_core.stream.input import ( +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN +from hachoir_core.stream.stream import StreamError +from hachoir_core.stream.input import ( InputStreamError, InputStream, InputIOStream, StringInputStream, InputSubStream, InputFieldStream, FragmentedStream, ConcatStream) -from lib.hachoir_core.stream.input_helper import FileInputStream, guessStreamCharset -from lib.hachoir_core.stream.output import (OutputStreamError, +from hachoir_core.stream.input_helper import FileInputStream, guessStreamCharset +from hachoir_core.stream.output import (OutputStreamError, FileOutputStream, StringOutputStream, OutputStream) diff --git a/lib/hachoir_core/stream/input.py b/lib/hachoir_core/stream/input.py index ec01e6e4..79ca6da0 100644 --- a/lib/hachoir_core/stream/input.py +++ b/lib/hachoir_core/stream/input.py @@ -1,14 +1,14 @@ -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.error import info -from lib.hachoir_core.log import Logger -from lib.hachoir_core.bits import str2long -from lib.hachoir_core.i18n import getTerminalCharset -from lib.hachoir_core.tools import lowerBound -from lib.hachoir_core.i18n import _ -from os import dup, fdopen +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN +from hachoir_core.error import info +from hachoir_core.log import Logger +from hachoir_core.bits import str2long +from hachoir_core.i18n import getTerminalCharset +from hachoir_core.tools import lowerBound +from hachoir_core.i18n import _ +from hachoir_core.tools import alignValue from errno import ESPIPE from weakref import ref as weakref_ref -from lib.hachoir_core.stream import StreamError +from hachoir_core.stream import StreamError class InputStreamError(StreamError): pass @@ -168,13 +168,20 @@ class InputStream(Logger): raise NotImplementedError def readBits(self, address, nbits, endian): - assert endian in (BIG_ENDIAN, LITTLE_ENDIAN) + assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) - shift, data, missing = self.read(address, nbits) + if endian is MIDDLE_ENDIAN: + # read an aligned chunk of words + wordaddr, remainder = divmod(address, 16) + wordnbits = alignValue(remainder+nbits, 16) + _, data, missing = self.read(wordaddr*16, wordnbits) + shift = remainder + else: + shift, data, missing = self.read(address, nbits) if missing: raise ReadStreamError(nbits, address) value = str2long(data, endian) - if endian is BIG_ENDIAN: + if endian in (BIG_ENDIAN, MIDDLE_ENDIAN): value >>= len(data) * 8 - shift - nbits else: value >>= shift @@ -404,6 +411,7 @@ class InputIOStream(InputStream): def file(self): if hasattr(self._input, "fileno"): + from os import dup, fdopen new_fd = dup(self._input.fileno()) new_file = fdopen(new_fd, "r") new_file.seek(0) diff --git a/lib/hachoir_core/stream/input_helper.py b/lib/hachoir_core/stream/input_helper.py index 9c222f3a..e7938310 100644 --- a/lib/hachoir_core/stream/input_helper.py +++ b/lib/hachoir_core/stream/input_helper.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.i18n import getTerminalCharset, guessBytesCharset, _ -from lib.hachoir_core.stream import InputIOStream, InputSubStream, InputStreamError +from hachoir_core.i18n import getTerminalCharset, guessBytesCharset, _ +from hachoir_core.stream import InputIOStream, InputSubStream, InputStreamError def FileInputStream(filename, real_filename=None, **args): """ diff --git a/lib/hachoir_core/stream/output.py b/lib/hachoir_core/stream/output.py index a809f76b..4300cc66 100644 --- a/lib/hachoir_core/stream/output.py +++ b/lib/hachoir_core/stream/output.py @@ -1,7 +1,7 @@ from cStringIO import StringIO -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.bits import long2raw -from lib.hachoir_core.stream import StreamError +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN +from hachoir_core.bits import long2raw +from hachoir_core.stream import StreamError from errno import EBADF MAX_READ_NBYTES = 2 ** 16 @@ -21,6 +21,7 @@ class OutputStream(object): filename = property(_getFilename) def writeBit(self, state, endian): + assert endian in (BIG_ENDIAN, LITTLE_ENDIAN) # middle endian not yet supported if self._bit_pos == 7: self._bit_pos = 0 if state: @@ -39,6 +40,7 @@ class OutputStream(object): self._bit_pos += 1 def writeBits(self, count, value, endian): + assert endian in (BIG_ENDIAN, LITTLE_ENDIAN) # middle endian not yet supported assert 0 <= value < 2**count # Feed bits to align to byte address diff --git a/lib/hachoir_core/stream/stream.py b/lib/hachoir_core/stream/stream.py index 101fcf68..58c9aea8 100644 --- a/lib/hachoir_core/stream/stream.py +++ b/lib/hachoir_core/stream/stream.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.error import HachoirError +from hachoir_core.error import HachoirError class StreamError(HachoirError): pass diff --git a/lib/hachoir_core/text_handler.py b/lib/hachoir_core/text_handler.py index 1a67e9be..e2c65f0a 100644 --- a/lib/hachoir_core/text_handler.py +++ b/lib/hachoir_core/text_handler.py @@ -2,12 +2,12 @@ Utilities used to convert a field to human classic reprentation of data. """ -from lib.hachoir_core.tools import ( +from hachoir_core.tools import ( humanDuration, humanFilesize, alignValue, durationWin64 as doDurationWin64, deprecated) from types import FunctionType, MethodType -from lib.hachoir_core.field import Field +from hachoir_core.field import Field def textHandler(field, handler): assert isinstance(handler, (FunctionType, MethodType)) diff --git a/lib/hachoir_core/tools.py b/lib/hachoir_core/tools.py index 1994dbfb..a8f77334 100644 --- a/lib/hachoir_core/tools.py +++ b/lib/hachoir_core/tools.py @@ -4,7 +4,7 @@ Various utilities. """ -from lib.hachoir_core.i18n import _, ngettext +from hachoir_core.i18n import _, ngettext import re import stat from datetime import datetime, timedelta, MAXYEAR @@ -330,7 +330,14 @@ def makeUnicode(text): if isinstance(text, str): text = unicode(text, "ISO-8859-1") elif not isinstance(text, unicode): - text = unicode(text) + try: + text = unicode(text) + except UnicodeError: + try: + text = str(text) + except Exception: + text = repr(text) + return makeUnicode(text) text = regex_control_code.sub( lambda regs: controlchars[ord(regs.group(1))], text) text = re.sub(r"\\x0([0-7])(?=[^0-7]|$)", r"\\\1", text) diff --git a/lib/hachoir_core/version.py b/lib/hachoir_core/version.py index c5e95447..e3506e93 100644 --- a/lib/hachoir_core/version.py +++ b/lib/hachoir_core/version.py @@ -1,5 +1,5 @@ PACKAGE = "hachoir-core" -VERSION = "1.3.3" +VERSION = "1.3.4" WEBSITE = 'http://bitbucket.org/haypo/hachoir/wiki/hachoir-core' LICENSE = 'GNU GPL v2' diff --git a/lib/hachoir_metadata/__init__.py b/lib/hachoir_metadata/__init__.py index 54adbaa8..5ab4743c 100644 --- a/lib/hachoir_metadata/__init__.py +++ b/lib/hachoir_metadata/__init__.py @@ -1,15 +1,15 @@ -from lib.hachoir_metadata.version import VERSION as __version__ -from lib.hachoir_metadata.metadata import extractMetadata +from hachoir_metadata.version import VERSION as __version__ +from hachoir_metadata.metadata import extractMetadata # Just import the module, # each module use registerExtractor() method -import lib.hachoir_metadata.archive -import lib.hachoir_metadata.audio -import lib.hachoir_metadata.file_system -import lib.hachoir_metadata.image -import lib.hachoir_metadata.jpeg -import lib.hachoir_metadata.misc -import lib.hachoir_metadata.program -import lib.hachoir_metadata.riff -import lib.hachoir_metadata.video +import hachoir_metadata.archive +import hachoir_metadata.audio +import hachoir_metadata.file_system +import hachoir_metadata.image +import hachoir_metadata.jpeg +import hachoir_metadata.misc +import hachoir_metadata.program +import hachoir_metadata.riff +import hachoir_metadata.video diff --git a/lib/hachoir_metadata/archive.py b/lib/hachoir_metadata/archive.py index 64289ac9..2ca16321 100644 --- a/lib/hachoir_metadata/archive.py +++ b/lib/hachoir_metadata/archive.py @@ -1,11 +1,11 @@ -from lib.hachoir_metadata.metadata_item import QUALITY_BEST, QUALITY_FASTEST -from lib.hachoir_metadata.safe import fault_tolerant, getValue -from lib.hachoir_metadata.metadata import ( +from hachoir_metadata.metadata_item import QUALITY_BEST, QUALITY_FASTEST +from hachoir_metadata.safe import fault_tolerant, getValue +from hachoir_metadata.metadata import ( RootMetadata, Metadata, MultipleMetadata, registerExtractor) -from lib.hachoir_parser.archive import (Bzip2Parser, CabFile, GzipParser, +from hachoir_parser.archive import (Bzip2Parser, CabFile, GzipParser, TarFile, ZipFile, MarFile) -from lib.hachoir_core.tools import humanUnixAttributes -from lib.hachoir_core.i18n import _ +from hachoir_core.tools import humanUnixAttributes +from hachoir_core.i18n import _ def maxNbFile(meta): if meta.quality <= QUALITY_FASTEST: @@ -110,7 +110,7 @@ class CabMetadata(MultipleMetadata): def extract(self, cab): if "folder[0]" in cab: self.useFolder(cab["folder[0]"]) - self.format_version = "Microsoft Cabinet version %s" % cab["cab_version"].display + self.format_version = "Microsoft Cabinet version %s.%s" % (cab["major_version"].display, cab["minor_version"].display) self.comment = "%s folders, %s files" % ( cab["nb_folder"].value, cab["nb_files"].value) max_nb = maxNbFile(self) diff --git a/lib/hachoir_metadata/audio.py b/lib/hachoir_metadata/audio.py index 86284c47..566613e0 100644 --- a/lib/hachoir_metadata/audio.py +++ b/lib/hachoir_metadata/audio.py @@ -1,12 +1,12 @@ -from lib.hachoir_metadata.metadata import (registerExtractor, +from hachoir_metadata.metadata import (registerExtractor, Metadata, RootMetadata, MultipleMetadata) -from lib.hachoir_parser.audio import AuFile, MpegAudioFile, RealAudioFile, AiffFile, FlacParser -from lib.hachoir_parser.container import OggFile, RealMediaFile -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import makePrintable, timedelta2seconds, humanBitRate +from hachoir_parser.audio import AuFile, MpegAudioFile, RealAudioFile, AiffFile, FlacParser +from hachoir_parser.container import OggFile, RealMediaFile +from hachoir_core.i18n import _ +from hachoir_core.tools import makePrintable, timedelta2seconds, humanBitRate from datetime import timedelta -from lib.hachoir_metadata.metadata_item import QUALITY_FAST, QUALITY_NORMAL, QUALITY_BEST -from lib.hachoir_metadata.safe import fault_tolerant, getValue +from hachoir_metadata.metadata_item import QUALITY_FAST, QUALITY_NORMAL, QUALITY_BEST +from hachoir_metadata.safe import fault_tolerant, getValue def computeComprRate(meta, size): if not meta.has("duration") \ diff --git a/lib/hachoir_metadata/file_system.py b/lib/hachoir_metadata/file_system.py index ff5ff997..b111c486 100644 --- a/lib/hachoir_metadata/file_system.py +++ b/lib/hachoir_metadata/file_system.py @@ -1,6 +1,6 @@ -from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor -from lib.hachoir_metadata.safe import fault_tolerant -from lib.hachoir_parser.file_system import ISO9660 +from hachoir_metadata.metadata import RootMetadata, registerExtractor +from hachoir_metadata.safe import fault_tolerant +from hachoir_parser.file_system import ISO9660 from datetime import datetime class ISO9660_Metadata(RootMetadata): diff --git a/lib/hachoir_metadata/filter.py b/lib/hachoir_metadata/filter.py index 0807c5d7..b4af8e3c 100644 --- a/lib/hachoir_metadata/filter.py +++ b/lib/hachoir_metadata/filter.py @@ -1,4 +1,4 @@ -from lib.hachoir_metadata.timezone import UTC +from hachoir_metadata.timezone import UTC from datetime import date, datetime # Year in 1850..2030 diff --git a/lib/hachoir_metadata/formatter.py b/lib/hachoir_metadata/formatter.py index d52c12b2..0d04f920 100644 --- a/lib/hachoir_metadata/formatter.py +++ b/lib/hachoir_metadata/formatter.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.i18n import _, ngettext +from hachoir_core.i18n import _, ngettext NB_CHANNEL_NAME = {1: _("mono"), 2: _("stereo")} diff --git a/lib/hachoir_metadata/image.py b/lib/hachoir_metadata/image.py index fbe41a3c..1416a8f9 100644 --- a/lib/hachoir_metadata/image.py +++ b/lib/hachoir_metadata/image.py @@ -1,12 +1,12 @@ -from lib.hachoir_metadata.metadata import (registerExtractor, +from hachoir_metadata.metadata import (registerExtractor, Metadata, RootMetadata, MultipleMetadata) -from lib.hachoir_parser.image import ( +from hachoir_parser.image import ( BmpFile, IcoFile, PcxFile, GifFile, PngFile, TiffFile, XcfFile, TargaFile, WMF_File, PsdFile) -from lib.hachoir_parser.image.png import getBitsPerPixel as pngBitsPerPixel -from lib.hachoir_parser.image.xcf import XcfProperty -from lib.hachoir_core.i18n import _ -from lib.hachoir_metadata.safe import fault_tolerant +from hachoir_parser.image.png import getBitsPerPixel as pngBitsPerPixel +from hachoir_parser.image.xcf import XcfProperty +from hachoir_core.i18n import _ +from hachoir_metadata.safe import fault_tolerant def computeComprRate(meta, compr_size): """ @@ -240,7 +240,7 @@ class GifMetadata(RootMetadata): def useScreen(self, screen): self.width = screen["width"].value self.height = screen["height"].value - self.bits_per_pixel = (1 + screen["bpp"].value) + self.bits_per_pixel = (1 + screen["size_global_map"].value) class TargaMetadata(RootMetadata): def extract(self, tga): diff --git a/lib/hachoir_metadata/jpeg.py b/lib/hachoir_metadata/jpeg.py index 9a3fe1aa..a112318f 100644 --- a/lib/hachoir_metadata/jpeg.py +++ b/lib/hachoir_metadata/jpeg.py @@ -1,14 +1,14 @@ -from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor -from lib.hachoir_metadata.image import computeComprRate -from lib.hachoir_parser.image.exif import ExifEntry -from lib.hachoir_parser.image.jpeg import ( +from hachoir_metadata.metadata import RootMetadata, registerExtractor +from hachoir_metadata.image import computeComprRate +from hachoir_parser.image.exif import IFD, BasicIFDEntry +from hachoir_parser.image.jpeg import ( JpegFile, JpegChunk, QUALITY_HASH_COLOR, QUALITY_SUM_COLOR, QUALITY_HASH_GRAY, QUALITY_SUM_GRAY) -from lib.hachoir_core.field import MissingField -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import makeUnicode -from lib.hachoir_metadata.safe import fault_tolerant +from hachoir_core.field import MissingField +from hachoir_core.i18n import _ +from hachoir_core.tools import makeUnicode +from hachoir_metadata.safe import fault_tolerant from datetime import datetime def deg2float(degree, minute, second): @@ -17,21 +17,21 @@ def deg2float(degree, minute, second): class JpegMetadata(RootMetadata): EXIF_KEY = { # Exif metadatas - ExifEntry.TAG_CAMERA_MANUFACTURER: "camera_manufacturer", - ExifEntry.TAG_CAMERA_MODEL: "camera_model", - ExifEntry.TAG_ORIENTATION: "image_orientation", - ExifEntry.TAG_EXPOSURE: "camera_exposure", - ExifEntry.TAG_FOCAL: "camera_focal", - ExifEntry.TAG_BRIGHTNESS: "camera_brightness", - ExifEntry.TAG_APERTURE: "camera_aperture", + "Make": "camera_manufacturer", + "Model": "camera_model", + "Orientation": "image_orientation", + "ExposureTime": "camera_exposure", + "FNumber": "camera_focal", + "BrightnessValue": "camera_brightness", + "MaxApertureValue": "camera_aperture", # Generic metadatas - ExifEntry.TAG_IMG_TITLE: "title", - ExifEntry.TAG_SOFTWARE: "producer", - ExifEntry.TAG_FILE_TIMESTAMP: "creation_date", - ExifEntry.TAG_WIDTH: "width", - ExifEntry.TAG_HEIGHT: "height", - ExifEntry.TAG_USER_COMMENT: "comment", + "ImageDescription": "title", + "Software": "producer", + "DateTime": "creation_date", + "PixelXDimension": "width", + "PixelYDimension": "height", + "UserComment": "comment", } IPTC_KEY = { @@ -63,7 +63,8 @@ class JpegMetadata(RootMetadata): self.extractAPP0(jpeg["app0/content"]) if "exif/content" in jpeg: - for ifd in jpeg.array("exif/content/ifd"): + for ifd in jpeg['exif/content']: + if not isinstance(ifd, IFD): continue for entry in ifd.array("entry"): self.processIfdEntry(ifd, entry) self.readGPS(ifd) @@ -156,7 +157,7 @@ class JpegMetadata(RootMetadata): @fault_tolerant def processIfdEntry(self, ifd, entry): # Skip unknown tags - tag = entry["tag"].value + tag = entry["tag"].display if tag not in self.EXIF_KEY: return key = self.EXIF_KEY[tag] @@ -166,20 +167,17 @@ class JpegMetadata(RootMetadata): return # Read value - if "value" in entry: - value = entry["value"].value - else: - value = ifd["value_%s" % entry.name].value + value = ifd.getEntryValues(entry)[0].value # Convert value to string - if tag == ExifEntry.TAG_ORIENTATION: + if tag == "Orientation": value = self.orientation_name.get(value, value) - elif tag == ExifEntry.TAG_EXPOSURE: + elif tag == "ExposureTime": if not value: return if isinstance(value, float): value = (value, u"1/%g" % (1/value)) - elif entry["type"].value in (ExifEntry.TYPE_RATIONAL, ExifEntry.TYPE_SIGNED_RATIONAL): + elif entry["type"].value in (BasicIFDEntry.TYPE_RATIONAL, BasicIFDEntry.TYPE_SIGNED_RATIONAL): value = (value, u"%.3g" % value) # Store information @@ -197,35 +195,33 @@ class JpegMetadata(RootMetadata): timestamp = None datestamp = None for entry in ifd.array("entry"): - tag = entry["tag"].value - if tag == ExifEntry.TAG_GPS_LATITUDE_REF: - if entry["value"].value == "N": + tag = entry["tag"].display + values = [v.value for v in ifd.getEntryValues(entry)] + if tag == "GPSLatitudeRef": + if values[0] == "N": latitude_ref = 1 else: latitude_ref = -1 - elif tag == ExifEntry.TAG_GPS_LONGITUDE_REF: - if entry["value"].value == "E": + elif tag == "GPSLongitudeRef": + if values[0] == "E": longitude_ref = 1 else: longitude_ref = -1 - elif tag == ExifEntry.TAG_GPS_ALTITUDE_REF: - if entry["value"].value == 1: + elif tag == "GPSAltitudeRef": + if values[0] == 1: altitude_ref = -1 else: altitude_ref = 1 - elif tag == ExifEntry.TAG_GPS_LATITUDE: - latitude = [ifd["value_%s[%u]" % (entry.name, index)].value for index in xrange(3)] - elif tag == ExifEntry.TAG_GPS_LONGITUDE: - longitude = [ifd["value_%s[%u]" % (entry.name, index)].value for index in xrange(3)] - elif tag == ExifEntry.TAG_GPS_ALTITUDE: - altitude = ifd["value_%s" % entry.name].value - elif tag == ExifEntry.TAG_GPS_DATESTAMP: - datestamp = ifd["value_%s" % entry.name].value - elif tag == ExifEntry.TAG_GPS_TIMESTAMP: - items = [ifd["value_%s[%u]" % (entry.name, index)].value for index in xrange(3)] - items = map(int, items) - items = map(str, items) - timestamp = ":".join(items) + elif tag == "GPSLatitude": + latitude = values + elif tag == "GPSLongitude": + longitude = values + elif tag == "GPSAltitude": + altitude = values[0] + elif tag == "GPSDateStamp": + datestamp = values[0] + elif tag == "GPSTimeStamp": + timestamp = ':'.join(str(int(x)) for x in values) if latitude_ref and latitude: value = deg2float(*latitude) if latitude_ref < 0: diff --git a/lib/hachoir_metadata/metadata.py b/lib/hachoir_metadata/metadata.py index 489a5466..37461c9d 100644 --- a/lib/hachoir_metadata/metadata.py +++ b/lib/hachoir_metadata/metadata.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- -from lib.hachoir_core.compatibility import any, sorted -from lib.hachoir_core.endian import endian_name -from lib.hachoir_core.tools import makePrintable, makeUnicode -from lib.hachoir_core.dict import Dict -from lib.hachoir_core.error import error, HACHOIR_ERRORS -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.log import Logger -from lib.hachoir_metadata.metadata_item import ( +from hachoir_core.compatibility import any, sorted +from hachoir_core.endian import endian_name +from hachoir_core.tools import makePrintable, makeUnicode +from hachoir_core.dict import Dict +from hachoir_core.error import error, HACHOIR_ERRORS +from hachoir_core.i18n import _ +from hachoir_core.log import Logger +from hachoir_metadata.metadata_item import ( MIN_PRIORITY, MAX_PRIORITY, QUALITY_NORMAL) -from lib.hachoir_metadata.register import registerAllItems +from hachoir_metadata.register import registerAllItems extractors = {} diff --git a/lib/hachoir_metadata/metadata_item.py b/lib/hachoir_metadata/metadata_item.py index 4b5573af..bddd3b07 100644 --- a/lib/hachoir_metadata/metadata_item.py +++ b/lib/hachoir_metadata/metadata_item.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.tools import makeUnicode, normalizeNewline -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_metadata import config -from lib.hachoir_metadata.setter import normalizeString +from hachoir_core.tools import makeUnicode, normalizeNewline +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_metadata import config +from hachoir_metadata.setter import normalizeString MIN_PRIORITY = 100 MAX_PRIORITY = 999 diff --git a/lib/hachoir_metadata/misc.py b/lib/hachoir_metadata/misc.py index 67647784..c6bbe97f 100644 --- a/lib/hachoir_metadata/misc.py +++ b/lib/hachoir_metadata/misc.py @@ -1,11 +1,11 @@ -from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor -from lib.hachoir_metadata.safe import fault_tolerant -from lib.hachoir_parser.container import SwfFile -from lib.hachoir_parser.misc import TorrentFile, TrueTypeFontFile, OLE2_File, PcfFile -from lib.hachoir_core.field import isString -from lib.hachoir_core.error import warning -from lib.hachoir_parser import guessParser -from lib.hachoir_metadata.setter import normalizeString +from hachoir_metadata.metadata import RootMetadata, registerExtractor +from hachoir_metadata.safe import fault_tolerant +from hachoir_parser.container import SwfFile +from hachoir_parser.misc import TorrentFile, TrueTypeFontFile, OLE2_File, PcfFile +from hachoir_core.field import isString +from hachoir_core.error import warning +from hachoir_parser import guessParser +from hachoir_metadata.setter import normalizeString class TorrentMetadata(RootMetadata): KEY_TO_ATTR = { @@ -109,45 +109,42 @@ class OLE2_Metadata(RootMetadata): def extract(self, ole2): self._extract(ole2) - def _extract(self, fieldset, main_document=True): - if main_document: - # _feedAll() is needed to make sure that we get all root[*] fragments + def _extract(self, fieldset): + try: fieldset._feedAll() - if "root[0]" in fieldset: - self.useRoot(fieldset["root[0]"]) - doc_summary = self.getField(fieldset, main_document, "doc_summary[0]") + except StopIteration: + pass + if "root[0]" in fieldset: + self._extract(self.getFragment(fieldset["root[0]"])) + doc_summary = self.getField(fieldset, "doc_summary[0]") if doc_summary: self.useSummary(doc_summary, True) - word_doc = self.getField(fieldset, main_document, "word_doc[0]") + word_doc = self.getField(fieldset, "word_doc[0]") if word_doc: self.useWordDocument(word_doc) - summary = self.getField(fieldset, main_document, "summary[0]") + summary = self.getField(fieldset, "summary[0]") if summary: self.useSummary(summary, False) - @fault_tolerant - def useRoot(self, root): - stream = root.getSubIStream() + def getFragment(self, frag): + stream = frag.getSubIStream() ministream = guessParser(stream) if not ministream: warning("Unable to create the OLE2 mini stream parser!") - return - self._extract(ministream, main_document=False) + return frag + return ministream - def getField(self, fieldset, main_document, name): - if name not in fieldset: - return None + def getField(self, fieldset, name): # _feedAll() is needed to make sure that we get all fragments # eg. summary[0], summary[1], ..., summary[n] - fieldset._feedAll() + try: + fieldset._feedAll() + except StopIteration: + pass + if name not in fieldset: + return None field = fieldset[name] - if main_document: - stream = field.getSubIStream() - field = guessParser(stream) - if not field: - warning("Unable to create the OLE2 parser for %s!" % name) - return None - return field + return self.getFragment(field) @fault_tolerant def useSummary(self, summary, is_doc_summary): @@ -161,7 +158,7 @@ class OLE2_Metadata(RootMetadata): @fault_tolerant def useWordDocument(self, doc): - self.comment = "Encrypted: %s" % doc["fEncrypted"].value + self.comment = "Encrypted: %s" % doc["FIB/fEncrypted"].value @fault_tolerant def useProperty(self, summary, property, is_doc_summary): diff --git a/lib/hachoir_metadata/program.py b/lib/hachoir_metadata/program.py index 14027d50..a524cee6 100644 --- a/lib/hachoir_metadata/program.py +++ b/lib/hachoir_metadata/program.py @@ -1,6 +1,6 @@ -from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor -from lib.hachoir_parser.program import ExeFile -from lib.hachoir_metadata.safe import fault_tolerant, getValue +from hachoir_metadata.metadata import RootMetadata, registerExtractor +from hachoir_parser.program import ExeFile +from hachoir_metadata.safe import fault_tolerant, getValue class ExeMetadata(RootMetadata): KEY_TO_ATTR = { diff --git a/lib/hachoir_metadata/qt/__init__.py b/lib/hachoir_metadata/qt/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/hachoir_metadata/qt/dialog.ui b/lib/hachoir_metadata/qt/dialog.ui deleted file mode 100644 index 498a8dae..00000000 --- a/lib/hachoir_metadata/qt/dialog.ui +++ /dev/null @@ -1,64 +0,0 @@ - - Form - - - - 0 - 0 - 441 - 412 - - - - hachoir-metadata - - - - - - - - Open - - - - - - - - 0 - 0 - - - - - - - - - - true - - - false - - - 0 - - - 0 - - - - - - - Quit - - - - - - - - diff --git a/lib/hachoir_metadata/qt/dialog_ui.py b/lib/hachoir_metadata/qt/dialog_ui.py deleted file mode 100644 index 970257cf..00000000 --- a/lib/hachoir_metadata/qt/dialog_ui.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- - -# Form implementation generated from reading ui file 'hachoir_metadata/qt/dialog.ui' -# -# Created: Mon Jul 26 03:10:06 2010 -# by: PyQt4 UI code generator 4.7.3 -# -# WARNING! All changes made in this file will be lost! - -from PyQt4 import QtCore, QtGui - -class Ui_Form(object): - def setupUi(self, Form): - Form.setObjectName("Form") - Form.resize(441, 412) - self.verticalLayout = QtGui.QVBoxLayout(Form) - self.verticalLayout.setObjectName("verticalLayout") - self.horizontalLayout_2 = QtGui.QHBoxLayout() - self.horizontalLayout_2.setObjectName("horizontalLayout_2") - self.open_button = QtGui.QPushButton(Form) - self.open_button.setObjectName("open_button") - self.horizontalLayout_2.addWidget(self.open_button) - self.files_combo = QtGui.QComboBox(Form) - sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(self.files_combo.sizePolicy().hasHeightForWidth()) - self.files_combo.setSizePolicy(sizePolicy) - self.files_combo.setObjectName("files_combo") - self.horizontalLayout_2.addWidget(self.files_combo) - self.verticalLayout.addLayout(self.horizontalLayout_2) - self.metadata_table = QtGui.QTableWidget(Form) - self.metadata_table.setAlternatingRowColors(True) - self.metadata_table.setShowGrid(False) - self.metadata_table.setRowCount(0) - self.metadata_table.setColumnCount(0) - self.metadata_table.setObjectName("metadata_table") - self.metadata_table.setColumnCount(0) - self.metadata_table.setRowCount(0) - self.verticalLayout.addWidget(self.metadata_table) - self.quit_button = QtGui.QPushButton(Form) - self.quit_button.setObjectName("quit_button") - self.verticalLayout.addWidget(self.quit_button) - - self.retranslateUi(Form) - QtCore.QMetaObject.connectSlotsByName(Form) - - def retranslateUi(self, Form): - Form.setWindowTitle(QtGui.QApplication.translate("Form", "hachoir-metadata", None, QtGui.QApplication.UnicodeUTF8)) - self.open_button.setText(QtGui.QApplication.translate("Form", "Open", None, QtGui.QApplication.UnicodeUTF8)) - self.quit_button.setText(QtGui.QApplication.translate("Form", "Quit", None, QtGui.QApplication.UnicodeUTF8)) - diff --git a/lib/hachoir_metadata/register.py b/lib/hachoir_metadata/register.py index 9916d36f..3cbde86d 100644 --- a/lib/hachoir_metadata/register.py +++ b/lib/hachoir_metadata/register.py @@ -1,17 +1,17 @@ -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import ( +from hachoir_core.i18n import _ +from hachoir_core.tools import ( humanDuration, humanBitRate, humanFrequency, humanBitSize, humanFilesize, humanDatetime) -from lib.hachoir_core.language import Language -from lib.hachoir_metadata.filter import Filter, NumberFilter, DATETIME_FILTER +from hachoir_core.language import Language +from hachoir_metadata.filter import Filter, NumberFilter, DATETIME_FILTER from datetime import date, datetime, timedelta -from lib.hachoir_metadata.formatter import ( +from hachoir_metadata.formatter import ( humanAudioChannel, humanFrameRate, humanComprRate, humanAltitude, humanPixelSize, humanDPI) -from lib.hachoir_metadata.setter import ( +from hachoir_metadata.setter import ( setDatetime, setTrackNumber, setTrackTotal, setLanguage) -from lib.hachoir_metadata.metadata_item import Data +from hachoir_metadata.metadata_item import Data MIN_SAMPLE_RATE = 1000 # 1 kHz MAX_SAMPLE_RATE = 192000 # 192 kHz diff --git a/lib/hachoir_metadata/riff.py b/lib/hachoir_metadata/riff.py index e3bfa6f4..adcc0bd9 100644 --- a/lib/hachoir_metadata/riff.py +++ b/lib/hachoir_metadata/riff.py @@ -2,13 +2,13 @@ Extract metadata from RIFF file format: AVI video and WAV sound. """ -from lib.hachoir_metadata.metadata import Metadata, MultipleMetadata, registerExtractor -from lib.hachoir_metadata.safe import fault_tolerant, getValue -from lib.hachoir_parser.container.riff import RiffFile -from lib.hachoir_parser.video.fourcc import UNCOMPRESSED_AUDIO -from lib.hachoir_core.tools import humanFilesize, makeUnicode, timedelta2seconds -from lib.hachoir_core.i18n import _ -from lib.hachoir_metadata.audio import computeComprRate as computeAudioComprRate +from hachoir_metadata.metadata import Metadata, MultipleMetadata, registerExtractor +from hachoir_metadata.safe import fault_tolerant, getValue +from hachoir_parser.container.riff import RiffFile +from hachoir_parser.video.fourcc import UNCOMPRESSED_AUDIO +from hachoir_core.tools import humanFilesize, makeUnicode, timedelta2seconds +from hachoir_core.i18n import _ +from hachoir_metadata.audio import computeComprRate as computeAudioComprRate from datetime import timedelta class RiffMetadata(MultipleMetadata): diff --git a/lib/hachoir_metadata/safe.py b/lib/hachoir_metadata/safe.py index 708a3c2f..e1d91abb 100644 --- a/lib/hachoir_metadata/safe.py +++ b/lib/hachoir_metadata/safe.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.error import HACHOIR_ERRORS, warning +from hachoir_core.error import HACHOIR_ERRORS, warning def fault_tolerant(func, *args): def safe_func(*args, **kw): diff --git a/lib/hachoir_metadata/setter.py b/lib/hachoir_metadata/setter.py index 77ecf668..41da4140 100644 --- a/lib/hachoir_metadata/setter.py +++ b/lib/hachoir_metadata/setter.py @@ -1,10 +1,10 @@ from datetime import date, datetime import re -from lib.hachoir_core.language import Language +from hachoir_core.language import Language from locale import setlocale, LC_ALL from time import strptime -from lib.hachoir_metadata.timezone import createTimezone -from lib.hachoir_metadata import config +from hachoir_metadata.timezone import createTimezone +from hachoir_metadata import config NORMALIZE_REGEX = re.compile("[-/.: ]+") YEAR_REGEX1 = re.compile("^([0-9]{4})$") diff --git a/lib/hachoir_metadata/video.py b/lib/hachoir_metadata/video.py index e7cdc682..5fcb2dd1 100644 --- a/lib/hachoir_metadata/video.py +++ b/lib/hachoir_metadata/video.py @@ -1,14 +1,14 @@ -from lib.hachoir_core.field import MissingField -from lib.hachoir_metadata.metadata import (registerExtractor, +from hachoir_core.field import MissingField +from hachoir_metadata.metadata import (registerExtractor, Metadata, RootMetadata, MultipleMetadata) -from lib.hachoir_metadata.metadata_item import QUALITY_GOOD -from lib.hachoir_metadata.safe import fault_tolerant -from lib.hachoir_parser.video import MovFile, AsfFile, FlvFile -from lib.hachoir_parser.video.asf import Descriptor as ASF_Descriptor -from lib.hachoir_parser.container import MkvFile -from lib.hachoir_parser.container.mkv import dateToDatetime -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import makeUnicode, makePrintable, timedelta2seconds +from hachoir_metadata.metadata_item import QUALITY_GOOD +from hachoir_metadata.safe import fault_tolerant +from hachoir_parser.video import MovFile, AsfFile, FlvFile +from hachoir_parser.video.asf import Descriptor as ASF_Descriptor +from hachoir_parser.container import MkvFile +from hachoir_parser.container.mkv import dateToDatetime +from hachoir_core.i18n import _ +from hachoir_core.tools import makeUnicode, makePrintable, timedelta2seconds from datetime import timedelta class MkvMetadata(MultipleMetadata): @@ -59,9 +59,10 @@ class MkvMetadata(MultipleMetadata): def trackCommon(self, track, meta): if "Name/unicode" in track: meta.title = track["Name/unicode"].value - if "Language/string" in track \ - and track["Language/string"].value not in ("mis", "und"): + if "Language/string" in track: meta.language = track["Language/string"].value + else: + meta.language = "eng" def processVideo(self, track): video = Metadata(self) @@ -222,7 +223,7 @@ class MovMetadata(RootMetadata): self.last_modification = hdr["lastmod_date"].value self.duration = timedelta(seconds=float(hdr["duration"].value) / hdr["time_scale"].value) self.comment = _("Play speed: %.1f%%") % (hdr["play_speed"].value*100) - self.comment = _("User volume: %.1f%%") % (float(hdr["volume"].value)*100//255) + self.comment = _("User volume: %.1f%%") % (float(hdr["volume"].value)*100) @fault_tolerant def processTrackHeader(self, hdr): diff --git a/lib/hachoir_parser/__init__.py b/lib/hachoir_parser/__init__.py index 0d1e0469..1b9860ab 100644 --- a/lib/hachoir_parser/__init__.py +++ b/lib/hachoir_parser/__init__.py @@ -1,7 +1,7 @@ -from lib.hachoir_parser.version import __version__ -from lib.hachoir_parser.parser import ValidateError, HachoirParser, Parser -from lib.hachoir_parser.parser_list import ParserList, HachoirParserList -from lib.hachoir_parser.guess import (QueryParser, guessParser, createParser) -from lib.hachoir_parser import (archive, audio, container, +from hachoir_parser.version import __version__ +from hachoir_parser.parser import ValidateError, HachoirParser, Parser +from hachoir_parser.parser_list import ParserList, HachoirParserList +from hachoir_parser.guess import (QueryParser, guessParser, createParser) +from hachoir_parser import (archive, audio, container, file_system, image, game, misc, network, program, video) diff --git a/lib/hachoir_parser/archive/__init__.py b/lib/hachoir_parser/archive/__init__.py index 86fbb9eb..46103c1a 100644 --- a/lib/hachoir_parser/archive/__init__.py +++ b/lib/hachoir_parser/archive/__init__.py @@ -1,12 +1,13 @@ -from lib.hachoir_parser.archive.ace import AceFile -from lib.hachoir_parser.archive.ar import ArchiveFile -from lib.hachoir_parser.archive.bzip2_parser import Bzip2Parser -from lib.hachoir_parser.archive.cab import CabFile -from lib.hachoir_parser.archive.gzip_parser import GzipParser -from lib.hachoir_parser.archive.tar import TarFile -from lib.hachoir_parser.archive.zip import ZipFile -from lib.hachoir_parser.archive.rar import RarFile -from lib.hachoir_parser.archive.rpm import RpmFile -from lib.hachoir_parser.archive.sevenzip import SevenZipParser -from lib.hachoir_parser.archive.mar import MarFile - +from hachoir_parser.archive.ace import AceFile +from hachoir_parser.archive.ar import ArchiveFile +from hachoir_parser.archive.bzip2_parser import Bzip2Parser +from hachoir_parser.archive.cab import CabFile +from hachoir_parser.archive.gzip_parser import GzipParser +from hachoir_parser.archive.tar import TarFile +from hachoir_parser.archive.zip import ZipFile +from hachoir_parser.archive.rar import RarFile +from hachoir_parser.archive.rpm import RpmFile +from hachoir_parser.archive.sevenzip import SevenZipParser +from hachoir_parser.archive.mar import MarFile +from hachoir_parser.archive.mozilla_ar import MozillaArchive +from hachoir_parser.archive.zlib import ZlibData diff --git a/lib/hachoir_parser/archive/ace.py b/lib/hachoir_parser/archive/ace.py index ff65bbb6..03652920 100644 --- a/lib/hachoir_parser/archive/ace.py +++ b/lib/hachoir_parser/archive/ace.py @@ -11,15 +11,15 @@ Author: Christophe Gisquet Creation date: 19 january 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, Bit, Bits, NullBits, RawBytes, Enum, UInt8, UInt16, UInt32, PascalString8, PascalString16, String, TimeDateMSDOS32) -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.msdos import MSDOSFileAttr32 +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.msdos import MSDOSFileAttr32 MAGIC = "**ACE**" diff --git a/lib/hachoir_parser/archive/ar.py b/lib/hachoir_parser/archive/ar.py index e314e9a1..421cdc53 100644 --- a/lib/hachoir_parser/archive/ar.py +++ b/lib/hachoir_parser/archive/ar.py @@ -2,10 +2,10 @@ GNU ar archive : archive file (.a) and Debian (.deb) archive. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, String, RawBytes, UnixLine) -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.endian import BIG_ENDIAN class ArchiveFileEntry(FieldSet): def createFields(self): diff --git a/lib/hachoir_parser/archive/bzip2_parser.py b/lib/hachoir_parser/archive/bzip2_parser.py index 50760b7d..c7df9ea7 100644 --- a/lib/hachoir_parser/archive/bzip2_parser.py +++ b/lib/hachoir_parser/archive/bzip2_parser.py @@ -1,14 +1,18 @@ """ BZIP2 archive file -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (ParserError, String, - Bytes, Character, UInt8, UInt32, CompressedField) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser import Parser +from hachoir_core.tools import paddingSize +from hachoir_core.field import (Field, FieldSet, GenericVector, + ParserError, String, + PaddingBits, Bit, Bits, Character, + UInt32, Enum, CompressedField) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.archive.zlib import build_tree, HuffmanCode try: from bz2 import BZ2Decompressor @@ -27,6 +31,152 @@ try: except ImportError: has_deflate = False +class ZeroTerminatedNumber(Field): + """Zero (bit) terminated number: e.g. 11110 is 4.""" + def __init__(self, parent, name, description=None): + Field.__init__(self, parent, name, 0, description) + + endian = self.parent.endian + stream = self.parent.stream + addr = self.absolute_address + + value = 0 + while True: + bit = stream.readBits(addr, 1, endian) + addr += 1 + self._size += 1 + if not bit: + break + value += 1 + self._value = value + def createValue(self): + return self._value + +def move_to_front(l, c): + l[:] = l[c:c+1] + l[0:c] + l[c+1:] + +class Bzip2Bitmap(FieldSet): + def __init__(self, parent, name, nb_items, start_index, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.nb_items = nb_items + self.start_index = start_index + + def createFields(self): + for i in xrange(self.start_index, self.start_index+self.nb_items): + yield Bit(self, "symbol_used[%i]"%i, "Is the symbol %i (%r) used?"%(i, chr(i))) + +class Bzip2Lengths(FieldSet): + def __init__(self, parent, name, symbols, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.symbols = symbols + + def createFields(self): + yield Bits(self, "start_length", 5) + length = self["start_length"].value + lengths = [] + for i in xrange(self.symbols): + while True: + bit = Bit(self, "change_length[%i][]"%i, "Should the length be changed for symbol %i?"%i) + yield bit + if not bit.value: + break + else: + bit = Enum(Bit(self, "length_decrement[%i][]"%i, "Decrement the value?"), {True: "Decrement", False: "Increment"}) + yield bit + if bit.value: + length -= 1 + else: + length += 1 + lengths.append(length) + self.final_length = length + self.tree = build_tree(lengths) + +class Bzip2Selectors(FieldSet): + def __init__(self, parent, name, ngroups, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.groups = range(ngroups) + + def createFields(self): + for i in xrange(self["../selectors_used"].value): + field = ZeroTerminatedNumber(self, "selector_list[]") + move_to_front(self.groups, field.value) + field.realvalue = self.groups[0] + field._description = "MTF'ed selector index: raw value %i, real value %i"%(field.value, field.realvalue) + yield field + +class Bzip2Block(FieldSet): + def createFields(self): + yield textHandler(Bits(self, "blockheader", 48, "Block header"), hexadecimal) + if self["blockheader"].value != 0x314159265359: # pi + raise ParserError("Invalid block header!") + yield textHandler(UInt32(self, "crc32", "CRC32 for this block"), hexadecimal) + yield Bit(self, "randomized", "Is this block randomized?") + yield Bits(self, "orig_bwt_pointer", 24, "Starting pointer into BWT after untransform") + yield GenericVector(self, "huffman_used_map", 16, Bit, 'block_used', "Bitmap showing which blocks (representing 16 literals each) are in use") + symbols_used = [] + for index, block_used in enumerate(self["huffman_used_map"].array('block_used')): + if block_used.value: + start_index = index*16 + field = Bzip2Bitmap(self, "huffman_used_bitmap[%i]"%index, 16, start_index, "Bitmap for block %i (literals %i to %i) showing which symbols are in use"%(index, start_index, start_index + 15)) + yield field + for i, used in enumerate(field): + if used.value: + symbols_used.append(start_index + i) + yield Bits(self, "huffman_groups", 3, "Number of different Huffman tables in use") + yield Bits(self, "selectors_used", 15, "Number of times the Huffman tables are switched") + yield Bzip2Selectors(self, "selectors_list", self["huffman_groups"].value) + trees = [] + for group in xrange(self["huffman_groups"].value): + field = Bzip2Lengths(self, "huffman_lengths[]", len(symbols_used)+2) + yield field + trees.append(field.tree) + counter = 0 + rle_run = 0 + selector_tree = None + while True: + if counter%50 == 0: + select_id = self["selectors_list"].array("selector_list")[counter//50].realvalue + selector_tree = trees[select_id] + field = HuffmanCode(self, "huffman_code[]", selector_tree) + if field.realvalue in [0, 1]: + # RLE codes + if rle_run == 0: + rle_power = 1 + rle_run += (field.realvalue + 1) * rle_power + rle_power <<= 1 + field._description = "RLE Run Code %i (for %r); Total accumulated run %i (Huffman Code %i)" % (field.realvalue, chr(symbols_used[0]), rle_run, field.value) + elif field.realvalue == len(symbols_used)+1: + field._description = "Block Terminator (%i) (Huffman Code %i)"%(field.realvalue, field.value) + yield field + break + else: + rle_run = 0 + move_to_front(symbols_used, field.realvalue-1) + field._description = "Literal %r (value %i) (Huffman Code %i)"%(chr(symbols_used[0]), field.realvalue, field.value) + yield field + if field.realvalue == len(symbols_used)+1: + break + counter += 1 + +class Bzip2Stream(FieldSet): + START_BLOCK = 0x314159265359 # pi + END_STREAM = 0x177245385090 # sqrt(pi) + def createFields(self): + end = False + while not end: + marker = self.stream.readBits(self.absolute_address + self.current_size, 48, self.endian) + if marker == self.START_BLOCK: + yield Bzip2Block(self, "block[]") + elif marker == self.END_STREAM: + yield textHandler(Bits(self, "stream_end", 48, "End-of-stream marker"), hexadecimal) + yield textHandler(UInt32(self, "crc32", "CRC32 for entire stream"), hexadecimal) + padding = paddingSize(self.current_size, 8) + if padding: + yield PaddingBits(self, "padding[]", padding) + end = True + else: + raise ParserError("Invalid marker 0x%02X!"%marker) + class Bzip2Parser(Parser): PARSER_TAGS = { "id": "bzip2", @@ -37,7 +187,7 @@ class Bzip2Parser(Parser): "magic": (('BZh', 0),), "description": "bzip2 archive" } - endian = LITTLE_ENDIAN + endian = BIG_ENDIAN def validate(self): if self.stream.readBytes(0, 3) != 'BZh': @@ -50,18 +200,6 @@ class Bzip2Parser(Parser): yield String(self, "id", 3, "Identifier (BZh)", charset="ASCII") yield Character(self, "blocksize", "Block size (KB of memory needed to uncompress)") - yield UInt8(self, "blockheader", "Block header") - if self["blockheader"].value == 0x17: - yield String(self, "id2", 4, "Identifier2 (re8P)", charset="ASCII") - yield UInt8(self, "id3", "Identifier3 (0x90)") - elif self["blockheader"].value == 0x31: - yield String(self, "id2", 5, "Identifier 2 (AY&SY)", charset="ASCII") - if self["id2"].value != "AY&SY": - raise ParserError("Invalid identifier 2 (AY&SY)!") - else: - raise ParserError("Invalid block header!") - yield textHandler(UInt32(self, "crc32", "CRC32"), hexadecimal) - if self._size is None: # TODO: is it possible to handle piped input? raise NotImplementedError @@ -73,7 +211,7 @@ class Bzip2Parser(Parser): break else: filename = None - data = Bytes(self, "file", size) + data = Bzip2Stream(self, "file", size=size*8) if has_deflate: CompressedField(self, Bunzip2) def createInputStream(**args): diff --git a/lib/hachoir_parser/archive/cab.py b/lib/hachoir_parser/archive/cab.py index ef6ab7c7..66c0eec1 100644 --- a/lib/hachoir_parser/archive/cab.py +++ b/lib/hachoir_parser/archive/cab.py @@ -1,18 +1,24 @@ """ Microsoft Cabinet (CAB) archive. -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao Creation date: 31 january 2007 -""" -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Enum, +- Microsoft Cabinet SDK + http://msdn2.microsoft.com/en-us/library/ms974336.aspx +""" +from __future__ import absolute_import +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, CString, String, - UInt16, UInt32, Bit, Bits, PaddingBits, NullBits, + UInt8, UInt16, UInt32, Bit, Bits, PaddingBits, NullBits, DateTimeMSDOS32, RawBytes) -from lib.hachoir_parser.common.msdos import MSDOSFileAttr16 -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import paddingSize +from hachoir_core.stream import StringInputStream +from hachoir_parser.archive.lzx import LZXStream, lzx_decompress +from hachoir_parser.archive.zlib import DeflateBlock MAX_NB_FOLDER = 30 @@ -26,38 +32,54 @@ COMPRESSION_NAME = { class Folder(FieldSet): def createFields(self): - yield UInt32(self, "off_data", "Offset of data") - yield UInt16(self, "cf_data") + yield UInt32(self, "offset", "Offset to data (from file start)") + yield UInt16(self, "data_blocks", "Number of data blocks which are in this cabinet") yield Enum(Bits(self, "compr_method", 4, "Compression method"), COMPRESSION_NAME) - yield Bits(self, "compr_level", 5, "Compression level") - yield PaddingBits(self, "padding", 7) + if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level + yield PaddingBits(self, "padding[]", 4) + yield Bits(self, "compr_level", 5, "Compression level") + yield PaddingBits(self, "padding[]", 3) + else: + yield PaddingBits(self, "padding[]", 12) + if self["../flags/has_reserved"].value and self["../reserved_folder_size"].value: + yield RawBytes(self, "reserved_folder", self["../reserved_folder_size"].value, "Per-folder reserved area") def createDescription(self): text= "Folder: compression %s" % self["compr_method"].display - if self["compr_method"].value != COMPRESSION_NONE: - text += " (level %u)" % self["compr_level"].value + if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level + text += " (level %u: window size %u)" % (self["compr_level"].value, 2**self["compr_level"].value) return text +class CabFileAttributes(FieldSet): + def createFields(self): + yield Bit(self, "readonly") + yield Bit(self, "hidden") + yield Bit(self, "system") + yield Bits(self, "reserved[]", 2) + yield Bit(self, "archive", "Has the file been modified since the last backup?") + yield Bit(self, "exec", "Run file after extraction?") + yield Bit(self, "name_is_utf", "Is the filename using UTF-8?") + yield Bits(self, "reserved[]", 8) + class File(FieldSet): def createFields(self): yield filesizeHandler(UInt32(self, "filesize", "Uncompressed file size")) - yield UInt32(self, "offset", "File offset after decompression") - yield UInt16(self, "iFolder", "file control id") + yield UInt32(self, "folder_offset", "File offset in uncompressed folder") + yield Enum(UInt16(self, "folder_index", "Containing folder ID (index)"), { + 0xFFFD:"Folder continued from previous cabinet (real folder ID = 0)", + 0xFFFE:"Folder continued to next cabinet (real folder ID = %i)" % (self["../nb_folder"].value - 1), + 0xFFFF:"Folder spanning previous, current and next cabinets (real folder ID = 0)"}) yield DateTimeMSDOS32(self, "timestamp") - yield MSDOSFileAttr16(self, "attributes") - yield CString(self, "filename", charset="ASCII") + yield CabFileAttributes(self, "attributes") + if self["attributes/name_is_utf"].value: + yield CString(self, "filename", charset="UTF-8") + else: + yield CString(self, "filename", charset="ASCII") def createDescription(self): return "File %s (%s)" % ( self["filename"].display, self["filesize"].display) -class Reserved(FieldSet): - def createFields(self): - yield UInt32(self, "size") - size = self["size"].value - if size: - yield RawBytes(self, "data", size) - class Flags(FieldSet): static_size = 16 def createFields(self): @@ -66,6 +88,111 @@ class Flags(FieldSet): yield Bit(self, "has_reserved") yield NullBits(self, "padding", 13) +class FragmentGroup: + def __init__(self, parser): + self.items = [] + self.parser = parser + self.args = {} + + def add(self, item): + self.items.append(item) + + def createInputStream(self): + # FIXME: Use lazy stream creation + data = [] + for item in self.items: + data.append( item["rawdata"].value ) + data = "".join(data) + + # FIXME: Use smarter code to send arguments + self.args["compr_level"] = self.items[0].parent.parent.folder["compr_level"].value + tags = {"class": self.parser, "args": self.args} + tags = tags.iteritems() + return StringInputStream(data, "", tags=tags) + +class CustomFragment(FieldSet): + def __init__(self, parent, name, size, parser, description=None, group=None): + FieldSet.__init__(self, parent, name, description, size=size) + if not group: + group = FragmentGroup(parser) + self.field_size = size + self.group = group + self.group.add(self) + + def createFields(self): + yield RawBytes(self, "rawdata", self.field_size//8) + + def _createInputStream(self, **args): + return self.group.createInputStream() + +class DataBlock(FieldSet): + def __init__(self, *args, **kwargs): + FieldSet.__init__(self, *args, **kwargs) + size = (self["size"].value + 8) * 8 # +8 for header values + if self["/flags/has_reserved"].value: + size += self["/reserved_data_size"].value * 8 + self._size = size + + def createFields(self): + yield textHandler(UInt32(self, "crc32"), hexadecimal) + yield UInt16(self, "size") + yield UInt16(self, "uncompressed_size", "If this is 0, this block is continued in a subsequent cabinet") + if self["/flags/has_reserved"].value and self["/reserved_data_size"].value: + yield RawBytes(self, "reserved_data", self["/reserved_data_size"].value, "Per-datablock reserved area") + compr_method = self.parent.folder["compr_method"].value + if compr_method == 0: # Uncompressed + yield RawBytes(self, "data", self["size"].value, "Folder Data") + self.parent.uncompressed_data += self["data"].value + elif compr_method == 1: # MSZIP + yield String(self, "mszip_signature", 2, "MSZIP Signature (CK)") + yield DeflateBlock(self, "deflate_block", self.parent.uncompressed_data) + padding = paddingSize(self.current_size, 8) + if padding: + yield PaddingBits(self, "padding[]", padding) + self.parent.uncompressed_data = self["deflate_block"].uncomp_data + elif compr_method == 2: # Quantum + yield RawBytes(self, "compr_data", self["size"].value, "Compressed Folder Data") + elif compr_method == 3: # LZX + group = getattr(self.parent.folder, "lzx_group", None) + field = CustomFragment(self, "data", self["size"].value*8, LZXStream, "LZX data fragment", group) + self.parent.folder.lzx_group = field.group + yield field + +class FolderParser(Parser): + endian = LITTLE_ENDIAN + def createFields(self): + for file in sorted(self.files, key=lambda x:x["folder_offset"].value): + padding = self.seekByte(file["folder_offset"].value) + if padding: + yield padding + yield RawBytes(self, "file[]", file["filesize"].value, file.description) + +class FolderData(FieldSet): + def __init__(self, parent, name, folder, files, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + def createInputStream(cis, source=None, **args): + stream = cis(source=source) + tags = args.setdefault("tags",[]) + tags.extend(stream.tags) + tags.append(( "class", FolderParser )) + tags.append(( "args", {'files': files} )) + for unused in self: + pass + if folder["compr_method"].value == 3: # LZX + self.uncompressed_data = lzx_decompress(self["block[0]/data"].getSubIStream(), folder["compr_level"].value) + return StringInputStream(self.uncompressed_data, source=source, **args) + self.setSubIStream(createInputStream) + self.files = files + self.folder = folder # Folder fieldset + + def createFields(self): + self.uncompressed_data = "" + for index in xrange(self.folder["data_blocks"].value): + block = DataBlock(self, "block[]") + for i in block: + pass + yield block + class CabFile(Parser): endian = LITTLE_ENDIAN MAGIC = "MSCF" @@ -82,8 +209,8 @@ class CabFile(Parser): def validate(self): if self.stream.readBytes(0, 4) != self.MAGIC: return "Invalid magic" - if self["cab_version"].value != 0x0103: - return "Unknown version (%s)" % self["cab_version"].display + if self["major_version"].value != 1 or self["minor_version"].value != 3: + return "Unknown version (%i.%i)" % (self["major_version"].value, self["minor_version"].value) if not (1 <= self["nb_folder"].value <= MAX_NB_FOLDER): return "Invalid number of folder (%s)" % self["nb_folder"].value return True @@ -95,26 +222,54 @@ class CabFile(Parser): yield textHandler(UInt32(self, "fld_checksum", "Folders checksum (0 if not used)"), hexadecimal) yield UInt32(self, "off_file", "Offset of first file") yield textHandler(UInt32(self, "files_checksum", "Files checksum (0 if not used)"), hexadecimal) - yield textHandler(UInt16(self, "cab_version", "Cabinet version"), hexadecimal) + yield UInt8(self, "minor_version", "Minor version (should be 3)") + yield UInt8(self, "major_version", "Major version (should be 1)") yield UInt16(self, "nb_folder", "Number of folders") yield UInt16(self, "nb_files", "Number of files") yield Flags(self, "flags") yield UInt16(self, "setid") - yield UInt16(self, "number", "Zero-based cabinet number") + yield UInt16(self, "cabinet_serial", "Zero-based cabinet number") - # --- TODO: Support flags if self["flags/has_reserved"].value: - yield Reserved(self, "reserved") - #(3) Previous cabinet name, if CAB_HEADER.flags & CAB_FLAG_HASPREV - #(4) Previous disk name, if CAB_HEADER.flags & CAB_FLAG_HASPREV - #(5) Next cabinet name, if CAB_HEADER.flags & CAB_FLAG_HASNEXT - #(6) Next disk name, if CAB_HEADER.flags & CAB_FLAG_HASNEXT - # ---- + yield UInt16(self, "reserved_header_size", "Size of per-cabinet reserved area") + yield UInt8(self, "reserved_folder_size", "Size of per-folder reserved area") + yield UInt8(self, "reserved_data_size", "Size of per-datablock reserved area") + if self["reserved_header_size"].value: + yield RawBytes(self, "reserved_header", self["reserved_header_size"].value, "Per-cabinet reserved area") + if self["flags/has_previous"].value: + yield CString(self, "previous_cabinet", "File name of previous cabinet", charset="ASCII") + yield CString(self, "previous_disk", "Description of disk/media on which previous cabinet resides", charset="ASCII") + if self["flags/has_next"].value: + yield CString(self, "next_cabinet", "File name of next cabinet", charset="ASCII") + yield CString(self, "next_disk", "Description of disk/media on which next cabinet resides", charset="ASCII") + folders = [] + files = [] for index in xrange(self["nb_folder"].value): - yield Folder(self, "folder[]") + folder = Folder(self, "folder[]") + yield folder + folders.append(folder) for index in xrange(self["nb_files"].value): - yield File(self, "file[]") + file = File(self, "file[]") + yield file + files.append(file) + + folders = sorted(enumerate(folders), key=lambda x:x[1]["offset"].value) + + for i in xrange(len(folders)): + index, folder = folders[i] + padding = self.seekByte(folder["offset"].value) + if padding: + yield padding + files = [] + for file in files: + if file["folder_index"].value == index: + files.append(file) + if i+1 == len(folders): + size = (self.size // 8) - folder["offset"].value + else: + size = (folders[i+1][1]["offset"].value) - folder["offset"].value + yield FolderData(self, "folder_data[%i]" % index, folder, files, size=size*8) end = self.seekBit(self.size, "endraw") if end: diff --git a/lib/hachoir_parser/archive/gzip_parser.py b/lib/hachoir_parser/archive/gzip_parser.py index 5f63cbc1..c0820332 100644 --- a/lib/hachoir_parser/archive/gzip_parser.py +++ b/lib/hachoir_parser/archive/gzip_parser.py @@ -4,14 +4,14 @@ GZIP archive parser. Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import ( +from hachoir_parser import Parser +from hachoir_core.field import ( UInt8, UInt16, UInt32, Enum, TimestampUnix32, Bit, CString, SubFile, NullBits, Bytes, RawBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.deflate import Deflate +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.deflate import Deflate class GzipParser(Parser): endian = LITTLE_ENDIAN diff --git a/lib/hachoir_parser/archive/lzx.py b/lib/hachoir_parser/archive/lzx.py new file mode 100644 index 00000000..39f5a6ef --- /dev/null +++ b/lib/hachoir_parser/archive/lzx.py @@ -0,0 +1,267 @@ +"""LZX data stream parser. + +Also includes a decompression function (slow!!) which can decompress +LZX data stored in a Hachoir stream. + +Author: Robert Xiao +Creation date: July 18, 2007 +""" +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, + UInt32, Bit, Bits, PaddingBits, + RawBytes, ParserError) +from hachoir_core.endian import MIDDLE_ENDIAN, LITTLE_ENDIAN +from hachoir_core.tools import paddingSize, alignValue +from hachoir_parser.archive.zlib import build_tree, HuffmanCode, extend_data +from hachoir_core.bits import str2long +import new # for instancemethod + +class LZXPreTreeEncodedTree(FieldSet): + def __init__(self, parent, name, num_elements, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.num_elements = num_elements + + def createFields(self): + for i in xrange(20): + yield Bits(self, "pretree_lengths[]", 4) + pre_tree = build_tree([self['pretree_lengths[%d]'%x].value for x in xrange(20)]) + if not hasattr(self.root, "lzx_tree_lengths_"+self.name): + self.lengths = [0] * self.num_elements + setattr(self.root, "lzx_tree_lengths_"+self.name, self.lengths) + else: + self.lengths = getattr(self.root, "lzx_tree_lengths_"+self.name) + i = 0 + while i < self.num_elements: + field = HuffmanCode(self, "tree_code[]", pre_tree) + if field.realvalue <= 16: + self.lengths[i] = (self.lengths[i] - field.realvalue) % 17 + field._description = "Literal tree delta length %i (new length value %i for element %i)" % ( + field.realvalue, self.lengths[i], i) + i += 1 + yield field + elif field.realvalue == 17: + field._description = "Tree Code 17: Zeros for 4-19 elements" + yield field + extra = Bits(self, "extra[]", 4) + zeros = 4 + extra.value + extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1) + yield extra + self.lengths[i:i+zeros] = [0] * zeros + i += zeros + elif field.realvalue == 18: + field._description = "Tree Code 18: Zeros for 20-51 elements" + yield field + extra = Bits(self, "extra[]", 5) + zeros = 20 + extra.value + extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1) + yield extra + self.lengths[i:i+zeros] = [0] * zeros + i += zeros + elif field.realvalue == 19: + field._description = "Tree Code 19: Same code for 4-5 elements" + yield field + extra = Bits(self, "extra[]", 1) + run = 4 + extra.value + extra._description = "Extra bits: run for %i elements (elements %i through %i)" % (run, i, i+run-1) + yield extra + newfield = HuffmanCode(self, "tree_code[]", pre_tree) + assert newfield.realvalue <= 16 + newfield._description = "Literal tree delta length %i (new length value %i for elements %i through %i)" % ( + newfield.realvalue, self.lengths[i], i, i+run-1) + self.lengths[i:i+run] = [(self.lengths[i] - newfield.realvalue) % 17] * run + i += run + yield newfield + +class LZXBlock(FieldSet): + WINDOW_SIZE = {15:30, + 16:32, + 17:34, + 18:36, + 19:38, + 20:42, + 21:50} + POSITION_SLOTS = {0:(0,0,0), + 1:(1,1,0), + 2:(2,2,0), + 3:(3,3,0), + 4:(4,5,1), + 5:(6,7,1), + 6:(8,11,2), + 7:(12,15,2), + 8:(16,23,3), + 9:(24,31,3), + 10:(32,47,4), + 11:(48,63,4), + 12:(64,95,5), + 13:(96,127,5), + 14:(128,191,6), + 15:(192,255,6), + 16:(256,383,7), + 17:(384,511,7), + 18:(512,767,8), + 19:(768,1023,8), + 20:(1024,1535,9), + 21:(1536,2047,9), + 22:(2048,3071,10), + 23:(3072,4095,10), + 24:(4096,6143,11), + 25:(6144,8191,11), + 26:(8192,12287,12), + 27:(12288,16383,12), + 28:(16384,24575,13), + 29:(24576,32767,13), + 30:(32768,49151,14), + 31:(49152,65535,14), + 32:(65536,98303,15), + 33:(98304,131071,15), + 34:(131072,196607,16), + 35:(196608,262143,16), + 36:(262144,393215,17), + 37:(393216,524287,17), + 38:(524288,655359,17), + 39:(655360,786431,17), + 40:(786432,917503,17), + 41:(917504,1048575,17), + 42:(1048576,1179647,17), + 43:(1179648,1310719,17), + 44:(1310720,1441791,17), + 45:(1441792,1572863,17), + 46:(1572864,1703935,17), + 47:(1703936,1835007,17), + 48:(1835008,1966079,17), + 49:(1966080,2097151,17), + } + def createFields(self): + yield Bits(self, "block_type", 3) + yield Bits(self, "block_size", 24) + self.uncompressed_size = self["block_size"].value + self.compression_level = self.root.compr_level + self.window_size = self.WINDOW_SIZE[self.compression_level] + self.block_type = self["block_type"].value + curlen = len(self.parent.uncompressed_data) + if self.block_type in (1, 2): # Verbatim or aligned offset block + if self.block_type == 2: + for i in xrange(8): + yield Bits(self, "aligned_len[]", 3) + aligned_tree = build_tree([self['aligned_len[%d]'%i].value for i in xrange(8)]) + yield LZXPreTreeEncodedTree(self, "main_tree_start", 256) + yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8) + main_tree = build_tree(self["main_tree_start"].lengths + self["main_tree_rest"].lengths) + yield LZXPreTreeEncodedTree(self, "length_tree", 249) + length_tree = build_tree(self["length_tree"].lengths) + current_decoded_size = 0 + while current_decoded_size < self.uncompressed_size: + if (curlen+current_decoded_size) % 32768 == 0 and (curlen+current_decoded_size) != 0: + padding = paddingSize(self.address + self.current_size, 16) + if padding: + yield PaddingBits(self, "padding[]", padding) + field = HuffmanCode(self, "main_code[]", main_tree) + if field.realvalue < 256: + field._description = "Literal value %r" % chr(field.realvalue) + current_decoded_size += 1 + self.parent.uncompressed_data += chr(field.realvalue) + yield field + continue + position_header, length_header = divmod(field.realvalue - 256, 8) + info = self.POSITION_SLOTS[position_header] + if info[2] == 0: + if info[0] == 0: + position = self.parent.r0 + field._description = "Position Slot %i, Position [R0] (%i)" % (position_header, position) + elif info[0] == 1: + position = self.parent.r1 + self.parent.r1 = self.parent.r0 + self.parent.r0 = position + field._description = "Position Slot %i, Position [R1] (%i)" % (position_header, position) + elif info[0] == 2: + position = self.parent.r2 + self.parent.r2 = self.parent.r0 + self.parent.r0 = position + field._description = "Position Slot %i, Position [R2] (%i)" % (position_header, position) + else: + position = info[0] - 2 + self.parent.r2 = self.parent.r1 + self.parent.r1 = self.parent.r0 + self.parent.r0 = position + field._description = "Position Slot %i, Position %i" % (position_header, position) + else: + field._description = "Position Slot %i, Positions %i to %i" % (position_header, info[0] - 2, info[1] - 2) + if length_header == 7: + field._description += ", Length Values 9 and up" + yield field + length_field = HuffmanCode(self, "length_code[]", length_tree) + length = length_field.realvalue + 9 + length_field._description = "Length Code %i, total length %i" % (length_field.realvalue, length) + yield length_field + else: + field._description += ", Length Value %i (Huffman Code %i)"%(length_header + 2, field.value) + yield field + length = length_header + 2 + if info[2]: + if self.block_type == 1 or info[2] < 3: # verbatim + extrafield = Bits(self, "position_extra[%s" % field.name.split('[')[1], info[2]) + position = extrafield.value + info[0] - 2 + extrafield._description = "Position Extra Bits (%i), total position %i"%(extrafield.value, position) + yield extrafield + else: # aligned offset + position = info[0] - 2 + if info[2] > 3: + extrafield = Bits(self, "position_verbatim[%s" % field.name.split('[')[1], info[2]-3) + position += extrafield.value*8 + extrafield._description = "Position Verbatim Bits (%i), added position %i"%(extrafield.value, extrafield.value*8) + yield extrafield + if info[2] >= 3: + extrafield = HuffmanCode(self, "position_aligned[%s" % field.name.split('[')[1], aligned_tree) + position += extrafield.realvalue + extrafield._description = "Position Aligned Bits (%i), total position %i"%(extrafield.realvalue, position) + yield extrafield + self.parent.r2 = self.parent.r1 + self.parent.r1 = self.parent.r0 + self.parent.r0 = position + self.parent.uncompressed_data = extend_data(self.parent.uncompressed_data, length, position) + current_decoded_size += length + elif self.block_type == 3: # Uncompressed block + padding = paddingSize(self.address + self.current_size, 16) + if padding: + yield PaddingBits(self, "padding[]", padding) + else: + yield PaddingBits(self, "padding[]", 16) + self.endian = LITTLE_ENDIAN + yield UInt32(self, "r[]", "New value of R0") + yield UInt32(self, "r[]", "New value of R1") + yield UInt32(self, "r[]", "New value of R2") + self.parent.r0 = self["r[0]"].value + self.parent.r1 = self["r[1]"].value + self.parent.r2 = self["r[2]"].value + yield RawBytes(self, "data", self.uncompressed_size) + self.parent.uncompressed_data+=self["data"].value + if self["block_size"].value % 2: + yield PaddingBits(self, "padding", 8) + else: + raise ParserError("Unknown block type %d!"%self.block_type) + +class LZXStream(Parser): + endian = MIDDLE_ENDIAN + def createFields(self): + self.uncompressed_data = "" + self.r0 = 1 + self.r1 = 1 + self.r2 = 1 + yield Bit(self, "filesize_indicator") + if self["filesize_indicator"].value: + yield UInt32(self, "filesize") + while self.current_size < self.size: + block = LZXBlock(self, "block[]") + yield block + if self.size - self.current_size < 16: + padding = paddingSize(self.address + self.current_size, 16) + if padding: + yield PaddingBits(self, "padding[]", padding) + break + +def lzx_decompress(stream, window_bits): + data = LZXStream(stream) + data.compr_level = window_bits + for unused in data: + pass + return data.uncompressed_data diff --git a/lib/hachoir_parser/archive/mar.py b/lib/hachoir_parser/archive/mar.py index 05be1cbd..6a7e31a7 100644 --- a/lib/hachoir_parser/archive/mar.py +++ b/lib/hachoir_parser/archive/mar.py @@ -7,10 +7,10 @@ Creation date: 2007-03-04 MAX_NB_FILE = 100000 -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import FieldSet, String, UInt32, SubFile -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, String, UInt32, SubFile +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal class FileIndex(FieldSet): static_size = 68*8 diff --git a/lib/hachoir_parser/archive/mozilla_ar.py b/lib/hachoir_parser/archive/mozilla_ar.py new file mode 100644 index 00000000..5b18f434 --- /dev/null +++ b/lib/hachoir_parser/archive/mozilla_ar.py @@ -0,0 +1,60 @@ +"""MAR (Mozilla ARchive) parser + +Author: Robert Xiao +Creation date: July 10, 2007 + +""" + +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, + String, CString, UInt32, RawBytes) +from hachoir_core.text_handler import displayHandler, filesizeHandler +from hachoir_core.tools import humanUnixAttributes +from hachoir_parser import HachoirParser + +class IndexEntry(FieldSet): + def createFields(self): + yield UInt32(self, "offset", "Offset in bytes relative to start of archive") + yield filesizeHandler(UInt32(self, "length", "Length in bytes")) + yield displayHandler(UInt32(self, "flags"), humanUnixAttributes) + yield CString(self, "name", "Filename (byte array)") + + def createDescription(self): + return 'File %s, Size %s, Mode %s'%( + self["name"].display, self["length"].display, self["flags"].display) + +class MozillaArchive(HachoirParser, RootSeekableFieldSet): + MAGIC = "MAR1" + PARSER_TAGS = { + "id": "mozilla_ar", + "category": "archive", + "file_ext": ("mar",), + "min_size": (8+4+13)*8, # Header, Index Header, 1 Index Entry + "magic": ((MAGIC, 0),), + "description": "Mozilla Archive", + } + endian = BIG_ENDIAN + + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, 4) != self.MAGIC: + return "Invalid magic" + return True + + def createFields(self): + yield String(self, "magic", 4, "File signature (MAR1)", charset="ASCII") + yield UInt32(self, "index_offset", "Offset to index relative to file start") + self.seekByte(self["index_offset"].value, False) + yield UInt32(self, "index_size", "size of index in bytes") + current_index_size = 0 # bytes + while current_index_size < self["index_size"].value: + # plus 4 compensates for index_size + self.seekByte(self["index_offset"].value + current_index_size + 4, False) + entry = IndexEntry(self, "index_entry[]") + yield entry + current_index_size += entry.size // 8 + self.seekByte(entry["offset"].value, False) + yield RawBytes(self, "file[]", entry["length"].value) diff --git a/lib/hachoir_parser/archive/rar.py b/lib/hachoir_parser/archive/rar.py index 166ae041..2be5887c 100644 --- a/lib/hachoir_parser/archive/rar.py +++ b/lib/hachoir_parser/archive/rar.py @@ -5,15 +5,15 @@ Status: can only read higher-level attructures Author: Christophe Gisquet """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, Bit, Bits, Enum, UInt8, UInt16, UInt32, UInt64, String, TimeDateMSDOS32, NullBytes, NullBits, RawBytes) -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.msdos import MSDOSFileAttr32 +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.msdos import MSDOSFileAttr32 MAX_FILESIZE = 1000 * 1024 * 1024 diff --git a/lib/hachoir_parser/archive/rpm.py b/lib/hachoir_parser/archive/rpm.py index 60235755..ccb8d2e5 100644 --- a/lib/hachoir_parser/archive/rpm.py +++ b/lib/hachoir_parser/archive/rpm.py @@ -4,14 +4,14 @@ RPM archive parser. Author: Victor Stinner, 1st December 2005. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, UInt32, UInt64, Enum, NullBytes, Bytes, RawBytes, SubFile, Character, CString, String) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_parser.archive.gzip_parser import GzipParser -from lib.hachoir_parser.archive.bzip2_parser import Bzip2Parser +from hachoir_core.endian import BIG_ENDIAN +from hachoir_parser.archive.gzip_parser import GzipParser +from hachoir_parser.archive.bzip2_parser import Bzip2Parser class ItemContent(FieldSet): format_type = { diff --git a/lib/hachoir_parser/archive/sevenzip.py b/lib/hachoir_parser/archive/sevenzip.py index 91f9716c..7a0148f5 100644 --- a/lib/hachoir_parser/archive/sevenzip.py +++ b/lib/hachoir_parser/archive/sevenzip.py @@ -9,13 +9,13 @@ Author: Olivier SCHWAB Creation date: 6 december 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (Field, FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (Field, FieldSet, ParserError, GenericVector, Enum, UInt8, UInt32, UInt64, Bytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler class SZUInt64(Field): """ diff --git a/lib/hachoir_parser/archive/tar.py b/lib/hachoir_parser/archive/tar.py index bd2dc1ad..08a9040b 100644 --- a/lib/hachoir_parser/archive/tar.py +++ b/lib/hachoir_parser/archive/tar.py @@ -4,11 +4,11 @@ Tar archive parser. Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, UInt8, SubFile, String, NullBytes) -from lib.hachoir_core.tools import humanFilesize, paddingSize, timestampUNIX -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import humanFilesize, paddingSize, timestampUNIX +from hachoir_core.endian import BIG_ENDIAN import re class FileEntry(FieldSet): diff --git a/lib/hachoir_parser/archive/zip.py b/lib/hachoir_parser/archive/zip.py index 98a41293..8271ac93 100644 --- a/lib/hachoir_parser/archive/zip.py +++ b/lib/hachoir_parser/archive/zip.py @@ -5,18 +5,18 @@ Status: can read most important headers Authors: Christophe Gisquet and Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, Bit, Bits, Enum, TimeDateMSDOS32, SubFile, UInt8, UInt16, UInt32, UInt64, String, PascalString16, RawBytes) -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_core.tools import makeUnicode -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.deflate import Deflate +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.tools import makeUnicode +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.deflate import Deflate MAX_FILESIZE = 1000 * 1024 * 1024 @@ -80,16 +80,7 @@ class ZipGeneralFlags(FieldSet): # Need the compression info from the parent, and that is the byte following method = self.stream.readBits(self.absolute_address+16, 16, LITTLE_ENDIAN) - yield Bits(self, "unused[]", 2, "Unused") - yield Bit(self, "encrypted_central_dir", "Selected data values in the Local Header are masked") - yield Bit(self, "incomplete", "Reserved by PKWARE for enhanced compression.") - yield Bit(self, "uses_unicode", "Filename and comments are in UTF-8") - yield Bits(self, "unused[]", 4, "Unused") - yield Bit(self, "strong_encrypt", "Strong encryption (version >= 50)") - yield Bit(self, "is_patched", "File is compressed with patched data?") - yield Bit(self, "enhanced_deflate", "Reserved for use with method 8") - yield Bit(self, "has_descriptor", - "Compressed data followed by descriptor?") + yield Bit(self, "is_encrypted", "File is encrypted?") if method == 6: yield Bit(self, "use_8k_sliding", "Use 8K sliding dictionary (instead of 4K)") yield Bit(self, "use_3shannon", "Use a 3 Shannon-Fano tree (instead of 2 Shannon-Fano)") @@ -106,7 +97,16 @@ class ZipGeneralFlags(FieldSet): yield Bit(self, "unused[]") else: yield Bits(self, "compression_info", 2) - yield Bit(self, "is_encrypted", "File is encrypted?") + yield Bit(self, "has_descriptor", + "Compressed data followed by descriptor?") + yield Bit(self, "enhanced_deflate", "Reserved for use with method 8") + yield Bit(self, "is_patched", "File is compressed with patched data?") + yield Bit(self, "strong_encrypt", "Strong encryption (version >= 50)") + yield Bits(self, "unused[]", 4, "Unused") + yield Bit(self, "uses_unicode", "Filename and comments are in UTF-8") + yield Bit(self, "incomplete", "Reserved by PKWARE for enhanced compression.") + yield Bit(self, "encrypted_central_dir", "Selected data values in the Local Header are masked") + yield Bits(self, "unused[]", 2, "Unused") class ExtraField(FieldSet): EXTRA_FIELD_ID = { @@ -141,7 +141,12 @@ class ExtraField(FieldSet): size = UInt16(self, "field_data_size", "Extra field data size") yield size if size.value > 0: - yield RawBytes(self, "field_data", size, "Unknown field data") + yield RawBytes(self, "field_data", size.value, "Unknown field data") + +class ExtraFields(FieldSet): + def createFields(self): + while self.current_size < self.size: + yield ExtraField(self, "extra[]") def ZipStartCommonFields(self): yield ZipVersion(self, "version_needed", "Version needed") @@ -179,8 +184,8 @@ class ZipCentralDirectory(FieldSet): yield String(self, "filename", self["filename_length"].value, "Filename", charset=charset) if 0 < self["extra_length"].value: - yield RawBytes(self, "extra", self["extra_length"].value, - "Extra fields") + yield ExtraFields(self, "extra", size=self["extra_length"].value*8, + description="Extra fields") if 0 < self["comment_length"].value: yield String(self, "comment", self["comment_length"].value, "Comment", charset=charset) @@ -278,14 +283,15 @@ class FileEntry(FieldSet): yield filename self.filename = filename.value if self["extra_length"].value: - yield RawBytes(self, "extra", self["extra_length"].value, "Extra") + yield ExtraFields(self, "extra", size=self["extra_length"].value*8, + description="Extra fields") size = self["compressed_size"].value if size > 0: yield self.data(size) elif self["flags/incomplete"].value: for field in self.resync(): yield field - if self["flags/has_descriptor"].value: + if self["flags/has_descriptor"].value and self['crc32'].value == 0: yield ZipDataDescriptor(self, "data_desc", "Data descriptor") def createDescription(self): diff --git a/lib/hachoir_parser/archive/zlib.py b/lib/hachoir_parser/archive/zlib.py new file mode 100644 index 00000000..bde94b1d --- /dev/null +++ b/lib/hachoir_parser/archive/zlib.py @@ -0,0 +1,301 @@ +"""Detailed ZLIB parser + +Author: Robert Xiao +Creation date: July 9 2007 + +""" + +from hachoir_parser import Parser +from hachoir_core.field import (Bit, Bits, Field, Int16, UInt32, + Enum, FieldSet, GenericFieldSet, + PaddingBits, ParserError, RawBytes) +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import paddingSize, alignValue + +def extend_data(data, length, offset): + """Extend data using a length and an offset.""" + if length >= offset: + new_data = data[-offset:] * (alignValue(length, offset) // offset) + return data + new_data[:length] + else: + return data + data[-offset:-offset+length] + +def build_tree(lengths): + """Build a Huffman tree from a list of lengths. + The ith entry of the input list is the length of the Huffman code corresponding to + integer i, or 0 if the integer i is unused.""" + max_length = max(lengths) + 1 + bit_counts = [0]*max_length + next_code = [0]*max_length + tree = {} + for i in lengths: + if i: + bit_counts[i] += 1 + code = 0 + for i in xrange(1, len(bit_counts)): + next_code[i] = code = (code + bit_counts[i-1]) << 1 + for i, ln in enumerate(lengths): + if ln: + tree[(ln, next_code[ln])] = i + next_code[ln] += 1 + return tree + +class HuffmanCode(Field): + """Huffman code. Uses tree parameter as the Huffman tree.""" + def __init__(self, parent, name, tree, description=None): + Field.__init__(self, parent, name, 0, description) + + endian = self.parent.endian + stream = self.parent.stream + addr = self.absolute_address + + value = 0 + while (self.size, value) not in tree: + if self.size > 256: + raise ParserError("Huffman code too long!") + bit = stream.readBits(addr, 1, endian) + value <<= 1 + value += bit + self._size += 1 + addr += 1 + self.huffvalue = value + self.realvalue = tree[(self.size, value)] + def createValue(self): + return self.huffvalue + +class DeflateBlock(FieldSet): + # code: (min, max, extrabits) + LENGTH_SYMBOLS = {257:(3,3,0), + 258:(4,4,0), + 259:(5,5,0), + 260:(6,6,0), + 261:(7,7,0), + 262:(8,8,0), + 263:(9,9,0), + 264:(10,10,0), + 265:(11,12,1), + 266:(13,14,1), + 267:(15,16,1), + 268:(17,18,1), + 269:(19,22,2), + 270:(23,26,2), + 271:(27,30,2), + 272:(31,34,2), + 273:(35,42,3), + 274:(43,50,3), + 275:(51,58,3), + 276:(59,66,3), + 277:(67,82,4), + 278:(83,98,4), + 279:(99,114,4), + 280:(115,130,4), + 281:(131,162,5), + 282:(163,194,5), + 283:(195,226,5), + 284:(227,257,5), + 285:(258,258,0) + } + DISTANCE_SYMBOLS = {0:(1,1,0), + 1:(2,2,0), + 2:(3,3,0), + 3:(4,4,0), + 4:(5,6,1), + 5:(7,8,1), + 6:(9,12,2), + 7:(13,16,2), + 8:(17,24,3), + 9:(25,32,3), + 10:(33,48,4), + 11:(49,64,4), + 12:(65,96,5), + 13:(97,128,5), + 14:(129,192,6), + 15:(193,256,6), + 16:(257,384,7), + 17:(385,512,7), + 18:(513,768,8), + 19:(769,1024,8), + 20:(1025,1536,9), + 21:(1537,2048,9), + 22:(2049,3072,10), + 23:(3073,4096,10), + 24:(4097,6144,11), + 25:(6145,8192,11), + 26:(8193,12288,12), + 27:(12289,16384,12), + 28:(16385,24576,13), + 29:(24577,32768,13), + } + CODE_LENGTH_ORDER = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] + def __init__(self, parent, name, uncomp_data="", *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.uncomp_data = uncomp_data + + def createFields(self): + yield Bit(self, "final", "Is this the final block?") # BFINAL + yield Enum(Bits(self, "compression_type", 2), # BTYPE + {0:"None", 1:"Fixed Huffman", 2:"Dynamic Huffman", 3:"Reserved"}) + if self["compression_type"].value == 0: # no compression + padding = paddingSize(self.current_size + self.absolute_address, 8) # align on byte boundary + if padding: + yield PaddingBits(self, "padding[]", padding) + yield Int16(self, "len") + yield Int16(self, "nlen", "One's complement of len") + if self["len"].value != ~self["nlen"].value: + raise ParserError("len must be equal to the one's complement of nlen!") + if self["len"].value: # null stored blocks produced by some encoders (e.g. PIL) + yield RawBytes(self, "data", self["len"].value, "Uncompressed data") + return + elif self["compression_type"].value == 1: # Fixed Huffman + length_tree = {} # (size, huffman code): value + distance_tree = {} + for i in xrange(144): + length_tree[(8, i+48)] = i + for i in xrange(144, 256): + length_tree[(9, i+256)] = i + for i in xrange(256, 280): + length_tree[(7, i-256)] = i + for i in xrange(280, 288): + length_tree[(8, i-88)] = i + for i in xrange(32): + distance_tree[(5, i)] = i + elif self["compression_type"].value == 2: # Dynamic Huffman + yield Bits(self, "huff_num_length_codes", 5, "Number of Literal/Length Codes, minus 257") + yield Bits(self, "huff_num_distance_codes", 5, "Number of Distance Codes, minus 1") + yield Bits(self, "huff_num_code_length_codes", 4, "Number of Code Length Codes, minus 4") + code_length_code_lengths = [0]*19 # confusing variable name... + for i in self.CODE_LENGTH_ORDER[:self["huff_num_code_length_codes"].value+4]: + field = Bits(self, "huff_code_length_code[%i]" % i, 3, "Code lengths for the code length alphabet") + yield field + code_length_code_lengths[i] = field.value + code_length_tree = build_tree(code_length_code_lengths) + length_code_lengths = [] + distance_code_lengths = [] + for numcodes, name, lengths in ( + (self["huff_num_length_codes"].value + 257, "length", length_code_lengths), + (self["huff_num_distance_codes"].value + 1, "distance", distance_code_lengths)): + while len(lengths) < numcodes: + field = HuffmanCode(self, "huff_%s_code[]" % name, code_length_tree) + value = field.realvalue + if value < 16: + prev_value = value + field._description = "Literal Code Length %i (Huffman Code %i)" % (value, field.value) + yield field + lengths.append(value) + else: + info = {16: (3,6,2), + 17: (3,10,3), + 18: (11,138,7)}[value] + if value == 16: + repvalue = prev_value + else: + repvalue = 0 + field._description = "Repeat Code %i, Repeating value (%i) %i to %i times (Huffman Code %i)" % (value, repvalue, info[0], info[1], field.value) + yield field + extrafield = Bits(self, "huff_%s_code_extra[%s" % (name, field.name.split('[')[1]), info[2]) + num_repeats = extrafield.value+info[0] + extrafield._description = "Repeat Extra Bits (%i), total repeats %i"%(extrafield.value, num_repeats) + yield extrafield + lengths += [repvalue]*num_repeats + length_tree = build_tree(length_code_lengths) + distance_tree = build_tree(distance_code_lengths) + else: + raise ParserError("Unsupported compression type 3!") + while True: + field = HuffmanCode(self, "length_code[]", length_tree) + value = field.realvalue + if value < 256: + field._description = "Literal Code %r (Huffman Code %i)" % (chr(value), field.value) + yield field + self.uncomp_data += chr(value) + if value == 256: + field._description = "Block Terminator Code (256) (Huffman Code %i)" % field.value + yield field + break + elif value > 256: + info = self.LENGTH_SYMBOLS[value] + if info[2] == 0: + field._description = "Length Code %i, Value %i (Huffman Code %i)" % (value, info[0], field.value) + length = info[0] + yield field + else: + field._description = "Length Code %i, Values %i to %i (Huffman Code %i)" % (value, info[0], info[1], field.value) + yield field + extrafield = Bits(self, "length_extra[%s" % field.name.split('[')[1], info[2]) + length = extrafield.value + info[0] + extrafield._description = "Length Extra Bits (%i), total length %i"%(extrafield.value, length) + yield extrafield + field = HuffmanCode(self, "distance_code[]", distance_tree) + value = field.realvalue + info = self.DISTANCE_SYMBOLS[value] + if info[2] == 0: + field._description = "Distance Code %i, Value %i (Huffman Code %i)" % (value, info[0], field.value) + distance = info[0] + yield field + else: + field._description = "Distance Code %i, Values %i to %i (Huffman Code %i)" % (value, info[0], info[1], field.value) + yield field + extrafield = Bits(self, "distance_extra[%s" % field.name.split('[')[1], info[2]) + distance = extrafield.value + info[0] + extrafield._description = "Distance Extra Bits (%i), total length %i"%(extrafield.value, distance) + yield extrafield + self.uncomp_data = extend_data(self.uncomp_data, length, distance) + +class DeflateData(GenericFieldSet): + endian = LITTLE_ENDIAN + def createFields(self): + uncomp_data = "" + blk=DeflateBlock(self, "compressed_block[]", uncomp_data) + yield blk + uncomp_data = blk.uncomp_data + while not blk["final"].value: + blk=DeflateBlock(self, "compressed_block[]", uncomp_data) + yield blk + uncomp_data = blk.uncomp_data + padding = paddingSize(self.current_size + self.absolute_address, 8) # align on byte boundary + if padding: + yield PaddingBits(self, "padding[]", padding) + self.uncompressed_data = uncomp_data + +class ZlibData(Parser): + PARSER_TAGS = { + "id": "zlib", + "category": "archive", + "file_ext": ("zlib",), + "min_size": 8*8, + "description": "ZLIB Data", + } + endian = LITTLE_ENDIAN + + def validate(self): + if self["compression_method"].value != 8: + return "Incorrect compression method" + if ((self["compression_info"].value << 12) + + (self["compression_method"].value << 8) + + (self["flag_compression_level"].value << 6) + + (self["flag_dictionary_present"].value << 5) + + (self["flag_check_bits"].value)) % 31 != 0: + return "Invalid flag check value" + return True + + def createFields(self): + yield Enum(Bits(self, "compression_method", 4), {8:"deflate", 15:"reserved"}) # CM + yield Bits(self, "compression_info", 4, "base-2 log of the window size") # CINFO + yield Bits(self, "flag_check_bits", 5) # FCHECK + yield Bit(self, "flag_dictionary_present") # FDICT + yield Enum(Bits(self, "flag_compression_level", 2), # FLEVEL + {0:"Fastest", 1:"Fast", 2:"Default", 3:"Maximum, Slowest"}) + if self["flag_dictionary_present"].value: + yield textHandler(UInt32(self, "dict_checksum", "ADLER32 checksum of dictionary information"), hexadecimal) + yield DeflateData(self, "data", self.stream, description = "Compressed Data") + yield textHandler(UInt32(self, "data_checksum", "ADLER32 checksum of compressed data"), hexadecimal) + +def zlib_inflate(stream, wbits=None, prevdata=""): + if wbits is None or wbits >= 0: + return ZlibData(stream)["data"].uncompressed_data + else: + data = DeflateData(None, "root", stream, "", stream.askSize(None)) + for unused in data: + pass + return data.uncompressed_data diff --git a/lib/hachoir_parser/audio/8svx.py b/lib/hachoir_parser/audio/8svx.py deleted file mode 100644 index 16d0f703..00000000 --- a/lib/hachoir_parser/audio/8svx.py +++ /dev/null @@ -1,126 +0,0 @@ -""" -Audio Interchange File Format (AIFF) parser. - -Author: Victor Stinner -Creation: 27 december 2006 -""" - -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, - UInt16, UInt32, Float80, TimestampMac32, - RawBytes, NullBytes, - String, Enum, PascalString32) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import filesizeHandler -from lib.hachoir_core.tools import alignValue -from lib.hachoir_parser.audio.id3 import ID3v2 - -CODEC_NAME = { - 'ACE2': u"ACE 2-to-1", - 'ACE8': u"ACE 8-to-3", - 'MAC3': u"MAC 3-to-1", - 'MAC6': u"MAC 6-to-1", - 'NONE': u"None", - 'sowt': u"Little-endian, no compression", -} - -class Comment(FieldSet): - def createFields(self): - yield TimestampMac32(self, "timestamp") - yield PascalString32(self, "text") - -def parseText(self): - yield String(self, "text", self["size"].value) - -def parseID3(self): - yield ID3v2(self, "id3v2", size=self["size"].value*8) - -def parseComment(self): - yield UInt16(self, "nb_comment") - for index in xrange(self["nb_comment"].value): - yield Comment(self, "comment[]") - -def parseCommon(self): - yield UInt16(self, "nb_channel") - yield UInt32(self, "nb_sample") - yield UInt16(self, "sample_size") - yield Float80(self, "sample_rate") - yield Enum(String(self, "codec", 4, strip="\0", charset="ASCII"), CODEC_NAME) - -def parseVersion(self): - yield TimestampMac32(self, "timestamp") - -def parseSound(self): - yield UInt32(self, "offset") - yield UInt32(self, "block_size") - size = (self.size - self.current_size) // 8 - if size: - yield RawBytes(self, "data", size) - -class Chunk(FieldSet): - TAG_INFO = { - 'COMM': ('common', "Common chunk", parseCommon), - 'COMT': ('comment', "Comment", parseComment), - 'NAME': ('name', "Name", parseText), - 'AUTH': ('author', "Author", parseText), - 'FVER': ('version', "Version", parseVersion), - 'SSND': ('sound', "Sound data", parseSound), - 'ID3 ': ('id3', "ID3", parseID3), - } - - def __init__(self, *args): - FieldSet.__init__(self, *args) - self._size = (8 + alignValue(self["size"].value, 2)) * 8 - tag = self["type"].value - if tag in self.TAG_INFO: - self._name, self._description, self._parser = self.TAG_INFO[tag] - else: - self._parser = None - - def createFields(self): - yield String(self, "type", 4, "Signature (FORM)", charset="ASCII") - yield filesizeHandler(UInt32(self, "size")) - size = self["size"].value - if size: - if self._parser: - for field in self._parser(self): - yield field - if size % 2: - yield NullBytes(self, "padding", 1) - else: - yield RawBytes(self, "data", size) - -class HeightSVX(Parser): - PARSER_TAGS = { - "id": "8svx", - "category": "audio", - "file_ext": ("8svx",), - "mime": (u"audio/x-aiff",), - "min_size": 12*8, - "description": "8SVX (audio) format" - } - endian = BIG_ENDIAN - - def validate(self): - if self.stream.readBytes(0, 4) != "FORM": - return "Invalid signature" - if self.stream.readBytes(8*8, 4) != "8SVX": - return "Invalid type" - return True - - def createFields(self): - yield String(self, "signature", 4, "Signature (FORM)", charset="ASCII") - yield filesizeHandler(UInt32(self, "filesize")) - yield String(self, "type", 4, "Form type (AIFF or AIFC)", charset="ASCII") - while not self.eof: - yield Chunk(self, "chunk[]") - - def createDescription(self): - if self["type"].value == "AIFC": - return "Audio Interchange File Format Compressed (AIFC)" - else: - return "Audio Interchange File Format (AIFF)" - - def createContentSize(self): - return self["filesize"].value * 8 - diff --git a/lib/hachoir_parser/audio/__init__.py b/lib/hachoir_parser/audio/__init__.py index 0c6ac749..1cc33a23 100644 --- a/lib/hachoir_parser/audio/__init__.py +++ b/lib/hachoir_parser/audio/__init__.py @@ -1,12 +1,12 @@ -from lib.hachoir_parser.audio.aiff import AiffFile -from lib.hachoir_parser.audio.au import AuFile -from lib.hachoir_parser.audio.itunesdb import ITunesDBFile -from lib.hachoir_parser.audio.midi import MidiFile -from lib.hachoir_parser.audio.mpeg_audio import MpegAudioFile -from lib.hachoir_parser.audio.real_audio import RealAudioFile -from lib.hachoir_parser.audio.xm import XMModule -from lib.hachoir_parser.audio.s3m import S3MModule -from lib.hachoir_parser.audio.s3m import PTMModule -from lib.hachoir_parser.audio.mod import AmigaModule -from lib.hachoir_parser.audio.flac import FlacParser +from hachoir_parser.audio.aiff import AiffFile +from hachoir_parser.audio.au import AuFile +from hachoir_parser.audio.itunesdb import ITunesDBFile +from hachoir_parser.audio.midi import MidiFile +from hachoir_parser.audio.mpeg_audio import MpegAudioFile +from hachoir_parser.audio.real_audio import RealAudioFile +from hachoir_parser.audio.xm import XMModule +from hachoir_parser.audio.s3m import S3MModule +from hachoir_parser.audio.s3m import PTMModule +from hachoir_parser.audio.mod import AmigaModule +from hachoir_parser.audio.flac import FlacParser diff --git a/lib/hachoir_parser/audio/aiff.py b/lib/hachoir_parser/audio/aiff.py index 89c7e61b..d8f41696 100644 --- a/lib/hachoir_parser/audio/aiff.py +++ b/lib/hachoir_parser/audio/aiff.py @@ -5,15 +5,15 @@ Author: Victor Stinner Creation: 27 december 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt16, UInt32, Float80, TimestampMac32, RawBytes, NullBytes, String, Enum, PascalString32) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import filesizeHandler -from lib.hachoir_core.tools import alignValue -from lib.hachoir_parser.audio.id3 import ID3v2 +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import filesizeHandler +from hachoir_core.tools import alignValue +from hachoir_parser.audio.id3 import ID3v2 CODEC_NAME = { 'ACE2': u"ACE 2-to-1", diff --git a/lib/hachoir_parser/audio/au.py b/lib/hachoir_parser/audio/au.py index 59b2f8e2..ab9d9c11 100644 --- a/lib/hachoir_parser/audio/au.py +++ b/lib/hachoir_parser/audio/au.py @@ -5,11 +5,11 @@ Author: Victor Stinner Creation: 12 july 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import UInt32, Enum, String, RawBytes -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import displayHandler, filesizeHandler -from lib.hachoir_core.tools import createDict, humanFrequency +from hachoir_parser import Parser +from hachoir_core.field import UInt32, Enum, String, RawBytes +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import displayHandler, filesizeHandler +from hachoir_core.tools import createDict, humanFrequency class AuFile(Parser): PARSER_TAGS = { diff --git a/lib/hachoir_parser/audio/flac.py b/lib/hachoir_parser/audio/flac.py index a30c6b00..f739ff70 100644 --- a/lib/hachoir_parser/audio/flac.py +++ b/lib/hachoir_parser/audio/flac.py @@ -9,11 +9,11 @@ Author: Esteban Loiseau Creation date: 2008-04-09 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import FieldSet, String, Bit, Bits, UInt16, UInt24, RawBytes, Enum, NullBytes -from lib.hachoir_core.stream import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.tools import createDict -from lib.hachoir_parser.container.ogg import parseVorbisComment +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, String, Bit, Bits, UInt16, UInt24, RawBytes, Enum, NullBytes +from hachoir_core.stream import BIG_ENDIAN, LITTLE_ENDIAN +from hachoir_core.tools import createDict +from hachoir_parser.container.ogg import parseVorbisComment class VorbisComment(FieldSet): endian = LITTLE_ENDIAN diff --git a/lib/hachoir_parser/audio/id3.py b/lib/hachoir_parser/audio/id3.py index 9616baa2..3cfda25f 100644 --- a/lib/hachoir_parser/audio/id3.py +++ b/lib/hachoir_parser/audio/id3.py @@ -6,13 +6,13 @@ Informations: http://www.id3.org/ Author: Victor Stinner """ -from lib.hachoir_core.field import (FieldSet, MatchError, ParserError, +from hachoir_core.field import (FieldSet, MatchError, ParserError, Enum, UInt8, UInt24, UInt32, CString, String, RawBytes, Bit, Bits, NullBytes, NullBits) -from lib.hachoir_core.text_handler import textHandler -from lib.hachoir_core.tools import humanDuration -from lib.hachoir_core.endian import NETWORK_ENDIAN +from hachoir_core.text_handler import textHandler +from hachoir_core.tools import humanDuration +from hachoir_core.endian import NETWORK_ENDIAN class ID3v1(FieldSet): static_size = 128 * 8 diff --git a/lib/hachoir_parser/audio/itunesdb.py b/lib/hachoir_parser/audio/itunesdb.py index 76b4f936..a70d9cb0 100644 --- a/lib/hachoir_parser/audio/itunesdb.py +++ b/lib/hachoir_parser/audio/itunesdb.py @@ -8,13 +8,13 @@ Author: Romain HERAULT Creation date: 19 august 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, - UInt8, UInt16, UInt32, UInt64, TimestampMac32, - String, Float32, NullBytes, Enum) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import humanDuration -from lib.hachoir_core.text_handler import displayHandler, filesizeHandler +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, + UInt8, UInt16, UInt32, Int32, UInt64, TimestampMac32, + String, Float32, NullBytes, Enum, RawBytes) +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import humanDuration +from hachoir_core.text_handler import displayHandler, filesizeHandler list_order={ 1 : "playlist order (manual sort order)", @@ -75,6 +75,9 @@ class DataObject(FieldSet): 51:"Smart Playlist Rules", 52:"Library Playlist Index", 100:"Column info", + 200:"Album name (for album descriptions)", + 201:"Album artist (for album descriptions)", + 202:"Album sort artist (for album descriptions)" } mhod52_sort_index_type_name={ @@ -94,7 +97,7 @@ class DataObject(FieldSet): yield UInt32(self, "header_length", "Header Length") yield UInt32(self, "entry_length", "Entry Length") yield Enum(UInt32(self, "type", "type"),self.type_name) - if(self["type"].value<15): + if(self["type"].value<15) or (self["type"].value >= 200): yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]") yield UInt32(self, "position", "Position") @@ -162,7 +165,7 @@ class TrackItem(FieldSet): yield Enum(UInt8(self, "x2_type", "Extended type 2"),self.x2_type_name) yield UInt8(self, "compilation_flag", "Compilation Flag") yield UInt8(self, "rating", "Rating") - yield TimestampMac32(self, "added_date", "Date when the item was added") + yield TimestampMac32(self, "last_modified", "Time of the last modification of the track") yield filesizeHandler(UInt32(self, "size", "Track size in bytes")) yield displayHandler(UInt32(self, "length", "Track length in milliseconds"), humanDuration) yield UInt32(self, "track_number", "Number of this track") @@ -180,23 +183,24 @@ class TrackItem(FieldSet): yield UInt32(self, "disc_number", "disc number in multi disc sets") yield UInt32(self, "total_discs", "Total number of discs in the disc set") yield UInt32(self, "userid", "User ID in the DRM scheme") - yield TimestampMac32(self, "last_modified", "Time of the last modification of the track") + yield TimestampMac32(self, "added_date", "Date when the item was added") yield UInt32(self, "bookmark_time", "Bookmark time for AudioBook") yield UInt64(self, "dbid", "Unique DataBase ID for the song (identical in mhit and in mhii)") yield UInt8(self, "checked", "song is checked") yield UInt8(self, "application_rating", "Last Rating before change") yield UInt16(self, "BPM", "BPM of the track") - yield UInt16(self, "artwork_count", "number of artworks fo this item") + yield UInt16(self, "artwork_count", "number of artworks for this item") yield UInt16(self, "unknown[]") yield UInt32(self, "artwork_size", "Total size of artworks in bytes") yield UInt32(self, "unknown[]") yield Float32(self, "sample_rate_2", "Sample Rate express in float") yield UInt32(self, "released_date", "Date of release in Music Store or in Podcast") + yield UInt16(self, "unknown[]") + yield UInt16(self, "explicit_flag[]", "Explicit flag") yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") + yield UInt32(self, "skip_count[]", "Skip Count") + yield TimestampMac32(self, "last_skipped", "Date when the item was last skipped") yield UInt8(self, "has_artwork", "0x01 for track with artwork, 0x02 otherwise") yield UInt8(self, "skip_wen_shuffling", "Skip that track when shuffling") yield UInt8(self, "remember_playback_position", "Remember playback position") @@ -207,11 +211,10 @@ class TrackItem(FieldSet): yield UInt8(self, "played_mark", "Track has been played") yield UInt8(self, "unknown[]") yield UInt32(self, "unknown[]") + yield UInt32(self, "pregap[]", "Number of samples of silence before the song starts") + yield UInt64(self, "sample_count", "Number of samples in the song (only for WAV and AAC files)") yield UInt32(self, "unknown[]") - yield UInt32(self, "sample_count", "Number of samples in the song (only for WAV and AAC files)") - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") + yield UInt32(self, "postgap[]", "Number of samples of silence at the end of the song") yield UInt32(self, "unknown[]") yield Enum(UInt32(self, "media_type", "Media Type for video iPod"),self.media_type_name) yield UInt32(self, "season_number", "Season Number") @@ -222,6 +225,20 @@ class TrackItem(FieldSet): yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt32(self, "gapless_data[]","The size in bytes from first Sync Frame until the 8th before the last frame." ) + yield UInt32(self, "unknown[]") + yield UInt16(self, "gaplessTrackFlag[]", "1 if track has gapless data") + yield UInt16(self, "gaplessAlbumFlag[]", "1 if track uses crossfading in iTunes") + yield RawBytes(self, "unknown[]", 20) + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt16(self, "unknown[]") + yield UInt16(self, "album_id[]", "Album ID (used to link tracks with MHIAs)") + yield RawBytes(self, "unknown[]", 52) + yield UInt32(self, "mhii_link[]", "Artwork ID (used to link tracks with MHIIs)") padding = self.seekByte(self["header_length"].value, "header padding") if padding: yield padding @@ -319,7 +336,7 @@ class Playlist(FieldSet): self._size = self["entry_length"].value *8 def createFields(self): - yield String(self, "header_id", 4, "Playlist List Header Markup (\"mhyp\")", charset="ISO-8859-1") + yield String(self, "header_id", 4, "Playlist Header Markup (\"mhyp\")", charset="ISO-8859-1") yield UInt32(self, "header_length", "Header Length") yield UInt32(self, "entry_length", "Entry Length") yield UInt32(self, "data_object_child_count", "Number of Child Data Objects") @@ -360,11 +377,48 @@ class PlaylistList(FieldSet): for i in xrange(self["playlist_number"].value): yield Playlist(self, "playlist[]") +class Album(FieldSet): + def __init__(self, *args, **kw): + FieldSet.__init__(self, *args, **kw) + self._size = self["entry_length"].value *8 + + def createFields(self): + yield String(self, "header_id", 4, "Album Item Header Markup (\"mhia\")", charset="ISO-8859-1") + yield UInt32(self, "header_length", "Header Length") + yield UInt32(self, "entry_length", "Entry Length") + yield UInt32(self, "data_object_child_count", "Number of Child Data Objects") + yield UInt16(self, "unknow[]") + yield UInt16(self, "album_id[]", "Album ID") + yield UInt32(self, "unknow[]") + yield UInt32(self, "unknow[]") + yield UInt32(self, "unknow[]") + + padding = self.seekByte(self["header_length"].value, "entry padding") + if padding: + yield padding + + for i in xrange(self["data_object_child_count"].value): + yield DataObject(self, "mhod[]") + +class AlbumList(FieldSet): + def createFields(self): + yield String(self, "header_id", 4, "Album List Header Markup (\"mhla\")", charset="ISO-8859-1") + yield UInt32(self, "header_length", "Header Length") + yield UInt32(self, "album_number", "Number of Albums") + + padding = self.seekByte(self["header_length"].value, "header padding") + if padding: + yield padding + + for i in xrange(self["album_number"].value): + yield Album(self, "album[]") + class DataSet(FieldSet): type_name={ 1:"Track List", 2:"Play List", - 3:"Podcast List" + 3:"Podcast List", + 4:"Album List" } def __init__(self, *args, **kw): FieldSet.__init__(self, *args, **kw) @@ -384,6 +438,8 @@ class DataSet(FieldSet): yield PlaylistList(self, "playlist_list[]"); if self["type"].value == 3: yield PlaylistList(self, "podcast_list[]"); + if self["type"].value == 4: + yield AlbumList(self, "album_list[]"); padding = self.seekBit(self._size, "entry padding") if padding: yield padding @@ -417,8 +473,20 @@ class ITunesDBFile(Parser): yield UInt32(self, "version_number", "Version Number") yield UInt32(self, "child_number", "Number of Children") yield UInt64(self, "id", "ID for this database") + yield UInt16(self, "unknown[]") yield UInt32(self, "unknown[]") - yield UInt64(self, "initial_dbid", "Initial DBID") + yield UInt64(self, "unknown[]") + yield UInt16(self, "unknown[]") + yield UInt16(self, "hashing_scheme[]", "Algorithm used to calculate the database hash") + yield NullBytes(self, "unknown[]", 20) + yield String(self, "language_id", 2, "Language ID") + yield UInt64(self, "persistent_id", "Library Persistent ID") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield RawBytes(self, "hash[]", 20) + yield Int32(self, "timezone_offset[]", "Timezone offset in seconds") + yield UInt16(self, "unknown[]") + yield RawBytes(self, "iphone_hash[]", 45) size = self["header_length"].value-self.current_size/ 8 if size>0: yield NullBytes(self, "padding", size) diff --git a/lib/hachoir_parser/audio/midi.py b/lib/hachoir_parser/audio/midi.py index 5382f2dd..211e7b78 100644 --- a/lib/hachoir_parser/audio/midi.py +++ b/lib/hachoir_parser/audio/midi.py @@ -8,13 +8,13 @@ Author: Victor Stinner Creation: 27 december 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Bits, ParserError, - String, UInt32, UInt24, UInt16, UInt8, Enum, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import createDict, humanDurationNanosec -from lib.hachoir_parser.common.tracker import NOTE_NAME +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Bits, ParserError, + String, UInt32, UInt24, UInt16, UInt8, Enum, RawBits, RawBytes) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import createDict, humanDurationNanosec +from hachoir_parser.common.tracker import NOTE_NAME MAX_FILESIZE = 10 * 1024 * 1024 @@ -46,7 +46,7 @@ def parseControl(parser): def parsePatch(parser): yield UInt8(parser, "program", "New program number") -def parseChannel(parser): +def parseChannel(parser, size=1): yield UInt8(parser, "channel", "Channel number") def parsePitch(parser): @@ -56,6 +56,16 @@ def parsePitch(parser): def parseText(parser, size): yield String(parser, "text", size) +def parseSMPTEOffset(parser, size): + yield RawBits(parser, "padding", 1) + yield Enum(Bits(parser, "frame_rate", 2), + {0:"24 fps", 1:"25 fps", 2:"30 fps (drop frame)", 3:"30 fps"}) + yield Bits(parser, "hour", 5) + yield UInt8(parser, "minute") + yield UInt8(parser, "second") + yield UInt8(parser, "frame") + yield UInt8(parser, "subframe", "100 subframes per frame") + def formatTempo(field): return humanDurationNanosec(field.value*1000) @@ -92,8 +102,10 @@ class Command(FieldSet): 0x05: ("Lyric", parseText), 0x06: ("Marker", parseText), 0x07: ("Cue point", parseText), + 0x20: ("MIDI Channel Prefix", parseChannel), 0x2F: ("End of the track", None), 0x51: ("Set tempo", parseTempo), + 0x54: ("SMPTE offset", parseSMPTEOffset), 0x58: ("Time Signature", parseTimeSignature), 0x59: ("Key signature", None), 0x7F: ("Sequencer specific information", None), @@ -101,11 +113,27 @@ class Command(FieldSet): META_COMMAND_DESC = createDict(META_COMMAND, 0) META_COMMAND_PARSER = createDict(META_COMMAND, 1) + def __init__(self, *args, **kwargs): + if 'prev_command' in kwargs: + self.prev_command = kwargs['prev_command'] + del kwargs['prev_command'] + else: + self.prev_command = None + self.command = None + FieldSet.__init__(self, *args, **kwargs) + def createFields(self): yield Integer(self, "time", "Delta time in ticks") - yield Enum(textHandler(UInt8(self, "command"), hexadecimal), self.COMMAND_DESC) - command = self["command"].value - if command == 0xFF: + next = self.stream.readBits(self.absolute_address+self.current_size, 8, self.root.endian) + if next & 0x80 == 0: + # "Running Status" command + if self.prev_command is None: + raise ParserError("Running Status command not preceded by another command.") + self.command = self.prev_command.command + else: + yield Enum(textHandler(UInt8(self, "command"), hexadecimal), self.COMMAND_DESC) + self.command = self["command"].value + if self.command == 0xFF: yield Enum(textHandler(UInt8(self, "meta_command"), hexadecimal), self.META_COMMAND_DESC) yield UInt8(self, "data_len") size = self["data_len"].value @@ -121,9 +149,9 @@ class Command(FieldSet): else: yield RawBytes(self, "data", size) else: - if command not in self.COMMAND_PARSER: + if self.command not in self.COMMAND_PARSER: raise ParserError("Unknown command: %s" % self["command"].display) - parser = self.COMMAND_PARSER[command] + parser = self.COMMAND_PARSER[self.command] for field in parser(self): yield field @@ -131,7 +159,7 @@ class Command(FieldSet): if "meta_command" in self: return self["meta_command"].display else: - return self["command"].display + return self.COMMAND_DESC[self.command] class Track(FieldSet): def __init__(self, *args): @@ -141,9 +169,11 @@ class Track(FieldSet): def createFields(self): yield String(self, "marker", 4, "Track marker (MTrk)", charset="ASCII") yield UInt32(self, "size") + cur = None if True: while not self.eof: - yield Command(self, "command[]") + cur = Command(self, "command[]", prev_command=cur) + yield cur else: size = self["size"].value if size: diff --git a/lib/hachoir_parser/audio/mod.py b/lib/hachoir_parser/audio/mod.py index 5ed300f0..75025e0c 100644 --- a/lib/hachoir_parser/audio/mod.py +++ b/lib/hachoir_parser/audio/mod.py @@ -18,12 +18,12 @@ Creation: 18th February 2007 """ from math import log10 -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Bits, UInt16, UInt8, RawBytes, String, GenericVector) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler # Old NoiseTracker 15-samples modules can have anything here. MODULE_TYPE = { diff --git a/lib/hachoir_parser/audio/modplug.py b/lib/hachoir_parser/audio/modplug.py index 6790bc03..d0ea0ff4 100644 --- a/lib/hachoir_parser/audio/modplug.py +++ b/lib/hachoir_parser/audio/modplug.py @@ -8,11 +8,11 @@ Author: Christophe GISQUET Creation: 10th February 2007 """ -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.field import (FieldSet, UInt32, UInt16, UInt8, Int8, Float32, RawBytes, String, GenericVector, ParserError) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal MAX_ENVPOINTS = 32 diff --git a/lib/hachoir_parser/audio/mpeg_audio.py b/lib/hachoir_parser/audio/mpeg_audio.py index b6f2ba91..04e7d327 100644 --- a/lib/hachoir_parser/audio/mpeg_audio.py +++ b/lib/hachoir_parser/audio/mpeg_audio.py @@ -5,18 +5,18 @@ Creation: 12 decembre 2005 Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, MissingField, ParserError, createOrphanField, Bit, Bits, Enum, PaddingBits, PaddingBytes, RawBytes) -from lib.hachoir_parser.audio.id3 import ID3v1, ID3v2 -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.tools import humanFrequency, humanBitSize -from lib.hachoir_core.bits import long2raw -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_core.stream import InputStreamError +from hachoir_parser.audio.id3 import ID3v1, ID3v2 +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import humanFrequency, humanBitSize +from hachoir_core.bits import long2raw +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.stream import InputStreamError # Max MP3 filesize: 200 MB MAX_FILESIZE = 200*1024*1024*8 diff --git a/lib/hachoir_parser/audio/real_audio.py b/lib/hachoir_parser/audio/real_audio.py index 5a2100e6..289ed6e1 100644 --- a/lib/hachoir_parser/audio/real_audio.py +++ b/lib/hachoir_parser/audio/real_audio.py @@ -8,14 +8,14 @@ Samples: http://samples.mplayerhq.hu/real/RA/ """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, UInt32, Bytes, RawBytes, String, PascalString8) -from lib.hachoir_core.tools import humanFrequency -from lib.hachoir_core.text_handler import displayHandler -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import humanFrequency +from hachoir_core.text_handler import displayHandler +from hachoir_core.endian import BIG_ENDIAN class Metadata(FieldSet): def createFields(self): diff --git a/lib/hachoir_parser/audio/s3m.py b/lib/hachoir_parser/audio/s3m.py index c03d72a0..a3e28579 100644 --- a/lib/hachoir_parser/audio/s3m.py +++ b/lib/hachoir_parser/audio/s3m.py @@ -9,15 +9,15 @@ Author: Christophe GISQUET Creation: 11th February 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, Field, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, Field, Bit, Bits, UInt32, UInt16, UInt8, Enum, PaddingBytes, RawBytes, NullBytes, String, GenericVector, ParserError) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import alignValue +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import alignValue class Chunk: def __init__(self, cls, name, offset, size, *args): @@ -326,7 +326,7 @@ class PTMHeader(Header): # static_size should prime over _size, right? static_size = 8*608 - def getTrackerVersion(val): + def getTrackerVersion(self, val): val = val.value return "ProTracker x%04X" % val diff --git a/lib/hachoir_parser/audio/xm.py b/lib/hachoir_parser/audio/xm.py index 17fd28e0..0b13b41f 100644 --- a/lib/hachoir_parser/audio/xm.py +++ b/lib/hachoir_parser/audio/xm.py @@ -13,15 +13,15 @@ Author: Christophe GISQUET Creation: 8th February 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, Bit, RawBits, Bits, UInt32, UInt16, UInt8, Int8, Enum, RawBytes, String, GenericVector) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_parser.audio.modplug import ParseModplugMetadata -from lib.hachoir_parser.common.tracker import NOTE_NAME +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_parser.audio.modplug import ParseModplugMetadata +from hachoir_parser.common.tracker import NOTE_NAME def parseSigned(val): return "%i" % (val.value-128) diff --git a/lib/hachoir_parser/common/deflate.py b/lib/hachoir_parser/common/deflate.py index ee369a29..8aa8e51a 100644 --- a/lib/hachoir_parser/common/deflate.py +++ b/lib/hachoir_parser/common/deflate.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import CompressedField +from hachoir_core.field import CompressedField try: from zlib import decompressobj, MAX_WBITS @@ -12,8 +12,8 @@ try: def __call__(self, size, data=None): if data is None: - data = self.gzip.unconsumed_tail - return self.gzip.decompress(data, size) + data = '' + return self.gzip.decompress(self.gzip.unconsumed_tail+data, size) class DeflateStreamWbits(DeflateStream): def __init__(self, stream): diff --git a/lib/hachoir_parser/common/msdos.py b/lib/hachoir_parser/common/msdos.py index e16920b7..addd1495 100644 --- a/lib/hachoir_parser/common/msdos.py +++ b/lib/hachoir_parser/common/msdos.py @@ -6,8 +6,8 @@ Documentation: http://www.cs.colorado.edu/~main/cs1300/include/ddk/winddk.h """ -from lib.hachoir_core.field import StaticFieldSet -from lib.hachoir_core.field import Bit, NullBits +from hachoir_core.field import StaticFieldSet +from hachoir_core.field import Bit, NullBits _FIELDS = ( (Bit, "read_only"), diff --git a/lib/hachoir_parser/common/win32.py b/lib/hachoir_parser/common/win32.py index 07a02f56..f5adf4fd 100644 --- a/lib/hachoir_parser/common/win32.py +++ b/lib/hachoir_parser/common/win32.py @@ -1,9 +1,9 @@ -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.field import (FieldSet, UInt16, UInt32, Enum, String, Bytes, Bits, TimestampUUID60) -from lib.hachoir_parser.video.fourcc import video_fourcc_name -from lib.hachoir_core.bits import str2hex -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.network.common import MAC48_Address +from hachoir_parser.video.fourcc import video_fourcc_name +from hachoir_core.bits import str2hex +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.network.common import MAC48_Address # Dictionary: Windows codepage => Python charset name CODEPAGE_CHARSET = { @@ -24,6 +24,26 @@ CODEPAGE_CHARSET = { 65001: "UTF-8", } +class PascalStringWin16(FieldSet): + def __init__(self, parent, name, description=None, strip=None, charset="UTF-16-LE"): + FieldSet.__init__(self, parent, name, description) + length = self["length"].value + self._size = 16 + length * 16 + self.strip = strip + self.charset = charset + + def createFields(self): + yield UInt16(self, "length", "Length in widechar characters") + size = self["length"].value + if size: + yield String(self, "text", size*2, charset=self.charset, strip=self.strip) + + def createValue(self): + if "text" in self: + return self["text"].value + else: + return None + class PascalStringWin32(FieldSet): def __init__(self, parent, name, description=None, strip=None, charset="UTF-16-LE"): FieldSet.__init__(self, parent, name, description) diff --git a/lib/hachoir_parser/container/__init__.py b/lib/hachoir_parser/container/__init__.py index ff22d57a..6fd7d3ed 100644 --- a/lib/hachoir_parser/container/__init__.py +++ b/lib/hachoir_parser/container/__init__.py @@ -1,7 +1,7 @@ -from lib.hachoir_parser.container.asn1 import ASN1File -from lib.hachoir_parser.container.mkv import MkvFile -from lib.hachoir_parser.container.ogg import OggFile, OggStream -from lib.hachoir_parser.container.riff import RiffFile -from lib.hachoir_parser.container.swf import SwfFile -from lib.hachoir_parser.container.realmedia import RealMediaFile +from hachoir_parser.container.asn1 import ASN1File +from hachoir_parser.container.mkv import MkvFile +from hachoir_parser.container.ogg import OggFile, OggStream +from hachoir_parser.container.riff import RiffFile +from hachoir_parser.container.swf import SwfFile +from hachoir_parser.container.realmedia import RealMediaFile diff --git a/lib/hachoir_parser/container/action_script.py b/lib/hachoir_parser/container/action_script.py index 747f772f..4e22cef9 100644 --- a/lib/hachoir_parser/container/action_script.py +++ b/lib/hachoir_parser/container/action_script.py @@ -5,29 +5,64 @@ Documentation: - Alexis' SWF Reference: http://www.m2osw.com/swf_alexref.html + - Tamarin ABC format: + http://www.m2osw.com/abc_format.html -Author: Sebastien Ponce +Authors: Sebastien Ponce, Robert Xiao Creation date: 26 April 2008 """ -from lib.hachoir_core.field import (FieldSet, ParserError, - Bit, Bits, UInt8, UInt32, Int16, UInt16, Float32, CString, - RawBytes) -#from lib.hachoir_core.field import Field -from lib.hachoir_core.field.float import FloatExponent +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, + Bit, Bits, UInt8, UInt32, Int16, UInt16, Float32, Float64, CString, Enum, + Bytes, RawBytes, NullBits, String, SubFile, Field) +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.field.float import FloatExponent from struct import unpack +class FlashPackedInteger(Bits): + def __init__(self, parent, name, signed=False, nbits=30, description=None): + Bits.__init__(self, parent, name, 8, description) + stream = self._parent.stream + addr = self.absolute_address + size = 0 + value = 0 + mult = 1 + while True: + byte = stream.readBits(addr+size, 8, LITTLE_ENDIAN) + value += mult * (byte & 0x7f) + size += 8 + mult <<= 7 + if byte < 128: + break + self._size = size + if signed and (1 << (nbits-1)) <= value: + value -= (1 << nbits) + self.createValue = lambda: value + +class FlashU30(FlashPackedInteger): + def __init__(self, parent, name, description=None): + FlashPackedInteger.__init__(self, parent, name, signed=False, nbits=30, description=description) + +class FlashS32(FlashPackedInteger): + def __init__(self, parent, name, description=None): + FlashPackedInteger.__init__(self, parent, name, signed=True, nbits=32, description=description) + +class FlashU32(FlashPackedInteger): + def __init__(self, parent, name, description=None): + FlashPackedInteger.__init__(self, parent, name, signed=False, nbits=32, description=description) + class FlashFloat64(FieldSet): def createFields(self): - yield Bits(self, "mantisa_high", 20) + yield Bits(self, "mantissa_high", 20) yield FloatExponent(self, "exponent", 11) yield Bit(self, "negative") - yield Bits(self, "mantisa_low", 32) + yield Bits(self, "mantissa_low", 32) def createValue(self): # Manual computation: - # mantisa = mantisa_high * 2^32 + mantisa_low - # float = 2^exponent + (1 + mantisa / 2^52) + # mantissa = mantissa_high * 2^32 + mantissa_low + # float = 2^exponent + (1 + mantissa / 2^52) # (and float is negative if negative=True) bytes = self.parent.stream.readBytes( self.absolute_address, self.size//8) @@ -44,8 +79,8 @@ TYPE_INFO = { 0x05: (UInt8, "Boolean[]"), 0x06: (FlashFloat64, "Double[]"), 0x07: (UInt32, "Integer[]"), - 0x08: (UInt8, "Dictionnary_Lookup_Index[]"), - 0x09: (UInt16, "Large_Dictionnary_Lookup_Index[]"), + 0x08: (UInt8, "Dictionary_Lookup_Index[]"), + 0x09: (UInt16, "Large_Dictionary_Lookup_Index[]"), } def parseBranch(parent, size): @@ -135,7 +170,7 @@ def parseWaitForFrame(parent, size): def parseWaitForFrameDyn(parent, size): yield UInt8(parent, "skip") -def parseDeclareDictionnary(parent, size): +def parseDeclareDictionary(parent, size): count = UInt16(parent, "count") yield count for i in range(count.value): @@ -231,7 +266,7 @@ class Instruction(FieldSet): # Objects 0x2B: ("Cast_Object[]", "Cast Object", None), 0x42: ("Declare_Array[]", "Declare Array", None), - 0x88: ("Declare_Dictionary[]", "Declare Dictionary", parseDeclareDictionnary), + 0x88: ("Declare_Dictionary[]", "Declare Dictionary", parseDeclareDictionary), 0x43: ("Declare_Object[]", "Declare Object", None), 0x3A: ("Delete[]", "Delete", None), 0x3B: ("Delete_All[]", "Delete All", None), @@ -314,3 +349,313 @@ class ActionScript(FieldSet): def parseActionScript(parent, size): yield ActionScript(parent, "action", size=size*8) +def FindABC(field): + while not getattr(field, "isABC", False): + field = field.parent + if field is None: + return None + return field + +def GetConstant(field, pool, index): + if index == 0: + return None + return FindABC(field)["constant_%s_pool/constant[%i]"%(pool, index)] + +def GetMultiname(field, index): + fld = GetConstant(field, "multiname", index) + if fld is None: + return "*" + if "name_index" not in fld: + return "?" + fld2 = GetConstant(fld, "string", fld["name_index"].value) + if fld2 is None: + return "*" + return fld2.value + +class ABCStringIndex(FlashU30): + def createDisplay(self): + fld = GetConstant(self, "string", self.value) + if fld is None: + return "*" + return fld.value + +class ABCNSIndex(FlashU30): + def createDisplay(self): + fld = GetConstant(self, "namespace", self.value) + if fld is None: + return "*" + return fld.display + +class ABCMethodIndex(FlashU30): + def createDisplay(self): + fld = FindABC(self)["method_array/method[%i]"%self.value] + if fld is None: + return "*" + return fld.description + +class ABCMultinameIndex(FlashU30): + def createDisplay(self): + return GetMultiname(self, self.value) + +class ABCConstantPool(FieldSet): + def __init__(self, parent, name, klass): + FieldSet.__init__(self, parent, 'constant_%s_pool'%name) + self.klass = klass + def createFields(self): + ctr = FlashU30(self, "count") + yield ctr + for i in xrange(ctr.value-1): + yield self.klass(self, "constant[%i]"%(i+1)) + +class ABCObjectArray(FieldSet): + def __init__(self, parent, name, klass): + self.arrname = name + FieldSet.__init__(self, parent, name+'_array') + self.klass = klass + def createFields(self): + ctr = FlashU30(self, "count") + yield ctr + for i in xrange(ctr.value): + yield self.klass(self, self.arrname+"[]") + +class ABCClassArray(FieldSet): + def __init__(self, parent, name): + FieldSet.__init__(self, parent, name+'_array') + def createFields(self): + ctr = FlashU30(self, "count") + yield ctr + for i in xrange(ctr.value): + yield ABCInstanceInfo(self, "instance[]") + for i in xrange(ctr.value): + yield ABCClassInfo(self, "class[]") + +class ABCConstantString(FieldSet): + def createFields(self): + yield FlashU30(self, "length") + size = self["length"].value + if size: + yield String(self, "data", size, charset="UTF-8") + + def createDisplay(self): + if "data" in self: + return self["data"].display + else: + return "" + + def createValue(self): + if "data" in self: + return self["data"].value + else: + return "" + +class ABCConstantNamespace(FieldSet): + NAMESPACE_KIND = {8: "Namespace", + 5: "PrivateNamespace", + 22: "PackageNamespace", + 23: "PacakgeInternalNamespace", + 24: "ProtectedNamespace", + 25: "ExplicitNamespace", + 26: "MultinameL"} + def createFields(self): + yield Enum(UInt8(self, "kind"), self.NAMESPACE_KIND) + yield ABCStringIndex(self, "name_index") + + def createDisplay(self): + return "%s %s"%(self["kind"].display, self["name_index"].display) + + def createValue(self): + return self["name_index"].value + +class ABCConstantNamespaceSet(FieldSet): + def createFields(self): + ctr = FlashU30(self, "namespace_count") + yield ctr + for i in xrange(ctr.value): + yield ABCNSIndex(self, "namespace_index[]") + + def createDescription(self): + ret = [fld.display for fld in self.array("namespace_index")] + return ', '.join(ret) + +class ABCConstantMultiname(FieldSet): + MULTINAME_KIND = {7: "Qname", + 13: "QnameA", + 9: "Multiname", + 14: "MultinameA", + 15: "RTQname", + 16: "RTQnameA", + 27: "MultinameL", + 17: "RTQnameL", + 18: "RTQnameLA"} + def createFields(self): + yield Enum(UInt8(self, "kind"), self.MULTINAME_KIND) + kind = self["kind"].value + if kind in (7,13): # Qname + yield FlashU30(self, "namespace_index") + yield ABCStringIndex(self, "name_index") + elif kind in (9,14): # Multiname + yield ABCStringIndex(self, "name_index") + yield FlashU30(self, "namespace_set_index") + elif kind in (15,16): # RTQname + yield ABCStringIndex(self, "name_index") + elif kind == 27: # MultinameL + yield FlashU30(self, "namespace_set_index") + elif kind in (17,18): # RTQnameL + pass + + def createDisplay(self): + kind = self["kind"].display + if "name_index" in self: + return kind + " " + self["name_index"].display + return kind + + def createValue(self): + return self["kind"].value + +class ABCTrait(FieldSet): + TRAIT_KIND = {0: "slot", + 1: "method", + 2: "getter", + 3: "setter", + 4: "class", + 5: "function", + 6: "const",} + def createFields(self): + yield ABCMultinameIndex(self, "name_index") + yield Enum(Bits(self, "kind", 4), self.TRAIT_KIND) + yield Enum(Bit(self, "is_final"), {True:'final',False:'virtual'}) + yield Enum(Bit(self, "is_override"), {True:'override',False:'new'}) + yield Bit(self, "has_metadata") + yield Bits(self, "unused", 1) + kind = self["kind"].value + if kind in (0,6): # slot, const + yield FlashU30(self, "slot_id") + yield ABCMultinameIndex(self, "type_index") + ### TODO reference appropriate constant pool using value_kind + yield FlashU30(self, "value_index") + if self['value_index'].value != 0: + yield UInt8(self, "value_kind") + elif kind in (1,2,3): # method, getter, setter + yield FlashU30(self, "disp_id") + yield ABCMethodIndex(self, "method_info") + elif kind == 4: # class + yield FlashU30(self, "disp_id") + yield FlashU30(self, "class_info") + elif kind == 5: # function + yield FlashU30(self, "disp_id") + yield ABCMethodIndex(self, "method_info") + if self['has_metadata'].value: + yield ABCObjectArray(self, "metadata", FlashU30) + +class ABCValueKind(FieldSet): + def createFields(self): + yield FlashU30(self, "value_index") + yield UInt8(self, "value_kind") + +class ABCMethodInfo(FieldSet): + def createFields(self): + yield FlashU30(self, "param_count") + yield ABCMultinameIndex(self, "ret_type") + for i in xrange(self["param_count"].value): + yield ABCMultinameIndex(self, "param_type[]") + yield ABCStringIndex(self, "name_index") + yield Bit(self, "need_arguments") + yield Bit(self, "need_activation") + yield Bit(self, "need_rest") + yield Bit(self, "has_optional") + yield Bit(self, "ignore_rest") + yield Bit(self, "explicit") + yield Bit(self, "setsdxns") + yield Bit(self, "has_paramnames") + if self["has_optional"].value: + yield ABCObjectArray(self, "optional", ABCValueKind) + if self["has_paramnames"].value: + for i in xrange(self["param_count"].value): + yield FlashU30(self, "param_name[]") + + def createDescription(self): + ret = GetMultiname(self, self["ret_type"].value) + ret += " " + self["name_index"].display + ret += "(" + ", ".join(GetMultiname(self, fld.value) for fld in self.array("param_type")) + ")" + return ret + +class ABCMetadataInfo(FieldSet): + def createFields(self): + yield ABCStringIndex(self, "name_index") + yield FlashU30(self, "values_count") + count = self["values_count"].value + for i in xrange(count): + yield FlashU30(self, "key[]") + for i in xrange(count): + yield FlashU30(self, "value[]") + +class ABCInstanceInfo(FieldSet): + def createFields(self): + yield ABCMultinameIndex(self, "name_index") + yield ABCMultinameIndex(self, "super_index") + yield Bit(self, "is_sealed") + yield Bit(self, "is_final") + yield Bit(self, "is_interface") + yield Bit(self, "is_protected") + yield Bits(self, "unused", 4) + if self['is_protected'].value: + yield ABCNSIndex(self, "protectedNS") + yield FlashU30(self, "interfaces_count") + for i in xrange(self["interfaces_count"].value): + yield ABCMultinameIndex(self, "interface[]") + yield ABCMethodIndex(self, "iinit_index") + yield ABCObjectArray(self, "trait", ABCTrait) + +class ABCClassInfo(FieldSet): + def createFields(self): + yield ABCMethodIndex(self, "cinit_index") + yield ABCObjectArray(self, "trait", ABCTrait) + +class ABCScriptInfo(FieldSet): + def createFields(self): + yield ABCMethodIndex(self, "init_index") + yield ABCObjectArray(self, "trait", ABCTrait) + +class ABCException(FieldSet): + def createFields(self): + yield FlashU30(self, "start") + yield FlashU30(self, "end") + yield FlashU30(self, "target") + yield FlashU30(self, "type_index") + yield FlashU30(self, "name_index") + +class ABCMethodBody(FieldSet): + def createFields(self): + yield ABCMethodIndex(self, "method_info") + yield FlashU30(self, "max_stack") + yield FlashU30(self, "max_regs") + yield FlashU30(self, "scope_depth") + yield FlashU30(self, "max_scope") + yield FlashU30(self, "code_length") + yield RawBytes(self, "code", self['code_length'].value) + yield ABCObjectArray(self, "exception", ABCException) + yield ABCObjectArray(self, "trait", ABCTrait) + +def parseABC(parent, size): + code = parent["code"].value + if code == parent.TAG_DO_ABC_DEFINE: + yield UInt32(parent, "action_flags") + yield CString(parent, "action_name") + yield UInt16(parent, "minor_version") + yield UInt16(parent, "major_version") + parent.isABC = True + + yield ABCConstantPool(parent, "int", FlashS32) + yield ABCConstantPool(parent, "uint", FlashU32) + yield ABCConstantPool(parent, "double", Float64) + yield ABCConstantPool(parent, "string", ABCConstantString) + yield ABCConstantPool(parent, "namespace", ABCConstantNamespace) + yield ABCConstantPool(parent, "namespace_set", ABCConstantNamespaceSet) + yield ABCConstantPool(parent, "multiname", ABCConstantMultiname) + + yield ABCObjectArray(parent, "method", ABCMethodInfo) + yield ABCObjectArray(parent, "metadata", ABCMetadataInfo) + yield ABCClassArray(parent, "class") + yield ABCObjectArray(parent, "script", ABCScriptInfo) + yield ABCObjectArray(parent, "body", ABCMethodBody) + diff --git a/lib/hachoir_parser/container/asn1.py b/lib/hachoir_parser/container/asn1.py index d1c3d113..dfac847b 100644 --- a/lib/hachoir_parser/container/asn1.py +++ b/lib/hachoir_parser/container/asn1.py @@ -39,15 +39,15 @@ Author: Victor Stinner Creation date: 24 september 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, FieldError, ParserError, Bit, Bits, Bytes, UInt8, GenericInteger, String, Field, Enum, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.tools import createDict, humanDatetime -from lib.hachoir_core.stream import InputStreamError -from lib.hachoir_core.text_handler import textHandler +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import createDict, humanDatetime +from hachoir_core.stream import InputStreamError +from hachoir_core.text_handler import textHandler # --- Field parser --- diff --git a/lib/hachoir_parser/container/mkv.py b/lib/hachoir_parser/container/mkv.py index 65230d61..0d3974c0 100644 --- a/lib/hachoir_parser/container/mkv.py +++ b/lib/hachoir_parser/container/mkv.py @@ -4,18 +4,18 @@ # Created: 8 june 2006 # -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Link, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Link, MissingField, ParserError, Enum as _Enum, String as _String, Float32, Float64, NullBits, Bits, Bit, RawBytes, Bytes, Int16, GenericInteger) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.iso639 import ISO639_2 -from lib.hachoir_core.tools import humanDatetime -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.container.ogg import XiphInt +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.iso639 import ISO639_2 +from hachoir_core.tools import humanDatetime +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.container.ogg import XiphInt from datetime import datetime, timedelta class RawInt(GenericInteger): @@ -66,7 +66,7 @@ def SInt(parent): return GenericInteger(parent, 'signed', True, parent['size'].value*8) def String(parent): - return _String(parent, 'string', parent['size'].value, charset="ASCII") + return _String(parent, 'string', parent['size'].value, charset="ASCII", strip="\0") def EnumString(parent, enum): return _Enum(String(parent), enum) @@ -206,7 +206,7 @@ class Block(FieldSet): yield Bit(self, 'invisible') yield self.lacing() yield NullBits(self, 'reserved[]', 1) - elif self.parent._name == 'SimpleBlock[]': + elif self.parent._name.startswith('SimpleBlock'): yield Bit(self, 'keyframe') yield NullBits(self, 'reserved', 3) yield Bit(self, 'invisible') diff --git a/lib/hachoir_parser/container/ogg.py b/lib/hachoir_parser/container/ogg.py index 82b961d0..fa2d26cb 100644 --- a/lib/hachoir_parser/container/ogg.py +++ b/lib/hachoir_parser/container/ogg.py @@ -4,15 +4,15 @@ # Created: 10 june 2006 # -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (Field, FieldSet, createOrphanField, +from hachoir_parser import Parser +from hachoir_core.field import (Field, FieldSet, createOrphanField, NullBits, Bit, Bits, Enum, Fragment, MissingField, ParserError, UInt8, UInt16, UInt24, UInt32, UInt64, RawBytes, String, PascalString32, NullBytes) -from lib.hachoir_core.stream import FragmentedStream, InputStreamError -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_core.tools import humanDurationNanosec -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.stream import FragmentedStream, InputStreamError +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.tools import humanDurationNanosec +from hachoir_core.text_handler import textHandler, hexadecimal MAX_FILESIZE = 1000 * 1024 * 1024 diff --git a/lib/hachoir_parser/container/realmedia.py b/lib/hachoir_parser/container/realmedia.py index 337be9b0..45c8173b 100644 --- a/lib/hachoir_parser/container/realmedia.py +++ b/lib/hachoir_parser/container/realmedia.py @@ -13,12 +13,12 @@ Samples: - http://samples.mplayerhq.hu/real/ """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt16, UInt32, Bit, RawBits, RawBytes, String, PascalString8, PascalString16) -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import BIG_ENDIAN def parseHeader(self): yield UInt32(self, "filever", "File version") diff --git a/lib/hachoir_parser/container/riff.py b/lib/hachoir_parser/container/riff.py index d5e96b93..a5e4fc0a 100644 --- a/lib/hachoir_parser/container/riff.py +++ b/lib/hachoir_parser/container/riff.py @@ -29,17 +29,17 @@ Thanks to: format information """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, UInt32, Enum, Bit, NullBits, NullBytes, RawBytes, String, PaddingBytes, SubFile) -from lib.hachoir_core.tools import alignValue, humanDuration -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import filesizeHandler, textHandler -from lib.hachoir_parser.video.fourcc import audio_codec_name, video_fourcc_name -from lib.hachoir_parser.image.ico import IcoFile +from hachoir_core.tools import alignValue, humanDuration +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import filesizeHandler, textHandler +from hachoir_parser.video.fourcc import audio_codec_name, video_fourcc_name +from hachoir_parser.image.ico import IcoFile from datetime import timedelta def parseText(self): diff --git a/lib/hachoir_parser/container/swf.py b/lib/hachoir_parser/container/swf.py index 4c0e5d69..942e3d9e 100644 --- a/lib/hachoir_parser/container/swf.py +++ b/lib/hachoir_parser/container/swf.py @@ -13,18 +13,18 @@ Author: Victor Stinner Creation date: 29 october 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, - Bit, Bits, UInt8, UInt32, UInt16, CString, Enum, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, + Bit, Bits, UInt8, UInt16, Int32, UInt32, Int64, CString, Enum, Bytes, RawBytes, NullBits, String, SubFile) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, filesizeHandler -from lib.hachoir_core.tools import paddingSize, humanFrequency -from lib.hachoir_parser.image.common import RGB -from lib.hachoir_parser.image.jpeg import JpegChunk, JpegFile -from lib.hachoir_core.stream import StringInputStream, ConcatStream -from lib.hachoir_parser.common.deflate import Deflate, has_deflate -from lib.hachoir_parser.container.action_script import parseActionScript +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.text_handler import textHandler, filesizeHandler +from hachoir_core.tools import paddingSize, humanFrequency +from hachoir_parser.image.common import RGB +from hachoir_parser.image.jpeg import JpegChunk, JpegFile +from hachoir_core.stream import StringInputStream, ConcatStream +from hachoir_parser.common.deflate import Deflate, has_deflate +from hachoir_parser.container.action_script import parseActionScript, parseABC import math # Maximum file size (50 MB) @@ -206,10 +206,35 @@ def parseExport(parent, size): for index in xrange(parent["count"].value): yield Export(parent, "export[]") +def parseProductInfo(parent, size): + yield Int32(parent, "product_id") + yield Int32(parent, "edition") + yield UInt8(parent, "major_version") + yield UInt8(parent, "minor_version") + yield Int64(parent, "build_number") + yield Int64(parent, "compilation_date") + +def parseScriptLimits(parent, size): + yield UInt16(parent, "max_recursion_limit") + yield UInt16(parent, "timeout_seconds", "Seconds of processing until the SWF is considered 'stuck'") + +def parseSymbolClass(parent, size): + yield UInt16(parent, "count") + for index in xrange(parent["count"].value): + yield UInt16(parent, "symbol_id[]") + yield CString(parent, "symbol_name[]") + +def parseBinaryData(parent, size): + yield UInt16(parent, "data_id") + yield UInt32(parent, "reserved") + if size > 6: + yield RawBytes(parent, "data", size-6) + class Tag(FieldSet): TAG_BITS = 6 TAG_BITS_JPEG2 = 32 TAG_BITS_JPEG3 = 35 + TAG_DO_ABC_DEFINE = 82 TAG_INFO = { # SWF version 1.0 0: ("end[]", "End", None), @@ -253,7 +278,7 @@ class Tag(FieldSet): 36: ("def_bits_lossless2[]", "Define bits lossless 2", None), 39: ("def_sprite[]", "Define sprite", None), 40: ("name_character[]", "Name character", None), - 41: ("serial_number", "Serial number", None), + 41: ("product_info", "Generator product info", parseProductInfo), 42: ("generator_text[]", "Generator text", None), 43: ("frame_label[]", "Frame label", None), 45: ("sound_hdr2[]", "Sound stream header2", parseSoundHeader), @@ -283,7 +308,7 @@ class Tag(FieldSet): 64: ("enable_debug2", "Enable debugger 2", None), # SWF version 7.0 - 65: ("script_limits[]", "Script limits", None), + 65: ("script_limits[]", "Script limits", parseScriptLimits), 66: ("tab_index[]", "Set tab index", None), # SWF version 8.0 @@ -297,6 +322,14 @@ class Tag(FieldSet): 78: ("def_scale_grid[]", "Define scaling factors", None), 83: ("def_shape4[]", "Define shape 4", None), 84: ("def_morph2[]", "Define a morphing shape 2", None), + + # SWF version 9.0 + 72: ("do_abc[]", "SWF 9 ActionScript container; actions only", parseABC), + 76: ("symbol_class[]", "Instantiate objects from a set of classes", parseSymbolClass), + 82: ("do_abc_define[]", "SWF 9 ActionScript container; identifier, name, actions", parseABC), + 86: ("def_scene_frame[]", "Define raw data for scenes and frames", None), + 87: ("def_binary_data[]", "Defines a buffer of any size with any binary user data", parseBinaryData), + 88: ("def_font_name[]", "Define the legal font name and copyright", None), } def __init__(self, *args): @@ -332,7 +365,7 @@ class Tag(FieldSet): return "Tag: %s (%s)" % (self["code"].display, self["length"].display) class SwfFile(Parser): - VALID_VERSIONS = set(xrange(1, 9+1)) + VALID_VERSIONS = set(xrange(1, 10+1)) PARSER_TAGS = { "id": "swf", "category": "container", diff --git a/lib/hachoir_parser/file_system/__init__.py b/lib/hachoir_parser/file_system/__init__.py index 92c82d26..863aae3a 100644 --- a/lib/hachoir_parser/file_system/__init__.py +++ b/lib/hachoir_parser/file_system/__init__.py @@ -1,8 +1,8 @@ -from lib.hachoir_parser.file_system.ext2 import EXT2_FS -from lib.hachoir_parser.file_system.fat import FAT12, FAT16, FAT32 -from lib.hachoir_parser.file_system.mbr import MSDos_HardDrive -from lib.hachoir_parser.file_system.ntfs import NTFS -from lib.hachoir_parser.file_system.iso9660 import ISO9660 -from lib.hachoir_parser.file_system.reiser_fs import REISER_FS -from lib.hachoir_parser.file_system.linux_swap import LinuxSwapFile +from hachoir_parser.file_system.ext2 import EXT2_FS +from hachoir_parser.file_system.fat import FAT12, FAT16, FAT32 +from hachoir_parser.file_system.mbr import MSDos_HardDrive +from hachoir_parser.file_system.ntfs import NTFS +from hachoir_parser.file_system.iso9660 import ISO9660 +from hachoir_parser.file_system.reiser_fs import REISER_FS +from hachoir_parser.file_system.linux_swap import LinuxSwapFile diff --git a/lib/hachoir_parser/file_system/ext2.py b/lib/hachoir_parser/file_system/ext2.py index 8b19b46b..634fe063 100644 --- a/lib/hachoir_parser/file_system/ext2.py +++ b/lib/hachoir_parser/file_system/ext2.py @@ -10,14 +10,14 @@ Sources: http://www.nondot.org/sabre/os/files/FileSystems/ext2fs/ """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, Bit, Bits, UInt8, UInt16, UInt32, Enum, String, TimestampUnix32, RawBytes, NullBytes) -from lib.hachoir_core.tools import (alignValue, +from hachoir_core.tools import (alignValue, humanDuration, humanFilesize) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler from itertools import izip class DirectoryEntry(FieldSet): diff --git a/lib/hachoir_parser/file_system/fat.py b/lib/hachoir_parser/file_system/fat.py index fc0ae6cb..2aebe175 100644 --- a/lib/hachoir_parser/file_system/fat.py +++ b/lib/hachoir_parser/file_system/fat.py @@ -1,14 +1,14 @@ -from lib.hachoir_core.compatibility import sorted -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, StaticFieldSet, +from hachoir_core.compatibility import sorted +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, StaticFieldSet, RawBytes, PaddingBytes, createPaddingField, Link, Fragment, Bit, Bits, UInt8, UInt16, UInt32, String, Bytes, NullBytes) -from lib.hachoir_core.field.integer import GenericInteger -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.error import error -from lib.hachoir_core.tools import humanFilesize, makePrintable +from hachoir_core.field.integer import GenericInteger +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.error import error +from hachoir_core.tools import humanFilesize, makePrintable import datetime import re diff --git a/lib/hachoir_parser/file_system/iso9660.py b/lib/hachoir_parser/file_system/iso9660.py index 9fc0dc9f..3d93593a 100644 --- a/lib/hachoir_parser/file_system/iso9660.py +++ b/lib/hachoir_parser/file_system/iso9660.py @@ -9,11 +9,11 @@ Author: Victor Stinner Creation: 11 july 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt32, UInt64, Enum, NullBytes, RawBytes, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN class PrimaryVolumeDescriptor(FieldSet): static_size = 2041*8 diff --git a/lib/hachoir_parser/file_system/linux_swap.py b/lib/hachoir_parser/file_system/linux_swap.py index 72fdf909..ea2e0dba 100644 --- a/lib/hachoir_parser/file_system/linux_swap.py +++ b/lib/hachoir_parser/file_system/linux_swap.py @@ -9,13 +9,13 @@ Author: Victor Stinner Creation date: 25 december 2006 (christmas ;-)) """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (ParserError, GenericVector, +from hachoir_parser import Parser +from hachoir_core.field import (ParserError, GenericVector, UInt32, String, Bytes, NullBytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import humanFilesize -from lib.hachoir_core.bits import str2hex +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import humanFilesize +from hachoir_core.bits import str2hex PAGE_SIZE = 4096 diff --git a/lib/hachoir_parser/file_system/mbr.py b/lib/hachoir_parser/file_system/mbr.py index 4174fa31..d5c366f8 100644 --- a/lib/hachoir_parser/file_system/mbr.py +++ b/lib/hachoir_parser/file_system/mbr.py @@ -12,13 +12,13 @@ Master Boot Record. # 2. Ask the system (ioctl/HDIO_GETGEO). # 3. 255 heads and 63 sectors/cylinder. -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, Bits, UInt8, UInt16, UInt32, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import humanFilesize -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import humanFilesize +from hachoir_core.text_handler import textHandler, hexadecimal BLOCK_SIZE = 512 # bytes diff --git a/lib/hachoir_parser/file_system/ntfs.py b/lib/hachoir_parser/file_system/ntfs.py index 0710f990..efea7e71 100644 --- a/lib/hachoir_parser/file_system/ntfs.py +++ b/lib/hachoir_parser/file_system/ntfs.py @@ -13,15 +13,15 @@ Author: Victor Stinner SECTOR_SIZE = 512 -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Enum, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, UInt8, UInt16, UInt32, UInt64, TimestampWin64, String, Bytes, Bit, NullBits, NullBytes, PaddingBytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.tools import humanFilesize, createDict -from lib.hachoir_parser.common.msdos import MSDOSFileAttr32 +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.tools import humanFilesize, createDict +from hachoir_parser.common.msdos import MSDOSFileAttr32 class BiosParameterBlock(FieldSet): """ diff --git a/lib/hachoir_parser/file_system/reiser_fs.py b/lib/hachoir_parser/file_system/reiser_fs.py index 73933c27..52a0dbf8 100644 --- a/lib/hachoir_parser/file_system/reiser_fs.py +++ b/lib/hachoir_parser/file_system/reiser_fs.py @@ -1,5 +1,5 @@ """ -ReiserFS file system version 3 parser (version 1, 2 and 4 are not supported). +ReiserFS file system version 3 parser (other version have not been tested). Author: Frederic Weisbecker Creation date: 8 december 2006 @@ -20,10 +20,63 @@ Kurz. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Enum, - UInt16, UInt32, String, RawBytes, NullBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, + UInt16, UInt32, String, RawBytes, NullBytes, SeekableFieldSet, Bit) +from hachoir_core.endian import LITTLE_ENDIAN + + +class BlockState(Bit): + """The state (used/free) of a ReiserFs Block""" + + STATE={ + True : "used", + False : "free" + } + + block_nb = 0 + + def __init__(self, parent, name, nb_block): + """@param nb_block: Number of the block concerned""" + Bit.__init__(self, parent, name) + self.block_nb = self.__class__.block_nb + self.__class__.block_nb += 1 + + def createDescription(self): + return "State of the block %d" % self.block_nb + + def createDisplay(self): + return self.STATE[Bit.createValue(self)] + + +class BitmapBlock(SeekableFieldSet): + """ The bitmap blocks are Reiserfs blocks where each byte contains + the state of 8 blocks in the filesystem. So each bit will describe + the state of a block to tell if it is used or not. + """ + def createFields(self): + block_size=self["/superblock/blocksize"].value + + for i in xrange(0, block_size * 8): + yield BlockState(self, "block[]", i) + + +class BitmapBlockGroup(SeekableFieldSet): + """The group that manages the Bitmap Blocks""" + + def createFields(self): + block_size=self["/superblock/blocksize"].value + nb_bitmap_block = self["/superblock/bmap_nr"].value + # Position of the first bitmap block + self.seekByte(REISER_FS.SUPERBLOCK_OFFSET + block_size, relative=False) + + yield BitmapBlock(self, "BitmapBlock[]", "Bitmap blocks tells for each block if it is used") + # The other bitmap blocks + for i in xrange(1, nb_bitmap_block): + self.seekByte( (block_size**2) * 8 * i, relative=False) + yield BitmapBlock(self, "BitmapBlock[]", "Bitmap blocks tells for each block if it is used") + + class Journal_params(FieldSet): static_size = 32*8 @@ -44,7 +97,7 @@ class Journal_params(FieldSet): return "Parameters of the journal" class SuperBlock(FieldSet): - static_size = 204*8 + #static_size = 204*8 UMOUNT_STATE = { 1: "unmounted", 2: "not unmounted" } HASH_FUNCTIONS = { @@ -84,6 +137,7 @@ class SuperBlock(FieldSet): yield RawBytes(self, "uuid", 16, "Filesystem unique identifier") yield String(self, "label", 16, "Filesystem volume label", strip="\0") yield NullBytes(self, "unused", 88) + yield NullBytes(self, "Bytes before end of the block", self["blocksize"].value-204) def createDescription(self): return "Superblock: ReiserFs Filesystem" @@ -108,13 +162,11 @@ class REISER_FS(Parser): def validate(self): # Let's look at the magic field in the superblock magic = self.stream.readBytes(self.MAGIC_OFFSET*8, 9).rstrip("\0") - if magic == "ReIsEr3Fs": + if magic in ("ReIsEr3Fs", "ReIsErFs", "ReIsEr2Fs"): return True - if magic in ("ReIsEr2Fs", "ReIsErFs"): - return "Unsupported version of ReiserFs" return "Invalid magic string" def createFields(self): yield NullBytes(self, "padding[]", self.SUPERBLOCK_OFFSET) yield SuperBlock(self, "superblock") - + yield BitmapBlockGroup(self, "Group of bitmap blocks") diff --git a/lib/hachoir_parser/game/__init__.py b/lib/hachoir_parser/game/__init__.py index 17f7cd0a..1b6447b9 100644 --- a/lib/hachoir_parser/game/__init__.py +++ b/lib/hachoir_parser/game/__init__.py @@ -1,4 +1,4 @@ -from lib.hachoir_parser.game.zsnes import ZSNESFile -from lib.hachoir_parser.game.spider_man_video import SpiderManVideoFile -from lib.hachoir_parser.game.laf import LafFile -from lib.hachoir_parser.game.blp import BLP1File, BLP2File \ No newline at end of file +from hachoir_parser.game.zsnes import ZSNESFile +from hachoir_parser.game.spider_man_video import SpiderManVideoFile +from hachoir_parser.game.laf import LafFile +from hachoir_parser.game.blp import BLP1File, BLP2File \ No newline at end of file diff --git a/lib/hachoir_parser/game/blp.py b/lib/hachoir_parser/game/blp.py index 2c81770e..218e8640 100644 --- a/lib/hachoir_parser/game/blp.py +++ b/lib/hachoir_parser/game/blp.py @@ -12,11 +12,11 @@ Creation date: July 10 2007 http://en.wikipedia.org/wiki/S3_Texture_Compression """ -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.field import String, UInt32, UInt8, Enum, FieldSet, RawBytes, GenericVector, Bit, Bits -from lib.hachoir_parser.parser import Parser -from lib.hachoir_parser.image.common import PaletteRGBA -from lib.hachoir_core.tools import alignValue +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.field import String, UInt32, UInt8, Enum, FieldSet, RawBytes, GenericVector, Bit, Bits +from hachoir_parser.parser import Parser +from hachoir_parser.image.common import PaletteRGBA +from hachoir_core.tools import alignValue class PaletteIndex(UInt8): def createDescription(self): diff --git a/lib/hachoir_parser/game/laf.py b/lib/hachoir_parser/game/laf.py index 88154629..4a8e15cf 100644 --- a/lib/hachoir_parser/game/laf.py +++ b/lib/hachoir_parser/game/laf.py @@ -7,10 +7,10 @@ Author: Cyril Zorin Creation date: 1 January 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, UInt32, GenericVector) -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.endian import LITTLE_ENDIAN class CharData(FieldSet): def __init__(self, chars, *args): diff --git a/lib/hachoir_parser/game/spider_man_video.py b/lib/hachoir_parser/game/spider_man_video.py index 23842617..b9092f33 100644 --- a/lib/hachoir_parser/game/spider_man_video.py +++ b/lib/hachoir_parser/game/spider_man_video.py @@ -7,10 +7,10 @@ Creation date: 2006-09-30 File samples: http://samples.mplayerhq.hu/game-formats/spiderman-segacd-bin/ """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import FieldSet, UInt32, String, RawBytes -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, UInt32, String, RawBytes +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal class Chunk(FieldSet): tag_info = { diff --git a/lib/hachoir_parser/game/zsnes.py b/lib/hachoir_parser/game/zsnes.py index e11b3528..a8f75506 100644 --- a/lib/hachoir_parser/game/zsnes.py +++ b/lib/hachoir_parser/game/zsnes.py @@ -5,11 +5,11 @@ Author: Jason Gorski Creation date: 2006-09-15 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, StaticFieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, StaticFieldSet, UInt8, UInt16, UInt32, String, PaddingBytes, Bytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.endian import LITTLE_ENDIAN class ZSTHeader(StaticFieldSet): format = ( diff --git a/lib/hachoir_parser/guess.py b/lib/hachoir_parser/guess.py index 0a7178af..758cec65 100644 --- a/lib/hachoir_parser/guess.py +++ b/lib/hachoir_parser/guess.py @@ -4,10 +4,11 @@ Parser list managment: """ import os -from lib.hachoir_core.error import warning, info, HACHOIR_ERRORS -from lib.hachoir_parser import ValidateError, HachoirParserList -from lib.hachoir_core.stream import FileInputStream -from lib.hachoir_core.i18n import _ +from hachoir_core.error import warning, info, HACHOIR_ERRORS +from hachoir_parser import ValidateError, HachoirParserList +from hachoir_core.stream import FileInputStream +from hachoir_core.i18n import _ +import weakref class QueryParser(object): @@ -80,6 +81,19 @@ class QueryParser(object): return parsers def parse(self, stream, fallback=True): + if hasattr(stream, "_cached_parser"): + parser = stream._cached_parser() + else: + parser = None + if parser is not None: + if parser.__class__ in self.parsers: + return parser + parser = self.doparse(stream, fallback) + if parser is not None: + stream._cached_parser = weakref.ref(parser) + return parser + + def doparse(self, stream, fallback=True): fb = None warn = warning for parser in self.parsers: diff --git a/lib/hachoir_parser/image/__init__.py b/lib/hachoir_parser/image/__init__.py index f66ffdd7..78c9c20b 100644 --- a/lib/hachoir_parser/image/__init__.py +++ b/lib/hachoir_parser/image/__init__.py @@ -1,12 +1,12 @@ -from lib.hachoir_parser.image.bmp import BmpFile -from lib.hachoir_parser.image.gif import GifFile -from lib.hachoir_parser.image.ico import IcoFile -from lib.hachoir_parser.image.jpeg import JpegFile -from lib.hachoir_parser.image.pcx import PcxFile -from lib.hachoir_parser.image.psd import PsdFile -from lib.hachoir_parser.image.png import PngFile -from lib.hachoir_parser.image.tga import TargaFile -from lib.hachoir_parser.image.tiff import TiffFile -from lib.hachoir_parser.image.wmf import WMF_File -from lib.hachoir_parser.image.xcf import XcfFile +from hachoir_parser.image.bmp import BmpFile +from hachoir_parser.image.gif import GifFile +from hachoir_parser.image.ico import IcoFile +from hachoir_parser.image.jpeg import JpegFile +from hachoir_parser.image.pcx import PcxFile +from hachoir_parser.image.psd import PsdFile +from hachoir_parser.image.png import PngFile +from hachoir_parser.image.tga import TargaFile +from hachoir_parser.image.tiff import TiffFile +from hachoir_parser.image.wmf import WMF_File +from hachoir_parser.image.xcf import XcfFile diff --git a/lib/hachoir_parser/image/bmp.py b/lib/hachoir_parser/image/bmp.py index 51c94400..c4865d3b 100644 --- a/lib/hachoir_parser/image/bmp.py +++ b/lib/hachoir_parser/image/bmp.py @@ -6,15 +6,15 @@ Author: Victor Stinner Creation: 16 december 2005 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, UInt32, Bits, String, RawBytes, Enum, PaddingBytes, NullBytes, createPaddingField) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.image.common import RGB, PaletteRGBA -from lib.hachoir_core.tools import alignValue +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.image.common import RGB, PaletteRGBA +from hachoir_core.tools import alignValue class Pixel4bit(Bits): static_size = 4 diff --git a/lib/hachoir_parser/image/common.py b/lib/hachoir_parser/image/common.py index ca7152a3..5046058a 100644 --- a/lib/hachoir_parser/image/common.py +++ b/lib/hachoir_parser/image/common.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import FieldSet, UserVector, UInt8 +from hachoir_core.field import FieldSet, UserVector, UInt8 class RGB(FieldSet): color_name = { diff --git a/lib/hachoir_parser/image/exif.py b/lib/hachoir_parser/image/exif.py index dcaaf77e..449c7ba0 100644 --- a/lib/hachoir_parser/image/exif.py +++ b/lib/hachoir_parser/image/exif.py @@ -1,19 +1,26 @@ """ -EXIF metadata parser (can be found in a JPEG picture for example) +EXIF metadata parser; also parses TIFF file headers. -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao + +References: +- Exif 2.2 Specification (JEITA CP-3451) + http://www.exif.org/Exif2-2.PDF +- TIFF 6.0 Specification + http://partners.adobe.com/public/developer/en/tiff/TIFF6.pdf """ -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_core.field import (FieldSet, SeekableFieldSet, ParserError, UInt8, UInt16, UInt32, - Int32, Enum, String, - Bytes, SubFile, - NullBytes, createPaddingField) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN, NETWORK_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import createDict + Int8, Int16, Int32, + Float32, Float64, + Enum, String, Bytes, SubFile, + NullBits, NullBytes, createPaddingField) +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN, NETWORK_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import createDict -MAX_COUNT = 1000 +MAX_COUNT = 1000 # maximum number of array entries in an IFD entry (excluding string types) def rationalFactory(class_name, size, field_class): class Rational(FieldSet): @@ -32,6 +39,16 @@ def rationalFactory(class_name, size, field_class): RationalInt32 = rationalFactory("RationalInt32", 64, Int32) RationalUInt32 = rationalFactory("RationalUInt32", 64, UInt32) +class ASCIIString(String): + def __init__(self, parent, name, nbytes, description=None, strip=' \0', charset='ISO-8859-1', *args, **kwargs): + String.__init__(self, parent, name, nbytes, description, strip, charset, *args, **kwargs) + +class IFDTag(UInt16): + def getTag(self): + return self.parent.TAG_INFO.get(self.value, (hex(self.value), "")) + def createDisplay(self): + return self.getTag()[0] + class BasicIFDEntry(FieldSet): TYPE_BYTE = 0 TYPE_UNDEFINED = 7 @@ -39,323 +56,364 @@ class BasicIFDEntry(FieldSet): TYPE_SIGNED_RATIONAL = 10 TYPE_INFO = { 1: (UInt8, "BYTE (8 bits)"), - 2: (String, "ASCII (8 bits)"), + 2: (ASCIIString, "ASCII (8 bits)"), 3: (UInt16, "SHORT (16 bits)"), 4: (UInt32, "LONG (32 bits)"), 5: (RationalUInt32, "RATIONAL (2x LONG, 64 bits)"), + 6: (Int8, "SBYTE (8 bits)"), 7: (Bytes, "UNDEFINED (8 bits)"), - 9: (Int32, "SIGNED LONG (32 bits)"), - 10: (RationalInt32, "SRATIONAL (2x SIGNED LONGs, 64 bits)"), + 8: (Int16, "SSHORT (16 bits)"), + 9: (Int32, "SLONG (32 bits)"), + 10: (RationalInt32, "SRATIONAL (2x SLONG, 64 bits)"), + 11: (Float32, "FLOAT (32 bits)"), + 12: (Float64, "DOUBLE (64 bits)"), } ENTRY_FORMAT = createDict(TYPE_INFO, 0) TYPE_NAME = createDict(TYPE_INFO, 1) + TAG_INFO = {} def createFields(self): - yield Enum(textHandler(UInt16(self, "tag", "Tag"), hexadecimal), self.TAG_NAME) - yield Enum(textHandler(UInt16(self, "type", "Type"), hexadecimal), self.TYPE_NAME) + yield IFDTag(self, "tag", "Tag") + yield Enum(UInt16(self, "type", "Type"), self.TYPE_NAME) + self.value_cls = self.ENTRY_FORMAT.get(self['type'].value, Bytes) + if issubclass(self.value_cls, Bytes): + self.value_size = 8 + else: + self.value_size = self.value_cls.static_size yield UInt32(self, "count", "Count") - if self["type"].value not in (self.TYPE_BYTE, self.TYPE_UNDEFINED) \ - and MAX_COUNT < self["count"].value: + + if not issubclass(self.value_cls, Bytes) \ + and self["count"].value > MAX_COUNT: raise ParserError("EXIF: Invalid count value (%s)" % self["count"].value) - value_size, array_size = self.getSizes() - # Get offset/value - if not value_size: + count = self['count'].value + totalsize = self.value_size * count + if count == 0: yield NullBytes(self, "padding", 4) - elif value_size <= 32: - if 1 < array_size: - name = "value[]" + elif totalsize <= 32: + name = "value" + if issubclass(self.value_cls, Bytes): + yield self.value_cls(self, name, count) else: - name = "value" - kw = {} - cls = self.value_cls - if cls is String: - args = (self, name, value_size/8, "Value") - kw["strip"] = " \0" - kw["charset"] = "ISO-8859-1" - elif cls is Bytes: - args = (self, name, value_size/8, "Value") - else: - args = (self, name, "Value") - for index in xrange(array_size): - yield cls(*args, **kw) - - size = array_size * value_size - if size < 32: - yield NullBytes(self, "padding", (32-size)//8) + if count > 1: + name += "[]" + for i in xrange(count): + yield self.value_cls(self, name) + if totalsize < 32: + yield NullBits(self, "padding", 32-totalsize) else: yield UInt32(self, "offset", "Value offset") - def getSizes(self): - """ - Returns (value_size, array_size): value_size in bits and - array_size in number of items. - """ - # Create format - self.value_cls = self.ENTRY_FORMAT.get(self["type"].value, Bytes) + def createValue(self): + if "value" in self: + return self['value'].value + return None - # Set size - count = self["count"].value - if self.value_cls in (String, Bytes): - return 8 * count, 1 - else: - return self.value_cls.static_size * count, count + def createDescription(self): + return "Entry: "+self["tag"].getTag()[1] -class ExifEntry(BasicIFDEntry): - OFFSET_JPEG_SOI = 0x0201 +class IFDEntry(BasicIFDEntry): EXIF_IFD_POINTER = 0x8769 + GPS_IFD_POINTER = 0x8825 + INTEROP_IFD_POINTER = 0xA005 - TAG_WIDTH = 0xA002 - TAG_HEIGHT = 0xA003 - - TAG_GPS_LATITUDE_REF = 0x0001 - TAG_GPS_LATITUDE = 0x0002 - TAG_GPS_LONGITUDE_REF = 0x0003 - TAG_GPS_LONGITUDE = 0x0004 - TAG_GPS_ALTITUDE_REF = 0x0005 - TAG_GPS_ALTITUDE = 0x0006 - TAG_GPS_TIMESTAMP = 0x0007 - TAG_GPS_DATESTAMP = 0x001d - - TAG_IMG_TITLE = 0x010e - TAG_FILE_TIMESTAMP = 0x0132 - TAG_SOFTWARE = 0x0131 - TAG_CAMERA_MODEL = 0x0110 - TAG_CAMERA_MANUFACTURER = 0x010f - TAG_ORIENTATION = 0x0112 - TAG_EXPOSURE = 0x829A - TAG_FOCAL = 0x829D - TAG_BRIGHTNESS = 0x9203 - TAG_APERTURE = 0x9205 - TAG_USER_COMMENT = 0x9286 - - TAG_NAME = { - # GPS - 0x0000: "GPS version ID", - 0x0001: "GPS latitude ref", - 0x0002: "GPS latitude", - 0x0003: "GPS longitude ref", - 0x0004: "GPS longitude", - 0x0005: "GPS altitude ref", - 0x0006: "GPS altitude", - 0x0007: "GPS timestamp", - 0x0008: "GPS satellites", - 0x0009: "GPS status", - 0x000a: "GPS measure mode", - 0x000b: "GPS DOP", - 0x000c: "GPS speed ref", - 0x000d: "GPS speed", - 0x000e: "GPS track ref", - 0x000f: "GPS track", - 0x0010: "GPS img direction ref", - 0x0011: "GPS img direction", - 0x0012: "GPS map datum", - 0x0013: "GPS dest latitude ref", - 0x0014: "GPS dest latitude", - 0x0015: "GPS dest longitude ref", - 0x0016: "GPS dest longitude", - 0x0017: "GPS dest bearing ref", - 0x0018: "GPS dest bearing", - 0x0019: "GPS dest distance ref", - 0x001a: "GPS dest distance", - 0x001b: "GPS processing method", - 0x001c: "GPS area information", - 0x001d: "GPS datestamp", - 0x001e: "GPS differential", - - 0x0100: "Image width", - 0x0101: "Image height", - 0x0102: "Number of bits per component", - 0x0103: "Compression scheme", - 0x0106: "Pixel composition", - TAG_ORIENTATION: "Orientation of image", - 0x0115: "Number of components", - 0x011C: "Image data arrangement", - 0x0212: "Subsampling ratio Y to C", - 0x0213: "Y and C positioning", - 0x011A: "Image resolution width direction", - 0x011B: "Image resolution in height direction", - 0x0128: "Unit of X and Y resolution", - - 0x0111: "Image data location", - 0x0116: "Number of rows per strip", - 0x0117: "Bytes per compressed strip", - 0x0201: "Offset to JPEG SOI", - 0x0202: "Bytes of JPEG data", - - 0x012D: "Transfer function", - 0x013E: "White point chromaticity", - 0x013F: "Chromaticities of primaries", - 0x0211: "Color space transformation matrix coefficients", - 0x0214: "Pair of blank and white reference values", - - TAG_FILE_TIMESTAMP: "File change date and time", - TAG_IMG_TITLE: "Image title", - TAG_CAMERA_MANUFACTURER: "Camera (Image input equipment) manufacturer", - TAG_CAMERA_MODEL: "Camera (Input input equipment) model", - TAG_SOFTWARE: "Software", - 0x013B: "File change date and time", - 0x8298: "Copyright holder", - 0x8769: "Exif IFD Pointer", - - TAG_EXPOSURE: "Exposure time", - TAG_FOCAL: "F number", - 0x8822: "Exposure program", - 0x8824: "Spectral sensitivity", - 0x8827: "ISO speed rating", - 0x8828: "Optoelectric conversion factor OECF", - 0x9201: "Shutter speed", - 0x9202: "Aperture", - TAG_BRIGHTNESS: "Brightness", - 0x9204: "Exposure bias", - TAG_APERTURE: "Maximum lens aperture", - 0x9206: "Subject distance", - 0x9207: "Metering mode", - 0x9208: "Light source", - 0x9209: "Flash", - 0x920A: "Lens focal length", - 0x9214: "Subject area", - 0xA20B: "Flash energy", - 0xA20C: "Spatial frequency response", - 0xA20E: "Focal plane X resolution", - 0xA20F: "Focal plane Y resolution", - 0xA210: "Focal plane resolution unit", - 0xA214: "Subject location", - 0xA215: "Exposure index", - 0xA217: "Sensing method", - 0xA300: "File source", - 0xA301: "Scene type", - 0xA302: "CFA pattern", - 0xA401: "Custom image processing", - 0xA402: "Exposure mode", - 0xA403: "White balance", - 0xA404: "Digital zoom ratio", - 0xA405: "Focal length in 35 mm film", - 0xA406: "Scene capture type", - 0xA407: "Gain control", - 0xA408: "Contrast", - - 0x9000: "Exif version", - 0xA000: "Supported Flashpix version", - 0xA001: "Color space information", - 0x9101: "Meaning of each component", - 0x9102: "Image compression mode", - TAG_WIDTH: "Valid image width", - TAG_HEIGHT: "Valid image height", - 0x927C: "Manufacturer notes", - TAG_USER_COMMENT: "User comments", - 0xA004: "Related audio file", - 0x9003: "Date and time of original data generation", - 0x9004: "Date and time of digital data generation", - 0x9290: "DateTime subseconds", - 0x9291: "DateTimeOriginal subseconds", - 0x9292: "DateTimeDigitized subseconds", - 0xA420: "Unique image ID", - 0xA005: "Interoperability IFD Pointer" + TAG_INFO = { + # image data structure + 0x0100: ("ImageWidth", "Image width"), + 0x0101: ("ImageLength", "Image height"), + 0x0102: ("BitsPerSample", "Number of bits per component"), + 0x0103: ("Compression", "Compression scheme"), + 0x0106: ("PhotometricInterpretation", "Pixel composition"), + 0x0112: ("Orientation", "Orientation of image"), + 0x0115: ("SamplesPerPixel", "Number of components"), + 0x011C: ("PlanarConfiguration", "Image data arrangement"), + 0x0212: ("YCbCrSubSampling", "Subsampling ratio of Y to C"), + 0x0213: ("YCbCrPositioning", "Y and C positioning"), + 0x011A: ("XResolution", "Image resolution in width direction"), + 0x011B: ("YResolution", "Image resolution in height direction"), + 0x0128: ("ResolutionUnit", "Unit of X and Y resolution"), + # recording offset + 0x0111: ("StripOffsets", "Image data location"), + 0x0116: ("RowsPerStrip", "Number of rows per strip"), + 0x0117: ("StripByteCounts", "Bytes per compressed strip"), + 0x0201: ("JPEGInterchangeFormat", "Offset to JPEG SOI"), + 0x0202: ("JPEGInterchangeFormatLength", "Bytes of JPEG data"), + # image data characteristics + 0x012D: ("TransferFunction", "Transfer function"), + 0x013E: ("WhitePoint", "White point chromaticity"), + 0x013F: ("PrimaryChromaticities", "Chromaticities of primaries"), + 0x0211: ("YCbCrCoefficients", "Color space transformation matrix coefficients"), + 0x0214: ("ReferenceBlackWhite", "Pair of black and white reference values"), + # other tags + 0x0132: ("DateTime", "File change date and time"), + 0x010E: ("ImageDescription", "Image title"), + 0x010F: ("Make", "Image input equipment manufacturer"), + 0x0110: ("Model", "Image input equipment model"), + 0x0131: ("Software", "Software used"), + 0x013B: ("Artist", "Person who created the image"), + 0x8298: ("Copyright", "Copyright holder"), + # TIFF-specific tags + 0x00FE: ("NewSubfileType", "NewSubfileType"), + 0x00FF: ("SubfileType", "SubfileType"), + 0x0107: ("Threshholding", "Threshholding"), + 0x0108: ("CellWidth", "CellWidth"), + 0x0109: ("CellLength", "CellLength"), + 0x010A: ("FillOrder", "FillOrder"), + 0x010D: ("DocumentName", "DocumentName"), + 0x0118: ("MinSampleValue", "MinSampleValue"), + 0x0119: ("MaxSampleValue", "MaxSampleValue"), + 0x011D: ("PageName", "PageName"), + 0x011E: ("XPosition", "XPosition"), + 0x011F: ("YPosition", "YPosition"), + 0x0120: ("FreeOffsets", "FreeOffsets"), + 0x0121: ("FreeByteCounts", "FreeByteCounts"), + 0x0122: ("GrayResponseUnit", "GrayResponseUnit"), + 0x0123: ("GrayResponseCurve", "GrayResponseCurve"), + 0x0124: ("T4Options", "T4Options"), + 0x0125: ("T6Options", "T6Options"), + 0x0129: ("PageNumber", "PageNumber"), + 0x013C: ("HostComputer", "HostComputer"), + 0x013D: ("Predictor", "Predictor"), + 0x0140: ("ColorMap", "ColorMap"), + 0x0141: ("HalftoneHints", "HalftoneHints"), + 0x0142: ("TileWidth", "TileWidth"), + 0x0143: ("TileLength", "TileLength"), + 0x0144: ("TileOffsets", "TileOffsets"), + 0x0145: ("TileByteCounts", "TileByteCounts"), + 0x014C: ("InkSet", "InkSet"), + 0x014D: ("InkNames", "InkNames"), + 0x014E: ("NumberOfInks", "NumberOfInks"), + 0x0150: ("DotRange", "DotRange"), + 0x0151: ("TargetPrinter", "TargetPrinter"), + 0x0152: ("ExtraSamples", "ExtraSamples"), + 0x0153: ("SampleFormat", "SampleFormat"), + 0x0154: ("SMinSampleValue", "SMinSampleValue"), + 0x0155: ("SMaxSampleValue", "SMaxSampleValue"), + 0x0156: ("TransferRange", "TransferRange"), + 0x0200: ("JPEGProc", "JPEGProc"), + 0x0203: ("JPEGRestartInterval", "JPEGRestartInterval"), + 0x0205: ("JPEGLosslessPredictors", "JPEGLosslessPredictors"), + 0x0206: ("JPEGPointTransforms", "JPEGPointTransforms"), + 0x0207: ("JPEGQTables", "JPEGQTables"), + 0x0208: ("JPEGDCTables", "JPEGDCTables"), + 0x0209: ("JPEGACTables", "JPEGACTables"), + # IFD pointers + EXIF_IFD_POINTER: ("IFDExif", "Exif IFD Pointer"), + GPS_IFD_POINTER: ("IFDGPS", "GPS IFD Pointer"), + INTEROP_IFD_POINTER: ("IFDInterop", "Interoperability IFD Pointer"), } - def createDescription(self): - return "Entry: %s" % self["tag"].display +class ExifIFDEntry(BasicIFDEntry): + TAG_INFO = { + # version + 0x9000: ("ExifVersion", "Exif version"), + 0xA000: ("FlashpixVersion", "Supported Flashpix version"), + # image data characteristics + 0xA001: ("ColorSpace", "Color space information"), + # image configuration + 0x9101: ("ComponentsConfiguration", "Meaning of each component"), + 0x9102: ("CompressedBitsPerPixel", "Image compression mode"), + 0xA002: ("PixelXDimension", "Valid image width"), + 0xA003: ("PixelYDimension", "Valid image height"), + # user information + 0x927C: ("MakerNote", "Manufacturer notes"), + 0x9286: ("UserComment", "User comments"), + # related file information + 0xA004: ("RelatedSoundFile", "Related audio file"), + # date and time + 0x9003: ("DateTimeOriginal", "Date and time of original data generation"), + 0x9004: ("DateTimeDigitized", "Date and time of digital data generation"), + 0x9290: ("SubSecTime", "DateTime subseconds"), + 0x9291: ("SubSecTimeOriginal", "DateTimeOriginal subseconds"), + 0x9292: ("SubSecTimeDigitized", "DateTimeDigitized subseconds"), + # picture-taking conditions + 0x829A: ("ExposureTime", "Exposure time"), + 0x829D: ("FNumber", "F number"), + 0x8822: ("ExposureProgram", "Exposure program"), + 0x8824: ("SpectralSensitivity", "Spectral sensitivity"), + 0x8827: ("ISOSpeedRatings", "ISO speed rating"), + 0x8828: ("OECF", "Optoelectric conversion factor"), + 0x9201: ("ShutterSpeedValue", "Shutter speed"), + 0x9202: ("ApertureValue", "Aperture"), + 0x9203: ("BrightnessValue", "Brightness"), + 0x9204: ("ExposureBiasValue", "Exposure bias"), + 0x9205: ("MaxApertureValue", "Maximum lens aperture"), + 0x9206: ("SubjectDistance", "Subject distance"), + 0x9207: ("MeteringMode", "Metering mode"), + 0x9208: ("LightSource", "Light source"), + 0x9209: ("Flash", "Flash"), + 0x920A: ("FocalLength", "Lens focal length"), + 0x9214: ("SubjectArea", "Subject area"), + 0xA20B: ("FlashEnergy", "Flash energy"), + 0xA20C: ("SpatialFrequencyResponse", "Spatial frequency response"), + 0xA20E: ("FocalPlaneXResolution", "Focal plane X resolution"), + 0xA20F: ("FocalPlaneYResolution", "Focal plane Y resolution"), + 0xA210: ("FocalPlaneResolutionUnit", "Focal plane resolution unit"), + 0xA214: ("SubjectLocation", "Subject location"), + 0xA215: ("ExposureIndex", "Exposure index"), + 0xA217: ("SensingMethod", "Sensing method"), + 0xA300: ("FileSource", "File source"), + 0xA301: ("SceneType", "Scene type"), + 0xA302: ("CFAPattern", "CFA pattern"), + 0xA401: ("CustomRendered", "Custom image processing"), + 0xA402: ("ExposureMode", "Exposure mode"), + 0xA403: ("WhiteBalance", "White balance"), + 0xA404: ("DigitalZoomRatio", "Digital zoom ratio"), + 0xA405: ("FocalLengthIn35mmFilm", "Focal length in 35 mm film"), + 0xA406: ("SceneCaptureType", "Scene capture type"), + 0xA407: ("GainControl", "Gain control"), + 0xA408: ("Contrast", "Contrast"), + 0xA409: ("Saturation", "Saturation"), + 0xA40A: ("Sharpness", "Sharpness"), + 0xA40B: ("DeviceSettingDescription", "Device settings description"), + 0xA40C: ("SubjectDistanceRange", "Subject distance range"), + # other tags + 0xA420: ("ImageUniqueID", "Unique image ID"), + } -def sortExifEntry(a,b): - return int( a["offset"].value - b["offset"].value ) +class GPSIFDEntry(BasicIFDEntry): + TAG_INFO = { + 0x0000: ("GPSVersionID", "GPS tag version"), + 0x0001: ("GPSLatitudeRef", "North or South Latitude"), + 0x0002: ("GPSLatitude", "Latitude"), + 0x0003: ("GPSLongitudeRef", "East or West Longitude"), + 0x0004: ("GPSLongitude", "Longitude"), + 0x0005: ("GPSAltitudeRef", "Altitude reference"), + 0x0006: ("GPSAltitude", "Altitude"), + 0x0007: ("GPSTimeStamp", "GPS time (atomic clock)"), + 0x0008: ("GPSSatellites", "GPS satellites used for measurement"), + 0x0009: ("GPSStatus", "GPS receiver status"), + 0x000A: ("GPSMeasureMode", "GPS measurement mode"), + 0x000B: ("GPSDOP", "Measurement precision"), + 0x000C: ("GPSSpeedRef", "Speed unit"), + 0x000D: ("GPSSpeed", "Speed of GPS receiver"), + 0x000E: ("GPSTrackRef", "Reference for direction of movement"), + 0x000F: ("GPSTrack", "Direction of movement"), + 0x0010: ("GPSImgDirectionRef", "Reference for direction of image"), + 0x0011: ("GPSImgDirection", "Direction of image"), + 0x0012: ("GPSMapDatum", "Geodetic survey data used"), + 0x0013: ("GPSDestLatitudeRef", "Reference for latitude of destination"), + 0x0014: ("GPSDestLatitude", "Latitude of destination"), + 0x0015: ("GPSDestLongitudeRef", "Reference for longitude of destination"), + 0x0016: ("GPSDestLongitude", "Longitude of destination"), + 0x0017: ("GPSDestBearingRef", "Reference for bearing of destination"), + 0x0018: ("GPSDestBearing", "Bearing of destination"), + 0x0019: ("GPSDestDistanceRef", "Reference for distance to destination"), + 0x001A: ("GPSDestDistance", "Distance to destination"), + 0x001B: ("GPSProcessingMethod", "Name of GPS processing method"), + 0x001C: ("GPSAreaInformation", "Name of GPS area"), + 0x001D: ("GPSDateStamp", "GPS date"), + 0x001E: ("GPSDifferential", "GPS differential correction"), + } -class ExifIFD(FieldSet): - def seek(self, offset): - """ - Seek to byte address relative to parent address. - """ - padding = offset - (self.address + self.current_size)/8 - if 0 < padding: - return createPaddingField(self, padding*8) - else: - return None +class InteropIFDEntry(BasicIFDEntry): + TAG_INFO = { + 0x0001: ("InteroperabilityIndex", "Interoperability Identification"), + } + +class IFD(SeekableFieldSet): + EntryClass = IFDEntry + def __init__(self, parent, name, base_addr): + self.base_addr = base_addr + SeekableFieldSet.__init__(self, parent, name) def createFields(self): - offset_diff = 6 yield UInt16(self, "count", "Number of entries") - entries = [] - next_chunk_offset = None count = self["count"].value - if not count: - return - while count: - addr = self.absolute_address + self.current_size - next = self.stream.readBits(addr, 32, NETWORK_ENDIAN) - if next in (0, 0xF0000000): - break - entry = ExifEntry(self, "entry[]") - yield entry - if entry["tag"].value in (ExifEntry.EXIF_IFD_POINTER, ExifEntry.OFFSET_JPEG_SOI): - next_chunk_offset = entry["value"].value + offset_diff - if 32 < entry.getSizes()[0]: - entries.append(entry) - count -= 1 - yield UInt32(self, "next", "Next IFD offset") - try: - entries.sort( sortExifEntry ) - except TypeError: - raise ParserError("Unable to sort entries!") - value_index = 0 - for entry in entries: - padding = self.seek(entry["offset"].value + offset_diff) - if padding is not None: - yield padding - - value_size, array_size = entry.getSizes() - if not array_size: + if count == 0: + raise ParserError("IFDs cannot be empty.") + for i in xrange(count): + yield self.EntryClass(self, "entry[]") + yield UInt32(self, "next", "Offset to next IFD") + for i in xrange(count): + entry = self['entry[%d]'%i] + if 'offset' not in entry: continue - cls = entry.value_cls - if 1 < array_size: - name = "value_%s[]" % entry.name + self.seekByte(entry['offset'].value+self.base_addr//8, relative=False) + count = entry['count'].value + name = "value[%s]"%i + if issubclass(entry.value_cls, Bytes): + yield entry.value_cls(self, name, count) else: - name = "value_%s" % entry.name - desc = "Value of \"%s\"" % entry["tag"].display - if cls is String: - for index in xrange(array_size): - yield cls(self, name, value_size/8, desc, strip=" \0", charset="ISO-8859-1") - elif cls is Bytes: - for index in xrange(array_size): - yield cls(self, name, value_size/8, desc) - else: - for index in xrange(array_size): - yield cls(self, name, desc) - value_index += 1 - if next_chunk_offset is not None: - padding = self.seek(next_chunk_offset) - if padding is not None: - yield padding + if count > 1: + name += "[]" + for i in xrange(count): + yield entry.value_cls(self, name) - def createDescription(self): - return "Exif IFD (id %s)" % self["id"].value + def getEntryValues(self, entry): + n = int(entry.name.rsplit('[',1)[1].strip(']')) + if 'offset' in entry: + field = 'value[%d]'%n + base = self + else: + field = 'value' + base = entry + if field in base: + return [base[field]] + else: + return base.array(field) -class Exif(FieldSet): +class ExifIFD(IFD): + EntryClass = ExifIFDEntry + +class GPSIFD(IFD): + EntryClass = GPSIFDEntry + +class InteropIFD(IFD): + EntryClass = InteropIFDEntry + +IFD_TAGS = { + IFDEntry.EXIF_IFD_POINTER: ('exif', ExifIFD), + IFDEntry.GPS_IFD_POINTER: ('exif_gps', GPSIFD), + IFDEntry.INTEROP_IFD_POINTER: ('exif_interop', InteropIFD), +} + +def TIFF(self): + iff_start = self.absolute_address + self.current_size + yield String(self, "endian", 2, "Endian ('II' or 'MM')", charset="ASCII") + if self["endian"].value not in ("II", "MM"): + raise ParserError("Invalid endian!") + if self["endian"].value == "II": + self.endian = LITTLE_ENDIAN + else: + self.endian = BIG_ENDIAN + + yield UInt16(self, "version", "TIFF version number") + yield UInt32(self, "img_dir_ofs", "Next image directory offset") + offsets = [(self['img_dir_ofs'].value, 'ifd[]', IFD)] + while offsets: + offset, name, klass = offsets.pop(0) + self.seekByte(offset+iff_start//8, relative=False) + ifd = klass(self, name, iff_start) + yield ifd + for entry in ifd.array('entry'): + tag = entry['tag'].value + if tag in IFD_TAGS: + name, klass = IFD_TAGS[tag] + offsets.append((ifd.getEntryValues(entry)[0].value, name+'[]', klass)) + if ifd['next'].value != 0: + offsets.append((ifd['next'].value, 'ifd[]', IFD)) + +class Exif(SeekableFieldSet): def createFields(self): # Headers yield String(self, "header", 6, "Header (Exif\\0\\0)", charset="ASCII") if self["header"].value != "Exif\0\0": raise ParserError("Invalid EXIF signature!") - yield String(self, "byte_order", 2, "Byte order", charset="ASCII") - if self["byte_order"].value not in ("II", "MM"): - raise ParserError("Invalid endian!") - if self["byte_order"].value == "II": - self.endian = LITTLE_ENDIAN - else: - self.endian = BIG_ENDIAN - yield UInt16(self, "version", "TIFF version number") - yield UInt32(self, "img_dir_ofs", "Next image directory offset") - while not self.eof: - addr = self.absolute_address + self.current_size - tag = self.stream.readBits(addr, 16, NETWORK_ENDIAN) - if tag == 0xFFD8: - size = (self._size - self.current_size) // 8 - yield SubFile(self, "thumbnail", size, "Thumbnail (JPEG file)", mime_type="image/jpeg") - break - elif tag == 0xFFFF: - break - yield ExifIFD(self, "ifd[]", "IFD") - padding = self.seekBit(self._size) - if padding is not None: - yield padding - + iff_start = self.absolute_address + self.current_size + ifds = [] + for field in TIFF(self): + yield field + if isinstance(field, IFD): + ifds.append(field) + for ifd in ifds: + data = {} + for i, entry in enumerate(ifd.array('entry')): + data[entry['tag'].display] = entry + if 'JPEGInterchangeFormat' in data and 'JPEGInterchangeFormatLength' in data: + offs = ifd.getEntryValues(data['JPEGInterchangeFormat'])[0].value + size = ifd.getEntryValues(data['JPEGInterchangeFormatLength'])[0].value + if size == 0: continue + self.seekByte(offs + iff_start//8, relative=False) + yield SubFile(self, "thumbnail[]", size, "Thumbnail (JPEG file)", mime_type="image/jpeg") diff --git a/lib/hachoir_parser/image/gif.py b/lib/hachoir_parser/image/gif.py index 777ba658..b870b673 100644 --- a/lib/hachoir_parser/image/gif.py +++ b/lib/hachoir_parser/image/gif.py @@ -1,25 +1,162 @@ """ GIF picture parser. -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao + +- GIF format + http://local.wasp.uwa.edu.au/~pbourke/dataformats/gif/ +- LZW compression + http://en.wikipedia.org/wiki/LZW """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, Enum, UInt8, UInt16, Bit, Bits, NullBytes, String, PascalString8, Character, NullBits, RawBytes) -from lib.hachoir_parser.image.common import PaletteRGB -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import humanDuration -from lib.hachoir_core.text_handler import textHandler, displayHandler, hexadecimal +from hachoir_parser.image.common import PaletteRGB +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.stream import StringInputStream +from hachoir_core.tools import humanDuration, paddingSize +from hachoir_core.text_handler import textHandler, displayHandler, hexadecimal # Maximum image dimension (in pixel) MAX_WIDTH = 6000 MAX_HEIGHT = MAX_WIDTH MAX_FILE_SIZE = 100 * 1024 * 1024 +class FragmentGroup: + def __init__(self, parser): + self.items = [] + self.parser = parser + self.args = {} + + def add(self, item): + self.items.append(item) + + def createInputStream(self): + # FIXME: Use lazy stream creation + data = [] + for item in self.items: + data.append( item["rawdata"].value ) + data = "".join(data) + + # FIXME: Use smarter code to send arguments + self.args["startbits"] = self.items[0].parent["lzw_min_code_size"].value + tags = {"class": self.parser, "args": self.args} + tags = tags.iteritems() + return StringInputStream(data, "", tags=tags) + +class CustomFragment(FieldSet): + def __init__(self, parent, name, size, parser, description=None, group=None): + FieldSet.__init__(self, parent, name, description, size=size) + if not group: + group = FragmentGroup(parser) + self.group = group + self.group.add(self) + + def createFields(self): + yield UInt8(self, "size") + yield RawBytes(self, "rawdata", self["size"].value) + + def _createInputStream(self, **args): + return self.group.createInputStream() + +def rle_repr(l): + """Run-length encode a list into an "eval"-able form + + Example: + >>> rle_repr([20, 16, 16, 16, 16, 16, 18, 18, 65]) + '[20] + [16]*5 + [18]*2 + [65]' + + Adapted from http://twistedmatrix.com/trac/browser/trunk/twisted/python/dxprofile.py + """ + def add_rle(previous, runlen, result): + if isinstance(previous, (list, tuple)): + previous = rle_repr(previous) + if runlen>1: + result.append('[%s]*%i'%(previous, runlen)) + else: + if result and '*' not in result[-1]: + result[-1] = '[%s, %s]'%(result[-1][1:-1], previous) + else: + result.append('[%s]'%previous) + iterable = iter(l) + runlen = 1 + result = [] + try: + previous = iterable.next() + except StopIteration: + return "[]" + for element in iterable: + if element == previous: + runlen = runlen + 1 + continue + else: + add_rle(previous, runlen, result) + previous = element + runlen = 1 + add_rle(previous, runlen, result) + return ' + '.join(result) + +class GifImageBlock(Parser): + endian = LITTLE_ENDIAN + def createFields(self): + dictionary = {} + self.nbits = self.startbits + CLEAR_CODE = 2**self.nbits + END_CODE = CLEAR_CODE + 1 + compress_code = CLEAR_CODE + 2 + obuf = [] + output = [] + while True: + if compress_code >= 2**self.nbits: + self.nbits += 1 + code = Bits(self, "code[]", self.nbits) + if code.value == CLEAR_CODE: + if compress_code == 2**(self.nbits-1): + # this fixes a bizarre edge case where the reset code could + # appear just after the bits incremented. Apparently, the + # correct behaviour is to express the reset code with the + # old number of bits, not the new... + code = Bits(self, "code[]", self.nbits-1) + self.nbits = self.startbits + 1 + dictionary = {} + compress_code = CLEAR_CODE + 2 + obuf = [] + code._description = "Reset Code (LZW code %i)" % code.value + yield code + continue + elif code.value == END_CODE: + code._description = "End of Information Code (LZW code %i)" % code.value + yield code + break + if code.value < CLEAR_CODE: # literal + if obuf: + chain = obuf + [code.value] + dictionary[compress_code] = chain + compress_code += 1 + obuf = [code.value] + output.append(code.value) + code._description = "Literal Code %i" % code.value + elif code.value >= CLEAR_CODE + 2: + if code.value in dictionary: + chain = dictionary[code.value] + code._description = "Compression Code %i (found in dictionary as %s)" % (code.value, rle_repr(chain)) + else: + chain = obuf + [obuf[0]] + code._description = "Compression Code %i (not found in dictionary; guessed to be %s)" % (code.value, rle_repr(chain)) + dictionary[compress_code] = obuf + [chain[0]] + compress_code += 1 + obuf = chain + output += chain + code._description += "; Current Decoded Length %i"%len(output) + yield code + padding = paddingSize(self.current_size, 8) + if padding: + yield NullBits(self, "padding[]", padding) + class Image(FieldSet): def createFields(self): yield UInt16(self, "left", "Left") @@ -27,24 +164,26 @@ class Image(FieldSet): yield UInt16(self, "width", "Width") yield UInt16(self, "height", "Height") - yield Bits(self, "bpp", 3, "Bits / pixel minus one") - yield NullBits(self, "nul", 2) - yield Bit(self, "sorted", "Sorted??") + yield Bits(self, "size_local_map", 3, "log2(size of local map) minus one") + yield NullBits(self, "reserved", 2) + yield Bit(self, "sort_flag", "Is the local map sorted by decreasing importance?") yield Bit(self, "interlaced", "Interlaced?") yield Bit(self, "has_local_map", "Use local color map?") if self["has_local_map"].value: - nb_color = 1 << (1 + self["bpp"].value) + nb_color = 1 << (1 + self["size_local_map"].value) yield PaletteRGB(self, "local_map", nb_color, "Local color map") - yield UInt8(self, "code_size", "LZW Minimum Code Size") + yield UInt8(self, "lzw_min_code_size", "LZW Minimum Code Size") + group = None while True: - blen = UInt8(self, "block_len[]", "Block Length") - yield blen - if blen.value != 0: - yield RawBytes(self, "data[]", blen.value, "Image Data") - else: + size = UInt8(self, "block_size") + if size.value == 0: break + block = CustomFragment(self, "image_block[]", None, GifImageBlock, "GIF Image Block", group) + group = block.group + yield block + yield NullBytes(self, "terminator", 1, "Terminator (0)") def createDescription(self): return "Image: %ux%u pixels at (%u,%u)" % ( @@ -64,16 +203,19 @@ NETSCAPE_CODE = { def parseApplicationExtension(parent): yield PascalString8(parent, "app_name", "Application name") - yield UInt8(parent, "size") - size = parent["size"].value - if parent["app_name"].value == "NETSCAPE2.0" and size == 3: - yield Enum(UInt8(parent, "netscape_code"), NETSCAPE_CODE) - if parent["netscape_code"].value == 1: - yield UInt16(parent, "loop_count") + while True: + size = UInt8(parent, "size[]") + if size.value == 0: + break + yield size + if parent["app_name"].value == "NETSCAPE2.0" and size.value == 3: + yield Enum(UInt8(parent, "netscape_code"), NETSCAPE_CODE) + if parent["netscape_code"].value == 1: + yield UInt16(parent, "loop_count") + else: + yield RawBytes(parent, "raw[]", 2) else: - yield RawBytes(parent, "raw", 2) - else: - yield RawBytes(parent, "raw", size) + yield RawBytes(parent, "raw[]", size.value) yield NullBytes(parent, "terminator", 1, "Terminator (0)") def parseGraphicControl(parent): @@ -149,15 +291,20 @@ class ScreenDescriptor(FieldSet): def createFields(self): yield UInt16(self, "width", "Width") yield UInt16(self, "height", "Height") - yield Bits(self, "bpp", 3, "Bits per pixel minus one") - yield Bit(self, "reserved", "(reserved)") + yield Bits(self, "size_global_map", 3, "log2(size of global map) minus one") + yield Bit(self, "sort_flag", "Is the global map sorted by decreasing importance?") yield Bits(self, "color_res", 3, "Color resolution minus one") yield Bit(self, "global_map", "Has global map?") yield UInt8(self, "background", "Background color") - yield UInt8(self, "pixel_aspect_ratio", "Pixel Aspect Ratio") + field = UInt8(self, "pixel_aspect_ratio") + if field.value: + field._description = "Pixel aspect ratio: %f (stored as %i)"%((field.value + 15)/64., field.value) + else: + field._description = "Pixel aspect ratio: not specified" + yield field def createDescription(self): - colors = 1 << (self["bpp"].value+1) + colors = 1 << (self["size_global_map"].value+1) return "Screen descriptor: %ux%u pixels %u colors" \ % (self["width"].value, self["height"].value, colors) @@ -196,7 +343,7 @@ class GifFile(Parser): yield ScreenDescriptor(self, "screen") if self["screen/global_map"].value: - bpp = (self["screen/bpp"].value+1) + bpp = (self["screen/size_global_map"].value+1) yield PaletteRGB(self, "color_map", 1 << bpp, "Color map") self.color_map = self["color_map"] else: diff --git a/lib/hachoir_parser/image/ico.py b/lib/hachoir_parser/image/ico.py index fc4282d7..193a81c6 100644 --- a/lib/hachoir_parser/image/ico.py +++ b/lib/hachoir_parser/image/ico.py @@ -4,12 +4,12 @@ Microsoft Windows icon and cursor file format parser. Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, UInt32, Enum, RawBytes) -from lib.hachoir_parser.image.common import PaletteRGBA -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.win32 import BitmapInfoHeader +from hachoir_parser.image.common import PaletteRGBA +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.win32 import BitmapInfoHeader class IconHeader(FieldSet): def createFields(self): diff --git a/lib/hachoir_parser/image/iptc.py b/lib/hachoir_parser/image/iptc.py index c814ed7d..6727de7f 100644 --- a/lib/hachoir_parser/image/iptc.py +++ b/lib/hachoir_parser/image/iptc.py @@ -11,9 +11,9 @@ Sources: Author: Victor Stinner """ -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, String, RawBytes, NullBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.text_handler import textHandler, hexadecimal def IPTC_String(parent, name, desc=None): # Charset may be utf-8, ISO-8859-1, or ... diff --git a/lib/hachoir_parser/image/jpeg.py b/lib/hachoir_parser/image/jpeg.py index 89965788..4a361962 100644 --- a/lib/hachoir_parser/image/jpeg.py +++ b/lib/hachoir_parser/image/jpeg.py @@ -8,21 +8,25 @@ Information: http://java.sun.com/j2se/1.5.0/docs/api/javax/imageio/metadata/doc-files/jpeg_metadata.html#color - APP12: http://search.cpan.org/~exiftool/Image-ExifTool/lib/Image/ExifTool/TagNames.pod +- JPEG Data Format + http://www.w3.org/Graphics/JPEG/itu-t81.pdf -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao """ -from lib.hachoir_core.error import HachoirError -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, - UInt8, UInt16, Enum, - Bit, Bits, NullBits, NullBytes, +from hachoir_core.error import HachoirError +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, FieldError, + UInt8, UInt16, Enum, Field, + Bit, Bits, NullBits, NullBytes, PaddingBits, String, RawBytes) -from lib.hachoir_parser.image.common import PaletteRGB -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.image.exif import Exif -from lib.hachoir_parser.image.photoshop_metadata import PhotoshopMetadata +from hachoir_parser.image.common import PaletteRGB +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.image.exif import Exif +from hachoir_parser.image.photoshop_metadata import PhotoshopMetadata +from hachoir_parser.archive.zlib import build_tree +from hachoir_core.tools import paddingSize, alignValue MAX_FILESIZE = 100 * 1024 * 1024 @@ -144,6 +148,13 @@ class APP12(FieldSet): while not self.eof: yield Ducky(self, "item[]") +class SOFComponent(FieldSet): + def createFields(self): + yield UInt8(self, "component_id") + yield Bits(self, "horiz_sample", 4, "Horizontal sampling factor") + yield Bits(self, "vert_sample", 4, "Vertical sampling factor") + yield UInt8(self, "quant_table", "Quantization table destination selector") + class StartOfFrame(FieldSet): def createFields(self): yield UInt8(self, "precision") @@ -153,9 +164,7 @@ class StartOfFrame(FieldSet): yield UInt8(self, "nr_components") for index in range(self["nr_components"].value): - yield UInt8(self, "component_id[]") - yield UInt8(self, "high[]") - yield UInt8(self, "low[]") + yield SOFComponent(self, "component[]") class Comment(FieldSet): def createFields(self): @@ -178,17 +187,25 @@ class AdobeChunk(FieldSet): yield NullBytes(self, "flags1", 2) yield Enum(UInt8(self, "color_transform", "Colorspace transformation code"), self.COLORSPACE_TRANSFORMATION) +class SOSComponent(FieldSet): + def createFields(self): + comp_id = UInt8(self, "component_id") + yield comp_id + if not(1 <= comp_id.value <= self["../nr_components"].value): + raise ParserError("JPEG error: Invalid component-id") + yield Bits(self, "dc_coding_table", 4, "DC entropy coding table destination selector") + yield Bits(self, "ac_coding_table", 4, "AC entropy coding table destination selector") + class StartOfScan(FieldSet): def createFields(self): yield UInt8(self, "nr_components") for index in range(self["nr_components"].value): - comp_id = UInt8(self, "component_id[]") - yield comp_id - if not(1 <= comp_id.value <= self["nr_components"].value): - raise ParserError("JPEG error: Invalid component-id") - yield UInt8(self, "value[]") - yield RawBytes(self, "raw", 3) # TODO: What's this??? + yield SOSComponent(self, "component[]") + yield UInt8(self, "spectral_start", "Start of spectral or predictor selection") + yield UInt8(self, "spectral_end", "End of spectral selection") + yield Bits(self, "bit_pos_high", 4, "Successive approximation bit position high") + yield Bits(self, "bit_pos_low", 4, "Successive approximation bit position low or point transform") class RestartInterval(FieldSet): def createFields(self): @@ -217,6 +234,182 @@ class DefineQuantizationTable(FieldSet): while self.current_size < self.size: yield QuantizationTable(self, "qt[]") +class HuffmanTable(FieldSet): + def createFields(self): + # http://www.w3.org/Graphics/JPEG/itu-t81.pdf, page 40-41 + yield Enum(Bits(self, "table_class", 4, "Table class"), { + 0:"DC or Lossless Table", + 1:"AC Table"}) + yield Bits(self, "index", 4, "Huffman table destination identifier") + for i in xrange(1, 17): + yield UInt8(self, "count[%i]" % i, "Number of codes of length %i" % i) + lengths = [] + remap = {} + for i in xrange(1, 17): + for j in xrange(self["count[%i]" % i].value): + field = UInt8(self, "value[%i][%i]" % (i, j), "Value of code #%i of length %i" % (j, i)) + yield field + remap[len(lengths)] = field.value + lengths.append(i) + self.tree = {} + for i,j in build_tree(lengths).iteritems(): + self.tree[i] = remap[j] + +class DefineHuffmanTable(FieldSet): + def createFields(self): + while self.current_size < self.size: + yield HuffmanTable(self, "huffman_table[]") + +class HuffmanCode(Field): + """Huffman code. Uses tree parameter as the Huffman tree.""" + def __init__(self, parent, name, tree, description=""): + Field.__init__(self, parent, name, 0, description) + + endian = self.parent.endian + stream = self.parent.stream + addr = self.absolute_address + + value = 0 + met_ff = False + while (self.size, value) not in tree: + if addr % 8 == 0: + last_byte = stream.readBytes(addr - 8, 1) + if last_byte == '\xFF': + next_byte = stream.readBytes(addr, 1) + if next_byte != '\x00': + raise FieldError("Unexpected byte sequence %r!"%(last_byte + next_byte)) + addr += 8 # hack hack hack + met_ff = True + self._description = "[skipped 8 bits after 0xFF] " + bit = stream.readBits(addr, 1, endian) + value <<= 1 + value += bit + self._size += 1 + addr += 1 + self.createValue = lambda: value + self.realvalue = tree[(self.size, value)] + if met_ff: + self._size += 8 + +class JpegHuffmanImageUnit(FieldSet): + """8x8 block of sample/coefficient values""" + def __init__(self, parent, name, dc_tree, ac_tree, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.dc_tree = dc_tree + self.ac_tree = ac_tree + + def createFields(self): + field = HuffmanCode(self, "dc_data", self.dc_tree) + field._description = "DC Code %i (Huffman Code %i)" % (field.realvalue, field.value) + field._description + yield field + if field.realvalue != 0: + extra = Bits(self, "dc_data_extra", field.realvalue) + if extra.value < 2**(field.realvalue - 1): + corrected_value = extra.value + (-1 << field.realvalue) + 1 + else: + corrected_value = extra.value + extra._description = "Extra Bits: Corrected DC Value %i" % corrected_value + yield extra + data = [] + while len(data) < 63: + field = HuffmanCode(self, "ac_data[]", self.ac_tree) + value_r = field.realvalue >> 4 + if value_r: + data += [0] * value_r + value_s = field.realvalue & 0x0F + if value_r == value_s == 0: + field._description = "AC Code Block Terminator (0, 0) (Huffman Code %i)" % field.value + field._description + yield field + return + field._description = "AC Code %i, %i (Huffman Code %i)" % (value_r, value_s, field.value) + field._description + yield field + if value_s != 0: + extra = Bits(self, "ac_data_extra[%s" % field.name.split('[')[1], value_s) + if extra.value < 2**(value_s - 1): + corrected_value = extra.value + (-1 << value_s) + 1 + else: + corrected_value = extra.value + extra._description = "Extra Bits: Corrected AC Value %i" % corrected_value + data.append(corrected_value) + yield extra + else: + data.append(0) + +class JpegImageData(FieldSet): + def __init__(self, parent, name, frame, scan, restart_interval, restart_offset=0, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.frame = frame + self.scan = scan + self.restart_interval = restart_interval + self.restart_offset = restart_offset + # try to figure out where this field ends + start = self.absolute_address + while True: + end = self.stream.searchBytes("\xff", start, MAX_FILESIZE*8) + if end is None: + # this is a bad sign, since it means there is no terminator + # we ignore this; it likely means a truncated image + break + if self.stream.readBytes(end, 2) == '\xff\x00': + # padding: false alarm + start=end+16 + continue + else: + self._size = end-self.absolute_address + break + + def createFields(self): + if self.frame["../type"].value in [0xC0, 0xC1]: + # yay, huffman coding! + if not hasattr(self, "huffman_tables"): + self.huffman_tables = {} + for huffman in self.parent.array("huffman"): + for table in huffman["content"].array("huffman_table"): + for _dummy_ in table: + # exhaust table, so the huffman tree is built + pass + self.huffman_tables[table["table_class"].value, table["index"].value] = table.tree + components = [] # sos_comp, samples + max_vert = 0 + max_horiz = 0 + for component in self.scan.array("component"): + for sof_comp in self.frame.array("component"): + if sof_comp["component_id"].value == component["component_id"].value: + vert = sof_comp["vert_sample"].value + horiz = sof_comp["horiz_sample"].value + components.append((component, vert * horiz)) + max_vert = max(max_vert, vert) + max_horiz = max(max_horiz, horiz) + mcu_height = alignValue(self.frame["height"].value, 8 * max_vert) // (8 * max_vert) + mcu_width = alignValue(self.frame["width"].value, 8 * max_horiz) // (8 * max_horiz) + if self.restart_interval and self.restart_offset > 0: + mcu_number = self.restart_interval * self.restart_offset + else: + mcu_number = 0 + initial_mcu = mcu_number + while True: + if (self.restart_interval and mcu_number != initial_mcu and mcu_number % self.restart_interval == 0) or\ + mcu_number == mcu_height * mcu_width: + padding = paddingSize(self.current_size, 8) + if padding: + yield PaddingBits(self, "padding[]", padding) # all 1s + last_byte = self.stream.readBytes(self.absolute_address + self.current_size - 8, 1) + if last_byte == '\xFF': + next_byte = self.stream.readBytes(self.absolute_address + self.current_size, 1) + if next_byte != '\x00': + raise FieldError("Unexpected byte sequence %r!"%(last_byte + next_byte)) + yield NullBytes(self, "stuffed_byte[]", 1) + break + for sos_comp, num_units in components: + for interleave_count in range(num_units): + yield JpegHuffmanImageUnit(self, "block[%i]component[%i][]" % (mcu_number, sos_comp["component_id"].value), + self.huffman_tables[0, sos_comp["dc_coding_table"].value], + self.huffman_tables[1, sos_comp["ac_coding_table"].value]) + mcu_number += 1 + else: + self.warning("Sorry, only supporting Baseline & Extended Sequential JPEG images so far!") + return + class JpegChunk(FieldSet): TAG_SOI = 0xD8 TAG_EOI = 0xD9 @@ -224,10 +417,18 @@ class JpegChunk(FieldSet): TAG_DQT = 0xDB TAG_DRI = 0xDD TAG_INFO = { - 0xC4: ("huffman[]", "Define Huffman Table (DHT)", None), + 0xC4: ("huffman[]", "Define Huffman Table (DHT)", DefineHuffmanTable), 0xD8: ("start_image", "Start of image (SOI)", None), 0xD9: ("end_image", "End of image (EOI)", None), - 0xDA: ("start_scan", "Start Of Scan (SOS)", StartOfScan), + 0xD0: ("restart_marker_0[]", "Restart Marker (RST0)", None), + 0xD1: ("restart_marker_1[]", "Restart Marker (RST1)", None), + 0xD2: ("restart_marker_2[]", "Restart Marker (RST2)", None), + 0xD3: ("restart_marker_3[]", "Restart Marker (RST3)", None), + 0xD4: ("restart_marker_4[]", "Restart Marker (RST4)", None), + 0xD5: ("restart_marker_5[]", "Restart Marker (RST5)", None), + 0xD6: ("restart_marker_6[]", "Restart Marker (RST6)", None), + 0xD7: ("restart_marker_7[]", "Restart Marker (RST7)", None), + 0xDA: ("start_scan[]", "Start Of Scan (SOS)", StartOfScan), 0xDB: ("quantization[]", "Define Quantization Table (DQT)", DefineQuantizationTable), 0xDC: ("nb_line", "Define number of Lines (DNL)", None), 0xDD: ("restart_interval", "Define Restart Interval (DRI)", RestartInterval), @@ -280,7 +481,7 @@ class JpegChunk(FieldSet): raise ParserError("JPEG: Invalid chunk header!") yield textHandler(UInt8(self, "type", "Type"), hexadecimal) tag = self["type"].value - if tag in (self.TAG_SOI, self.TAG_EOI): + if tag in [self.TAG_SOI, self.TAG_EOI] + range(0xD0, 0xD8): # D0 - D7 inclusive are the restart markers return yield UInt16(self, "size", "Size") size = (self["size"].value - 2) @@ -326,12 +527,31 @@ class JpegFile(Parser): return True def createFields(self): + frame = None + scan = None + restart_interval = None + restart_offset = 0 while not self.eof: chunk = JpegChunk(self, "chunk[]") yield chunk + if chunk["type"].value in JpegChunk.START_OF_FRAME: + if chunk["type"].value not in [0xC0, 0xC1]: # SOF0 [Baseline], SOF1 [Extended Sequential] + self.warning("Only supporting Baseline & Extended Sequential JPEG images so far!") + frame = chunk["content"] if chunk["type"].value == JpegChunk.TAG_SOS: - # TODO: Read JPEG image data... - break + if not frame: + self.warning("Missing or invalid SOF marker before SOS!") + continue + scan = chunk["content"] + # hack: scan only the fields seen so far (in _fields): don't use the generator + if "restart_interval" in self._fields: + restart_interval = self["restart_interval/content/interval"].value + else: + restart_interval = None + yield JpegImageData(self, "image_data[]", frame, scan, restart_interval) + elif chunk["type"].value in range(0xD0, 0xD8): + restart_offset += 1 + yield JpegImageData(self, "image_data[]", frame, scan, restart_interval, restart_offset) # TODO: is it possible to handle piped input? if self._size is None: @@ -350,8 +570,8 @@ class JpegFile(Parser): def createDescription(self): desc = "JPEG picture" - if "sof/content" in self: - header = self["sof/content"] + if "start_frame/content" in self: + header = self["start_frame/content"] desc += ": %ux%u pixels" % (header["width"].value, header["height"].value) return desc @@ -365,4 +585,3 @@ class JpegFile(Parser): if end is not None: return end + 16 return None - diff --git a/lib/hachoir_parser/image/pcx.py b/lib/hachoir_parser/image/pcx.py index cf23a7cb..cb2a63bf 100644 --- a/lib/hachoir_parser/image/pcx.py +++ b/lib/hachoir_parser/image/pcx.py @@ -2,13 +2,13 @@ PCX picture filter. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import ( +from hachoir_parser import Parser +from hachoir_core.field import ( UInt8, UInt16, PaddingBytes, RawBytes, Enum) -from lib.hachoir_parser.image.common import PaletteRGB -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.image.common import PaletteRGB +from hachoir_core.endian import LITTLE_ENDIAN class PcxFile(Parser): endian = LITTLE_ENDIAN diff --git a/lib/hachoir_parser/image/photoshop_metadata.py b/lib/hachoir_parser/image/photoshop_metadata.py index 89670bcb..15fed726 100644 --- a/lib/hachoir_parser/image/photoshop_metadata.py +++ b/lib/hachoir_parser/image/photoshop_metadata.py @@ -1,11 +1,19 @@ -from lib.hachoir_core.field import (FieldSet, ParserError, - UInt8, UInt16, UInt32, - String, CString, PascalString8, +""" Photoshop metadata parser. + +References: +- http://www.scribd.com/doc/32900475/Photoshop-File-Formats +""" + +from hachoir_core.field import (FieldSet, ParserError, + UInt8, UInt16, UInt32, Float32, Enum, + SubFile, String, CString, PascalString8, NullBytes, RawBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import alignValue, createDict -from lib.hachoir_parser.image.iptc import IPTC -from lib.hachoir_parser.common.win32 import PascalStringWin32 +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import alignValue, createDict +from hachoir_parser.image.iptc import IPTC +from hachoir_parser.common.win32 import PascalStringWin32 + +BOOL = {0: False, 1: True} class Version(FieldSet): def createFields(self): @@ -18,25 +26,102 @@ class Version(FieldSet): if size: yield NullBytes(self, "padding", size) +class FixedFloat32(FieldSet): + def createFields(self): + yield UInt16(self, "int_part") + yield UInt16(self, "float_part") + + def createValue(self): + return self["int_part"].value + float(self["float_part"].value) / (1<<16) + +class ResolutionInfo(FieldSet): + def createFields(self): + yield FixedFloat32(self, "horiz_res") + yield Enum(UInt16(self, "horiz_res_unit"), {1:'px/in', 2:'px/cm'}) + yield Enum(UInt16(self, "width_unit"), {1:'inches', 2:'cm', 3:'points', 4:'picas', 5:'columns'}) + yield FixedFloat32(self, "vert_res") + yield Enum(UInt16(self, "vert_res_unit"), {1:'px/in', 2:'px/cm'}) + yield Enum(UInt16(self, "height_unit"), {1:'inches', 2:'cm', 3:'points', 4:'picas', 5:'columns'}) + +class PrintScale(FieldSet): + def createFields(self): + yield Enum(UInt16(self, "style"), {0:'centered', 1:'size to fit', 2:'user defined'}) + yield Float32(self, "x_location") + yield Float32(self, "y_location") + yield Float32(self, "scale") + +class PrintFlags(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "labels"), BOOL) + yield Enum(UInt8(self, "crop_marks"), BOOL) + yield Enum(UInt8(self, "color_bars"), BOOL) + yield Enum(UInt8(self, "reg_marks"), BOOL) + yield Enum(UInt8(self, "negative"), BOOL) + yield Enum(UInt8(self, "flip"), BOOL) + yield Enum(UInt8(self, "interpolate"), BOOL) + yield Enum(UInt8(self, "caption"), BOOL) + yield Enum(UInt8(self, "print_flags"), BOOL) + yield Enum(UInt8(self, "unknown"), BOOL) + + def createValue(self): + return [field.name for field in self if field.value] + + def createDisplay(self): + return ', '.join(self.value) + +class PrintFlags2(FieldSet): + def createFields(self): + yield UInt16(self, "version") + yield UInt8(self, "center_crop_marks") + yield UInt8(self, "reserved") + yield UInt32(self, "bleed_width") + yield UInt16(self, "bleed_width_scale") + +class GridGuides(FieldSet): + def createFields(self): + yield UInt32(self, "version") + yield UInt32(self, "horiz_cycle", "Horizontal grid spacing, in quarter inches") + yield UInt32(self, "vert_cycle", "Vertical grid spacing, in quarter inches") + yield UInt32(self, "guide_count", "Number of guide resource blocks (can be 0)") + +class Thumbnail(FieldSet): + def createFields(self): + yield Enum(UInt32(self, "format"), {0:'Raw RGB', 1:'JPEG RGB'}) + yield UInt32(self, "width", "Width of thumbnail in pixels") + yield UInt32(self, "height", "Height of thumbnail in pixels") + yield UInt32(self, "widthbytes", "Padded row bytes = (width * bits per pixel + 31) / 32 * 4") + yield UInt32(self, "uncompressed_size", "Total size = widthbytes * height * planes") + yield UInt32(self, "compressed_size", "Size after compression. Used for consistency check") + yield UInt16(self, "bits_per_pixel") + yield UInt16(self, "num_planes") + yield SubFile(self, "thumbnail", self['compressed_size'].value, "Thumbnail (JPEG file)", mime_type="image/jpeg") + class Photoshop8BIM(FieldSet): TAG_INFO = { - 0x03ed: ("res_info", None, "Resolution information"), - 0x03f3: ("print_flag", None, "Print flags: labels, crop marks, colour bars, etc."), + 0x03ed: ("res_info", ResolutionInfo, "Resolution information"), + 0x03f3: ("print_flag", PrintFlags, "Print flags: labels, crop marks, colour bars, etc."), 0x03f5: ("col_half_info", None, "Colour half-toning information"), 0x03f8: ("color_trans_func", None, "Colour transfer function"), 0x0404: ("iptc", IPTC, "IPTC/NAA"), 0x0406: ("jpeg_qual", None, "JPEG quality"), - 0x0408: ("grid_guide", None, "Grid guides informations"), - 0x040a: ("copyright_flag", None, "Copyright flag"), - 0x040c: ("thumb_res2", None, "Thumbnail resource (2)"), - 0x040d: ("glob_angle", None, "Global lighting angle for effects"), + 0x0408: ("grid_guide", GridGuides, "Grid guides informations"), + 0x0409: ("thumb_res", Thumbnail, "Thumbnail resource (PS 4.0)"), + 0x0410: ("watermark", UInt8, "Watermark"), + 0x040a: ("copyright_flag", UInt8, "Copyright flag"), + 0x040b: ("url", None, "URL"), + 0x040c: ("thumb_res2", Thumbnail, "Thumbnail resource (PS 5.0)"), + 0x040d: ("glob_angle", UInt32, "Global lighting angle for effects"), 0x0411: ("icc_tagged", None, "ICC untagged (1 means intentionally untagged)"), - 0x0414: ("base_layer_id", None, "Base value for new layers ID's"), - 0x0419: ("glob_altitude", None, "Global altitude"), + 0x0414: ("base_layer_id", UInt32, "Base value for new layers ID's"), + 0x0416: ("indexed_colors", UInt16, "Number of colors in table that are actually defined"), + 0x0417: ("transparency_index", UInt16, "Index of transparent color"), + 0x0419: ("glob_altitude", UInt32, "Global altitude"), 0x041a: ("slices", None, "Slices"), - 0x041e: ("url_list", None, "Unicode URL's"), + 0x041e: ("url_list", None, "Unicode URLs"), 0x0421: ("version", Version, "Version information"), - 0x2710: ("print_flag2", None, "Print flags (2)"), + 0x0425: ("caption_digest", None, "16-byte MD5 caption digest"), + 0x0426: ("printscale", PrintScale, "Printer scaling"), + 0x2710: ("print_flag2", PrintFlags2, "Print flags (2)"), } TAG_NAME = createDict(TAG_INFO, 0) CONTENT_HANDLER = createDict(TAG_INFO, 1) @@ -67,7 +152,10 @@ class Photoshop8BIM(FieldSet): if not size: return if self.handler: - yield self.handler(self, "content", size=size*8) + if issubclass(self.handler, FieldSet): + yield self.handler(self, "content", size=size*8) + else: + yield self.handler(self, "content") else: yield RawBytes(self, "content", size) diff --git a/lib/hachoir_parser/image/png.py b/lib/hachoir_parser/image/png.py index 6ef8fd8e..acbfc850 100644 --- a/lib/hachoir_parser/image/png.py +++ b/lib/hachoir_parser/image/png.py @@ -8,21 +8,21 @@ Documents: Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Fragment, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Fragment, ParserError, MissingField, UInt8, UInt16, UInt32, String, CString, Bytes, RawBytes, Bit, NullBits, Enum, CompressedField) -from lib.hachoir_parser.image.common import RGB -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.endian import NETWORK_ENDIAN -from lib.hachoir_core.tools import humanFilesize +from hachoir_parser.image.common import RGB +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import NETWORK_ENDIAN +from hachoir_core.tools import humanFilesize from datetime import datetime -MAX_FILESIZE = 500 * 1024 * 1024 +MAX_FILESIZE = 500 * 1024 * 1024 # 500 MB try: from zlib import decompressobj @@ -44,7 +44,7 @@ UNIT_NAME = {1: "Meter"} COMPRESSION_NAME = { 0: u"deflate" # with 32K sliding window } -MAX_CHUNK_SIZE = 500 * 1024 # Maximum chunk size (500 KB) +MAX_CHUNK_SIZE = 5 * 1024 * 1024 # Maximum chunk size (5 MB) def headerParse(parent): yield UInt32(parent, "width", "Width (pixels)") diff --git a/lib/hachoir_parser/image/psd.py b/lib/hachoir_parser/image/psd.py index 5eb8f76e..6ea09fb1 100644 --- a/lib/hachoir_parser/image/psd.py +++ b/lib/hachoir_parser/image/psd.py @@ -5,11 +5,11 @@ Creation date: 8 january 2006 Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt16, UInt32, String, NullBytes, Enum, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_parser.image.photoshop_metadata import Photoshop8BIM +from hachoir_core.endian import BIG_ENDIAN +from hachoir_parser.image.photoshop_metadata import Photoshop8BIM class Config(FieldSet): def __init__(self, *args): diff --git a/lib/hachoir_parser/image/tga.py b/lib/hachoir_parser/image/tga.py index b1d699f8..716ab28a 100644 --- a/lib/hachoir_parser/image/tga.py +++ b/lib/hachoir_parser/image/tga.py @@ -5,10 +5,10 @@ Author: Victor Stinner Creation: 18 december 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import FieldSet, UInt8, UInt16, Enum, RawBytes -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.image.common import PaletteRGB +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, UInt8, UInt16, Enum, RawBytes +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.image.common import PaletteRGB class Line(FieldSet): def __init__(self, *args): diff --git a/lib/hachoir_parser/image/tiff.py b/lib/hachoir_parser/image/tiff.py index ceeba537..30dedd8b 100644 --- a/lib/hachoir_parser/image/tiff.py +++ b/lib/hachoir_parser/image/tiff.py @@ -1,165 +1,35 @@ """ TIFF image parser. -Authors: Victor Stinner and Sebastien Ponce +Authors: Victor Stinner, Sebastien Ponce, Robert Xiao Creation date: 30 september 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, SeekableFieldSet, ParserError, RootSeekableFieldSet, - UInt16, UInt32, Bytes, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_parser.image.exif import BasicIFDEntry -from lib.hachoir_core.tools import createDict +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, SeekableFieldSet, RootSeekableFieldSet, Bytes +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_parser.image.exif import TIFF -MAX_COUNT = 250 - -class IFDEntry(BasicIFDEntry): - static_size = 12*8 - - TAG_INFO = { - 254: ("new_subfile_type", "New subfile type"), - 255: ("subfile_type", "Subfile type"), - 256: ("img_width", "Image width in pixels"), - 257: ("img_height", "Image height in pixels"), - 258: ("bits_per_sample", "Bits per sample"), - 259: ("compression", "Compression method"), - 262: ("photo_interpret", "Photometric interpretation"), - 263: ("thres", "Thresholding"), - 264: ("cell_width", "Cellule width"), - 265: ("cell_height", "Cellule height"), - 266: ("fill_order", "Fill order"), - 269: ("doc_name", "Document name"), - 270: ("description", "Image description"), - 271: ("make", "Make"), - 272: ("model", "Model"), - 273: ("strip_ofs", "Strip offsets"), - 274: ("orientation", "Orientation"), - 277: ("sample_pixel", "Samples per pixel"), - 278: ("row_per_strip", "Rows per strip"), - 279: ("strip_byte", "Strip byte counts"), - 280: ("min_sample_value", "Min sample value"), - 281: ("max_sample_value", "Max sample value"), - 282: ("xres", "X resolution"), - 283: ("yres", "Y resolution"), - 284: ("planar_conf", "Planar configuration"), - 285: ("page_name", "Page name"), - 286: ("xpos", "X position"), - 287: ("ypos", "Y position"), - 288: ("free_ofs", "Free offsets"), - 289: ("free_byte", "Free byte counts"), - 290: ("gray_resp_unit", "Gray response unit"), - 291: ("gray_resp_curve", "Gray response curve"), - 292: ("group3_opt", "Group 3 options"), - 293: ("group4_opt", "Group 4 options"), - 296: ("res_unit", "Resolution unit"), - 297: ("page_nb", "Page number"), - 301: ("color_respt_curve", "Color response curves"), - 305: ("software", "Software"), - 306: ("date_time", "Date time"), - 315: ("artist", "Artist"), - 316: ("host_computer", "Host computer"), - 317: ("predicator", "Predicator"), - 318: ("white_pt", "White point"), - 319: ("prim_chomat", "Primary chromaticities"), - 320: ("color_map", "Color map"), - 321: ("half_tone_hints", "Halftone Hints"), - 322: ("tile_width", "TileWidth"), - 323: ("tile_length", "TileLength"), - 324: ("tile_offsets", "TileOffsets"), - 325: ("tile_byte_counts", "TileByteCounts"), - 332: ("ink_set", "InkSet"), - 333: ("ink_names", "InkNames"), - 334: ("number_of_inks", "NumberOfInks"), - 336: ("dot_range", "DotRange"), - 337: ("target_printer", "TargetPrinter"), - 338: ("extra_samples", "ExtraSamples"), - 339: ("sample_format", "SampleFormat"), - 340: ("smin_sample_value", "SMinSampleValue"), - 341: ("smax_sample_value", "SMaxSampleValue"), - 342: ("transfer_range", "TransferRange"), - 512: ("jpeg_proc", "JPEGProc"), - 513: ("jpeg_interchange_format", "JPEGInterchangeFormat"), - 514: ("jpeg_interchange_format_length", "JPEGInterchangeFormatLength"), - 515: ("jpeg_restart_interval", "JPEGRestartInterval"), - 517: ("jpeg_lossless_predictors", "JPEGLosslessPredictors"), - 518: ("jpeg_point_transforms", "JPEGPointTransforms"), - 519: ("jpeg_qtables", "JPEGQTables"), - 520: ("jpeg_dctables", "JPEGDCTables"), - 521: ("jpeg_actables", "JPEGACTables"), - 529: ("ycbcr_coefficients", "YCbCrCoefficients"), - 530: ("ycbcr_subsampling", "YCbCrSubSampling"), - 531: ("ycbcr_positioning", "YCbCrPositioning"), - 532: ("reference_blackwhite", "ReferenceBlackWhite"), - 33432: ("copyright", "Copyright"), - 0x8769: ("ifd_pointer", "Pointer to next IFD entry"), - } - TAG_NAME = createDict(TAG_INFO, 0) - - def __init__(self, *args): - FieldSet.__init__(self, *args) - tag = self["tag"].value - if tag in self.TAG_INFO: - self._name, self._description = self.TAG_INFO[tag] - else: - self._parser = None - -class IFD(FieldSet): - def __init__(self, *args): - FieldSet.__init__(self, *args) - self._size = 16 + self["count"].value * IFDEntry.static_size - self._has_offset = False - - def createFields(self): - yield UInt16(self, "count") - if MAX_COUNT < self["count"].value: - raise ParserError("TIFF IFD: Invalid count (%s)" - % self["count"].value) - for index in xrange(self["count"].value): - yield IFDEntry(self, "entry[]") +def getStrips(ifd): + data = {} + for i, entry in enumerate(ifd.array('entry')): + data[entry['tag'].display] = entry + # image data + if "StripOffsets" in data and "StripByteCounts" in data: + offs = ifd.getEntryValues(data["StripOffsets"]) + bytes = ifd.getEntryValues(data["StripByteCounts"]) + for off, byte in zip(offs, bytes): + yield off.value, byte.value class ImageFile(SeekableFieldSet): def __init__(self, parent, name, description, ifd): SeekableFieldSet.__init__(self, parent, name, description, None) - self._has_offset = False self._ifd = ifd def createFields(self): - datas = {} - for entry in self._ifd: - if type(entry) != IFDEntry: - continue - for c in entry: - if c.name != "offset": - continue - self.seekByte(c.value, False) - desc = "data of ifd entry " + entry.name, - entryType = BasicIFDEntry.ENTRY_FORMAT[entry["type"].value] - count = entry["count"].value - if entryType == String: - yield String(self, entry.name, count, desc, "\0", "ISO-8859-1") - else: - d = Data(self, entry.name, desc, entryType, count) - datas[d.name] = d - yield d - break - # image data - if "strip_ofs" in datas and "strip_byte" in datas: - for i in xrange(datas["strip_byte"]._count): - self.seekByte(datas["strip_ofs"]["value["+str(i)+"]"].value, False) - yield Bytes(self, "strip[]", datas["strip_byte"]["value["+str(i)+"]"].value) - -class Data(FieldSet): - - def __init__(self, parent, name, desc, type, count): - size = type.static_size * count - FieldSet.__init__(self, parent, name, desc, size) - self._count = count - self._type = type - - def createFields(self): - for i in xrange(self._count): - yield self._type(self, "value[]") + for off, byte in getStrips(self._ifd): + self.seekByte(off, relative=False) + yield Bytes(self, "strip[]", byte) class TiffFile(RootSeekableFieldSet, Parser): PARSER_TAGS = { @@ -168,7 +38,6 @@ class TiffFile(RootSeekableFieldSet, Parser): "file_ext": ("tif", "tiff"), "mime": (u"image/tiff",), "min_size": 8*8, -# TODO: Re-enable magic "magic": (("II\x2A\0", 0), ("MM\0\x2A", 0)), "description": "TIFF picture" } @@ -191,21 +60,11 @@ class TiffFile(RootSeekableFieldSet, Parser): return True def createFields(self): - yield String(self, "endian", 2, 'Endian ("II" or "MM")', charset="ASCII") - yield UInt16(self, "version", "TIFF version number") - offset = UInt32(self, "img_dir_ofs[]", "Next image directory offset (in bytes from the beginning)") - yield offset - ifds = [] - while True: - if offset.value == 0: - break + for field in TIFF(self): + yield field - self.seekByte(offset.value, relative=False) - ifd = IFD(self, "ifd[]", "Image File Directory", None) - ifds.append(ifd) - yield ifd - offset = UInt32(self, "img_dir_ofs[]", "Next image directory offset (in bytes from the beginning)") - yield offset - for ifd in ifds: + for ifd in self.array('ifd'): + offs = (off for off, byte in getStrips(ifd)) + self.seekByte(min(offs), relative=False) image = ImageFile(self, "image[]", "Image File", ifd) yield image diff --git a/lib/hachoir_parser/image/wmf.py b/lib/hachoir_parser/image/wmf.py index c45b530a..86f9840b 100644 --- a/lib/hachoir_parser/image/wmf.py +++ b/lib/hachoir_parser/image/wmf.py @@ -16,14 +16,14 @@ Creation date: 26 december 2006 MAX_FILESIZE = 50 * 1024 * 1024 -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, StaticFieldSet, Enum, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, StaticFieldSet, Enum, MissingField, ParserError, UInt32, Int32, UInt16, Int16, UInt8, NullBytes, RawBytes, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import createDict -from lib.hachoir_parser.image.common import RGBA +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import createDict +from hachoir_parser.image.common import RGBA POLYFILL_MODE = {1: "Alternate", 2: "Winding"} diff --git a/lib/hachoir_parser/image/xcf.py b/lib/hachoir_parser/image/xcf.py index e365001f..f0bfa30c 100644 --- a/lib/hachoir_parser/image/xcf.py +++ b/lib/hachoir_parser/image/xcf.py @@ -9,11 +9,11 @@ CVS online: Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, ParserError, UInt8, UInt32, Enum, Float32, String, PascalString32, RawBytes) -from lib.hachoir_parser.image.common import RGBA -from lib.hachoir_core.endian import NETWORK_ENDIAN +from hachoir_parser.image.common import RGBA +from hachoir_core.endian import NETWORK_ENDIAN class XcfCompression(FieldSet): static_size = 8 diff --git a/lib/hachoir_parser/misc/__init__.py b/lib/hachoir_parser/misc/__init__.py index 938d4aa5..f1392015 100644 --- a/lib/hachoir_parser/misc/__init__.py +++ b/lib/hachoir_parser/misc/__init__.py @@ -1,14 +1,18 @@ -from lib.hachoir_parser.misc.file_3do import File3do -from lib.hachoir_parser.misc.file_3ds import File3ds -from lib.hachoir_parser.misc.torrent import TorrentFile -from lib.hachoir_parser.misc.ttf import TrueTypeFontFile -from lib.hachoir_parser.misc.chm import ChmFile -from lib.hachoir_parser.misc.lnk import LnkFile -from lib.hachoir_parser.misc.pcf import PcfFile -from lib.hachoir_parser.misc.ole2 import OLE2_File -from lib.hachoir_parser.misc.pdf import PDFDocument -from lib.hachoir_parser.misc.pifv import PIFVFile -from lib.hachoir_parser.misc.hlp import HlpFile -from lib.hachoir_parser.misc.gnome_keyring import GnomeKeyring -from lib.hachoir_parser.misc.bplist import BPList - +from hachoir_parser.misc.file_3do import File3do +from hachoir_parser.misc.file_3ds import File3ds +from hachoir_parser.misc.torrent import TorrentFile +from hachoir_parser.misc.ttf import TrueTypeFontFile +from hachoir_parser.misc.chm import ChmFile +from hachoir_parser.misc.lnk import LnkFile +from hachoir_parser.misc.pcf import PcfFile +from hachoir_parser.misc.ole2 import OLE2_File +from hachoir_parser.misc.pdf import PDFDocument +from hachoir_parser.misc.pifv import PIFVFile +from hachoir_parser.misc.hlp import HlpFile +from hachoir_parser.misc.gnome_keyring import GnomeKeyring +from hachoir_parser.misc.bplist import BPList +from hachoir_parser.misc.dsstore import DSStore +from hachoir_parser.misc.word_doc import WordDocumentParser +from hachoir_parser.misc.word_2 import Word2DocumentParser +from hachoir_parser.misc.mstask import MSTaskFile +from hachoir_parser.misc.mapsforge_map import MapsforgeMapFile diff --git a/lib/hachoir_parser/misc/bplist.py b/lib/hachoir_parser/misc/bplist.py index 22282790..5411b488 100644 --- a/lib/hachoir_parser/misc/bplist.py +++ b/lib/hachoir_parser/misc/bplist.py @@ -15,12 +15,12 @@ Author: Robert Xiao Created: 2008-09-21 """ -from lib.hachoir_parser import HachoirParser -from lib.hachoir_core.field import (RootSeekableFieldSet, FieldSet, Enum, +from hachoir_parser import HachoirParser +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, Enum, Bits, GenericInteger, Float32, Float64, UInt8, UInt64, Bytes, NullBytes, RawBytes, String) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import displayHandler -from lib.hachoir_core.tools import humanDatetime +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import displayHandler +from hachoir_core.tools import humanDatetime from datetime import datetime, timedelta class BPListTrailer(FieldSet): @@ -157,9 +157,16 @@ class BPListObject(FieldSet): elif markertype == 3: # Date yield Bits(self, "extra", 4, "Extra value, should be 3") - cvt_time=lambda v:datetime(2001,1,1) + timedelta(seconds=v) + # Use a heuristic to determine which epoch to use + def cvt_time(v): + v=timedelta(seconds=v) + epoch2001 = datetime(2001,1,1) + epoch1970 = datetime(1970,1,1) + if (epoch2001 + v - datetime.today()).days > 5*365: + return epoch1970 + v + return epoch2001 + v yield displayHandler(Float64(self, "value"),lambda x:humanDatetime(cvt_time(x))) - self.xml=lambda prefix:prefix + "%s"%(cvt_time(self['value'].value).isoformat()) + self.xml=lambda prefix:prefix + "%sZ"%(cvt_time(self['value'].value).isoformat()) elif markertype == 4: # Data @@ -175,7 +182,7 @@ class BPListObject(FieldSet): yield BPListSize(self, "size") if self['size'].value: yield String(self, "value", self['size'].value, charset="ASCII") - self.xml=lambda prefix:prefix + "%s"%(self['value'].value.encode('iso-8859-1')) + self.xml=lambda prefix:prefix + "%s"%(self['value'].value.replace('&','&').encode('iso-8859-1')) else: self.xml=lambda prefix:prefix + '' @@ -184,7 +191,7 @@ class BPListObject(FieldSet): yield BPListSize(self, "size") if self['size'].value: yield String(self, "value", self['size'].value*2, charset="UTF-16-BE") - self.xml=lambda prefix:prefix + "%s"%(self['value'].value.encode('utf-8')) + self.xml=lambda prefix:prefix + "%s"%(self['value'].value.replace('&','&').encode('utf-8')) else: self.xml=lambda prefix:prefix + '' diff --git a/lib/hachoir_parser/misc/chm.py b/lib/hachoir_parser/misc/chm.py index 3a158e54..37c5cae0 100644 --- a/lib/hachoir_parser/misc/chm.py +++ b/lib/hachoir_parser/misc/chm.py @@ -6,20 +6,24 @@ Document: http://www.wotsit.org (search "chm") - chmlib library http://www.jedrea.com/chmlib/ +- Unofficial CHM Spec + http://savannah.nongnu.org/projects/chmspec +- Microsoft's HTML Help (.chm) format + http://www.speakeasy.org/~russotto/chm/chmformat.html Author: Victor Stinner Creation date: 2007-03-04 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (Field, FieldSet, ParserError, - Int32, UInt32, UInt64, +from hachoir_core.field import (Field, FieldSet, ParserError, RootSeekableFieldSet, + Int32, UInt16, UInt32, UInt64, RawBytes, PaddingBytes, Enum, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.win32 import GUID -from lib.hachoir_parser.common.win32_lang_id import LANGUAGE_ID -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser import HachoirParser +from hachoir_parser.common.win32 import GUID +from hachoir_parser.common.win32_lang_id import LANGUAGE_ID +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler class CWord(Field): """ @@ -42,6 +46,7 @@ class CWord(Field): raise ParserError("CHM: CWord is limited to 64 bits") addr += 8 byte = stream.readBits(addr, 8, endian) + value <<= 7 value += byte self.createValue = lambda: value @@ -84,7 +89,7 @@ class ITSF(FieldSet): yield UInt32(self, "version") yield UInt32(self, "header_size", "Total header length (in bytes)") yield UInt32(self, "one") - yield UInt32(self, "last_modified") + yield UInt32(self, "last_modified", "Lower 32 bits of the time expressed in units of 0.1 us") yield Enum(UInt32(self, "lang_id", "Windows Language ID"), LANGUAGE_ID) yield GUID(self, "dir_uuid", "{7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC}") yield GUID(self, "stream_uuid", "{7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC}") @@ -99,9 +104,9 @@ class PMGL_Entry(FieldSet): def createFields(self): yield CWord(self, "name_len") yield String(self, "name", self["name_len"].value, charset="UTF-8") - yield CWord(self, "space") - yield CWord(self, "start") - yield filesizeHandler(CWord(self, "length")) + yield CWord(self, "section", "Section number that the entry data is in.") + yield CWord(self, "start", "Start offset of the data") + yield filesizeHandler(CWord(self, "length", "Length of the data")) def createDescription(self): return "%s (%s)" % (self["name"].value, self["length"].display) @@ -118,13 +123,23 @@ class PMGL(FieldSet): # Entries stop = self.size - self["free_space"].value * 8 + entry_count = 0 while self.current_size < stop: yield PMGL_Entry(self, "entry[]") + entry_count+=1 # Padding - padding = (self.size - self.current_size) // 8 + quickref_frequency = 1 + (1 << self["/dir/itsp/density"].value) + num_quickref = (entry_count // quickref_frequency) + if entry_count % quickref_frequency == 0: + num_quickref -= 1 + print self.current_size//8, quickref_frequency, num_quickref + padding = (self["free_space"].value - (num_quickref*2+2)) if padding: yield PaddingBytes(self, "padding", padding) + for i in range(num_quickref*quickref_frequency, 0, -quickref_frequency): + yield UInt16(self, "quickref[%i]"%i) + yield UInt16(self, "entry_count") class PMGI_Entry(FieldSet): def createFields(self): @@ -164,36 +179,145 @@ class Directory(FieldSet): if self.current_size < self.size: yield PMGI(self, "pmgi", size=block_size) -class ChmFile(Parser): +class NameList(FieldSet): + def createFields(self): + yield UInt16(self, "length", "Length of name list in 2-byte blocks") + yield UInt16(self, "count", "Number of entries in name list") + for index in range(self["count"].value): + length=UInt16(self, "name_len[]", "Length of name in 2-byte blocks, excluding terminating null") + yield length + yield String(self, "name[]", length.value*2+2, charset="UTF-16-LE") + +class ControlData(FieldSet): + def createFields(self): + yield UInt32(self, "count", "Number of DWORDS in this struct") + yield String(self, "type", 4, "Type of compression") + if self["type"].value!='LZXC': return + yield UInt32(self, "version", "Compression version") + version=self["version"].value + if version==1: block='bytes' + else: block='32KB blocks' + yield UInt32(self, "reset_interval", "LZX: Reset interval in %s"%block) + yield UInt32(self, "window_size", "LZX: Window size in %s"%block) + yield UInt32(self, "cache_size", "LZX: Cache size in %s"%block) + yield UInt32(self, "unknown[]") + +class ResetTable(FieldSet): + def createFields(self): + yield UInt32(self, "unknown[]", "Version number?") + yield UInt32(self, "count", "Number of entries") + yield UInt32(self, "entry_size", "Size of each entry") + yield UInt32(self, "header_size", "Size of this header") + yield UInt64(self, "uncompressed_size") + yield UInt64(self, "compressed_size") + yield UInt64(self, "block_size", "Block size in bytes") + for i in xrange(self["count"].value): + yield UInt64(self, "block_location[]", "location in compressed data of 1st block boundary in uncompressed data") + +class SystemEntry(FieldSet): + ENTRY_TYPE={0:"HHP: [OPTIONS]: Contents File", + 1:"HHP: [OPTIONS]: Index File", + 2:"HHP: [OPTIONS]: Default Topic", + 3:"HHP: [OPTIONS]: Title", + 4:"File Metadata", + 5:"HHP: [OPTIONS]: Default Window", + 6:"HHP: [OPTIONS]: Compiled file", + # 7 present only in files with Binary Index; unknown function + # 8 unknown function + 9: "Version", + 10: "Timestamp", + # 11 only in Binary TOC files + 12: "Number of Info Types", + 13: "#IDXHDR file", + # 14 unknown function + # 15 checksum?? + 16:"HHP: [OPTIONS]: Default Font", + } + def createFields(self): + yield Enum(UInt16(self, "type", "Type of entry"),self.ENTRY_TYPE) + yield UInt16(self, "length", "Length of entry") + yield RawBytes(self, "data", self["length"].value) + def createDescription(self): + return '#SYSTEM Entry, Type %s'%self["type"].display + +class SystemFile(FieldSet): + def createFields(self): + yield UInt32(self, "version", "Either 2 or 3") + while self.current_size < self.size: + yield SystemEntry(self, "entry[]") + +class ChmFile(HachoirParser, RootSeekableFieldSet): + MAGIC = "ITSF\3\0\0\0" PARSER_TAGS = { "id": "chm", "category": "misc", "file_ext": ("chm",), "min_size": 4*8, - "magic": (("ITSF\3\0\0\0", 0),), + "magic": ((MAGIC, 0),), "description": "Microsoft's HTML Help (.chm)", } endian = LITTLE_ENDIAN + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + def validate(self): - if self.stream.readBytes(0, 4) != "ITSF": + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: return "Invalid magic" - if self["itsf/version"].value != 3: - return "Invalid version" return True def createFields(self): yield ITSF(self, "itsf") yield Filesize_Header(self, "file_size", size=self["itsf/filesize_len"].value*8) - padding = self.seekByte(self["itsf/dir_offset"].value) - if padding: - yield padding - yield Directory(self, "dir", size=self["itsf/dir_len"].value*8) + self.seekByte(self["itsf/dir_offset"].value) + directory=Directory(self, "dir", size=self["itsf/dir_len"].value*8) + yield directory - size = (self.size - self.current_size) // 8 - if size: - yield RawBytes(self, "raw_end", size) + otherentries = {} + for pmgl in directory.array("pmgl"): + for entry in pmgl.array("entry"): + if entry["section"].value != 0: + otherentries.setdefault(entry["section"].value,[]).append(entry) + continue + if entry["length"].value == 0: + continue + self.seekByte(self["itsf/data_offset"].value+entry["start"].value) + name = entry["name"].value + if name == "::DataSpace/NameList": + yield NameList(self, "name_list") + elif name.startswith('::DataSpace/Storage/'): + sectname = str(name.split('/')[2]) + if name.endswith('/SpanInfo'): + yield UInt64(self, "%s_spaninfo"%sectname, "Size of uncompressed data in the %s section"%sectname) + elif name.endswith('/ControlData'): + yield ControlData(self, "%s_controldata"%sectname, "Data about the compression scheme", size=entry["length"].value*8) + elif name.endswith('/Transform/List'): + yield String(self, "%s_transform_list"%sectname, 38, description="Transform/List element", charset="UTF-16-LE") + elif name.endswith('/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable'): + yield ResetTable(self, "%s_reset_table"%sectname, "LZX Reset Table", size=entry["length"].value*8) + elif name.endswith('/Content'): + # eventually, a LZX wrapper will appear here, we hope! + yield RawBytes(self, "%s_content"%sectname, entry["length"].value, "Content for the %s section"%sectname) + else: + yield RawBytes(self, "entry_data[]", entry["length"].value, name) + elif name=="/#SYSTEM": + yield SystemFile(self, "system_file", size=entry["length"].value*8) + else: + yield RawBytes(self, "entry_data[]", entry["length"].value, name) + + def getFile(self, filename): + page=0 + if 'pmgi' in self['/dir']: + for entry in self['/dir/pmgi'].array('entry'): + if entry['name'].value <= filename: + page=entry['page'].value + pmgl=self['/dir/pmgl[%i]'%page] + for entry in pmgl.array('entry'): + if entry['name'].value == filename: + return entry + raise ParserError("File '%s' not found!"%filename) def createContentSize(self): return self["file_size/file_size"].value * 8 diff --git a/lib/hachoir_parser/misc/common.py b/lib/hachoir_parser/misc/common.py index b73c8212..38d9f823 100644 --- a/lib/hachoir_parser/misc/common.py +++ b/lib/hachoir_parser/misc/common.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import StaticFieldSet, Float32 +from hachoir_core.field import StaticFieldSet, Float32 class Vertex(StaticFieldSet): format = ((Float32, "x"), (Float32, "y"), (Float32, "z")) diff --git a/lib/hachoir_parser/misc/dsstore.py b/lib/hachoir_parser/misc/dsstore.py new file mode 100644 index 00000000..02792ad5 --- /dev/null +++ b/lib/hachoir_parser/misc/dsstore.py @@ -0,0 +1,211 @@ +""" +Mac OS X .DS_Store parser. + +Documents: +- http://search.cpan.org/~wiml/Mac-Finder-DSStore-0.95/DSStoreFormat.pod +Author: Robert Xiao +Created: 2010-09-01 +""" + +from hachoir_parser import HachoirParser +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, + NullBytes, RawBytes, PaddingBytes, Bytes, SubFile, String, PascalString8, + Bits, UInt8, UInt16, UInt32, + Link, + ParserError) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import displayHandler, hexadecimal +from hachoir_core.tools import paddingSize + +class BlockAddress(FieldSet): + static_size = 32 + + def createFields(self): + yield displayHandler(Bits(self, "offset", 27, description="Offset into file divided by 32"), lambda x: hex(x*32).strip('L')) + yield displayHandler(Bits(self, "size", 5, description="Power-of-2 size of the block"), lambda x: hex(1< 0: + self.seekByte(offs.value+4) + yield RawBytes(self, "free[]", size) diff --git a/lib/hachoir_parser/misc/file_3do.py b/lib/hachoir_parser/misc/file_3do.py index 3d909d56..3108d0ae 100644 --- a/lib/hachoir_parser/misc/file_3do.py +++ b/lib/hachoir_parser/misc/file_3do.py @@ -7,12 +7,12 @@ Author: Cyril Zorin Creation date: 28 september 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt32, Int32, String, Float32, RawBytes, PaddingBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_parser.misc.common import Vertex, MapUV +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_parser.misc.common import Vertex, MapUV class Vector(FieldSet): def __init__(self, parent, name, diff --git a/lib/hachoir_parser/misc/file_3ds.py b/lib/hachoir_parser/misc/file_3ds.py index de05fa94..aaf4fbf4 100644 --- a/lib/hachoir_parser/misc/file_3ds.py +++ b/lib/hachoir_parser/misc/file_3ds.py @@ -3,13 +3,13 @@ Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, UInt16, UInt32, RawBytes, Enum, CString) -from lib.hachoir_parser.image.common import RGB -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.misc.common import Vertex, MapUV +from hachoir_parser.image.common import RGB +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.misc.common import Vertex, MapUV def readObject(parent): yield CString(parent, "name", "Object name") diff --git a/lib/hachoir_parser/misc/gnome_keyring.py b/lib/hachoir_parser/misc/gnome_keyring.py index 56b26a67..0bade36f 100644 --- a/lib/hachoir_parser/misc/gnome_keyring.py +++ b/lib/hachoir_parser/misc/gnome_keyring.py @@ -9,14 +9,14 @@ Author: Victor Stinner Creation date: 2008-04-09 """ -from lib.hachoir_core.tools import paddingSize -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.tools import paddingSize +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Bit, NullBits, NullBytes, UInt8, UInt32, String, RawBytes, Enum, TimestampUnix64, CompressedField, SubFile) -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.endian import BIG_ENDIAN try: import hashlib diff --git a/lib/hachoir_parser/misc/hlp.py b/lib/hachoir_parser/misc/hlp.py index 24da2077..167dc7a6 100644 --- a/lib/hachoir_parser/misc/hlp.py +++ b/lib/hachoir_parser/misc/hlp.py @@ -10,12 +10,12 @@ Author: Victor Stinner Creation date: 2007-09-03 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Bits, Int32, UInt16, UInt32, NullBytes, RawBytes, PaddingBytes, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import (textHandler, hexadecimal, +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import (textHandler, hexadecimal, displayHandler, humanFilesize) class FileEntry(FieldSet): diff --git a/lib/hachoir_parser/misc/lnk.py b/lib/hachoir_parser/misc/lnk.py index b0512baa..3844d37f 100644 --- a/lib/hachoir_parser/misc/lnk.py +++ b/lib/hachoir_parser/misc/lnk.py @@ -22,20 +22,20 @@ Changes: * Creation of the parser """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, CString, String, UInt32, UInt16, UInt8, Bit, Bits, PaddingBits, TimestampWin64, DateTimeMSDOS32, NullBytes, PaddingBytes, RawBytes, Enum) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.common.win32 import GUID -from lib.hachoir_parser.common.msdos import MSDOSFileAttr16, MSDOSFileAttr32 -from lib.hachoir_core.text_handler import filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.common.win32 import GUID +from hachoir_parser.common.msdos import MSDOSFileAttr16, MSDOSFileAttr32 +from hachoir_core.text_handler import filesizeHandler -from lib.hachoir_core.tools import paddingSize +from hachoir_core.tools import paddingSize class ItemIdList(FieldSet): def __init__(self, *args, **kw): @@ -56,7 +56,7 @@ class ItemId(FieldSet): 0x23: "Drive", 0x25: "Drive", 0x29: "Drive", - 0x2E: "GUID", + 0x2E: "Shell Extension", 0x2F: "Drive", 0x30: "Dir/File", 0x31: "Directory", @@ -66,6 +66,7 @@ class ItemId(FieldSet): 0x42: "Computer", 0x46: "Net Provider", 0x47: "Whole Network", + 0x4C: "Web Folder", 0x61: "MSITStore", 0x70: "Printer/RAS Connection", 0xB1: "History/Favorite", @@ -86,16 +87,26 @@ class ItemId(FieldSet): yield Enum(UInt8(self, "type"),self.ITEM_TYPE) entrytype=self["type"].value - if entrytype in (0x1F, 0x2E, 0x70): + if entrytype in (0x1F, 0x70): # GUID yield RawBytes(self, "dummy", 1, "should be 0x50") yield GUID(self, "guid") + elif entrytype == 0x2E: + # Shell extension + yield RawBytes(self, "dummy", 1, "should be 0x50") + if self["dummy"].value == '\0': + yield UInt16(self, "length_data", "Length of shell extension-specific data") + if self["length_data"].value: + yield RawBytes(self, "data", self["length_data"].value, "Shell extension-specific data") + yield GUID(self, "handler_guid") + yield GUID(self, "guid") + elif entrytype in (0x23, 0x25, 0x29, 0x2F): # Drive yield String(self, "drive", self["length"].value-3, strip="\0") - elif entrytype in (0x30, 0x31, 0x32): + elif entrytype in (0x30, 0x31, 0x32, 0x61, 0xb1): yield RawBytes(self, "dummy", 1, "should be 0x00") yield UInt32(self, "size", "size of file; 0 for folders") yield DateTimeMSDOS32(self, "date_time", "File/folder date and time") @@ -111,8 +122,11 @@ class ItemId(FieldSet): yield RawBytes(self, "unknown[]", 6) yield DateTimeMSDOS32(self, "creation_date_time", "File/folder creation date and time") yield DateTimeMSDOS32(self, "access_date_time", "File/folder last access date and time") - yield RawBytes(self, "unknown[]", 4) + yield RawBytes(self, "unknown[]", 2) + yield UInt16(self, "length_next", "Length of next two strings (if zero, ignore this field)") yield CString(self, "unicode_name", "File/folder name", charset="UTF-16-LE") + if self["length_next"].value: + yield CString(self, "localized_name", "Localized name") yield RawBytes(self, "unknown[]", 2) else: yield CString(self, "name_short", "File/folder short name") @@ -136,6 +150,19 @@ class ItemId(FieldSet): yield CString(self, "description") yield RawBytes(self, "unknown[]", 2) + elif entrytype == 0x4C: + # Web Folder + yield RawBytes(self, "unknown[]", 5) + yield TimestampWin64(self, "modification_time") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield LnkString(self, "name") + yield RawBytes(self, "padding[]", 2) + yield LnkString(self, "address") + if self["address/length"].value: + yield RawBytes(self, "padding[]", 2) + else: yield RawBytes(self, "raw", self["length"].value-3) @@ -249,13 +276,17 @@ class FileLocationInfo(FieldSet): class LnkString(FieldSet): def createFields(self): yield UInt16(self, "length", "Length of this string") - if self.root.hasUnicodeNames(): - yield String(self, "data", self["length"].value*2, charset="UTF-16-LE") - else: - yield String(self, "data", self["length"].value, charset="ASCII") + if self["length"].value: + if self.root.hasUnicodeNames(): + yield String(self, "data", self["length"].value*2, charset="UTF-16-LE") + else: + yield String(self, "data", self["length"].value, charset="ASCII") def createValue(self): - return self["data"].value + if self["length"].value: + return self["data"].value + else: + return "" class ColorRef(FieldSet): ''' COLORREF struct, 0x00bbggrr ''' diff --git a/lib/hachoir_parser/misc/mapsforge_map.py b/lib/hachoir_parser/misc/mapsforge_map.py new file mode 100644 index 00000000..4b99653a --- /dev/null +++ b/lib/hachoir_parser/misc/mapsforge_map.py @@ -0,0 +1,357 @@ +""" +Mapsforge map file parser (for version 3 files). + +Author: Oliver Gerlich + +References: +- http://code.google.com/p/mapsforge/wiki/SpecificationBinaryMapFile +- http://mapsforge.org/ +""" + +from hachoir_parser import Parser +from hachoir_core.field import (ParserError, + Bit, Bits, UInt8, UInt16, UInt32, UInt64, String, RawBytes, + PaddingBits, PaddingBytes, + Enum, Field, FieldSet, SeekableFieldSet, RootSeekableFieldSet) +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN + + +# micro-degrees factor: +UDEG = float(1000*1000) + + +CoordinateEncoding = { + 0: "single delta encoding", + 1: "double delta encoding", +} + + +class UIntVbe(Field): + def __init__(self, parent, name, description=None): + Field.__init__(self, parent, name, description=description) + + value = 0 + size = 0 + while True: + byteValue = ord( self._parent.stream.readBytes(self.absolute_address + (size*8), 1) ) + + haveMoreData = (byteValue & 0x80) + value = value | ((byteValue & 0x7f) << (size*7)) + size += 1 + assert size < 100, "UIntVBE is too large" + + if not(haveMoreData): + break + + self._size = size*8 + self.createValue = lambda: value + + +class IntVbe(Field): + def __init__(self, parent, name, description=None): + Field.__init__(self, parent, name, description=description) + + value = 0 + size = 0 + shift = 0 + while True: + byteValue = ord( self._parent.stream.readBytes(self.absolute_address + (size*8), 1) ) + + haveMoreData = (byteValue & 0x80) + if size == 0: + isNegative = (byteValue & 0x40) + value = (byteValue & 0x3f) + shift += 6 + else: + value = value | ((byteValue & 0x7f) << shift) + shift += 7 + size += 1 + assert size < 100, "IntVBE is too large" + + if not(haveMoreData): + break + + if isNegative: + value *= -1 + + self._size = size*8 + self.createValue = lambda: value + + +class VbeString(FieldSet): + def createFields(self): + yield UIntVbe(self, "length") + yield String(self, "chars", self["length"].value, charset="UTF-8") + + def createDescription (self): + return '(%d B) "%s"' % (self["length"].value, self["chars"].value) + + +class TagStringList(FieldSet): + def createFields(self): + yield UInt16(self, "num_tags") + for i in range(self["num_tags"].value): + yield VbeString(self, "tag[]") + + def createDescription (self): + return "%d tag strings" % self["num_tags"].value + + +class ZoomIntervalCfg(FieldSet): + def createFields(self): + yield UInt8(self, "base_zoom_level") + yield UInt8(self, "min_zoom_level") + yield UInt8(self, "max_zoom_level") + yield UInt64(self, "subfile_start") + yield UInt64(self, "subfile_size") + + def createDescription (self): + return "zoom level around %d (%d - %d)" % (self["base_zoom_level"].value, + self["min_zoom_level"].value, self["max_zoom_level"].value) + + +class TileIndexEntry(FieldSet): + def createFields(self): + yield Bit(self, "is_water_tile") + yield Bits(self, "offset", 39) + + +class TileZoomTable(FieldSet): + def createFields(self): + yield UIntVbe(self, "num_pois") + yield UIntVbe(self, "num_ways") + + def createDescription (self): + return "%d POIs, %d ways" % (self["num_pois"].value, self["num_ways"].value) + + +class TileHeader(FieldSet): + def __init__ (self, parent, name, zoomIntervalCfg, **kw): + FieldSet.__init__(self, parent, name, **kw) + self.zoomIntervalCfg = zoomIntervalCfg + + def createFields(self): + numLevels = int(self.zoomIntervalCfg["max_zoom_level"].value - self.zoomIntervalCfg["min_zoom_level"].value) +1 + assert(numLevels < 50) + for i in range(numLevels): + yield TileZoomTable(self, "zoom_table_entry[]") + yield UIntVbe(self, "first_way_offset") + + +class POIData(FieldSet): + def createFields(self): + yield IntVbe(self, "lat_diff") + yield IntVbe(self, "lon_diff") + yield Bits(self, "layer", 4) + yield Bits(self, "num_tags", 4) + + for i in range(self["num_tags"].value): + yield UIntVbe(self, "tag_id[]") + + yield Bit(self, "have_name") + yield Bit(self, "have_house_number") + yield Bit(self, "have_ele") + yield PaddingBits(self, "pad[]", 5) + + if self["have_name"].value: + yield VbeString(self, "name") + if self["have_house_number"].value: + yield VbeString(self, "house_number") + if self["have_ele"].value: + yield IntVbe(self, "ele") + + def createDescription (self): + s = "POI" + if self["have_name"].value: + s += ' "%s"' % self["name"]["chars"].value + s += " @ %f/%f" % (self["lat_diff"].value / UDEG, self["lon_diff"].value / UDEG) + return s + + + +class SubTileBitmap(FieldSet): + static_size = 2*8 + def createFields(self): + for y in range(4): + for x in range(4): + yield Bit(self, "is_used[%d,%d]" % (x,y)) + + +class WayProperties(FieldSet): + def createFields(self): + yield UIntVbe(self, "way_data_size") + + # WayProperties is split into an outer and an inner field, to allow specifying data size for inner part: + yield WayPropertiesInner(self, "inner", size=self["way_data_size"].value * 8) + + +class WayPropertiesInner(FieldSet): + def createFields(self): + yield SubTileBitmap(self, "sub_tile_bitmap") + #yield Bits(self, "sub_tile_bitmap", 16) + + yield Bits(self, "layer", 4) + yield Bits(self, "num_tags", 4) + + for i in range(self["num_tags"].value): + yield UIntVbe(self, "tag_id[]") + + yield Bit(self, "have_name") + yield Bit(self, "have_house_number") + yield Bit(self, "have_ref") + yield Bit(self, "have_label_position") + yield Bit(self, "have_num_way_blocks") + yield Enum(Bit(self, "coord_encoding"), CoordinateEncoding) + yield PaddingBits(self, "pad[]", 2) + + if self["have_name"].value: + yield VbeString(self, "name") + if self["have_house_number"].value: + yield VbeString(self, "house_number") + if self["have_ref"].value: + yield VbeString(self, "ref") + if self["have_label_position"].value: + yield IntVbe(self, "label_lat_diff") + yield IntVbe(self, "label_lon_diff") + numWayDataBlocks = 1 + if self["have_num_way_blocks"].value: + yield UIntVbe(self, "num_way_blocks") + numWayDataBlocks = self["num_way_blocks"].value + + for i in range(numWayDataBlocks): + yield WayData(self, "way_data[]") + + def createDescription (self): + s = "way" + if self["have_name"].value: + s += ' "%s"' % self["name"]["chars"].value + return s + + +class WayData(FieldSet): + def createFields(self): + yield UIntVbe(self, "num_coord_blocks") + for i in range(self["num_coord_blocks"].value): + yield WayCoordBlock(self, "way_coord_block[]") + +class WayCoordBlock(FieldSet): + def createFields(self): + yield UIntVbe(self, "num_way_nodes") + yield IntVbe(self, "first_lat_diff") + yield IntVbe(self, "first_lon_diff") + + for i in range(self["num_way_nodes"].value-1): + yield IntVbe(self, "lat_diff[]") + yield IntVbe(self, "lon_diff[]") + + +class TileData(FieldSet): + def __init__ (self, parent, name, zoomIntervalCfg, **kw): + FieldSet.__init__(self, parent, name, **kw) + self.zoomIntervalCfg = zoomIntervalCfg + + def createFields(self): + yield TileHeader(self, "tile_header", self.zoomIntervalCfg) + + numLevels = int(self.zoomIntervalCfg["max_zoom_level"].value - self.zoomIntervalCfg["min_zoom_level"].value) +1 + for zoomLevel in range(numLevels): + zoomTableEntry = self["tile_header"]["zoom_table_entry[%d]" % zoomLevel] + for poiIndex in range(zoomTableEntry["num_pois"].value): + yield POIData(self, "poi_data[%d,%d]" % (zoomLevel, poiIndex)) + + for zoomLevel in range(numLevels): + zoomTableEntry = self["tile_header"]["zoom_table_entry[%d]" % zoomLevel] + for wayIndex in range(zoomTableEntry["num_ways"].value): + yield WayProperties(self, "way_props[%d,%d]" % (zoomLevel, wayIndex)) + + + +class ZoomSubFile(SeekableFieldSet): + def __init__ (self, parent, name, zoomIntervalCfg, **kw): + SeekableFieldSet.__init__(self, parent, name, **kw) + self.zoomIntervalCfg = zoomIntervalCfg + + def createFields(self): + indexEntries = [] + numTiles = None + i = 0 + while True: + entry = TileIndexEntry(self, "tile_index_entry[]") + indexEntries.append(entry) + yield entry + + i+=1 + if numTiles is None: + # calculate number of tiles (TODO: better calc this from map bounding box) + firstOffset = self["tile_index_entry[0]"]["offset"].value + numTiles = firstOffset / 5 + if i >= numTiles: + break + + for indexEntry in indexEntries: + self.seekByte(indexEntry["offset"].value, relative=True) + yield TileData(self, "tile_data[]", zoomIntervalCfg=self.zoomIntervalCfg) + + + +class MapsforgeMapFile(Parser, RootSeekableFieldSet): + PARSER_TAGS = { + "id": "mapsforge_map", + "category": "misc", + "file_ext": ("map",), + "min_size": 62*8, + "description": "Mapsforge map file", + } + + endian = BIG_ENDIAN + + def validate(self): + return self["file_magic"].value == "mapsforge binary OSM" and self["file_version"].value == 3 + + def createFields(self): + yield String(self, "file_magic", 20) + yield UInt32(self, "header_size") + yield UInt32(self, "file_version") + yield UInt64(self, "file_size") + yield UInt64(self, "creation_date") + yield UInt32(self, "min_lat") + yield UInt32(self, "min_lon") + yield UInt32(self, "max_lat") + yield UInt32(self, "max_lon") + yield UInt16(self, "tile_size") + yield VbeString(self, "projection") + + # flags + yield Bit(self, "have_debug") + yield Bit(self, "have_map_start") + yield Bit(self, "have_start_zoom") + yield Bit(self, "have_language_preference") + yield Bit(self, "have_comment") + yield Bit(self, "have_created_by") + yield Bits(self, "reserved[]", 2) + + if self["have_map_start"].value: + yield UInt32(self, "start_lat") + yield UInt32(self, "start_lon") + if self["have_start_zoom"].value: + yield UInt8(self, "start_zoom") + if self["have_language_preference"].value: + yield VbeString(self, "language_preference") + if self["have_comment"].value: + yield VbeString(self, "comment") + if self["have_created_by"].value: + yield VbeString(self, "created_by") + + yield TagStringList(self, "poi_tags") + yield TagStringList(self, "way_tags") + + + yield UInt8(self, "num_zoom_intervals") + for i in range(self["num_zoom_intervals"].value): + yield ZoomIntervalCfg(self, "zoom_interval_cfg[]") + + for i in range(self["num_zoom_intervals"].value): + zoomIntervalCfg = self["zoom_interval_cfg[%d]" % i] + self.seekByte(zoomIntervalCfg["subfile_start"].value, relative=False) + yield ZoomSubFile(self, "subfile[]", size=zoomIntervalCfg["subfile_size"].value * 8, zoomIntervalCfg=zoomIntervalCfg) + diff --git a/lib/hachoir_parser/misc/msoffice.py b/lib/hachoir_parser/misc/msoffice.py index 960ec825..825c5637 100644 --- a/lib/hachoir_parser/misc/msoffice.py +++ b/lib/hachoir_parser/misc/msoffice.py @@ -3,49 +3,40 @@ Parsers for the different streams and fragments found in an OLE2 file. Documents: - goffice source code + - Microsoft Office PowerPoint 97-2007 Binary File Format (.ppt) Specification + http://download.microsoft.com/download/0/B/E/0BE8BDD7-E5E8-422A-ABFD-4342ED7AD886/PowerPoint97-2007BinaryFileFormat(ppt)Specification.pdf Author: Robert Xiao, Victor Stinner -Creation: 2006-04-23 +Creation: 8 january 2005 """ -from lib.hachoir_parser import HachoirParser -from lib.hachoir_core.field import FieldSet, RootSeekableFieldSet, RawBytes -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.stream import StringInputStream -from lib.hachoir_parser.misc.msoffice_summary import SummaryFieldSet, CompObj -from lib.hachoir_parser.misc.word_doc import WordDocumentFieldSet +from hachoir_core.field import (SubFile, FieldSet, + UInt8, UInt16, Int32, UInt32, Enum, String, CString, + Bits, RawBytes) +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.misc.ole2_util import OLE2FragmentParser, RawParser +from hachoir_core.stream import StringInputStream +from hachoir_parser.misc.msoffice_summary import Summary, CompObj +from hachoir_parser.misc.word_doc import WordDocumentParser, WordTableParser -PROPERTY_NAME = { - u"\5DocumentSummaryInformation": "doc_summary", - u"\5SummaryInformation": "summary", - u"WordDocument": "word_doc", -} - -class OfficeRootEntry(HachoirParser, RootSeekableFieldSet): - PARSER_TAGS = { - "description": "Microsoft Office document subfragments", - } - endian = LITTLE_ENDIAN - - def __init__(self, stream, **args): - RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) - HachoirParser.__init__(self, stream, **args) - - def validate(self): - return True +class RootEntry(OLE2FragmentParser): + ENDIAN_CHECK=False def createFields(self): for index, property in enumerate(self.ole2.properties): if index == 0: continue try: - name = PROPERTY_NAME[property["name"].value] + name,parser = PROPERTY_NAME[property["name"].value] except LookupError: name = property.name+"content" - for field in self.parseProperty(index, property, name): + parser = RawParser + for field in self.parseProperty(property, name, parser): yield field + def seekSBlock(self, block): + self.seekBit(block * self.ole2.ss_size) - def parseProperty(self, property_index, property, name_prefix): + def parseProperty(self, property, name_prefix, parser=RawParser): ole2 = self.ole2 if not property["size"].value: return @@ -55,49 +46,45 @@ class OfficeRootEntry(HachoirParser, RootSeekableFieldSet): first = None previous = None size = 0 - start = property["start"].value - chain = ole2.getChain(start, True) - blocksize = ole2.ss_size - desc_format = "Small blocks %s..%s (%s)" + fragment_group = None + chain = ole2.getChain(property["start"].value, ole2.ss_fat) while True: try: block = chain.next() contiguous = False - if not first: + if first is None: first = block contiguous = True - if previous and block == (previous+1): + if previous is not None and block == (previous+1): contiguous = True if contiguous: previous = block - size += blocksize + size += ole2.ss_size continue except StopIteration: block = None + if first is None: + break self.seekSBlock(first) - desc = desc_format % (first, previous, previous-first+1) - size = min(size, property["size"].value*8) - if name_prefix in ("summary", "doc_summary"): - yield SummaryFieldSet(self, name, desc, size=size) - elif name_prefix == "word_doc": - yield WordDocumentFieldSet(self, name, desc, size=size) - elif property_index == 1: - yield CompObj(self, "comp_obj", desc, size=size) - else: - yield RawBytes(self, name, size//8, desc) + desc = "Small blocks %s..%s (%s)" % (first, previous, previous-first+1) + desc += " of %s bytes" % (ole2.ss_size//8) + field = CustomFragment(self, name, size, parser, desc, fragment_group) + yield field + if not fragment_group: + fragment_group = field.group + fragment_group.args["datasize"] = property["size"].value + fragment_group.args["ole2name"] = property["name"].value if block is None: break first = block previous = block - size = ole2.sector_size - - def seekSBlock(self, block): - self.seekBit(block * self.ole2.ss_size) + size = ole2.ss_size class FragmentGroup: def __init__(self, parser): self.items = [] self.parser = parser + self.args = {} def add(self, item): self.items.append(item) @@ -110,8 +97,8 @@ class FragmentGroup: data = "".join(data) # FIXME: Use smarter code to send arguments - args = {"ole2": self.items[0].root} - tags = {"class": self.parser, "args": args} + self.args["ole2"] = self.items[0].root + tags = {"class": self.parser, "args": self.args} tags = tags.iteritems() return StringInputStream(data, "", tags=tags) @@ -129,3 +116,660 @@ class CustomFragment(FieldSet): def _createInputStream(self, **args): return self.group.createInputStream() +class Pictures(OLE2FragmentParser): + class Picture(FieldSet): + def createFields(self): + yield RawBytes(self, "identifier", 4, "some kind of marker (A0461DF0)") + yield UInt32(self, "size") + yield RawBytes(self, "unknown[]", 16) + yield RawBytes(self, "unknown[]", 1) + yield SubFile(self, "image", self["size"].value-17, "Image Data") + ENDIAN_CHECK=False + + def createFields(self): + pos=0 + while pos//8 < self.datasize: + newpic=Pictures.Picture(self, "picture[]") + yield newpic + pos+=newpic.size + +class PowerPointDocument(OLE2FragmentParser): + OBJ_TYPES={ 0:"Unknown", + 1000:"Document", + 1001:"DocumentAtom", + 1002:"EndDocument", + 1003:"SlidePersist", + 1004:"SlideBase", + 1005:"SlideBaseAtom", + 1006:"Slide", + 1007:"SlideAtom", + 1008:"Notes", + 1009:"NotesAtom", + 1010:"Environment", + 1011:"SlidePersistAtom", + 1012:"Scheme", + 1013:"SchemeAtom", + 1014:"DocViewInfo", + 1015:"SSlideLayoutAtom", + 1016:"MainMaster", + 1017:"SSSlideInfoAtom", + 1018:"SlideViewInfo", + 1019:"GuideAtom", + 1020:"ViewInfo", + 1021:"ViewInfoAtom", + 1022:"SlideViewInfoAtom", + 1023:"VBAInfo", + 1024:"VBAInfoAtom", + 1025:"SSDocInfoAtom", + 1026:"Summary", + 1027:"Texture", + 1028:"VBASlideInfo", + 1029:"VBASlideInfoAtom", + 1030:"DocRoutingSlip", + 1031:"OutlineViewInfo", + 1032:"SorterViewInfo", + 1033:"ExObjList", + 1034:"ExObjListAtom", + 1035:"PPDrawingGroup", #FIXME: Office Art File Format Docu + 1036:"PPDrawing", #FIXME: Office Art File Format Docu + 1038:"Theme", + 1039:"ColorMapping", + 1040:"NamedShows", # don't know if container + 1041:"NamedShow", + 1042:"NamedShowSlides", # don't know if container + 1052:"OriginalMainMasterId", + 1053:"CompositeMasterId", + 1054:"RoundTripContentMasterInfo12", + 1055:"RoundTripShapeId12", + 1056:"RoundTripHFPlaceholder12", + 1058:"RoundTripContentMasterId12", + 1059:"RoundTripOArtTextStyles12", + 1060:"HeaderFooterDefaults12", + 1061:"DocFlags12", + 1062:"RoundTripShapeCheckSumForCustomLayouts12", + 1063:"RoundTripNotesMasterTextStyles12", + 1064:"RoundTripCustomTableStyles12", + 2000:"List", + 2005:"FontCollection", + 2017:"ListPlaceholder", + 2019:"BookmarkCollection", + 2020:"SoundCollection", + 2021:"SoundCollAtom", + 2022:"Sound", + 2023:"SoundData", + 2025:"BookmarkSeedAtom", + 2026:"GuideList", + 2028:"RunArray", + 2029:"RunArrayAtom", + 2030:"ArrayElementAtom", + 2031:"Int4ArrayAtom", + 2032:"ColorSchemeAtom", + 3008:"OEShape", + 3009:"ExObjRefAtom", + 3011:"OEPlaceholderAtom", + 3020:"GrColor", + 3024:"GPointAtom", + 3025:"GrectAtom", + 3031:"GRatioAtom", + 3032:"Gscaling", + 3034:"GpointAtom", + 3035:"OEShapeAtom", + 3037:"OEPlaceholderNewPlaceholderId12", + 3998:"OutlineTextRefAtom", + 3999:"TextHeaderAtom", + 4000:"TextCharsAtom", + 4001:"StyleTextPropAtom", + 4002:"BaseTextPropAtom", + 4003:"TxMasterStyleAtom", + 4004:"TxCFStyleAtom", + 4005:"TxPFStyleAtom", + 4006:"TextRulerAtom", + 4007:"TextBookmarkAtom", + 4008:"TextBytesAtom", + 4009:"TxSIStyleAtom", + 4010:"TextSpecInfoAtom", + 4011:"DefaultRulerAtom", + 4023:"FontEntityAtom", + 4024:"FontEmbeddedData", + 4025:"TypeFace", + 4026:"CString", + 4027:"ExternalObject", + 4033:"MetaFile", + 4034:"ExOleObj", + 4035:"ExOleObjAtom", + 4036:"ExPlainLinkAtom", + 4037:"CorePict", + 4038:"CorePictAtom", + 4039:"ExPlainAtom", + 4040:"SrKinsoku", + 4041:"HandOut", + 4044:"ExEmbed", + 4045:"ExEmbedAtom", + 4046:"ExLink", + 4047:"ExLinkAtom_old", + 4048:"BookmarkEntityAtom", + 4049:"ExLinkAtom", + 4050:"SrKinsokuAtom", + 4051:"ExHyperlinkAtom", + 4053:"ExPlain", + 4054:"ExPlainLink", + 4055:"ExHyperlink", + 4056:"SlideNumberMCAtom", + 4057:"HeadersFooters", + 4058:"HeadersFootersAtom", + 4062:"RecolorEntryAtom", + 4063:"TxInteractiveInfoAtom", + 4065:"EmFormatAtom", + 4066:"CharFormatAtom", + 4067:"ParaFormatAtom", + 4068:"MasterText", + 4071:"RecolorInfoAtom", + 4073:"ExQuickTime", + 4074:"ExQuickTimeMovie", + 4075:"ExQuickTimeMovieData", + 4076:"ExSubscription", + 4077:"ExSubscriptionSection", + 4078:"ExControl", + 4080:"SlideListWithText", + 4081:"AnimationInfoAtom", + 4082:"InteractiveInfo", + 4083:"InteractiveInfoAtom", + 4084:"SlideList", + 4085:"UserEditAtom", + 4086:"CurrentUserAtom", + 4087:"DateTimeMCAtom", + 4088:"GenericDateMCAtom", + 4090:"FooterMCAtom", + 4091:"ExControlAtom", + 4100:"ExMediaAtom", + 4101:"ExVideo", + 4102:"ExAviMovie", + 4103:"ExMCIMovie", + 4109:"ExMIDIAudio", + 4110:"ExCDAudio", + 4111:"ExWAVAudioEmbedded", + 4112:"ExWAVAudioLink", + 4113:"ExOleObjStg", + 4114:"ExCDAudioAtom", + 4115:"ExWAVAudioEmbeddedAtom", + 4116:"AnimationInfoAtom", + 4117:"RTFDateTimeMCAtom", + 5000:"ProgTags", # don't know if container + 5001:"ProgStringTag", + 5002:"ProgBinaryTag", + 5003:"BinaryTagData", + 6000:"PrintOptions", + 6001:"PersistPtrFullBlock", # don't know if container + 6002:"PersistPtrIncrementalBlock", # don't know if container + 10000:"RulerIndentAtom", + 10001:"GScalingAtom", + 10002:"GRColorAtom", + 10003:"GLPointAtom", + 10004:"GlineAtom", + 11019:"AnimationAtom12", + 11021:"AnimationHashAtom12", + 14100:"SlideSyncInfo12", + 14101:"SlideSyncInfoAtom12", + 0xf000:"EscherDggContainer", # Drawing Group Container + 0xf006:"EscherDgg", + 0xf016:"EscherCLSID", + 0xf00b:"EscherOPT", + 0xf001:"EscherBStoreContainer", + 0xf007:"EscherBSE", + 0xf018:"EscherBlip_START", # Blip types are between + 0xf117:"EscherBlip_END", # these two values + 0xf002:"EscherDgContainer", # Drawing Container + 0xf008:"EscherDg", + 0xf118:"EscherRegroupItems", + 0xf120:"EscherColorScheme", # bug in docs + 0xf003:"EscherSpgrContainer", + 0xf004:"EscherSpContainer", + 0xf009:"EscherSpgr", + 0xf00a:"EscherSp", + 0xf00c:"EscherTextbox", + 0xf00d:"EscherClientTextbox", + 0xf00e:"EscherAnchor", + 0xf00f:"EscherChildAnchor", + 0xf010:"EscherClientAnchor", + 0xf011:"EscherClientData", + 0xf005:"EscherSolverContainer", + 0xf012:"EscherConnectorRule", # bug in docs + 0xf013:"EscherAlignRule", + 0xf014:"EscherArcRule", + 0xf015:"EscherClientRule", + 0xf017:"EscherCalloutRule", + 0xf119:"EscherSelection", + 0xf11a:"EscherColorMRU", + 0xf11d:"EscherDeletedPspl", # bug in docs + 0xf11e:"EscherSplitMenuColors", + 0xf11f:"EscherOleObject", + 0xf122:"EscherUserDefined"} + class CurrentUserAtom(FieldSet): + def createFields(self): + yield UInt32(self, "size") + yield textHandler(UInt32(self, "magic", "0xe391c05f for normal PPT, 0xf3d1c4df for encrypted PPT"), hexadecimal) + yield UInt32(self, "offsetToCurrentEdit", "Offset in main stream to current edit field") + yield UInt16(self, "lenUserName", "Length of user name") + yield UInt16(self, "docFileVersion", "1012 for PP97+") + yield UInt8(self, "majorVersion", "3 for PP97+") + yield UInt8(self, "minorVersion", "0 for PP97+") + yield UInt16(self, "unknown") + yield String(self, "userName", self["lenUserName"].value, "ANSI version of the username") + yield UInt32(self, "relVersion", "Release version: 8 for regular PPT file, 9 for multiple-master PPT file") + + class PowerPointObject(FieldSet): + def createFields(self): + yield Bits(self, "version", 4) + yield Bits(self, "instance", 12) + yield Enum(UInt16(self, "type"),PowerPointDocument.OBJ_TYPES) + yield UInt32(self, "length") + self._size = self["length"].value * 8 + 64 + obj_type = self["type"].display + obj_len = self["length"].value + # type 1064 (RoundTripCustomTableStyles12) may appear to be a container, but it is not. + if self["version"].value==0xF and self["type"].value != 1064: + while (self.current_size)//8 < obj_len+8: + yield PowerPointDocument.PowerPointObject(self, "object[]") + elif obj_len: + if obj_type=="FontEntityAtom": + yield String(self, "data", obj_len, charset="UTF-16-LE", truncate="\0", strip="\0") + elif obj_type=="TextCharsAtom": + yield String(self, "data", obj_len, charset="UTF-16-LE") + elif obj_type=="TextBytesAtom": + yield String(self, "data", obj_len, charset="ASCII") + elif hasattr(PowerPointDocument, obj_type): + field = getattr(PowerPointDocument, obj_type)(self, "data") + field._size = obj_len * 8 + yield field + else: + yield RawBytes(self, "data", obj_len) + def createDescription(self): + if self["version"].value==0xF: + return "PowerPoint Object Container; instance %s, type %s"%(self["instance"].value,self["type"].display) + return "PowerPoint Object; version %s, instance %s, type %s"%(self["version"].value,self["instance"].value,self["type"].display) + ENDIAN_CHECK=False + OS_CHECK=False + def createFields(self): + pos=0 + while pos//8 < self.datasize: + newobj=PowerPointDocument.PowerPointObject(self, "object[]") + yield newobj + pos+=newobj.size + +class CurrentUser(OLE2FragmentParser): + def createFields(self): + yield PowerPointDocument.PowerPointObject(self, "current_user") + if self.current_size < self.size: + yield String(self, "unicode_name", self["current_user/data/lenUserName"].value * 2, charset="UTF-16-LE") + + +class ExcelWorkbook(OLE2FragmentParser): + BIFF_TYPES={0x000:"DIMENSIONS_v0", + 0x200:"DIMENSIONS_v2", + 0x001:"BLANK_v0", + 0x201:"BLANK_v2", + 0x002:"INTEGER", + 0x003:"NUMBER_v0", + 0x203:"NUMBER_v2", + 0x004:"LABEL_v0", + 0x204:"LABEL_v2", + 0x005:"BOOLERR_v0", + 0x205:"BOOLERR_v2", + 0x006:"FORMULA_v0", + 0x206:"FORMULA_v2", + 0x406:"FORMULA_v4", + 0x007:"STRING_v0", + 0x207:"STRING_v2", + 0x008:"ROW_v0", + 0x208:"ROW_v2", + 0x009:"BOF_v0", + 0x209:"BOF_v2", + 0x409:"BOF_v4", + 0x809:"BOF_v8", + 0x00a:"EOF", + 0x00b:"INDEX_v0", + 0x20b:"INDEX_v2", + 0x00c:"CALCCOUNT", + 0x00d:"CALCMODE", + 0x00e:"PRECISION", + 0x00f:"REFMODE", + 0x010:"DELTA", + 0x011:"ITERATION", + 0x012:"PROTECT", + 0x013:"PASSWORD", + 0x014:"HEADER", + 0x015:"FOOTER", + 0x016:"EXTERNCOUNT", + 0x017:"EXTERNSHEET", + 0x018:"NAME_v0", + 0x218:"NAME_v2", + 0x019:"WINDOWPROTECT", + 0x01a:"VERTICALPAGEBREAKS", + 0x01b:"HORIZONTALPAGEBREAKS", + 0x01c:"NOTE", + 0x01d:"SELECTION", + 0x01e:"FORMAT_v0", + 0x41e:"FORMAT_v4", + 0x01f:"FORMATCOUNT", # Undocumented + 0x020:"COLUMNDEFAULT", # Undocumented + 0x021:"ARRAY_v0", + 0x221:"ARRAY_v2", + 0x022:"1904", + 0x023:"EXTERNNAME_v0", + 0x223:"EXTERNNAME_v2", + 0x024:"COLWIDTH", # Undocumented + 0x025:"DEFAULTROWHEIGHT_v0", + 0x225:"DEFAULTROWHEIGHT_v2", + 0x026:"LEFT_MARGIN", + 0x027:"RIGHT_MARGIN", + 0x028:"TOP_MARGIN", + 0x029:"BOTTOM_MARGIN", + 0x02a:"PRINTHEADERS", + 0x02b:"PRINTGRIDLINES", + 0x02f:"FILEPASS", + 0x031:"FONT_v0", + 0x231:"FONT_v2", + 0x032:"FONTCOUNT", # Undocumented + 0x033:"PRINTSIZE", # Undocumented + 0x036:"TABLE_v0", + 0x236:"TABLE_v2", + 0x037:"TABLE2", # OOo has docs + 0x038:"WNDESK", # Undocumented + 0x039:"ZOOM", # Undocumented + 0x03a:"BEGINPREF", # Undocumented + 0x03b:"ENDPREF", # Undocumented + 0x03c:"CONTINUE", + 0x03d:"WINDOW1", + 0x03e:"WINDOW2_v0", + 0x23e:"WINDOW2_v2", + 0x03f:"PANE_V2", # Undocumented + 0x040:"BACKUP", + 0x041:"PANE", + 0x042:"CODEPAGE", + 0x043:"XF_OLD_v0", + 0x243:"XF_OLD_v2", + 0x443:"XF_OLD_v4", + 0x044:"XF_INDEX", + 0x045:"FONT_COLOR", + 0x04d:"PLS", + 0x050:"DCON", + 0x051:"DCONREF", + 0x052:"DCONNAME", + 0x055:"DEFCOLWIDTH", + 0x059:"XCT", + 0x05a:"CRN", + 0x05b:"FILESHARING", + 0x05c:"WRITEACCESS", + 0x05d:"OBJ", + 0x05e:"UNCALCED", + 0x05f:"SAVERECALC", + 0x060:"TEMPLATE", + 0x061:"INTL", # Undocumented + 0x862:"TAB_COLOR", # Undocumented, OO calls it SHEETLAYOUT + 0x063:"OBJPROTECT", + 0x07d:"COLINFO", + 0x27e:"RK", # Odd that there is no 0x7e + 0x07f:"IMDATA", + 0x080:"GUTS", + 0x081:"WSBOOL", + 0x082:"GRIDSET", + 0x083:"HCENTER", + 0x084:"VCENTER", + 0x085:"BOUNDSHEET", + 0x086:"WRITEPROT", + 0x087:"ADDIN", + 0x088:"EDG", + 0x089:"PUB", + 0x08c:"COUNTRY", + 0x08d:"HIDEOBJ", + 0x08e:"BUNDLESOFFSET", # Undocumented + 0x08f:"BUNDLEHEADER", # Undocumented + 0x090:"SORT", + 0x091:"SUB", + 0x092:"PALETTE", + 0x293:"STYLE", # Odd that there is no 0x93 + 0x094:"LHRECORD", + 0x095:"LHNGRAPH", + 0x096:"SOUND", + 0x097:"SYNC", # Undocumented + 0x098:"LPR", + 0x099:"STANDARDWIDTH", + 0x09a:"FNGROUPNAME", + 0x09b:"FILTERMODE", + 0x09c:"FNGROUPCOUNT", + 0x09d:"AUTOFILTERINFO", + 0x09e:"AUTOFILTER", + 0x0a0:"SCL", + 0x0a1:"SETUP", + 0x0a4:"TOOLBARVER", # Undocumented + 0x0a9:"COORDLIST", + 0x0ab:"GCW", + 0x0ae:"SCENMAN", + 0x0af:"SCENARIO", + 0x0b0:"SXVIEW", + 0x0b1:"SXVD", + 0x0b2:"SXVI", + 0x0b3:"SXSI", # Undocumented + 0x0b4:"SXIVD", + 0x0b5:"SXLI", + 0x0b6:"SXPI", + 0x0b7:"FACENUM", # Undocumented + 0x0b8:"DOCROUTE", + 0x0b9:"RECIPNAME", + 0x0ba:"SSLIST", # Undocumented + 0x0bb:"MASKIMDATA", # Undocumented + 0x4bc:"SHRFMLA", + 0x0bd:"MULRK", + 0x0be:"MULBLANK", + 0x0bf:"TOOLBARHDR", # Undocumented + 0x0c0:"TOOLBAREND", # Undocumented + 0x0c1:"MMS", + 0x0c2:"ADDMENU", + 0x0c3:"DELMENU", + 0x0c4:"TIPHISTORY", # Undocumented + 0x0c5:"SXDI", + 0x0c6:"SXDB", + 0x0c7:"SXFDB", # guessed + 0x0c8:"SXDDB", # guessed + 0x0c9:"SXNUM", # guessed + 0x0ca:"SXBOOL", # guessed + 0x0cb:"SXERR", # guessed + 0x0cc:"SXINT", # guessed + 0x0cd:"SXSTRING", + 0x0ce:"SXDTR", # guessed + 0x0cf:"SXNIL", # guessed + 0x0d0:"SXTBL", + 0x0d1:"SXTBRGIITM", + 0x0d2:"SXTBPG", + 0x0d3:"OBPROJ", + 0x0d5:"SXIDSTM", + 0x0d6:"RSTRING", + 0x0d7:"DBCELL", + 0x0d8:"SXNUMGROUP", # from OO : numerical grouping in pivot cache field + 0x0da:"BOOKBOOL", + 0x0dc:"PARAMQRY", # DUPLICATE dc + 0x0dc:"SXEXT", # DUPLICATE dc + 0x0dd:"SCENPROTECT", + 0x0de:"OLESIZE", + 0x0df:"UDDESC", + 0x0e0:"XF", + 0x0e1:"INTERFACEHDR", + 0x0e2:"INTERFACEEND", + 0x0e3:"SXVS", + 0x0e5:"MERGECELLS", # guessed + 0x0e9:"BG_PIC", # Undocumented + 0x0ea:"TABIDCONF", + 0x0eb:"MS_O_DRAWING_GROUP", + 0x0ec:"MS_O_DRAWING", + 0x0ed:"MS_O_DRAWING_SELECTION", + 0x0ef:"PHONETIC", # semi-Undocumented + 0x0f0:"SXRULE", + 0x0f1:"SXEX", + 0x0f2:"SXFILT", + 0x0f6:"SXNAME", + 0x0f7:"SXSELECT", + 0x0f8:"SXPAIR", + 0x0f9:"SXFMLA", + 0x0fb:"SXFORMAT", + 0x0fc:"SST", + 0x0fd:"LABELSST", + 0x0ff:"EXTSST", + 0x100:"SXVDEX", + 0x103:"SXFORMULA", + 0x122:"SXDBEX", + 0x137:"CHTRINSERT", + 0x138:"CHTRINFO", + 0x13B:"CHTRCELLCONTENT", + 0x13d:"TABID", + 0x140:"CHTRMOVERANGE", + 0x14D:"CHTRINSERTTAB", + 0x15F:"LABELRANGES", + 0x160:"USESELFS", + 0x161:"DSF", + 0x162:"XL5MODIFY", + 0x196:"CHTRHEADER", + 0x1a5:"FILESHARING2", + 0x1a9:"USERDBVIEW", + 0x1aa:"USERSVIEWBEGIN", + 0x1ab:"USERSVIEWEND", + 0x1ad:"QSI", + 0x1ae:"SUPBOOK", + 0x1af:"PROT4REV", + 0x1b0:"CONDFMT", + 0x1b1:"CF", + 0x1b2:"DVAL", + 0x1b5:"DCONBIN", + 0x1b6:"TXO", + 0x1b7:"REFRESHALL", + 0x1b8:"HLINK", + 0x1ba:"CODENAME", # TYPO in MS Docs + 0x1bb:"SXFDBTYPE", + 0x1bc:"PROT4REVPASS", + 0x1be:"DV", + 0x1c0:"XL9FILE", + 0x1c1:"RECALCID", + 0x800:"LINK_TIP", # follows an hlink + 0x802:"UNKNOWN_802", # OO exports it but has not name or docs + 0x803:"WQSETT", # OO named it and can export it, but does not include it in the docs + 0x804:"WQTABLES", # OO named it and can export it, but does not include it in the docs + 0x805:"UNKNOWN_805", # No name or docs, seems related to web query see #153260 for sample + 0x810:"PIVOT_AUTOFORMAT", # Seems to contain pivot table autoformat indicies, plus ?? + 0x864:"UNKNOWN_864", # seems related to pivot tables + 0x867:"SHEETPROTECTION", # OO named it, and has docs + 0x868:"RANGEPROTECTION", # OO named it, no docs yet + + 0x1001:"CHART_units", + 0x1002:"CHART_chart", + 0x1003:"CHART_series", + 0x1006:"CHART_dataformat", + 0x1007:"CHART_lineformat", + 0x1009:"CHART_markerformat", + 0x100a:"CHART_areaformat", + 0x100b:"CHART_pieformat", + 0x100c:"CHART_attachedlabel", + 0x100d:"CHART_seriestext", + 0x1014:"CHART_chartformat", + 0x1015:"CHART_legend", + 0x1016:"CHART_serieslist", + 0x1017:"CHART_bar", + 0x1018:"CHART_line", + 0x1019:"CHART_pie", + 0x101a:"CHART_area", + 0x101b:"CHART_scatter", + 0x101c:"CHART_chartline", + 0x101d:"CHART_axis", + 0x101e:"CHART_tick", + 0x101f:"CHART_valuerange", + 0x1020:"CHART_catserrange", + 0x1021:"CHART_axislineformat", + 0x1022:"CHART_chartformatlink", + 0x1024:"CHART_defaulttext", + 0x1025:"CHART_text", + 0x1026:"CHART_fontx", + 0x1027:"CHART_objectlink", + 0x1032:"CHART_frame", + 0x1033:"CHART_begin", + 0x1034:"CHART_end", + 0x1035:"CHART_plotarea", + 0x103a:"CHART_3d", + 0x103c:"CHART_picf", + 0x103d:"CHART_dropbar", + 0x103e:"CHART_radar", + 0x103f:"CHART_surf", + 0x1040:"CHART_radararea", + 0x1041:"CHART_axisparent", + 0x1043:"CHART_legendxn", + 0x1044:"CHART_shtprops", + 0x1045:"CHART_sertocrt", + 0x1046:"CHART_axesused", + 0x1048:"CHART_sbaseref", + 0x104a:"CHART_serparent", + 0x104b:"CHART_serauxtrend", + 0x104e:"CHART_ifmt", + 0x104f:"CHART_pos", + 0x1050:"CHART_alruns", + 0x1051:"CHART_ai", + 0x105b:"CHART_serauxerrbar", + 0x105c:"CHART_clrtclient", # Undocumented + 0x105d:"CHART_serfmt", + 0x105f:"CHART_3dbarshape", # Undocumented + 0x1060:"CHART_fbi", + 0x1061:"CHART_boppop", + 0x1062:"CHART_axcext", + 0x1063:"CHART_dat", + 0x1064:"CHART_plotgrowth", + 0x1065:"CHART_siindex", + 0x1066:"CHART_gelframe", + 0x1067:"CHART_boppopcustom",} + class BIFF(FieldSet): + def createFields(self): + yield Enum(UInt16(self, "type"),ExcelWorkbook.BIFF_TYPES) + yield UInt16(self, "length") + if self["length"].value: + yield RawBytes(self, "data", self["length"].value) + def createDescription(self): + return "Excel BIFF; type %s"%self["type"].display + def createFields(self): + pos=0 + while pos//8 < self.datasize: + newobj=ExcelWorkbook.BIFF(self, "BIFF[]") + yield newobj + pos+=newobj.size + +class ThumbsCatalog(OLE2FragmentParser): + class ThumbsEntry(FieldSet): + def createFields(self): + yield UInt32(self, "size") + yield UInt32(self, "index") + yield Bits(self, "flags", 8) + yield RawBytes(self, "unknown[]", 5) + yield UInt16(self, "unknown[]") + yield CString(self, "name", charset="UTF-16-LE") + if self.current_size // 8 != self['size'].value: + yield RawBytes(self, "padding", self['size'].value - self.current_size // 8) + def createDescription(self): + return "Thumbnail entry for %s"%self["name"].display + + def createFields(self): + yield UInt16(self, "unknown[]") + yield UInt16(self, "unknown[]") + yield UInt32(self, "count") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + for i in xrange(self['count'].value): + yield ThumbsCatalog.ThumbsEntry(self, "entry[]") + +PROPERTY_NAME = { + u"Root Entry": ("root",RootEntry), + u"\5DocumentSummaryInformation": ("doc_summary",Summary), + u"\5SummaryInformation": ("summary",Summary), + u"\1CompObj": ("compobj",CompObj), + u"Pictures": ("pictures",Pictures), + u"PowerPoint Document": ("powerpointdoc",PowerPointDocument), + u"Current User": ("current_user",CurrentUser), + u"Workbook": ("workbook",ExcelWorkbook), + u"Catalog": ("catalog",ThumbsCatalog), + u"WordDocument": ("word_doc",WordDocumentParser), + u"0Table": ("table0",WordTableParser), + u"1Table": ("table1",WordTableParser), +} diff --git a/lib/hachoir_parser/misc/msoffice_summary.py b/lib/hachoir_parser/misc/msoffice_summary.py index 4199934f..e3ded4cf 100644 --- a/lib/hachoir_parser/misc/msoffice_summary.py +++ b/lib/hachoir_parser/misc/msoffice_summary.py @@ -7,18 +7,19 @@ Documents - Apache POI (HPSF Internals): http://poi.apache.org/hpsf/internals.html """ -from lib.hachoir_parser import HachoirParser -from lib.hachoir_core.field import (FieldSet, ParserError, - RootSeekableFieldSet, SeekableFieldSet, +from hachoir_core.endian import BIG_ENDIAN,LITTLE_ENDIAN +from hachoir_parser import HachoirParser +from hachoir_core.field import (FieldSet, ParserError, + SeekableFieldSet, Bit, Bits, NullBits, UInt8, UInt16, UInt32, TimestampWin64, TimedeltaWin64, Enum, - Bytes, RawBytes, NullBytes, String, + Bytes, RawBytes, NullBytes, PaddingBits, String, Int8, Int32, Float32, Float64, PascalString32) -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.tools import createDict -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_parser.common.win32 import GUID, PascalStringWin32, CODEPAGE_CHARSET -from lib.hachoir_parser.image.bmp import BmpHeader, parseImageData +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.tools import createDict, paddingSize +from hachoir_parser.common.win32 import GUID, PascalStringWin32, CODEPAGE_CHARSET +from hachoir_parser.image.bmp import BmpHeader, parseImageData +from hachoir_parser.misc.ole2_util import OLE2FragmentParser MAX_SECTION_COUNT = 100 @@ -165,10 +166,37 @@ class Thumbnail(FieldSet): yield RawBytes(self, "data", size) class PropertyContent(FieldSet): + class NullHandler(FieldSet): + def createFields(self): + yield UInt32(self, "unknown[]") + yield PascalString32(self, "data") + def createValue(self): + return self["data"].value + class BlobHandler(FieldSet): + def createFields(self): + self.osconfig = self.parent.osconfig + yield UInt32(self, "size") + yield UInt32(self, "count") + for i in range(self["count"].value): + yield PropertyContent(self, "item[]") + n=paddingSize(self.current_size,32) + if n: yield PaddingBits(self, "padding[]", n) + class WidePascalString32(FieldSet): + ''' uses number of characters instead of number of bytes ''' + def __init__(self,parent,name,charset='ASCII'): + FieldSet.__init__(self,parent,name) + self.charset=charset + def createFields(self): + yield UInt32(self, "length", "Length of this string") + yield String(self, "data", self["length"].value*2, charset=self.charset) + def createValue(self): + return self["data"].value + def createDisplay(self): + return 'u'+self["data"].display TYPE_LPSTR = 30 TYPE_INFO = { 0: ("EMPTY", None), - 1: ("NULL", None), + 1: ("NULL", NullHandler), 2: ("UInt16", UInt16), 3: ("UInt32", UInt32), 4: ("Float32", Float32), @@ -197,9 +225,9 @@ class PropertyContent(FieldSet): 28: ("CARRAY", None), 29: ("USERDEFINED", None), 30: ("LPSTR", PascalString32), - 31: ("LPWSTR", PascalString32), + 31: ("LPWSTR", WidePascalString32), 64: ("FILETIME", TimestampWin64), - 65: ("BLOB", None), + 65: ("BLOB", BlobHandler), 66: ("STREAM", None), 67: ("STORAGE", None), 68: ("STREAMED_OBJECT", None), @@ -223,8 +251,13 @@ class PropertyContent(FieldSet): kw = {} try: handler = self.TYPE_INFO[tag][1] - if handler == PascalString32: - osconfig = self.osconfig + if handler in (self.WidePascalString32,PascalString32): + cur = self + while not hasattr(cur,'osconfig'): + cur=cur.parent + if cur is None: + raise LookupError('Cannot find osconfig') + osconfig = cur.osconfig if tag == self.TYPE_LPSTR: kw["charset"] = osconfig.charset else: @@ -235,9 +268,10 @@ class PropertyContent(FieldSet): except LookupError: handler = None if not handler: - raise ParserError("OLE2: Unable to parse property of type %s" \ + self.warning("OLE2: Unable to parse property of type %s" \ % self["type"].display) - if self["is_vector"].value: + # raise ParserError( + elif self["is_vector"].value: yield UInt32(self, "count") for index in xrange(self["count"].value): yield handler(self, "item[]", **kw) @@ -276,20 +310,16 @@ class SummaryIndex(FieldSet): yield String(self, "name", 16) yield UInt32(self, "offset") -class BaseSummary: - endian = LITTLE_ENDIAN +class Summary(OLE2FragmentParser): + ENDIAN_CHECK=True - def __init__(self): - if self["endian"].value == "\xFF\xFE": - self.endian = BIG_ENDIAN - elif self["endian"].value == "\xFE\xFF": - self.endian = LITTLE_ENDIAN - else: - raise ParserError("OLE2: Invalid endian value") - self.osconfig = OSConfig(self["os_type"].value == OS_MAC) + def __init__(self, stream, **args): + OLE2FragmentParser.__init__(self, stream, **args) + #self.osconfig = OSConfig(self["os_type"].value == OS_MAC) + self.osconfig = OSConfig(self.endian == BIG_ENDIAN) def createFields(self): - yield Bytes(self, "endian", 2, "Endian (0xFF 0xFE for Intel)") + yield Bytes(self, "endian", 2, "Endian (\\xfe\\xff for little endian)") yield UInt16(self, "format", "Format (0)") yield UInt8(self, "os_version") yield UInt8(self, "os_revision") @@ -313,35 +343,20 @@ class BaseSummary: if 0 < size: yield NullBytes(self, "end_padding", size) -class SummaryParser(BaseSummary, HachoirParser, RootSeekableFieldSet): - PARSER_TAGS = { - "description": "Microsoft Office summary", - } +class CompObj(OLE2FragmentParser): + ENDIAN_CHECK=True - def __init__(self, stream, **kw): - RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) - HachoirParser.__init__(self, stream, **kw) - BaseSummary.__init__(self) - - def validate(self): - return True - -class SummaryFieldSet(BaseSummary, FieldSet): - def __init__(self, parent, name, description=None, size=None): - FieldSet.__init__(self, parent, name, description=description, size=size) - BaseSummary.__init__(self) - -class CompObj(FieldSet): - OS_VERSION = { - 0x0a03: "Windows 3.1", - } + def __init__(self, stream, **args): + OLE2FragmentParser.__init__(self, stream, **args) + self.osconfig = OSConfig(self["os"].value == OS_MAC) + def createFields(self): # Header yield UInt16(self, "version", "Version (=1)") - yield textHandler(UInt16(self, "endian", "Endian (0xFF 0xFE for Intel)"), hexadecimal) + yield Bytes(self, "endian", 2, "Endian (\\xfe\\xff for little endian)") yield UInt8(self, "os_version") yield UInt8(self, "os_revision") - yield Enum(UInt16(self, "os_type"), OS_NAME) + yield Enum(UInt16(self, "os"), OS_NAME) yield Int32(self, "unused", "(=-1)") yield GUID(self, "clsid") @@ -349,12 +364,12 @@ class CompObj(FieldSet): yield PascalString32(self, "user_type", strip="\0") # Clipboard format - if self["os_type"].value == OS_MAC: + if self["os"].value == OS_MAC: yield Int32(self, "unused[]", "(=-2)") yield String(self, "clipboard_format", 4) else: yield PascalString32(self, "clipboard_format", strip="\0") - if self.current_size == self.size: + if self._current_size // 8 == self.datasize: return #-- OLE 2.01 --- @@ -362,7 +377,7 @@ class CompObj(FieldSet): # Program ID yield PascalString32(self, "prog_id", strip="\0") - if self["os_type"].value != OS_MAC: + if self["os"].value != OS_MAC: # Magic number yield textHandler(UInt32(self, "magic", "Magic number (0x71B239F4)"), hexadecimal) @@ -371,7 +386,8 @@ class CompObj(FieldSet): yield PascalStringWin32(self, "clipboard_format_unicode", strip="\0") yield PascalStringWin32(self, "prog_id_unicode", strip="\0") - size = (self.size - self.current_size) // 8 + size = self.datasize - (self._current_size // 8) # _current_size because current_size returns _current_max_size if size: yield NullBytes(self, "end_padding", size) + if self.datasize """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import ( +from hachoir_parser import Parser +from hachoir_core.field import ( Field, FieldSet, ParserError, GenericVector, UInt8, UInt16, UInt32, String, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal MAGIC = "%PDF-" ENDMAGIC = "%%EOF" diff --git a/lib/hachoir_parser/misc/pifv.py b/lib/hachoir_parser/misc/pifv.py index e60e651e..d9474733 100644 --- a/lib/hachoir_parser/misc/pifv.py +++ b/lib/hachoir_parser/misc/pifv.py @@ -5,13 +5,13 @@ Author: Alexandre Boeglin Creation date: 08 jul 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, UInt24, UInt32, UInt64, Enum, CString, String, PaddingBytes, RawBytes, NullBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import paddingSize, humanFilesize -from lib.hachoir_parser.common.win32 import GUID +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import paddingSize, humanFilesize +from hachoir_parser.common.win32 import GUID EFI_SECTION_COMPRESSION = 0x1 EFI_SECTION_GUID_DEFINED = 0x2 diff --git a/lib/hachoir_parser/misc/torrent.py b/lib/hachoir_parser/misc/torrent.py index 62653271..0c32a785 100644 --- a/lib/hachoir_parser/misc/torrent.py +++ b/lib/hachoir_parser/misc/torrent.py @@ -7,11 +7,11 @@ Status: To statufy Author: Christophe Gisquet """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, String, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import makePrintable, timestampUNIX, humanFilesize +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import makePrintable, timestampUNIX, humanFilesize # Maximum number of bytes for string length MAX_STRING_LENGTH = 6 # length in 0..999999 @@ -128,7 +128,7 @@ class DictionaryItem(FieldSet): # Map first chunk byte => type TAGS = {'d': Dictionary, 'i': Integer, 'l': List} -for index in xrange(1, 9+1): +for index in xrange(0, 9+1): TAGS[str(index)] = TorrentString # Create an entry diff --git a/lib/hachoir_parser/misc/ttf.py b/lib/hachoir_parser/misc/ttf.py index 3a698ad1..f1024aab 100644 --- a/lib/hachoir_parser/misc/ttf.py +++ b/lib/hachoir_parser/misc/ttf.py @@ -10,14 +10,14 @@ Author: Victor Stinner Creation date: 2007-02-08 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt16, UInt32, Bit, Bits, PaddingBits, NullBytes, String, RawBytes, Bytes, Enum, TimestampMac32) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler MAX_NAME_COUNT = 300 MIN_NB_TABLE = 3 diff --git a/lib/hachoir_parser/misc/word_2.py b/lib/hachoir_parser/misc/word_2.py new file mode 100644 index 00000000..aec727b1 --- /dev/null +++ b/lib/hachoir_parser/misc/word_2.py @@ -0,0 +1,168 @@ +""" +Documents: + +* "Microsoft Word for Windows 2.0 Binary Format" + http://www.wotsit.org/download.asp?f=word2&sc=275927573 +""" + +from hachoir_core.field import (FieldSet, Enum, + Bit, Bits, + UInt8, Int16, UInt16, UInt32, Int32, + NullBytes, Bytes, RawBytes, PascalString16, + DateTimeMSDOS32, TimeDateMSDOS32) +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.misc.ole2_util import OLE2FragmentParser +from hachoir_core.tools import paddingSize +from hachoir_parser.common.win32_lang_id import LANGUAGE_ID +TIMESTAMP = DateTimeMSDOS32 + +class FC_CB(FieldSet): + def createFields(self): + yield Int32(self, "fc", "File Offset") + yield UInt16(self, "cb", "Byte Count") + def createValue(self): + return (self['fc'].value,self['cb'].value) + +class FIB(FieldSet): + def createFields(self): + yield UInt16(self, "wIdent", "Magic Number") + yield UInt16(self, "nFib", "File Information Block (FIB) Version") + yield UInt16(self, "nProduct", "Product Version") + yield Enum(UInt16(self, "lid", "Language ID"), LANGUAGE_ID) + yield Int16(self, "pnNext") + + yield Bit(self, "fDot", "Is the document a document template?") + yield Bit(self, "fGlsy", "Is the document a glossary?") + yield Bit(self, "fComplex", "Is the document in Complex format?") + yield Bit(self, "fHasPic", "Does the document have embedded images?") + yield Bits(self, "cQuickSaves", 4, "Number of times the document was quick-saved") + yield Bit(self, "fEncrypted", "Is the document encrypted?") + yield Bits(self, "reserved[]", 7) + + yield UInt16(self, "nFibBack") + yield UInt32(self, "reserved[]") + yield NullBytes(self, "rgwSpare", 6) + + yield UInt32(self, "fcMin", "File offset of first text character") + yield UInt32(self, "fcMax", "File offset of last text character + 1") + yield Int32(self, "cbMax", "File offset of last byte + 1") + yield NullBytes(self, "fcSpare", 16) + + yield UInt32(self, "ccpText", "Length of main document text stream") + yield Int32(self, "ccpFtn", "Length of footnote subdocument text stream") + yield Int32(self, "ccpHdr", "Length of header subdocument text stream") + yield Int32(self, "ccpMcr", "Length of macro subdocument text stream") + yield Int32(self, "ccpAtn", "Length of annotation subdocument text stream") + yield NullBytes(self, "ccpSpare", 16) + + yield FC_CB(self, "StshfOrig", "Original STSH allocation") + yield FC_CB(self, "Stshf", "Current STSH allocation") + yield FC_CB(self, "PlcffndRef", "Footnote reference PLC") + yield FC_CB(self, "PlcffndTxt", "Footnote text PLC") + yield FC_CB(self, "PlcfandRef", "Annotation reference PLC") + yield FC_CB(self, "PlcfandTxt", "Annotation text PLC") + yield FC_CB(self, "Plcfsed", "Section descriptor PLC") + yield FC_CB(self, "Plcfpgd", "Page descriptor PLC") + yield FC_CB(self, "Plcfphe", "Paragraph heights PLC") + yield FC_CB(self, "Sttbfglsy", "Glossary string table") + yield FC_CB(self, "Plcfglsy", "Glossary PLC") + yield FC_CB(self, "Plcfhdd", "Header PLC") + yield FC_CB(self, "PlcfbteChpx", "Character property bin table PLC") + yield FC_CB(self, "PlcfbtePapx", "Paragraph property bin table PLC") + yield FC_CB(self, "Plcfsea", "Private Use PLC") + yield FC_CB(self, "Sttbfffn") + yield FC_CB(self, "PlcffldMom") + yield FC_CB(self, "PlcffldHdr") + yield FC_CB(self, "PlcffldFtn") + yield FC_CB(self, "PlcffldAtn") + yield FC_CB(self, "PlcffldMcr") + yield FC_CB(self, "Sttbfbkmk") + yield FC_CB(self, "Plcfbkf") + yield FC_CB(self, "Plcfbkl") + yield FC_CB(self, "Cmds") + yield FC_CB(self, "Plcmcr") + yield FC_CB(self, "Sttbfmcr") + yield FC_CB(self, "PrDrvr", "Printer Driver information") + yield FC_CB(self, "PrEnvPort", "Printer environment for Portrait mode") + yield FC_CB(self, "PrEnvLand", "Printer environment for Landscape mode") + yield FC_CB(self, "Wss", "Window Save State") + yield FC_CB(self, "Dop", "Document Property data") + yield FC_CB(self, "SttbfAssoc") + yield FC_CB(self, "Clx", "'Complex' file format data") + yield FC_CB(self, "PlcfpgdFtn", "Footnote page descriptor PLC") + yield FC_CB(self, "AutosaveSource", "Original filename for Autosave purposes") + yield FC_CB(self, "Spare5") + yield FC_CB(self, "Spare6") + + yield Int16(self, "wSpare4") + yield UInt16(self, "pnChpFirst") + yield UInt16(self, "pnPapFirst") + yield UInt16(self, "cpnBteChp", "Count of CHPX FKPs recorded in file") + yield UInt16(self, "cpnBtePap", "Count of PAPX FKPs recorded in file") + +class SEPX(FieldSet): + def createFields(self): + yield UInt8(self, "size") + self._size=(self['size'].value+1)*8 + yield RawBytes(self, "raw[]", self['size'].value) + +class SEPXGroup(FieldSet): + def __init__(self, parent, name, size, description=None): + FieldSet.__init__(self, parent, name, description=description) + self._size=size*8 + def createFields(self): + while self.current_size < self.size: + next=self.stream.readBytes(self.absolute_address+self.current_size,1) + if next=='\x00': + padding = paddingSize((self.absolute_address + self.current_size)//8, 512) + if padding: + yield NullBytes(self, "padding[]", padding) + if self.current_size >= self.size: break + yield SEPX(self, "sepx[]") + +class Word2DocumentParser(OLE2FragmentParser): + MAGIC='\xdb\xa5' # 42459 + PARSER_TAGS = { + "id": "word_v2_document", + "min_size": 8, + "magic": ((MAGIC, 0),), + "file_ext": ("doc",), + "description": "Microsoft Office Word Version 2.0 document", + } + endian = LITTLE_ENDIAN + + def __init__(self, stream, **args): + OLE2FragmentParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0,2) != self.MAGIC: + return "Invalid magic." + if self['FIB/nFib'].value not in (45,): + return "Unknown FIB version." + return True + + def createFields(self): + yield FIB(self, "FIB", "File Information Block") + + padding = (self['FIB/fcMin'].value - self.current_size//8) + if padding: + yield NullBytes(self, "padding[]", padding) + if self['FIB/ccpText'].value: + yield Bytes(self, "text", self['FIB/ccpText'].value) + if self['FIB/ccpFtn'].value: + yield Bytes(self, "text_footnote", self['FIB/ccpFtn'].value) + if self['FIB/ccpHdr'].value: + yield Bytes(self, "text_header", self['FIB/ccpHdr'].value) + if self['FIB/ccpMcr'].value: + yield Bytes(self, "text_macro", self['FIB/ccpMcr'].value) + if self['FIB/ccpAtn'].value: + yield Bytes(self, "text_annotation", self['FIB/ccpAtn'].value) + + padding = (self['FIB/fcMax'].value - self.current_size//8) + if padding: + yield RawBytes(self, "padding[]", padding) + + sepx_size = (self['FIB/pnChpFirst'].value*512 - self.current_size//8) + if sepx_size: + yield SEPXGroup(self, "sepx", sepx_size) + diff --git a/lib/hachoir_parser/misc/word_doc.py b/lib/hachoir_parser/misc/word_doc.py index 8c9df549..36929d83 100644 --- a/lib/hachoir_parser/misc/word_doc.py +++ b/lib/hachoir_parser/misc/word_doc.py @@ -11,289 +11,417 @@ Documents: section. Revised Dec 21 1998, added missing Document Properties (section). """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.field import (FieldSet, Enum, Bit, Bits, UInt8, Int16, UInt16, UInt32, Int32, - NullBytes, RawBytes, PascalString16, - DateTimeMSDOS32) -from lib.hachoir_core.endian import LITTLE_ENDIAN + NullBytes, Bytes, RawBytes, PascalString8, PascalString16, CString, String, + TimestampMac32, TimestampWin64) +from hachoir_core.text_handler import displayHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser import guessParser +from hachoir_parser.misc.ole2_util import OLE2FragmentParser +from hachoir_parser.common.win32_lang_id import LANGUAGE_ID -TIMESTAMP = DateTimeMSDOS32 - -class BaseWordDocument: +CREATOR_ID={0x6A62: "Microsoft Word"} +class ShortArray(FieldSet): def createFields(self): - yield UInt16(self, "wIdent", 2) - yield UInt16(self, "nFib") - yield UInt16(self, "nProduct") - yield UInt16(self, "lid") + yield UInt16(self, "csw", "Count of fields in the array of shorts") + self._size = self['csw'].value*16+16 + yield Enum(UInt16(self, "wMagicCreated", "File creator ID"), CREATOR_ID) + yield Enum(UInt16(self, "wMagicRevised", "File last modifier ID"), CREATOR_ID) + yield UInt16(self, "wMagicCreatePrivate") + yield UInt16(self, "wMagicCreatedPrivate") + yield UInt16(self, "pnFbpChpFirst_W6") + yield UInt16(self, "pnChpFirst_W6") + yield UInt16(self, "cpnBteChp_W6") + yield UInt16(self, "pnFbpPapFirst_W6") + yield UInt16(self, "pnPapFirst_W6") + yield UInt16(self, "cpnBtePap_W6") + yield UInt16(self, "pnFbpLvcFirst_W6") + yield UInt16(self, "pnLvcFirst_W6") + yield UInt16(self, "cpnBteLvc_W6") + yield Enum(UInt16(self, "lidFE", "Language ID if a Far East version of Word was used"), LANGUAGE_ID) + while self.current_size < self.size: + yield Int16(self, "unknown[]") + +def buildDateHandler(v): + md,y=divmod(v,100) + m,d=divmod(md,100) + if y < 60: y=2000+y + else: y=1900+y + return "%04i-%02i-%02i"%(y,m,d) + +class LongArray(FieldSet): + def createFields(self): + yield UInt16(self, "clw", "Count of fields in the array of longs") + self._size = self['clw'].value*32+16 + yield Int32(self, "cbMax", "Stream offset of last byte + 1") + yield displayHandler(UInt32(self, "lProductCreated", "Date when the creator program was built"),buildDateHandler) + yield displayHandler(UInt32(self, "lProductRevised", "Date when the last modifier program was built"),buildDateHandler) + + yield UInt32(self, "ccpText", "Length of main document text stream") + yield Int32(self, "ccpFtn", "Length of footnote subdocument text stream") + yield Int32(self, "ccpHdr", "Length of header subdocument text stream") + yield Int32(self, "ccpMcr", "Length of macro subdocument text stream") + yield Int32(self, "ccpAtn", "Length of annotation subdocument text stream") + yield Int32(self, "ccpEdn", "Length of endnote subdocument text stream") + yield Int32(self, "ccpTxbx", "Length of textbox subdocument text stream") + yield Int32(self, "ccpHdrTxbx", "Length of header textbox subdocument text stream") + yield Int32(self, "pnFbpChpFirst", "Start of CHPX (Character Property) sector chain (sector = 512-byte 'page')") + yield Int32(self, "pnChpFirst", "First CHPX sector") + yield Int32(self, "cpnBteChp", "Number of CHPX sectors in the file") + yield Int32(self, "pnFbpPapFirst", "Start of PAPX (Paragraph Property) sector chain") + yield Int32(self, "pnPapFirst", "First PAPX sector") + yield Int32(self, "cpnBtePap", "Number of PAPX sectors in the file") + yield Int32(self, "pnFbpLvcFirst", "Start of LVC sector chain") + yield Int32(self, "pnLvcFirst", "First LVC sector") + yield Int32(self, "cpnBteLvc", "Number of LVC sectors in the file") + yield Int32(self, "fcIslandFirst") + yield Int32(self, "fcIslandLim") + while self.current_size < self.size: + yield Int32(self, "unknown[]") + +class FCLCB(FieldSet): + static_size=64 + def createFields(self): + yield Int32(self, "fc", "Table Stream Offset") + yield UInt32(self, "lcb", "Byte Count") + def createValue(self): + return (self['fc'].value,self['lcb'].value) + +class FCLCBArray(FieldSet): + def createFields(self): + yield UInt16(self, "cfclcb", "Count of fields in the array of FC/LCB pairs") + self._size = self['cfclcb'].value*64+16 + + yield FCLCB(self, "StshfOrig", "Original STSH allocation") + yield FCLCB(self, "Stshf", "Current STSH allocation") + yield FCLCB(self, "PlcffndRef", "Footnote reference (FRD) PLC") + yield FCLCB(self, "PlcffndTxt", "Footnote text PLC") + yield FCLCB(self, "PlcfandRef", "Annotation reference (ATRD) PLC") + yield FCLCB(self, "PlcfandTxt", "Annotation text PLC") + yield FCLCB(self, "Plcfsed", "Section descriptor (SED) PLC") + yield FCLCB(self, "Plcpad", "No longer used; used to be Plcfpgd (Page descriptor PLC)") + yield FCLCB(self, "Plcfphe", "Paragraph heights (PHE) PLC (only for Complex files)") + yield FCLCB(self, "Sttbfglsy", "Glossary string table") + yield FCLCB(self, "Plcfglsy", "Glossary PLC") + yield FCLCB(self, "Plcfhdd", "Header (HDD) PLC") + yield FCLCB(self, "PlcfbteChpx", "Character property bin table PLC") + yield FCLCB(self, "PlcfbtePapx", "Paragraph property bin table PLC") + yield FCLCB(self, "Plcfsea", "Private Use PLC") + yield FCLCB(self, "Sttbfffn", "Font information STTB") + yield FCLCB(self, "PlcffldMom", "Main document field position (FLD) PLC") + yield FCLCB(self, "PlcffldHdr", "Header subdocument field position (FLD) PLC") + yield FCLCB(self, "PlcffldFtn", "Footnote subdocument field position (FLD) PLC") + yield FCLCB(self, "PlcffldAtn", "Annotation subdocument field position (FLD) PLC") + yield FCLCB(self, "PlcffldMcr", "No longer used") + yield FCLCB(self, "Sttbfbkmk", "Bookmark names STTB") + yield FCLCB(self, "Plcfbkf", "Bookmark begin position (BKF) PLC") + yield FCLCB(self, "Plcfbkl", "Bookmark end position (BKL) PLC") + yield FCLCB(self, "Cmds", "Macro commands") + yield FCLCB(self, "Plcmcr", "No longer used") + yield FCLCB(self, "Sttbfmcr", "No longer used") + yield FCLCB(self, "PrDrvr", "Printer Driver information") + yield FCLCB(self, "PrEnvPort", "Printer environment for Portrait mode") + yield FCLCB(self, "PrEnvLand", "Printer environment for Landscape mode") + yield FCLCB(self, "Wss", "Window Save State") + yield FCLCB(self, "Dop", "Document Property data") + yield FCLCB(self, "SttbfAssoc", "Associated strings STTB") + yield FCLCB(self, "Clx", "Complex file information") + yield FCLCB(self, "PlcfpgdFtn", "Not used") + yield FCLCB(self, "AutosaveSource", "Original filename for Autosave purposes") + yield FCLCB(self, "GrpXstAtnOwners", "String Group for Annotation Owner Names") + yield FCLCB(self, "SttbfAtnbkmk", "Annotation subdocument bookmark names STTB") + yield FCLCB(self, "PlcdoaMom", "No longer used") + yield FCLCB(self, "PlcdoaHdr", "No longer used") + yield FCLCB(self, "PlcspaMom", "Main document File Shape (FSPA) PLC") + yield FCLCB(self, "PlcspaHdr", "Header subdocument FSPA PLC") + yield FCLCB(self, "PlcfAtnbkf", "Annotation subdocument bookmark begin position (BKF) PLC") + yield FCLCB(self, "PlcfAtnbkl", "Annotation subdocument bookmark end position (BKL) PLC") + yield FCLCB(self, "Pms", "Print Merge State") + yield FCLCB(self, "FormFldSttbs", "Form field values STTB") + yield FCLCB(self, "PlcfendRef", "Endnote Reference (FRD) PLC") + yield FCLCB(self, "PlcfendTxt", "Endnote Text PLC") + yield FCLCB(self, "PlcffldEdn", "Endnote subdocument field position (FLD) PLC)") + yield FCLCB(self, "PlcfpgdEdn", "not used") + yield FCLCB(self, "DggInfo", "Office Art Object Table Data") + yield FCLCB(self, "SttbfRMark", "Editor Author Abbreviations STTB") + yield FCLCB(self, "SttbCaption", "Caption Title STTB") + yield FCLCB(self, "SttbAutoCaption", "Auto Caption Title STTB") + yield FCLCB(self, "Plcfwkb", "WKB PLC") + yield FCLCB(self, "Plcfspl", "Spell Check State PLC") + yield FCLCB(self, "PlcftxbxTxt", "Text Box Text PLC") + yield FCLCB(self, "PlcffldTxbx", "Text Box Reference (FLD) PLC") + yield FCLCB(self, "PlcfhdrtxbxTxt", "Header Text Box Text PLC") + yield FCLCB(self, "PlcffldHdrTxbx", "Header Text Box Reference (FLD) PLC") + yield FCLCB(self, "StwUser", "Macro User storage") + yield FCLCB(self, "Sttbttmbd", "Embedded TrueType Font Data") + yield FCLCB(self, "Unused") + yield FCLCB(self, "PgdMother", "Main text page descriptors PLF") + yield FCLCB(self, "BkdMother", "Main text break descriptors PLF") + yield FCLCB(self, "PgdFtn", "Footnote text page descriptors PLF") + yield FCLCB(self, "BkdFtn", "Footnote text break descriptors PLF") + yield FCLCB(self, "PgdEdn", "Endnote text page descriptors PLF") + yield FCLCB(self, "BkdEdn", "Endnote text break descriptors PLF") + yield FCLCB(self, "SttbfIntlFld", "Field keywords STTB") + yield FCLCB(self, "RouteSlip", "Mailer Routing Slip") + yield FCLCB(self, "SttbSavedBy", "STTB of names of users who have saved the document") + yield FCLCB(self, "SttbFnm", "STTB of filenames of documents referenced by this one") + yield FCLCB(self, "PlcfLst", "List Format information PLC") + yield FCLCB(self, "PlfLfo", "List Format Override information PLC") + yield FCLCB(self, "PlcftxbxBkd", "Main document textbox break table (BKD) PLC") + yield FCLCB(self, "PlcftxbxHdrBkd", "Header subdocument textbox break table (BKD) PLC") + yield FCLCB(self, "DocUndo", "Undo/Versioning data") + yield FCLCB(self, "Rgbuse", "Undo/Versioning data") + yield FCLCB(self, "Usp", "Undo/Versioning data") + yield FCLCB(self, "Uskf", "Undo/Versioning data") + yield FCLCB(self, "PlcupcRgbuse", "Undo/Versioning data") + yield FCLCB(self, "PlcupcUsp", "Undo/Versioning data") + yield FCLCB(self, "SttbGlsyStyle", "Glossary entry style names STTB") + yield FCLCB(self, "Plgosl", "Grammar options PL") + yield FCLCB(self, "Plcocx", "OCX data PLC") + yield FCLCB(self, "PlcfbteLvc", "Character property bin table PLC") + if self['../fMac'].value: + yield TimestampMac32(self, "ftModified", "Date last modified") + yield Int32(self, "padding[]") + else: + yield TimestampWin64(self, "ftModified", "Date last modified") + yield FCLCB(self, "Plcflvc", "LVC PLC") + yield FCLCB(self, "Plcasumy", "Autosummary PLC") + yield FCLCB(self, "Plcfgram", "Grammar check PLC") + yield FCLCB(self, "SttbListNames", "List names STTB") + yield FCLCB(self, "SttbfUssr", "Undo/Versioning data") + while self.current_size < self.size: + yield FCLCB(self, "unknown[]") + +class FIB(FieldSet): + def createFields(self): + yield UInt16(self, "wIdent", "Magic Number") + yield UInt16(self, "nFib", "File Information Block (FIB) Version") + yield UInt16(self, "nProduct", "Product Version") + yield Enum(UInt16(self, "lid", "Language ID"), LANGUAGE_ID) yield Int16(self, "pnNext") - yield Bit(self, "fDot") - yield Bit(self, "fGlsy") - yield Bit(self, "fComplex") - yield Bit(self, "fHasPic") - yield Bits(self, "cQuickSaves", 4) - yield Bit(self, "fEncrypted") - yield Bit(self, "fWhichTblStm") - yield Bit(self, "fReadOnlyRecommanded") - yield Bit(self, "fWriteReservation") - yield Bit(self, "fExtChar") + yield Bit(self, "fDot", "Is the document a document template?") + yield Bit(self, "fGlsy", "Is the document a glossary?") + yield Bit(self, "fComplex", "Is the document in Complex format?") + yield Bit(self, "fHasPic", "Does the document have embedded images?") + yield Bits(self, "cQuickSaves", 4, "Number of times the document was quick-saved") + yield Bit(self, "fEncrypted", "Is the document encrypted?") + yield Bits(self, "fWhichTblStm", 1, "Which table stream (0Table or 1Table) to use") + yield Bit(self, "fReadOnlyRecommended", "Should the file be opened read-only?") + yield Bit(self, "fWriteReservation", "Is the file write-reserved?") + yield Bit(self, "fExtChar", "Does the file use an extended character set?") yield Bit(self, "fLoadOverride") - yield Bit(self, "fFarEeast") + yield Bit(self, "fFarEast") yield Bit(self, "fCrypto") - yield UInt16(self, "nFibBack") - yield UInt32(self, "lKey") - yield UInt8(self, "envr") + yield UInt16(self, "nFibBack", "Document is backwards compatible down to this FIB version") + yield UInt32(self, "lKey", "File encryption key (only if fEncrypted)") + yield Enum(UInt8(self, "envr", "Document creation environment"), {0:'Word for Windows',1:'Macintosh Word'}) - yield Bit(self, "fMac") + yield Bit(self, "fMac", "Was this file last saved on a Mac?") yield Bit(self, "fEmptySpecial") yield Bit(self, "fLoadOverridePage") yield Bit(self, "fFutureSavedUndo") yield Bit(self, "fWord97Save") yield Bits(self, "fSpare0", 3) + CHARSET={0:'Windows ANSI',256:'Macintosh'} + yield Enum(UInt16(self, "chse", "Character set for document text"),CHARSET) + yield Enum(UInt16(self, "chsTables", "Character set for internal table text"),CHARSET) + yield UInt32(self, "fcMin", "File offset for the first character of text") + yield UInt32(self, "fcMax", "File offset for the last character of text + 1") - yield UInt16(self, "chse") - yield UInt16(self, "chsTables") - yield UInt32(self, "fcMin") - yield UInt32(self, "fcMac") + yield ShortArray(self, "array1", "Array of shorts") + yield LongArray(self, "array2", "Array of longs") + yield FCLCBArray(self, "array3", "Array of File Offset/Byte Count (FC/LCB) pairs") - yield PascalString16(self, "file_creator", strip="\0") +def getRootParser(ole2): + return guessParser(ole2["root[0]"].getSubIStream()) - yield NullBytes(self, "reserved[]", 12) +def getOLE2Parser(ole2, path): + name = path+"[0]" + if name in ole2: + fragment = ole2[name] + else: + fragment = getRootParser(ole2)[name] + return guessParser(fragment.getSubIStream()) - yield Int16(self, "lidFE") - yield UInt16(self, "clw") - yield Int32(self, "cbMac") - yield UInt32(self, "lProductCreated") - yield TIMESTAMP(self, "lProductRevised") - - yield UInt32(self, "ccpText") - yield Int32(self, "ccpFtn") - yield Int32(self, "ccpHdr") - yield Int32(self, "ccpMcr") - yield Int32(self, "ccpAtn") - yield Int32(self, "ccpEdn") - yield Int32(self, "ccpTxbx") - yield Int32(self, "ccpHdrTxbx") - yield Int32(self, "pnFbpChpFirst") - yield Int32(self, "pnChpFirst") - yield Int32(self, "cpnBteChp") - yield Int32(self, "pnFbpPapFirst") - yield Int32(self, "pnPapFirst") - yield Int32(self, "cpnBtePap") - yield Int32(self, "pnFbpLvcFirst") - yield Int32(self, "pnLvcFirst") - yield Int32(self, "cpnBteLvc") - yield Int32(self, "fcIslandFirst") - yield Int32(self, "fcIslandLim") - yield UInt16(self, "cfclcb") - yield Int32(self, "fcStshfOrig") - yield UInt32(self, "lcbStshfOrig") - yield Int32(self, "fcStshf") - yield UInt32(self, "lcbStshf") - - yield Int32(self, "fcPlcffndRef") - yield UInt32(self, "lcbPlcffndRef") - yield Int32(self, "fcPlcffndTxt") - yield UInt32(self, "lcbPlcffndTxt") - yield Int32(self, "fcPlcfandRef") - yield UInt32(self, "lcbPlcfandRef") - yield Int32(self, "fcPlcfandTxt") - yield UInt32(self, "lcbPlcfandTxt") - yield Int32(self, "fcPlcfsed") - yield UInt32(self, "lcbPlcfsed") - yield Int32(self, "fcPlcpad") - yield UInt32(self, "lcbPlcpad") - yield Int32(self, "fcPlcfphe") - yield UInt32(self, "lcbPlcfphe") - yield Int32(self, "fcSttbfglsy") - yield UInt32(self, "lcbSttbfglsy") - yield Int32(self, "fcPlcfglsy") - yield UInt32(self, "lcbPlcfglsy") - yield Int32(self, "fcPlcfhdd") - yield UInt32(self, "lcbPlcfhdd") - yield Int32(self, "fcPlcfbteChpx") - yield UInt32(self, "lcbPlcfbteChpx") - yield Int32(self, "fcPlcfbtePapx") - yield UInt32(self, "lcbPlcfbtePapx") - yield Int32(self, "fcPlcfsea") - yield UInt32(self, "lcbPlcfsea") - yield Int32(self, "fcSttbfffn") - yield UInt32(self, "lcbSttbfffn") - yield Int32(self, "fcPlcffldMom") - yield UInt32(self, "lcbPlcffldMom") - yield Int32(self, "fcPlcffldHdr") - yield UInt32(self, "lcbPlcffldHdr") - yield Int32(self, "fcPlcffldFtn") - yield UInt32(self, "lcbPlcffldFtn") - yield Int32(self, "fcPlcffldAtn") - yield UInt32(self, "lcbPlcffldAtn") - yield Int32(self, "fcPlcffldMcr") - yield UInt32(self, "lcbPlcffldMcr") - yield Int32(self, "fcSttbfbkmk") - yield UInt32(self, "lcbSttbfbkmk") - yield Int32(self, "fcPlcfbkf") - yield UInt32(self, "lcbPlcfbkf") - yield Int32(self, "fcPlcfbkl") - yield UInt32(self, "lcbPlcfbkl") - yield Int32(self, "fcCmds") - yield UInt32(self, "lcbCmds") - yield Int32(self, "fcPlcmcr") - yield UInt32(self, "lcbPlcmcr") - yield Int32(self, "fcSttbfmcr") - yield UInt32(self, "lcbSttbfmcr") - yield Int32(self, "fcPrDrvr") - yield UInt32(self, "lcbPrDrvr") - yield Int32(self, "fcPrEnvPort") - yield UInt32(self, "lcbPrEnvPort") - yield Int32(self, "fcPrEnvLand") - yield UInt32(self, "lcbPrEnvLand") - yield Int32(self, "fcWss") - yield UInt32(self, "lcbWss") - yield Int32(self, "fcDop") - yield UInt32(self, "lcbDop") - yield Int32(self, "fcSttbfAssoc") - yield UInt32(self, "lcbSttbfAssoc") - yield Int32(self, "fcClx") - yield UInt32(self, "lcbClx") - yield Int32(self, "fcPlcfpgdFtn") - yield UInt32(self, "lcbPlcfpgdFtn") - yield Int32(self, "fcAutosaveSource") - yield UInt32(self, "lcbAutosaveSource") - yield Int32(self, "fcGrpXstAtnOwners") - yield UInt32(self, "lcbGrpXstAtnOwners") - yield Int32(self, "fcSttbfAtnbkmk") - yield UInt32(self, "lcbSttbfAtnbkmk") - yield Int32(self, "fcPlcdoaMom") - yield UInt32(self, "lcbPlcdoaMom") - yield Int32(self, "fcPlcdoaHdr") - yield UInt32(self, "lcbPlcdoaHdr") - yield Int32(self, "fcPlcspaMom") - yield UInt32(self, "lcbPlcspaMom") - yield Int32(self, "fcPlcspaHdr") - yield UInt32(self, "lcbPlcspaHdr") - yield Int32(self, "fcPlcfAtnbkf") - yield UInt32(self, "lcbPlcfAtnbkf") - yield Int32(self, "fcPlcfAtnbkl") - yield UInt32(self, "lcbPlcfAtnbkl") - yield Int32(self, "fcPms") - yield UInt32(self, "lcbPms") - yield Int32(self, "fcFormFldSttbs") - yield UInt32(self, "lcbFormFldSttbs") - yield Int32(self, "fcPlcfendRef") - yield UInt32(self, "lcbPlcfendRef") - yield Int32(self, "fcPlcfendTxt") - yield UInt32(self, "lcbPlcfendTxt") - yield Int32(self, "fcPlcffldEdn") - yield UInt32(self, "lcbPlcffldEdn") - yield Int32(self, "fcPlcfpgdEdn") - yield UInt32(self, "lcbPlcfpgdEdn") - yield Int32(self, "fcDggInfo") - yield UInt32(self, "lcbDggInfo") - yield Int32(self, "fcSttbfRMark") - yield UInt32(self, "lcbSttbfRMark") - yield Int32(self, "fcSttbCaption") - yield UInt32(self, "lcbSttbCaption") - yield Int32(self, "fcSttbAutoCaption") - yield UInt32(self, "lcbSttbAutoCaption") - yield Int32(self, "fcPlcfwkb") - yield UInt32(self, "lcbPlcfwkb") - yield Int32(self, "fcPlcfspl") - yield UInt32(self, "lcbPlcfspl") - yield Int32(self, "fcPlcftxbxTxt") - yield UInt32(self, "lcbPlcftxbxTxt") - yield Int32(self, "fcPlcffldTxbx") - yield UInt32(self, "lcbPlcffldTxbx") - yield Int32(self, "fcPlcfhdrtxbxTxt") - yield UInt32(self, "lcbPlcfhdrtxbxTxt") - yield Int32(self, "fcPlcffldHdrTxbx") - yield UInt32(self, "lcbPlcffldHdrTxbx") - yield Int32(self, "fcStwUser") - yield UInt32(self, "lcbStwUser") - yield Int32(self, "fcSttbttmbd") - yield UInt32(self, "cbSttbttmbd") - yield Int32(self, "fcUnused") - yield UInt32(self, "lcbUnused") - yield Int32(self, "fcPgdMother") - yield UInt32(self, "lcbPgdMother") - yield Int32(self, "fcBkdMother") - yield UInt32(self, "lcbBkdMother") - yield Int32(self, "fcPgdFtn") - yield UInt32(self, "lcbPgdFtn") - yield Int32(self, "fcBkdFtn") - yield UInt32(self, "lcbBkdFtn") - yield Int32(self, "fcPgdEdn") - yield UInt32(self, "lcbPgdEdn") - yield Int32(self, "fcBkdEdn") - yield UInt32(self, "lcbBkdEdn") - yield Int32(self, "fcSttbfIntlFld") - yield UInt32(self, "lcbSttbfIntlFld") - yield Int32(self, "fcRouteSlip") - yield UInt32(self, "lcbRouteSlip") - yield Int32(self, "fcSttbSavedBy") - yield UInt32(self, "lcbSttbSavedBy") - yield Int32(self, "fcSttbFnm") - yield UInt32(self, "lcbSttbFnm") - yield Int32(self, "fcPlcfLst") - yield UInt32(self, "lcbPlcfLst") - yield Int32(self, "fcPlfLfo") - yield UInt32(self, "lcbPlfLfo") - yield Int32(self, "fcPlcftxbxBkd") - yield UInt32(self, "lcbPlcftxbxBkd") - yield Int32(self, "fcPlcftxbxHdrBkd") - yield UInt32(self, "lcbPlcftxbxHdrBkd") - yield Int32(self, "fcDocUndo") - yield UInt32(self, "lcbDocUndo") - yield Int32(self, "fcRgbuse") - yield UInt32(self, "lcbRgbuse") - yield Int32(self, "fcUsp") - yield UInt32(self, "lcbUsp") - yield Int32(self, "fcUskf") - yield UInt32(self, "lcbUskf") - yield Int32(self, "fcPlcupcRgbuse") - yield UInt32(self, "lcbPlcupcRgbuse") - yield Int32(self, "fcPlcupcUsp") - yield UInt32(self, "lcbPlcupcUsp") - yield Int32(self, "fcSttbGlsyStyle") - yield UInt32(self, "lcbSttbGlsyStyle") - yield Int32(self, "fcPlgosl") - yield UInt32(self, "lcbPlgosl") - yield Int32(self, "fcPlcocx") - yield UInt32(self, "lcbPlcocx") - yield Int32(self, "fcPlcfbteLvc") - yield UInt32(self, "lcbPlcfbteLvc") - yield TIMESTAMP(self, "ftModified") - yield Int32(self, "fcPlcflvc") - yield UInt32(self, "lcbPlcflvc") - yield Int32(self, "fcPlcasumy") - yield UInt32(self, "lcbPlcasumy") - yield Int32(self, "fcPlcfgram") - yield UInt32(self, "lcbPlcfgram") - yield Int32(self, "fcSttbListNames") - yield UInt32(self, "lcbSttbListNames") - yield Int32(self, "fcSttbfUssr") - yield UInt32(self, "lcbSttbfUssr") - - tail = (self.size - self.current_size) // 8 - if tail: - yield RawBytes(self, "tail", tail) - -class WordDocumentFieldSet(BaseWordDocument, FieldSet): - pass - -class WordDocumentParser(BaseWordDocument, Parser): +class WordDocumentParser(OLE2FragmentParser): + MAGIC='\xec\xa5' # 42476 PARSER_TAGS = { "id": "word_document", "min_size": 8, + "magic": ((MAGIC, 0),), "description": "Microsoft Office Word document", } endian = LITTLE_ENDIAN - def __init__(self, stream, **kw): - Parser.__init__(self, stream, **kw) + def __init__(self, stream, **args): + OLE2FragmentParser.__init__(self, stream, **args) def validate(self): + if self.stream.readBytes(0,2) != self.MAGIC: + return "Invalid magic." + if self['FIB/nFib'].value not in (192,193): + return "Unknown FIB version." return True + def createFields(self): + yield FIB(self, "FIB", "File Information Block") + table = getOLE2Parser(self.ole2, "table"+str(self["FIB/fWhichTblStm"].value)) + + padding = (self['FIB/fcMin'].value - self.current_size//8) + if padding: + yield NullBytes(self, "padding[]", padding) + + # Guess whether the file uses UTF16 encoding. + is_unicode = False + if self['FIB/array2/ccpText'].value*2 == self['FIB/fcMax'].value - self['FIB/fcMin'].value: + is_unicode = True + for fieldname, textname in [('Text','text'),('Ftn','text_footnote'), + ('Hdr','text_header'),('Mcr','text_macro'),('Atn','text_annotation'), + ('Edn','text_endnote'),('Txbx','text_textbox'),('HdrTxbx','text_header_textbox')]: + size = self['FIB/array2/ccp'+fieldname].value + if size: + if is_unicode: + yield String(self, textname, size*2, charset="UTF-16-LE") + else: + yield Bytes(self, textname, size) + + padding = (self['FIB/fcMax'].value - self.current_size//8) + if padding: + yield RawBytes(self, "padding[]", padding) + +class WidePascalString16(String): + def __init__(self, parent, name, description=None, + strip=None, nbytes=None, truncate=None): + Bytes.__init__(self, parent, name, 1, description) + + self._format = "WidePascalString16" + self._strip = strip + self._truncate = truncate + self._character_size = 2 + self._charset = "UTF-16-LE" + self._content_offset = 2 + self._content_size = self._character_size * self._parent.stream.readBits( + self.absolute_address, self._content_offset*8, self._parent.endian) + self._size = (self._content_size + self.content_offset) * 8 + +class TableParsers(object): + class Bte(FieldSet): + 'Bin Table Entry' + static_size = 32 + def createFields(self): + yield Bits(self, "pn", 22, "Referenced page number") + yield Bits(self, "unused", 10) + + def createValue(self): + return self["pn"].value + + class Ffn(FieldSet): + 'Font Family Name' + def createFields(self): + yield UInt8(self, "size", "Total length of this FFN in bytes, minus 1") + self._size = self["size"].value * 8 + 8 + yield Bits(self, "prq", 2, "Pitch request") + yield Bit(self, "fTrueType", "Is font a TrueType font?") + yield Bits(self, "reserved[]", 1) + yield Bits(self, "ff", 3, "Font Family ID") + yield Bits(self, "reserved[]", 1) + yield UInt16(self, "wWeight", "Base weight of font") + yield UInt8(self, "chs", "Character set identifier") + yield UInt8(self, "ixchSzAlt", "Index into name to the name of the alternate font") + yield RawBytes(self, "panose", 10) + yield RawBytes(self, "fs", 24, "Font Signature") + yield CString(self, "name", charset="UTF-16-LE") + if self["ixchSzAlt"].value != 0: + yield CString(self, "nameAlt", charset="UTF-16-LE") + + def createValue(self): + return self["name"].value + + class Sttbf(FieldSet): + 'String Table stored in File' + SttbfAssocDESC = { + 0: "FileNext: unused", + 1: "Dot: filename of associated template", + 2: "Title: title of document", + 3: "Subject: subject of document", + 4: "KeyWords: keywords of document", + 5: "Comments: comments of document", + 6: "Author: author of document", + 7: "LastRevBy: name of person who last revised the document", + 8: "DataDoc: filename of data document", + 9: "HeaderDoc: filename of header document", + 10: "Criteria1: packed string used by print merge record selection", + 11: "Criteria2: packed string used by print merge record selection", + 12: "Criteria3: packed string used by print merge record selection", + 13: "Criteria4: packed string used by print merge record selection", + 14: "Criteria5: packed string used by print merge record selection", + 15: "Criteria6: packed string used by print merge record selection", + 16: "Criteria7: packed string used by print merge record selection", + 17: "Max: maximum number of strings in string table", + } + + def createFields(self): + if self.stream.readBytes(self.absolute_address, 2) == "\xff\xff": + yield Int16(self, "utf16_marker", "If this field is present, the Sttbf contains UTF-16 data.") + self.is_utf16 = True + else: + self.is_utf16 = False + yield UInt16(self, "count", "Number of strings in this Sttbf") + extra_data_field = UInt16(self, "extra_data_len", "Size of optional extra data after each string") + yield extra_data_field + extra_data_len = extra_data_field.value + for i in xrange(self["count"].value): + if self.name == "SttbfAssoc": + desc = self.SttbfAssocDESC.get(i, None) + else: + desc = None + if self.name == "Sttbfffn": + yield TableParsers.Ffn(self, "string[]", desc) + elif self.is_utf16: + yield WidePascalString16(self, "string[]", desc) + else: + yield PascalString8(self, "string[]", desc) + if extra_data_len: + yield RawBytes(self, "extra[]", extra_data_len) + + class Plcf(FieldSet): + 'Plex of CPs/FCs stored in file' + def createFields(self): + if self.size is None: + return + chunk_parser = None + size = None + if self.name.startswith("Plcfbte"): + chunk_parser = TableParsers.Bte + if not chunk_parser: + return + if size is None: + size = chunk_parser.static_size // 8 + n = (self.size / 8 - 4) / (4 + size) + for i in xrange(n+1): + yield UInt32(self, "cp_fc[]", "CP or FC value") + for i in xrange(n): + yield chunk_parser(self, "obj[]") + +class WordTableParser(OLE2FragmentParser): + def createFields(self): + word_doc = getOLE2Parser(self.ole2, "word_doc") + if word_doc["FIB/fWhichTblStm"].value != int(self.ole2name[0]): + yield RawBytes(self, "inactive_table", self.datasize) + return + for fclcb in word_doc["FIB/array3"]: + if not isinstance(fclcb, FCLCB): + continue + if fclcb["fc"].value < 0 or fclcb["lcb"].value <= 0: + continue + self.seekByte(fclcb["fc"].value, relative=False) + if fclcb.name.startswith("Sttb"): + yield TableParsers.Sttbf(self, fclcb.name, size=fclcb["lcb"].value * 8) + elif fclcb.name.startswith("Plc"): + yield TableParsers.Plcf(self, fclcb.name, size=fclcb["lcb"].value * 8) + else: + yield RawBytes(self, fclcb.name, fclcb["lcb"].value, fclcb.description) diff --git a/lib/hachoir_parser/network/__init__.py b/lib/hachoir_parser/network/__init__.py index 0781e00f..a7fe2473 100644 --- a/lib/hachoir_parser/network/__init__.py +++ b/lib/hachoir_parser/network/__init__.py @@ -1,2 +1,2 @@ -from lib.hachoir_parser.network.tcpdump import TcpdumpFile +from hachoir_parser.network.tcpdump import TcpdumpFile diff --git a/lib/hachoir_parser/network/common.py b/lib/hachoir_parser/network/common.py index bc049b6b..d6e9feaa 100644 --- a/lib/hachoir_parser/network/common.py +++ b/lib/hachoir_parser/network/common.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.field import FieldSet, Field, Bits -from lib.hachoir_core.bits import str2hex -from lib.hachoir_parser.network.ouid import REGISTERED_OUID -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.field import FieldSet, Field, Bits +from hachoir_core.bits import str2hex +from hachoir_parser.network.ouid import REGISTERED_OUID +from hachoir_core.endian import BIG_ENDIAN from socket import gethostbyaddr, herror as socket_host_error def ip2name(addr): diff --git a/lib/hachoir_parser/network/tcpdump.py b/lib/hachoir_parser/network/tcpdump.py index 1625c845..564e6189 100644 --- a/lib/hachoir_parser/network/tcpdump.py +++ b/lib/hachoir_parser/network/tcpdump.py @@ -12,16 +12,16 @@ Author: Victor Stinner Creation: 23 march 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, Enum, Bytes, NullBytes, RawBytes, UInt8, UInt16, UInt32, Int32, TimestampUnix32, Bit, Bits, NullBits) -from lib.hachoir_core.endian import NETWORK_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.tools import humanDuration -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import createDict -from lib.hachoir_parser.network.common import MAC48_Address, IPv4_Address, IPv6_Address +from hachoir_core.endian import NETWORK_ENDIAN, LITTLE_ENDIAN +from hachoir_core.tools import humanDuration +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import createDict +from hachoir_parser.network.common import MAC48_Address, IPv4_Address, IPv6_Address def diff(field): return humanDuration(field.value*1000) diff --git a/lib/hachoir_parser/parser.py b/lib/hachoir_parser/parser.py index 833e8a79..e8124158 100644 --- a/lib/hachoir_parser/parser.py +++ b/lib/hachoir_parser/parser.py @@ -1,8 +1,8 @@ -import lib.hachoir_core.config as config -from lib.hachoir_core.field import Parser as GenericParser -from lib.hachoir_core.error import HACHOIR_ERRORS, HachoirError, error -from lib.hachoir_core.tools import makeUnicode -from lib.hachoir_core.i18n import _ +import hachoir_core.config as config +from hachoir_core.field import Parser as GenericParser +from hachoir_core.error import HACHOIR_ERRORS, HachoirError, error +from hachoir_core.tools import makeUnicode +from hachoir_core.i18n import _ from inspect import getmro diff --git a/lib/hachoir_parser/parser_list.py b/lib/hachoir_parser/parser_list.py index 88692f3c..38071550 100644 --- a/lib/hachoir_parser/parser_list.py +++ b/lib/hachoir_parser/parser_list.py @@ -1,8 +1,8 @@ import re import types -from lib.hachoir_core.error import error -from lib.hachoir_core.i18n import _ -from lib.hachoir_parser import Parser, HachoirParser +from hachoir_core.error import error +from hachoir_core.i18n import _ +from hachoir_parser import Parser, HachoirParser import sys ### Parser list ################################################################ @@ -198,8 +198,7 @@ class HachoirParserList(ParserList): return self.parser_list todo = [] - from lib import hachoir_parser - module = hachoir_parser + module = __import__("hachoir_parser") for attrname in dir(module): attr = getattr(module, attrname) if isinstance(attr, types.ModuleType): diff --git a/lib/hachoir_parser/program/__init__.py b/lib/hachoir_parser/program/__init__.py index 5dba92d3..261eaf15 100644 --- a/lib/hachoir_parser/program/__init__.py +++ b/lib/hachoir_parser/program/__init__.py @@ -1,6 +1,7 @@ -from lib.hachoir_parser.program.elf import ElfFile -from lib.hachoir_parser.program.exe import ExeFile -from lib.hachoir_parser.program.python import PythonCompiledFile -from lib.hachoir_parser.program.java import JavaCompiledClassFile -from lib.hachoir_parser.program.prc import PRCFile +from hachoir_parser.program.elf import ElfFile +from hachoir_parser.program.exe import ExeFile +from hachoir_parser.program.python import PythonCompiledFile +from hachoir_parser.program.java import JavaCompiledClassFile +from hachoir_parser.program.prc import PRCFile +from hachoir_parser.program.nds import NdsFile diff --git a/lib/hachoir_parser/program/elf.py b/lib/hachoir_parser/program/elf.py index 7c66ca28..4ddd6511 100644 --- a/lib/hachoir_parser/program/elf.py +++ b/lib/hachoir_parser/program/elf.py @@ -1,44 +1,98 @@ """ ELF (Unix/BSD executable file format) parser. -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao Creation date: 08 may 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, - UInt8, UInt16, UInt32, Enum, - String, Bytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_parser import HachoirParser +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, ParserError, Bit, NullBits, RawBits, + UInt8, UInt16, UInt32, UInt64, Enum, + String, RawBytes, Bytes) +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN class ElfHeader(FieldSet): - static_size = 52*8 LITTLE_ENDIAN_ID = 1 BIG_ENDIAN_ID = 2 MACHINE_NAME = { + # e_machine, EM_ defines + 0: u"No machine", 1: u"AT&T WE 32100", 2: u"SPARC", 3: u"Intel 80386", 4: u"Motorola 68000", 5: u"Motorola 88000", + 6: u"Intel 80486", 7: u"Intel 80860", - 8: u"MIPS RS3000" + 8: u"MIPS I Architecture", + 9: u"Amdahl UTS on System/370", + 10: u"MIPS RS3000 Little-endian", + 11: u"IBM RS/6000 XXX reserved", + 15: u"Hewlett-Packard PA-RISC", + 16: u"NCube XXX reserved", + 17: u"Fujitsu VPP500", + 18: u"Enhanced instruction set SPARC", + 19: u"Intel 80960", + 20: u"PowerPC 32-bit", + 21: u"PowerPC 64-bit", + 36: u"NEC V800", + 37: u"Fujitsu FR20", + 38: u"TRW RH-32", + 39: u"Motorola RCE", + 40: u"Advanced RISC Machines (ARM)", + 41: u"DIGITAL Alpha", + 42: u"Hitachi Super-H", + 43: u"SPARC Version 9", + 44: u"Siemens Tricore", + 45: u"Argonaut RISC Core", + 46: u"Hitachi H8/300", + 47: u"Hitachi H8/300H", + 48: u"Hitachi H8S", + 49: u"Hitachi H8/500", + 50: u"Intel Merced (IA-64) Processor", + 51: u"Stanford MIPS-X", + 52: u"Motorola Coldfire", + 53: u"Motorola MC68HC12", + 62: u"Advanced Micro Devices x86-64", + 75: u"DIGITAL VAX", + 36902: u"used by NetBSD/alpha; obsolete", } CLASS_NAME = { + # e_ident[EI_CLASS], ELFCLASS defines 1: u"32 bits", 2: u"64 bits" } TYPE_NAME = { + # e_type, ET_ defines 0: u"No file type", 1: u"Relocatable file", 2: u"Executable file", 3: u"Shared object file", 4: u"Core file", 0xFF00: u"Processor-specific (0xFF00)", - 0xFFFF: u"Processor-specific (0xFFFF)" + 0xFFFF: u"Processor-specific (0xFFFF)", + } + OSABI_NAME = { + # e_ident[EI_OSABI], ELFOSABI_ defines + 0: u"UNIX System V ABI", + 1: u"HP-UX operating system", + 2: u"NetBSD", + 3: u"GNU/Linux", + 4: u"GNU/Hurd", + 5: u"86Open common IA32 ABI", + 6: u"Solaris", + 7: u"Monterey", + 8: u"IRIX", + 9: u"FreeBSD", + 10: u"TRU64 UNIX", + 11: u"Novell Modesto", + 12: u"OpenBSD", + 97: u"ARM", + 255: u"Standalone (embedded) application", } ENDIAN_NAME = { + # e_ident[EI_DATA], ELFDATA defines LITTLE_ENDIAN_ID: "Little endian", BIG_ENDIAN_ID: "Big endian", } @@ -46,23 +100,29 @@ class ElfHeader(FieldSet): def createFields(self): yield Bytes(self, "signature", 4, r'ELF signature ("\x7fELF")') yield Enum(UInt8(self, "class", "Class"), self.CLASS_NAME) + if self["class"].value == 1: + ElfLongWord = UInt32 + else: + ElfLongWord = UInt64 yield Enum(UInt8(self, "endian", "Endian"), self.ENDIAN_NAME) yield UInt8(self, "file_version", "File version") - yield String(self, "pad", 8, "Pad") - yield UInt8(self, "nb_ident", "Size of ident[]") + yield Enum(UInt8(self, "osabi_ident", "OS/syscall ABI identification"), self.OSABI_NAME) + yield UInt8(self, "abi_version", "syscall ABI version") + yield String(self, "pad", 7, "Pad") + yield Enum(UInt16(self, "type", "File type"), self.TYPE_NAME) yield Enum(UInt16(self, "machine", "Machine type"), self.MACHINE_NAME) yield UInt32(self, "version", "ELF format version") - yield UInt32(self, "entry", "Number of entries") - yield UInt32(self, "phoff", "Program header offset") - yield UInt32(self, "shoff", "Section header offset") - yield UInt32(self, "flags", "Flags") + yield textHandler(ElfLongWord(self, "entry", "Entry point"), hexadecimal) + yield ElfLongWord(self, "phoff", "Program header file offset") + yield ElfLongWord(self, "shoff", "Section header file offset") + yield UInt32(self, "flags", "Architecture-specific flags") yield UInt16(self, "ehsize", "Elf header size (this header)") yield UInt16(self, "phentsize", "Program header entry size") yield UInt16(self, "phnum", "Program header entry count") yield UInt16(self, "shentsize", "Section header entry size") - yield UInt16(self, "shnum", "Section header entre count") - yield UInt16(self, "shstrndx", "Section header strtab index") + yield UInt16(self, "shnum", "Section header entry count") + yield UInt16(self, "shstrndx", "Section header string table index") def isValid(self): if self["signature"].value != "\x7FELF": @@ -73,70 +133,154 @@ class ElfHeader(FieldSet): return "Unknown endian (%s)" % self["endian"].value return "" +class SectionFlags(FieldSet): + def createFields(self): + if self.root.endian == BIG_ENDIAN: + if self.root.is64bit: + yield RawBits(self, "reserved[]", 32) + yield RawBits(self, "processor_specific", 4, "Processor specific flags") + yield NullBits(self, "reserved[]", 17) + yield Bit(self, "is_tls", "Section contains TLS data?") + yield NullBits(self, "reserved[]", 7) + yield Bit(self, "is_exec", "Section contains executable instructions?") + yield Bit(self, "is_alloc", "Section occupies memory?") + yield Bit(self, "is_writable", "Section contains writable data?") + else: + yield Bit(self, "is_writable", "Section contains writable data?") + yield Bit(self, "is_alloc", "Section occupies memory?") + yield Bit(self, "is_exec", "Section contains executable instructions?") + yield NullBits(self, "reserved[]", 7) + yield Bit(self, "is_tls", "Section contains TLS data?") + yield RawBits(self, "processor_specific", 4, "Processor specific flags") + yield NullBits(self, "reserved[]", 17) + if self.root.is64bit: + yield RawBits(self, "reserved[]", 32) + +class SymbolStringTableOffset(UInt32): + def createDisplay(self): + section_index = self['/header/shstrndx'].value + section = self['/section['+str(section_index)+']'] + text = section.value[self.value:] + return text.split('\0',1)[0] + class SectionHeader32(FieldSet): static_size = 40*8 TYPE_NAME = { - 8: "BSS" + # sh_type, SHT_ defines + 0: "Inactive", + 1: "Program defined information", + 2: "Symbol table section", + 3: "String table section", + 4: "Relocation section with addends", + 5: "Symbol hash table section", + 6: "Dynamic section", + 7: "Note section", + 8: "Block started by symbol (BSS) or No space section", + 9: "Relocation section without addends", + 10:"Reserved - purpose unknown", + 11:"Dynamic symbol table section", } def createFields(self): - yield UInt32(self, "name", "Name") - yield Enum(UInt32(self, "type", "Type"), self.TYPE_NAME) - yield UInt32(self, "flags", "Flags") + yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)") + yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME) + yield SectionFlags(self, "flags", "Section flags") yield textHandler(UInt32(self, "VMA", "Virtual memory address"), hexadecimal) - yield textHandler(UInt32(self, "LMA", "Logical memory address (in file)"), hexadecimal) - yield textHandler(UInt32(self, "size", "Size"), hexadecimal) - yield UInt32(self, "link", "Link") - yield UInt32(self, "info", "Information") - yield UInt32(self, "addr_align", "Address alignment") - yield UInt32(self, "entry_size", "Entry size") + yield textHandler(UInt32(self, "LMA", "Logical memory address (offset in file)"), hexadecimal) + yield textHandler(UInt32(self, "size", "Section size (bytes)"), hexadecimal) + yield UInt32(self, "link", "Index of a related section") + yield UInt32(self, "info", "Type-dependent information") + yield UInt32(self, "addr_align", "Address alignment (bytes)") + yield UInt32(self, "entry_size", "Size of each entry in section") def createDescription(self): return "Section header (name: %s, type: %s)" % \ - (self["name"].value, self["type"].display) + (self["name"].display, self["type"].display) + +class SectionHeader64(SectionHeader32): + static_size = 64*8 + + def createFields(self): + yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)") + yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME) + yield SectionFlags(self, "flags", "Section flags") + yield textHandler(UInt64(self, "VMA", "Virtual memory address"), hexadecimal) + yield textHandler(UInt64(self, "LMA", "Logical memory address (offset in file)"), hexadecimal) + yield textHandler(UInt64(self, "size", "Section size (bytes)"), hexadecimal) + yield UInt32(self, "link", "Index of a related section") + yield UInt32(self, "info", "Type-dependent information") + yield UInt64(self, "addr_align", "Address alignment (bytes)") + yield UInt64(self, "entry_size", "Size of each entry in section") + +class ProgramFlags(FieldSet): + static_size = 32 + FLAGS = (('pf_r','readable'),('pf_w','writable'),('pf_x','executable')) + + def createFields(self): + if self.root.endian == BIG_ENDIAN: + yield NullBits(self, "padding[]", 29) + for fld, desc in self.FLAGS: + yield Bit(self, fld, "Segment is " + desc) + else: + for fld, desc in reversed(self.FLAGS): + yield Bit(self, fld, "Segment is " + desc) + yield NullBits(self, "padding[]", 29) + + def createDescription(self): + attribs=[] + for fld, desc in self.FLAGS: + if self[fld].value: + attribs.append(desc) + return 'Segment is '+', '.join(attribs) class ProgramHeader32(FieldSet): TYPE_NAME = { - 3: "Dynamic library" + # p_type, PT_ defines + 0: u"Unused program header table entry", + 1: u"Loadable program segment", + 2: u"Dynamic linking information", + 3: u"Program interpreter", + 4: u"Auxiliary information", + 5: u"Reserved, unspecified semantics", + 6: u"Entry for header table itself", + 7: u"Thread Local Storage segment", + 0x70000000: u"MIPS_REGINFO", } static_size = 32*8 def createFields(self): - yield Enum(UInt16(self, "type", "Type"), ProgramHeader32.TYPE_NAME) - yield UInt16(self, "flags", "Flags") + yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME) yield UInt32(self, "offset", "Offset") yield textHandler(UInt32(self, "vaddr", "V. address"), hexadecimal) yield textHandler(UInt32(self, "paddr", "P. address"), hexadecimal) yield UInt32(self, "file_size", "File size") yield UInt32(self, "mem_size", "Memory size") - yield UInt32(self, "align", "Alignment") - yield UInt32(self, "xxx", "???") + yield ProgramFlags(self, "flags") + yield UInt32(self, "align", "Alignment padding") def createDescription(self): return "Program Header (%s)" % self["type"].display -def sortSection(a, b): - return int(a["offset"] - b["offset"]) +class ProgramHeader64(ProgramHeader32): + static_size = 56*8 -#class Sections(FieldSet): -# def createFields?(self, stream, parent, sections): -# for section in sections: -# ofs = section["offset"] -# size = section["file_size"] -# if size != 0: -# sub = stream.createSub(ofs, size) -# #yield DeflateFilter(self, "section[]", sub, size, Section, "Section")) -# chunk = self.doRead("section[]", "Section", (Section,), {"stream": sub}) -# else: -# chunk = self.doRead("section[]", "Section", (FormatChunk, "string[0]")) -# chunk.description = "ELF section (in file: %s..%s)" % (ofs, ofs+size) + def createFields(self): + yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME) + yield ProgramFlags(self, "flags") + yield UInt64(self, "offset", "Offset") + yield textHandler(UInt64(self, "vaddr", "V. address"), hexadecimal) + yield textHandler(UInt64(self, "paddr", "P. address"), hexadecimal) + yield UInt64(self, "file_size", "File size") + yield UInt64(self, "mem_size", "Memory size") + yield UInt64(self, "align", "Alignment padding") -class ElfFile(Parser): +class ElfFile(HachoirParser, RootSeekableFieldSet): + MAGIC = "\x7FELF" PARSER_TAGS = { "id": "elf", "category": "program", "file_ext": ("so", ""), - "min_size": ElfHeader.static_size, # At least one program header + "min_size": 52*8, # At least one program header "mime": ( u"application/x-executable", u"application/x-object", @@ -148,7 +292,13 @@ class ElfFile(Parser): } endian = LITTLE_ENDIAN + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + def validate(self): + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: + return "Invalid magic" err = self["header"].isValid() if err: return err @@ -163,23 +313,27 @@ class ElfFile(Parser): # Parse header and program headers yield ElfHeader(self, "header", "Header") - for index in xrange(self["header/phnum"].value): - yield ProgramHeader32(self, "prg_header[]") + self.is64bit = (self["header/class"].value == 2) - if False: - raise ParserError("TODO: Parse sections...") - #sections = self.array("prg_header") - #size = self["header/shoff"].value - self.current_size//8 - #chunk = self.doRead("data", "Data", (DeflateFilter, stream, size, Sections, sections)) - #chunk.description = "Sections (use an evil hack to manage share same data on differents parts)" - #assert self.current_size//8 == self["header/shoff"].value - else: - raw = self.seekByte(self["header/shoff"].value, "raw[]", relative=False) - if raw: - yield raw + for index in xrange(self["header/phnum"].value): + if self.is64bit: + yield ProgramHeader64(self, "prg_header[]") + else: + yield ProgramHeader32(self, "prg_header[]") + + self.seekByte(self["header/shoff"].value, relative=False) for index in xrange(self["header/shnum"].value): - yield SectionHeader32(self, "section_header[]") + if self.is64bit: + yield SectionHeader64(self, "section_header[]") + else: + yield SectionHeader32(self, "section_header[]") + + for index in xrange(self["header/shnum"].value): + field = self["section_header["+str(index)+"]"] + if field['size'].value != 0: + self.seekByte(field['LMA'].value, relative=False) + yield RawBytes(self, "section["+str(index)+"]", field['size'].value) def createDescription(self): return "ELF Unix/BSD program/library: %s" % ( diff --git a/lib/hachoir_parser/program/exe.py b/lib/hachoir_parser/program/exe.py index 531b89fe..5a7bc727 100644 --- a/lib/hachoir_parser/program/exe.py +++ b/lib/hachoir_parser/program/exe.py @@ -9,15 +9,15 @@ Author: Victor Stinner Creation date: 2006-08-13 """ -from lib.hachoir_parser import HachoirParser -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.field import (FieldSet, RootSeekableFieldSet, +from hachoir_parser import HachoirParser +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.field import (FieldSet, RootSeekableFieldSet, UInt16, UInt32, String, RawBytes, PaddingBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.program.exe_ne import NE_Header -from lib.hachoir_parser.program.exe_pe import PE_Header, PE_OptHeader, SectionHeader -from lib.hachoir_parser.program.exe_res import PE_Resource, NE_VersionInfoNode +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.program.exe_ne import NE_Header +from hachoir_parser.program.exe_pe import PE_Header, PE_OptHeader, SectionHeader +from hachoir_parser.program.exe_res import PE_Resource, NE_VersionInfoNode MAX_NB_SECTION = 50 diff --git a/lib/hachoir_parser/program/exe_ne.py b/lib/hachoir_parser/program/exe_ne.py index b4dbe395..cf62e885 100644 --- a/lib/hachoir_parser/program/exe_ne.py +++ b/lib/hachoir_parser/program/exe_ne.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.field import (FieldSet, Bit, UInt8, UInt16, UInt32, Bytes, PaddingBits, PaddingBytes, NullBits, NullBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler class NE_Header(FieldSet): static_size = 64*8 diff --git a/lib/hachoir_parser/program/exe_pe.py b/lib/hachoir_parser/program/exe_pe.py index c8fa101c..d769e91d 100644 --- a/lib/hachoir_parser/program/exe_pe.py +++ b/lib/hachoir_parser/program/exe_pe.py @@ -1,9 +1,9 @@ -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_core.field import (FieldSet, ParserError, Bit, UInt8, UInt16, UInt32, TimestampUnix32, Bytes, String, Enum, PaddingBytes, PaddingBits, NullBytes, NullBits) -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.error import HACHOIR_ERRORS class SectionHeader(FieldSet): static_size = 40 * 8 @@ -72,7 +72,7 @@ class SectionHeader(FieldSet): return "section_%s" % name except HACHOIR_ERRORS, err: self.warning(unicode(err)) - return "section[]" + return "section[]" class DataDirectory(FieldSet): def createFields(self): diff --git a/lib/hachoir_parser/program/exe_res.py b/lib/hachoir_parser/program/exe_res.py index 22b25d0e..850fcf01 100644 --- a/lib/hachoir_parser/program/exe_res.py +++ b/lib/hachoir_parser/program/exe_res.py @@ -9,15 +9,15 @@ Author: Victor Stinner Creation date: 2007-01-19 """ -from lib.hachoir_core.field import (FieldSet, ParserError, Enum, +from hachoir_core.field import (FieldSet, ParserError, Enum, Bit, Bits, SeekableFieldSet, UInt16, UInt32, TimestampUnix32, RawBytes, PaddingBytes, NullBytes, NullBits, CString, String) -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_core.tools import createDict, paddingSize, alignValue, makePrintable -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_parser.common.win32 import BitmapInfoHeader +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_core.tools import createDict, paddingSize, alignValue, makePrintable +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_parser.common.win32 import BitmapInfoHeader MAX_DEPTH = 5 MAX_INDEX_PER_HEADER = 300 diff --git a/lib/hachoir_parser/program/java.py b/lib/hachoir_parser/program/java.py index 2e58552c..7329cbe0 100644 --- a/lib/hachoir_parser/program/java.py +++ b/lib/hachoir_parser/program/java.py @@ -59,15 +59,15 @@ TODO/FIXME: should update the length field of it's entry, etc. Sounds like a huge work. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import ( +from hachoir_parser import Parser +from hachoir_core.field import ( ParserError, FieldSet, StaticFieldSet, Enum, RawBytes, PascalString16, Float32, Float64, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, Bit, NullBits ) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import paddingSize +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import paddingSize ############################################################################### def parse_flags(flags, flags_dict, show_unknown_flags=True, separator=" "): diff --git a/lib/hachoir_parser/program/nds.py b/lib/hachoir_parser/program/nds.py new file mode 100644 index 00000000..bc6e5c44 --- /dev/null +++ b/lib/hachoir_parser/program/nds.py @@ -0,0 +1,359 @@ +""" +Nintendo DS .nds game file parser + +File format references: +- http://www.bottledlight.com/ds/index.php/FileFormats/NDSFormat +- http://imrannazar.com/The-Smallest-NDS-File +- http://darkfader.net/ds/files/ndstool.cpp +- http://crackerscrap.com/docs/dsromstructure.html +- http://nocash.emubase.de/gbatek.htm +""" + +from hachoir_parser import Parser +from hachoir_core.field import (ParserError, + UInt8, UInt16, UInt32, UInt64, String, RawBytes, SubFile, FieldSet, NullBits, Bit, Bits, Bytes, + SeekableFieldSet, RootSeekableFieldSet) +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN + + +""" +CRC16 Calculation + +Modified from: +http://www.mail-archive.com/python-list@python.org/msg47844.html + +Original License: +crc16.py by Bryan G. Olson, 2005 +This module is free software and may be used and +distributed under the same terms as Python itself. +""" +class CRC16: + _table = None + + def _initTable (self): + from array import array + + # CRC-16 poly: p(x) = x**16 + x**15 + x**2 + 1 + # top bit implicit, reflected + poly = 0xa001 + CRC16._table = array('H') + for byte in range(256): + crc = 0 + for bit in range(8): + if (byte ^ crc) & 1: + crc = (crc >> 1) ^ poly + else: + crc >>= 1 + byte >>= 1 + CRC16._table.append(crc) + + def checksum (self, string, value): + if CRC16._table is None: + self._initTable() + + for ch in string: + value = self._table[ord(ch) ^ (value & 0xff)] ^ (value >> 8) + return value + + +class Crc16(UInt16): + "16 bit field for calculating and comparing CRC-16 of specified string" + def __init__(self, parent, name, targetBytes): + UInt16.__init__(self, parent, name) + self.targetBytes = targetBytes + + def createDescription(self): + crc = CRC16().checksum(self.targetBytes, 0xffff) + if crc == self.value: + return "matches CRC of %d bytes" % len(self.targetBytes) + else: + return "mismatch (calculated CRC %d for %d bytes)" % (crc, len(self.targetBytes)) + + +class FileNameDirTable(FieldSet): + static_size = (4+2+2)*8 + def createFields(self): + yield UInt32(self, "entry_start") + yield UInt16(self, "entry_file_id") + yield UInt16(self, "parent_id") + + def createDescription(self): + return "first file id: %d; parent directory id: %d (%d)" % (self["entry_file_id"].value, self["parent_id"].value, self["parent_id"].value & 0xFFF) + +class FileNameEntry(FieldSet): + def createFields(self): + yield Bits(self, "name_len", 7) + yield Bit(self, "is_directory") + yield String(self, "name", self["name_len"].value) + if self["is_directory"].value: + yield UInt16(self, "dir_id") + + def createDescription(self): + s = "" + if self["is_directory"].value: + s = "[D] " + return s + self["name"].value + +class Directory(FieldSet): + def createFields(self): + while True: + fne = FileNameEntry(self, "entry[]") + if fne["name_len"].value == 0: + yield UInt8(self, "end_marker") + break + yield fne + + +class FileNameTable(SeekableFieldSet): + def createFields(self): + self.startOffset = self.absolute_address / 8 + + # parent_id of first FileNameDirTable contains number of directories: + dt = FileNameDirTable(self, "dir_table[]") + numDirs = dt["parent_id"].value + yield dt + + for i in range(1, numDirs): + yield FileNameDirTable(self, "dir_table[]") + + for i in range(0, numDirs): + dt = self["dir_table[%d]" % i] + offset = self.startOffset + dt["entry_start"].value + self.seekByte(offset, relative=False) + yield Directory(self, "directory[]") + + +class FATFileEntry(FieldSet): + static_size = 2*4*8 + def createFields(self): + yield UInt32(self, "start") + yield UInt32(self, "end") + + def createDescription(self): + return "start: %d; size: %d" % (self["start"].value, self["end"].value - self["start"].value) + +class FATContent(FieldSet): + def createFields(self): + num_entries = self.parent["header"]["fat_size"].value / 8 + for i in range(0, num_entries): + yield FATFileEntry(self, "entry[]") + + + +class BannerTile(FieldSet): + static_size = 32*8 + def createFields(self): + for y in range(8): + for x in range(8): + yield Bits(self, "pixel[%d,%d]" % (x,y), 4) + +class BannerIcon(FieldSet): + static_size = 16*32*8 + def createFields(self): + for y in range(4): + for x in range(4): + yield BannerTile(self, "tile[%d,%d]" % (x,y)) + +class NdsColor(FieldSet): + static_size = 16 + def createFields(self): + yield Bits(self, "red", 5) + yield Bits(self, "green", 5) + yield Bits(self, "blue", 5) + yield NullBits(self, "pad", 1) + + def createDescription(self): + return "#%02x%02x%02x" % (self["red"].value << 3, self["green"].value << 3, self["blue"].value << 3) + +class Banner(FieldSet): + static_size = 2112*8 + def createFields(self): + yield UInt16(self, "version") + # CRC of this structure, excluding first 32 bytes: + yield Crc16(self, "crc", self.stream.readBytes(self.absolute_address+(32*8), (2112-32))) + yield RawBytes(self, "reserved", 28) + yield BannerIcon(self, "icon_data") + for i in range(0, 16): + yield NdsColor(self, "palette_color[]") + yield String(self, "title_jp", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_en", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_fr", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_de", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_it", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_es", 256, charset="UTF-16-LE", truncate="\0") + + +class Overlay(FieldSet): + static_size = 8*4*8 + def createFields(self): + yield UInt32(self, "id") + yield textHandler(UInt32(self, "ram_address"), hexadecimal) + yield UInt32(self, "ram_size") + yield UInt32(self, "bss_size") + yield textHandler(UInt32(self, "init_start_address"), hexadecimal) + yield textHandler(UInt32(self, "init_end_address"), hexadecimal) + yield UInt32(self, "file_id") + yield RawBytes(self, "reserved[]", 4) + + def createDescription(self): + return "file #%d, %d (+%d) bytes to 0x%08x" % ( + self["file_id"].value, self["ram_size"].value, self["bss_size"].value, self["ram_address"].value) + + +class SecureArea(FieldSet): + static_size=2048*8 + def createFields(self): + yield textHandler(UInt64(self, "id"), hexadecimal) + if self["id"].value == 0xe7ffdeffe7ffdeff: # indicates that secure area is decrypted + yield Bytes(self, "fixed[]", 6) # always \xff\xde\xff\xe7\xff\xde + yield Crc16(self, "header_crc16", self.stream.readBytes(self.absolute_address+(16*8), 2048-16)) + yield RawBytes(self, "unknown[]", 2048-16-2) + yield Bytes(self, "fixed[]", 2) # always \0\0 + else: + yield RawBytes(self, "encrypted[]", 2048-8) + + +class DeviceSize(UInt8): + def createDescription(self): + return "%d Mbit" % ((2**(20+self.value)) / (1024*1024)) + +class Header(FieldSet): + def createFields(self): + yield String(self, "game_title", 12, truncate="\0") + yield String(self, "game_code", 4) + yield String(self, "maker_code", 2) + yield UInt8(self, "unit_code") + yield UInt8(self, "device_code") + + yield DeviceSize(self, "card_size") + yield String(self, "card_info", 9) + yield UInt8(self, "rom_version") + yield Bits(self, "unknown_flags[]", 2) + yield Bit(self, "autostart_flag") + yield Bits(self, "unknown_flags[]", 5) + + yield UInt32(self, "arm9_source", "ARM9 ROM offset") + yield textHandler(UInt32(self, "arm9_execute_addr", "ARM9 entry address"), hexadecimal) + yield textHandler(UInt32(self, "arm9_copy_to_addr", "ARM9 RAM address"), hexadecimal) + yield UInt32(self, "arm9_bin_size", "ARM9 code size") + + yield UInt32(self, "arm7_source", "ARM7 ROM offset") + yield textHandler(UInt32(self, "arm7_execute_addr", "ARM7 entry address"), hexadecimal) + yield textHandler(UInt32(self, "arm7_copy_to_addr", "ARM7 RAM address"), hexadecimal) + yield UInt32(self, "arm7_bin_size", "ARM7 code size") + + yield UInt32(self, "filename_table_offset") + yield UInt32(self, "filename_table_size") + yield UInt32(self, "fat_offset") + yield UInt32(self, "fat_size") + + yield UInt32(self, "arm9_overlay_src") + yield UInt32(self, "arm9_overlay_size") + yield UInt32(self, "arm7_overlay_src") + yield UInt32(self, "arm7_overlay_size") + + yield textHandler(UInt32(self, "ctl_read_flags"), hexadecimal) + yield textHandler(UInt32(self, "ctl_init_flags"), hexadecimal) + yield UInt32(self, "banner_offset") + yield Crc16(self, "secure_crc16", self.stream.readBytes(0x4000*8, 0x4000)) + yield UInt16(self, "rom_timeout") + + yield UInt32(self, "arm9_unk_addr") + yield UInt32(self, "arm7_unk_addr") + yield UInt64(self, "unenc_mode_magic") + + yield UInt32(self, "rom_size") + yield UInt32(self, "header_size") + + yield RawBytes(self, "unknown[]", 36) + yield String(self, "passme_autoboot_detect", 4) + yield RawBytes(self, "unknown[]", 16) + + yield RawBytes(self, "gba_logo", 156) + yield Crc16(self, "logo_crc16", self.stream.readBytes(0xc0*8, 156)) + yield Crc16(self, "header_crc16", self.stream.readBytes(0, 350)) + + yield UInt32(self, "debug_rom_offset") + yield UInt32(self, "debug_size") + yield textHandler(UInt32(self, "debug_ram_address"), hexadecimal) + + +class NdsFile(Parser, RootSeekableFieldSet): + PARSER_TAGS = { + "id": "nds_file", + "category": "program", + "file_ext": ("nds",), + "mime": (u"application/octet-stream",), + "min_size": 352 * 8, # just a minimal header + "description": "Nintendo DS game file", + } + + endian = LITTLE_ENDIAN + + def validate(self): + try: + header = self["header"] + except Exception, e: + return False + + return (self.stream.readBytes(0, 1) != "\0" + and (header["device_code"].value & 7) == 0 + and header["header_size"].value >= 352 + and header["card_size"].value < 15 # arbitrary limit at 32Gbit + and header["arm9_bin_size"].value > 0 and header["arm9_bin_size"].value <= 0x3bfe00 + and header["arm7_bin_size"].value > 0 and header["arm7_bin_size"].value <= 0x3bfe00 + and header["arm9_source"].value + header["arm9_bin_size"].value < self._size + and header["arm7_source"].value + header["arm7_bin_size"].value < self._size + and header["arm9_execute_addr"].value >= 0x02000000 and header["arm9_execute_addr"].value <= 0x023bfe00 + and header["arm9_copy_to_addr"].value >= 0x02000000 and header["arm9_copy_to_addr"].value <= 0x023bfe00 + and header["arm7_execute_addr"].value >= 0x02000000 and header["arm7_execute_addr"].value <= 0x03807e00 + and header["arm7_copy_to_addr"].value >= 0x02000000 and header["arm7_copy_to_addr"].value <= 0x03807e00 + ) + + def createFields(self): + # Header + yield Header(self, "header") + + # Secure Area + if self["header"]["arm9_source"].value >= 0x4000 and self["header"]["arm9_source"].value < 0x8000: + secStart = self["header"]["arm9_source"].value & 0xfffff000 + self.seekByte(secStart, relative=False) + yield SecureArea(self, "secure_area", size=0x8000-secStart) + + # ARM9 binary + self.seekByte(self["header"]["arm9_source"].value, relative=False) + yield RawBytes(self, "arm9_bin", self["header"]["arm9_bin_size"].value) + + # ARM7 binary + self.seekByte(self["header"]["arm7_source"].value, relative=False) + yield RawBytes(self, "arm7_bin", self["header"]["arm7_bin_size"].value) + + # File Name Table + if self["header"]["filename_table_size"].value > 0: + self.seekByte(self["header"]["filename_table_offset"].value, relative=False) + yield FileNameTable(self, "filename_table", size=self["header"]["filename_table_size"].value*8) + + # FAT + if self["header"]["fat_size"].value > 0: + self.seekByte(self["header"]["fat_offset"].value, relative=False) + yield FATContent(self, "fat_content", size=self["header"]["fat_size"].value*8) + + # banner + if self["header"]["banner_offset"].value > 0: + self.seekByte(self["header"]["banner_offset"].value, relative=False) + yield Banner(self, "banner") + + # ARM9 overlays + if self["header"]["arm9_overlay_src"].value > 0: + self.seekByte(self["header"]["arm9_overlay_src"].value, relative=False) + numOvls = self["header"]["arm9_overlay_size"].value / (8*4) + for i in range(numOvls): + yield Overlay(self, "arm9_overlay[]") + + # files + if self["header"]["fat_size"].value > 0: + for field in self["fat_content"]: + if field["end"].value > field["start"].value: + self.seekByte(field["start"].value, relative=False) + yield SubFile(self, "file[]", field["end"].value - field["start"].value) diff --git a/lib/hachoir_parser/program/prc.py b/lib/hachoir_parser/program/prc.py index 19db7885..f4db0254 100644 --- a/lib/hachoir_parser/program/prc.py +++ b/lib/hachoir_parser/program/prc.py @@ -5,11 +5,11 @@ Author: Sebastien Ponce Creation date: 29 october 2008 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt16, UInt32, TimestampMac32, String, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.endian import BIG_ENDIAN class PRCHeader(FieldSet): static_size = 78*8 diff --git a/lib/hachoir_parser/program/python.py b/lib/hachoir_parser/program/python.py index 5075f7f6..f408fb2d 100644 --- a/lib/hachoir_parser/program/python.py +++ b/lib/hachoir_parser/program/python.py @@ -11,14 +11,14 @@ Creation: 25 march 2005 DISASSEMBLE = False -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, UInt8, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, Int32, UInt32, Int64, ParserError, Float64, Enum, Character, Bytes, RawBytes, PascalString8, TimestampUnix32) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.bits import long2raw -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.i18n import ngettext +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.bits import long2raw +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.i18n import ngettext if DISASSEMBLE: from dis import dis @@ -268,6 +268,7 @@ class PythonCompiledFile(Parser): MAGIC = { # Python 1.x 20121: ("1.5", 0x1050000), + 50428: ("1.6", 0x1060000), # Python 2.x 50823: ("2.0", 0x2000000), @@ -286,6 +287,13 @@ class PythonCompiledFile(Parser): 62111: ("2.5b3", 0x2050000), 62121: ("2.5c1", 0x2050000), 62131: ("2.5c2", 0x2050000), + 62151: ("2.6a0", 0x2070000), + 62161: ("2.6a1", 0x2070000), + 62171: ("2.7a0", 0x2070000), + 62181: ("2.7a0", 0x2070000), + 62191: ("2.7a0", 0x2070000), + 62201: ("2.7a0", 0x2070000), + 62211: ("2.7a0", 0x2070000), # Python 3.x 3000: ("3.0 (3000)", 0x3000000), @@ -295,14 +303,20 @@ class PythonCompiledFile(Parser): 3040: ("3.0 (3040)", 0x3000000), 3050: ("3.0 (3050)", 0x3000000), 3060: ("3.0 (3060)", 0x3000000), - 3070: ("3.0 (3070)", 0x3000000), - 3080: ("3.0 (3080)", 0x3000000), - 3090: ("3.0 (3090)", 0x3000000), - 3100: ("3.0 (3100)", 0x3000000), - 3102: ("3.0 (3102)", 0x3000000), - 3110: ("3.0a4", 0x3000000), - 3130: ("3.0a5", 0x3000000), - 3131: ("3.0a5 unicode", 0x3000000), + 3061: ("3.0 (3061)", 0x3000000), + 3071: ("3.0 (3071)", 0x3000000), + 3081: ("3.0 (3081)", 0x3000000), + 3091: ("3.0 (3091)", 0x3000000), + 3101: ("3.0 (3101)", 0x3000000), + 3103: ("3.0 (3103)", 0x3000000), + 3111: ("3.0a4", 0x3000000), + 3131: ("3.0a5", 0x3000000), + 3141: ("3.1a0", 0x3010000), + 3151: ("3.1a0", 0x3010000), + 3160: ("3.2a0", 0x3020000), + 3170: ("3.2a1", 0x3020000), + 3180: ("3.2a2", 0x3020000), + 3190: ("Python 3.3a0", 0x3030000), } # Dictionnary which associate the pyc signature (4-byte long string) diff --git a/lib/hachoir_parser/template.py b/lib/hachoir_parser/template.py index 0df480eb..2b75eb6e 100644 --- a/lib/hachoir_parser/template.py +++ b/lib/hachoir_parser/template.py @@ -13,17 +13,17 @@ Creation date: YYYY-mm-DD """ # TODO: Just keep what you need -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (ParserError, UInt8, UInt16, UInt32, String, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN class TODOFile(Parser): PARSER_TAGS = { "id": "TODO", "category": "TODO", # "archive", "audio", "container", ... "file_ext": ("TODO",), # TODO: Example ("bmp",) to parse the file "image.bmp" - "mime": (u"TODO"), # TODO: Example: "image/png" + "mime": (u"TODO",), # TODO: Example: "image/png" "min_size": 0, # TODO: Minimum file size (x bits, or x*8 in bytes) "description": "TODO", # TODO: Example: "A bitmap picture" } diff --git a/lib/hachoir_parser/version.py b/lib/hachoir_parser/version.py index 28d1e616..6571743e 100644 --- a/lib/hachoir_parser/version.py +++ b/lib/hachoir_parser/version.py @@ -1,4 +1,4 @@ -__version__ = "1.3.4" +__version__ = "1.3.5" PACKAGE = "hachoir-parser" WEBSITE = "http://bitbucket.org/haypo/hachoir/wiki/hachoir-parser" LICENSE = 'GNU GPL v2' diff --git a/lib/hachoir_parser/video/__init__.py b/lib/hachoir_parser/video/__init__.py index 1fabf92a..26f787e9 100644 --- a/lib/hachoir_parser/video/__init__.py +++ b/lib/hachoir_parser/video/__init__.py @@ -1,6 +1,6 @@ -from lib.hachoir_parser.video.asf import AsfFile -from lib.hachoir_parser.video.flv import FlvFile -from lib.hachoir_parser.video.mov import MovFile -from lib.hachoir_parser.video.mpeg_video import MPEGVideoFile -from lib.hachoir_parser.video.mpeg_ts import MPEG_TS +from hachoir_parser.video.asf import AsfFile +from hachoir_parser.video.flv import FlvFile +from hachoir_parser.video.mov import MovFile +from hachoir_parser.video.mpeg_video import MPEGVideoFile +from hachoir_parser.video.mpeg_ts import MPEG_TS diff --git a/lib/hachoir_parser/video/amf.py b/lib/hachoir_parser/video/amf.py index 963f3207..496c5c1d 100644 --- a/lib/hachoir_parser/video/amf.py +++ b/lib/hachoir_parser/video/amf.py @@ -10,9 +10,9 @@ Author: Victor Stinner Creation date: 4 november 2006 """ -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, UInt32, PascalString16, Float64) -from lib.hachoir_core.tools import timestampUNIX +from hachoir_core.tools import timestampUNIX def parseUTF8(parent): yield PascalString16(parent, "value", charset="UTF-8") diff --git a/lib/hachoir_parser/video/asf.py b/lib/hachoir_parser/video/asf.py index 35711d09..39205ea6 100644 --- a/lib/hachoir_parser/video/asf.py +++ b/lib/hachoir_parser/video/asf.py @@ -10,20 +10,20 @@ Author: Victor Stinner Creation: 5 august 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt16, UInt32, UInt64, TimestampWin64, TimedeltaWin64, String, PascalString16, Enum, Bit, Bits, PaddingBits, PaddingBytes, NullBytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import ( +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import ( displayHandler, filesizeHandler) -from lib.hachoir_core.tools import humanBitRate +from hachoir_core.tools import humanBitRate from itertools import izip -from lib.hachoir_parser.video.fourcc import audio_codec_name, video_fourcc_name -from lib.hachoir_parser.common.win32 import BitmapInfoHeader, GUID +from hachoir_parser.video.fourcc import audio_codec_name, video_fourcc_name +from hachoir_parser.common.win32 import BitmapInfoHeader, GUID MAX_HEADER_SIZE = 100 * 1024 # bytes diff --git a/lib/hachoir_parser/video/flv.py b/lib/hachoir_parser/video/flv.py index c2723d9f..5edbe7ab 100644 --- a/lib/hachoir_parser/video/flv.py +++ b/lib/hachoir_parser/video/flv.py @@ -12,14 +12,14 @@ Author: Victor Stinner Creation date: 4 november 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt24, UInt32, NullBits, NullBytes, Bit, Bits, String, RawBytes, Enum) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_parser.audio.mpeg_audio import Frame -from lib.hachoir_parser.video.amf import AMFObject -from lib.hachoir_core.tools import createDict +from hachoir_core.endian import BIG_ENDIAN +from hachoir_parser.audio.mpeg_audio import Frame +from hachoir_parser.video.amf import AMFObject +from hachoir_core.tools import createDict SAMPLING_RATE = { 0: ( 5512, "5.5 kHz"), diff --git a/lib/hachoir_parser/video/mov.py b/lib/hachoir_parser/video/mov.py index 32a81d09..1ab6ac51 100644 --- a/lib/hachoir_parser/video/mov.py +++ b/lib/hachoir_parser/video/mov.py @@ -10,28 +10,48 @@ Documents: http://wiki.multimedia.cx/index.php?title=Apple_QuickTime - File type (ftyp): http://www.ftyps.com/ +- MPEG4 standard + http://neuron2.net/library/avc/c041828_ISO_IEC_14496-12_2005%28E%29.pdf -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao Creation: 2 august 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (ParserError, FieldSet, MissingField, - UInt8, Int16, UInt16, UInt32, TimestampMac32, - String, PascalString8, CString, - RawBytes, PaddingBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser import Parser +from hachoir_parser.common.win32 import GUID +from hachoir_core.field import (ParserError, FieldSet, MissingField, + Enum, + Bit, NullBits, Bits, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, TimestampMac32, + String, PascalString8, PascalString16, CString, + RawBytes, NullBytes, PaddingBytes) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal -class QTFloat32(FieldSet): - static_size = 32 - def createFields(self): - yield Int16(self, "int_part") - yield UInt16(self, "float_part") - def createValue(self): - return self["int_part"].value + float(self["float_part"].value) / 65535 - def createDescription(self): - return str(self.value) +from hachoir_core.tools import MAC_TIMESTAMP_T0, timedelta +def timestampMac64(value): + if not isinstance(value, (float, int, long)): + raise TypeError("an integer or float is required") + return MAC_TIMESTAMP_T0 + timedelta(seconds=value) +from hachoir_core.field.timestamp import timestampFactory +TimestampMac64 = timestampFactory("TimestampMac64", timestampMac64, 64) + +def fixedFloatFactory(name, int_bits, float_bits, doc): + size = int_bits + float_bits + class Float(FieldSet): + static_size = size + __doc__ = doc + def createFields(self): + yield Bits(self, "int_part", int_bits) + yield Bits(self, "float_part", float_bits) + def createValue(self): + return self["int_part"].value + float(self["float_part"].value) / (1< 0: + yield RawBytes(self, "extra_data", size) + +class SampleDescription(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield UInt32(self, "count", description="Total entries in table") + for i in xrange(self['count'].value): + yield SampleEntry(self, "sample_entry[]") + +class SyncSampleTable(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield UInt32(self, "count", description="Number of sync samples") + for i in xrange(self['count'].value): + yield UInt32(self, "sample_number[]") + +class SampleSizeTable(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield UInt32(self, "uniform_size", description="Uniform size of each sample (0 if non-uniform)") + yield UInt32(self, "count", description="Number of samples") + if self['uniform_size'].value == 0: + for i in xrange(self['count'].value): + yield UInt32(self, "sample_size[]") + +class CompactSampleSizeTable(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield NullBits(self, "reserved[]", 24) + yield UInt8(self, "field_size", "Size of each entry in this table, in bits") + yield UInt32(self, "count", description="Number of samples") + bitsize = self['field_size'].value + for i in xrange(self['count'].value): + yield Bits(self, "sample_size[]", bitsize) + if self.current_size % 8 != 0: + yield NullBits(self, "padding[]", 8 - (self.current_size % 8)) + +class SampleToChunkTable(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield UInt32(self, "count", description="Number of samples") + for i in xrange(self['count'].value): + yield UInt32(self, "first_chunk[]") + yield UInt32(self, "samples_per_chunk[]") + yield UInt32(self, "sample_description_index[]") + class Atom(FieldSet): tag_info = { - # TODO: Use dictionnary of dictionnary, like Matroska parser does - # "elst" is a child of "edts", but not of "moov" for example - "moov": (AtomList, "movie", "Movie"), - "trak": (AtomList, "track", "Track"), - "mdia": (AtomList, "media", "Media"), - "edts": (AtomList, "edts", ""), - "minf": (AtomList, "minf", ""), - "stbl": (AtomList, "stbl", ""), - "dinf": (AtomList, "dinf", ""), - "elst": (ELST, "edts", ""), - "tkhd": (TrackHeader, "track_hdr", "Track header"), - "hdlr": (HDLR, "hdlr", ""), - "mdhd": (MediaHeader, "media_hdr", "Media header"), - "load": (Load, "load", ""), - "mvhd": (MovieHeader, "movie_hdr", "Movie header"), - "ftyp": (FileType, "file_type", "File type"), + "ftyp": (FileType, "file_type", "File type and compatibility"), + # pdin: progressive download information + # pnot: movie preview (old QT spec) + "moov": (AtomList, "movie", "Container for all metadata"), + "mvhd": (MovieHeader, "movie_hdr", "Movie header, overall declarations"), + # clip: movie clipping (old QT spec) + # crgn: movie clipping region (old QT spec) + "trak": (AtomList, "track", "Container for an individual track or stream"), + "tkhd": (TrackHeader, "track_hdr", "Track header, overall information about the track"), + # matt: track matte (old QT spec) + # kmat: compressed matte (old QT spec) + "tref": (AtomList, "tref", "Track reference container"), + "hint": (TrackReferenceType, "hint", "Original media track(s) for this hint track"), + "cdsc": (TrackReferenceType, "cdsc", "Reference to track described by this track"), + "edts": (AtomList, "edts", "Edit list container"), + "elst": (EditList, "elst", "Edit list"), + "load": (Load, "load", "Track loading settings (old QT spec)"), + # imap: Track input map (old QT spec) + "mdia": (AtomList, "media", "Container for the media information in a track"), + "mdhd": (MediaHeader, "media_hdr", "Media header, overall information about the media"), + "hdlr": (Handler, "hdlr", "Handler, declares the media or metadata (handler) type"), + "minf": (AtomList, "minf", "Media information container"), + "vmhd": (VideoMediaHeader, "vmhd", "Video media header, overall information (video track only)"), + "smhd": (SoundMediaHeader, "smhd", "Sound media header, overall information (sound track only)"), + "hmhd": (HintMediaHeader, "hmhd", "Hint media header, overall information (hint track only)"), + # nmhd: Null media header, overall information (some tracks only) (unparsed) + "dinf": (AtomList, "dinf", "Data information, container"), + "dref": (DataReference, "dref", "Data reference, declares source(s) of media data in track"), + "url ": (DataEntryUrl, "url", "URL data reference"), + "urn ": (DataEntryUrn, "urn", "URN data reference"), + "stbl": (AtomList, "stbl", "Sample table, container for the time/space map"), + "stsd": (SampleDescription, "stsd", "Sample descriptions (codec types, initialization etc.)"), + "stts": (SampleDecodeTimeTable, "stts", "decoding time-to-sample delta table"), + "ctts": (SampleCompositionTimeTable, "ctts", "composition time-to-sample offset table"), + "stsc": (SampleToChunkTable, "stsc", "sample-to-chunk, partial data-offset information"), + "stsz": (SampleSizeTable, "stsz", "Sample size table (framing)"), + "stz2": (CompactSampleSizeTable, "stz2", "Compact sample size table (framing)"), + "stco": (ChunkOffsetTable, "stco", "Chunk offset, partial data-offset information"), + "co64": (ChunkOffsetTable64, "co64", "64-bit chunk offset"), + "stss": (SyncSampleTable, "stss", "Sync sample table (random access points)"), + # stsh: shadow sync sample table + # padb: sample padding bits + # stdp: sample degradation priority + # sdtp: independent and disposable samples + # sbgp: sample-to-group + # sgpd: sample group description + # subs: sub-sample information + # ctab color table (old QT spec) + # mvex: movie extends + # mehd: movie extends header + # trex: track extends defaults + # ipmc: IPMP control + "moof": (AtomList, "moof", "movie fragment"), + "mfhd": (MovieFragmentHeader, "mfhd", "movie fragment header"), + # traf: track fragment + # tfhd: track fragment header + # trun: track fragment run + # sdtp: independent and disposable samples + # sbgp: sample-to-group + # subs: sub-sample information + "mfra": (AtomList, "mfra", "movie fragment random access"), + "tfra": (TrackFragmentRandomAccess, "tfra", "track fragment random access"), + "mfro": (MovieFragmentRandomAccessOffset, "mfro", "movie fragment random access offset"), + # mdat: media data container + # free: free space (unparsed) + # skip: free space (unparsed) + "udta": (AtomList, "udta", "User data"), + "meta": (META, "meta", "File metadata"), + "keys": (KeyList, "keys", "Metadata keys"), + ## hdlr + ## dinf + ## dref: data reference, declares source(s) of metadata items + ## ipmc: IPMP control + # iloc: item location + # ipro: item protection + # sinf: protection scheme information + # frma: original format + # imif: IPMP information + # schm: scheme type + # schi: scheme information + # iinf: item information + # xml : XML container + # bxml: binary XML container + # pitm: primary item reference + ## other tags + "ilst": (ItemList, "ilst", "Item list"), + "trkn": (AtomList, "trkn", "Metadata: Track number"), + "disk": (AtomList, "disk", "Metadata: Disk number"), + "tmpo": (AtomList, "tempo", "Metadata: Tempo"), + "cpil": (AtomList, "cpil", "Metadata: Compilation"), + "gnre": (AtomList, "gnre", "Metadata: Genre"), + "\xa9cpy": (AtomList, "copyright", "Metadata: Copyright statement"), + "\xa9day": (AtomList, "date", "Metadata: Date of content creation"), + "\xa9dir": (AtomList, "director", "Metadata: Movie director"), + "\xa9ed1": (AtomList, "edit1", "Metadata: Edit date and description (1)"), + "\xa9ed2": (AtomList, "edit2", "Metadata: Edit date and description (2)"), + "\xa9ed3": (AtomList, "edit3", "Metadata: Edit date and description (3)"), + "\xa9ed4": (AtomList, "edit4", "Metadata: Edit date and description (4)"), + "\xa9ed5": (AtomList, "edit5", "Metadata: Edit date and description (5)"), + "\xa9ed6": (AtomList, "edit6", "Metadata: Edit date and description (6)"), + "\xa9ed7": (AtomList, "edit7", "Metadata: Edit date and description (7)"), + "\xa9ed8": (AtomList, "edit8", "Metadata: Edit date and description (8)"), + "\xa9ed9": (AtomList, "edit9", "Metadata: Edit date and description (9)"), + "\xa9fmt": (AtomList, "format", "Metadata: Movie format (CGI, digitized, etc.)"), + "\xa9inf": (AtomList, "info", "Metadata: Information about the movie"), + "\xa9prd": (AtomList, "producer", "Metadata: Movie producer"), + "\xa9prf": (AtomList, "performers", "Metadata: Performer names"), + "\xa9req": (AtomList, "requirements", "Metadata: Special hardware and software requirements"), + "\xa9src": (AtomList, "source", "Metadata: Credits for those who provided movie source content"), + "\xa9nam": (AtomList, "name", "Metadata: Name of song or video"), + "\xa9des": (AtomList, "description", "Metadata: File description"), + "\xa9cmt": (AtomList, "comment", "Metadata: General comment"), + "\xa9alb": (AtomList, "album", "Metadata: Album name"), + "\xa9gen": (AtomList, "genre", "Metadata: Custom genre"), + "\xa9ART": (AtomList, "artist", "Metadata: Artist name"), + "\xa9too": (AtomList, "encoder", "Metadata: Encoder"), + "\xa9wrt": (AtomList, "writer", "Metadata: Writer"), + "covr": (AtomList, "cover", "Metadata: Cover art"), + "----": (AtomList, "misc", "Metadata: Miscellaneous"), + "tags": (AtomList, "tags", "File tags"), + "tseg": (AtomList, "tseg", "tseg"), + "chpl": (NeroChapters, "chpl", "Nero chapter data"), } tag_handler = [ item[0] for item in tag_info ] tag_desc = [ item[1] for item in tag_info ] def createFields(self): yield UInt32(self, "size") - yield String(self, "tag", 4) + yield RawBytes(self, "tag", 4) size = self["size"].value if size == 1: - raise ParserError("Extended size is not supported!") - #yield UInt64(self, "size64") - size = self["size64"].value + # 64-bit size + yield UInt64(self, "size64") + size = self["size64"].value - 16 elif size == 0: - #size = (self.root.size - self.root.current_size - self.current_size) / 8 + # Unbounded atom if self._size is None: - size = (self.parent.size - self.current_size) / 8 - 8 + size = (self.parent.size - self.parent.current_size) / 8 - 8 else: size = (self.size - self.current_size) / 8 else: size = size - 8 - if 0 < size: + if self['tag'].value == 'uuid': + yield GUID(self, "usertag") + tag = self["usertag"].value + size -= 16 + else: tag = self["tag"].value + if size > 0: if tag in self.tag_info: handler, name, desc = self.tag_info[tag] yield handler(self, name, desc, size=size*8) @@ -191,6 +820,8 @@ class Atom(FieldSet): yield RawBytes(self, "data", size) def createDescription(self): + if self["tag"].value == "uuid": + return "Atom: uuid: "+self["usertag"].value return "Atom: %s" % self["tag"].value class MovFile(Parser): @@ -207,12 +838,16 @@ class MovFile(Parser): # File type brand => MIME type 'mp41': u'video/mp4', 'mp42': u'video/mp4', + 'avc1': u'video/mp4', + 'isom': u'video/mp4', + 'iso2': u'video/mp4', } endian = BIG_ENDIAN def __init__(self, *args, **kw): Parser.__init__(self, *args, **kw) - self.is_mpeg4 = False + + is_mpeg4 = property(lambda self:self.mime_type==u'video/mp4') def validate(self): # TODO: Write better code, erk! @@ -242,5 +877,5 @@ class MovFile(Parser): return self.BRANDS[brand] except MissingField: pass - return None + return u'video/quicktime' diff --git a/lib/hachoir_parser/video/mpeg_ts.py b/lib/hachoir_parser/video/mpeg_ts.py index c158dc9e..ed8724a3 100644 --- a/lib/hachoir_parser/video/mpeg_ts.py +++ b/lib/hachoir_parser/video/mpeg_ts.py @@ -9,11 +9,11 @@ Author: Victor Stinner Creation date: 13 january 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, MissingField, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, MissingField, UInt8, Enum, Bit, Bits, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal class Packet(FieldSet): def __init__(self, *args): @@ -92,11 +92,11 @@ class MPEG_TS(Parser): return True def createFields(self): - sync = self.stream.searchBytes("\x47", 0, 204*8) - if sync is None: - raise ParserError("Unable to find synchronization byte") - elif sync: - yield RawBytes(self, "incomplete_packet", sync//8) while not self.eof: + sync = self.stream.searchBytes("\x47", self.current_size, self.current_size+204*8) + if sync is None: + raise ParserError("Unable to find synchronization byte") + elif sync: + yield RawBytes(self, "incomplete_packet[]", (sync-self.current_size)//8) yield Packet(self, "packet[]") diff --git a/lib/hachoir_parser/video/mpeg_video.py b/lib/hachoir_parser/video/mpeg_video.py index 9545681e..5a5d51c8 100644 --- a/lib/hachoir_parser/video/mpeg_video.py +++ b/lib/hachoir_parser/video/mpeg_video.py @@ -13,17 +13,17 @@ Author: Victor Stinner Creation date: 15 september 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_parser.audio.mpeg_audio import MpegAudioFile -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_parser.audio.mpeg_audio import MpegAudioFile +from hachoir_core.field import (FieldSet, FieldError, ParserError, Bit, Bits, Bytes, RawBits, PaddingBits, NullBits, UInt8, UInt16, RawBytes, PaddingBytes, Enum) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.stream import StringInputStream -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.stream import StringInputStream +from hachoir_core.text_handler import textHandler, hexadecimal class FragmentGroup: def __init__(self, parser): diff --git a/sickbeard/common.py b/sickbeard/common.py index 41c32c12..602bc58d 100644 --- a/sickbeard/common.py +++ b/sickbeard/common.py @@ -230,6 +230,45 @@ class Quality: else: return Quality.UNKNOWN + @staticmethod + def fileQuality(filename): + + from sickbeard import encodingKludge as ek + if ek.ek(os.path.isfile, filename): + + from hachoir_parser import createParser + from hachoir_metadata import extractMetadata + from hachoir_core.stream import InputStreamError + + parser = height = None + try: + parser = createParser(filename) + except InputStreamError as e: + logger.log('Hachoir can\'t parse file content quality because it found error: %s' % e.text, logger.WARNING) + + if parser: + extract = extractMetadata(parser) + if extract: + try: + height = extract.get('height') + except (AttributeError, ValueError): + try: + for metadata in extract.iterGroups(): + if re.search('(?i)video', metadata.header): + height = metadata.get('height') + break + except (AttributeError, ValueError): + pass + + parser.stream._input.close() + + tolerance = lambda value, percent: int(round(value - (value * percent / 100.0))) + if height >= tolerance(352, 5): + if height <= tolerance(720, 2): + return Quality.SDTV + return (Quality.HDTV, Quality.FULLHDTV)[height >= tolerance(1080, 1)] + return Quality.UNKNOWN + @staticmethod def assumeQuality(name): if name.lower().endswith(('.avi', '.mp4')): @@ -262,10 +301,19 @@ class Quality: @staticmethod def statusFromName(name, assume=True, anime=False): quality = Quality.nameQuality(name, anime) - if assume and quality == Quality.UNKNOWN: + if assume and Quality.UNKNOWN == quality: quality = Quality.assumeQuality(name) return Quality.compositeStatus(DOWNLOADED, quality) + @staticmethod + def statusFromNameOrFile(file_path, assume=True, anime=False): + quality = Quality.nameQuality(file_path, anime) + if Quality.UNKNOWN == quality: + quality = Quality.fileQuality(file_path) + if assume and Quality.UNKNOWN == quality: + quality = Quality.assumeQuality(file_path) + return Quality.compositeStatus(DOWNLOADED, quality) + DOWNLOADED = None SNATCHED = None SNATCHED_PROPER = None diff --git a/sickbeard/postProcessor.py b/sickbeard/postProcessor.py index 08543eb9..6ed1a27e 100644 --- a/sickbeard/postProcessor.py +++ b/sickbeard/postProcessor.py @@ -662,7 +662,7 @@ class PostProcessor(object): continue ep_quality = common.Quality.nameQuality(cur_name, ep_obj.show.is_anime) - quality_log = u' "%s" quality from the %s %s' % (common.Quality.qualityStrings[ep_quality], thing, cur_name) + quality_log = u' "%s" quality parsed from the %s %s' % (common.Quality.qualityStrings[ep_quality], thing, cur_name) # if we find a good one then use it if common.Quality.UNKNOWN != ep_quality: @@ -671,6 +671,12 @@ class PostProcessor(object): else: self._log(u'Found' + quality_log, logger.DEBUG) + ep_quality = common.Quality.fileQuality(self.file_path) + if common.Quality.UNKNOWN != ep_quality: + self._log(u'Using "%s" quality parsed from the metadata file content of %s' + % (common.Quality.qualityStrings[ep_quality], self.file_name), logger.DEBUG) + return ep_quality + # Try guessing quality from the file name ep_quality = common.Quality.assumeQuality(self.file_name) self._log(u'Using guessed "%s" quality from the file name %s' diff --git a/sickbeard/processTV.py b/sickbeard/processTV.py index 5abd23c5..5baad72f 100644 --- a/sickbeard/processTV.py +++ b/sickbeard/processTV.py @@ -434,6 +434,10 @@ class ProcessTVShow(object): try_scene_exceptions=True, convert=True).parse( dir_name, cache_result=False) + # check we parsed id, ep and season + if not (0 < len(parse_result.episode_numbers) and isinstance(parse_result.show.indexerid, int) + and isinstance(parse_result.season_number, int)): + return False except (InvalidNameException, InvalidShowException): # If the filename doesn't parse, then return false as last # resort. We can assume that unparseable filenames are not diff --git a/sickbeard/tv.py b/sickbeard/tv.py index 65ebea27..2ed8fe57 100644 --- a/sickbeard/tv.py +++ b/sickbeard/tv.py @@ -613,141 +613,137 @@ class TVShow(object): def makeEpFromFile(self, file): if not ek.ek(os.path.isfile, file): - logger.log(str(self.indexerid) + u": That isn't even a real file dude... " + file) + logger.log(u'%s: Not a real file... %s' % (self.indexerid, file)) return None - logger.log(str(self.indexerid) + u": Creating episode object from " + file, logger.DEBUG) + logger.log(u'%s: Creating episode object from %s' % (self.indexerid, file), logger.DEBUG) try: - myParser = NameParser(showObj=self, try_indexers=True) - parse_result = myParser.parse(file) + my_parser = NameParser(showObj=self, try_indexers=True) + parse_result = my_parser.parse(file) except InvalidNameException: - logger.log(u"Unable to parse the filename " + file + " into a valid episode", logger.DEBUG) + logger.log(u'Unable to parse the filename %s into a valid episode' % file, logger.DEBUG) return None except InvalidShowException: - logger.log(u"Unable to parse the filename " + file + " into a valid show", logger.DEBUG) + logger.log(u'Unable to parse the filename %s into a valid show' % file, logger.DEBUG) return None if not len(parse_result.episode_numbers): - logger.log("parse_result: " + str(parse_result)) - logger.log(u"No episode number found in " + file + ", ignoring it", logger.ERROR) + logger.log(u'parse_result: %s' % parse_result) + logger.log(u'No episode number found in %s, ignoring it' % file, logger.ERROR) return None # for now lets assume that any episode in the show dir belongs to that show - season = parse_result.season_number if parse_result.season_number != None else 1 + season = parse_result.season_number if None is not parse_result.season_number else 1 episodes = parse_result.episode_numbers - rootEp = None + root_ep = None sql_l = [] - for curEpNum in episodes: + for cur_ep_num in episodes: - episode = int(curEpNum) + episode = int(cur_ep_num) - logger.log( - str(self.indexerid) + ": " + file + " parsed to " + self.name + " " + str(season) + "x" + str(episode), - logger.DEBUG) + logger.log(u'%s: %s parsed to %s %sx%s' % (self.indexerid, file, self.name, season, episode), logger.DEBUG) - checkQualityAgain = False + check_quality_again = False same_file = False - curEp = self.getEpisode(season, episode) + cur_ep = self.getEpisode(season, episode) - if curEp == None: + if None is cur_ep: try: - curEp = self.getEpisode(season, episode, file) + cur_ep = self.getEpisode(season, episode, file) except exceptions.EpisodeNotFoundException: - logger.log(str(self.indexerid) + u": Unable to figure out what this file is, skipping", - logger.ERROR) + logger.log(u'%s: Unable to figure out what this file is, skipping' % self.indexerid, logger.ERROR) continue else: # if there is a new file associated with this ep then re-check the quality - if curEp.location and ek.ek(os.path.normpath, curEp.location) != ek.ek(os.path.normpath, file): + if cur_ep.location and ek.ek(os.path.normpath, cur_ep.location) != ek.ek(os.path.normpath, file): logger.log( - u"The old episode had a different file associated with it, I will re-check the quality based on the new filename " + file, + u'The old episode had a different file associated with it, re-checking the quality based on the new filename ' + file, logger.DEBUG) - checkQualityAgain = True + check_quality_again = True - with curEp.lock: - old_size = curEp.file_size - curEp.location = file + with cur_ep.lock: + old_size = cur_ep.file_size + cur_ep.location = file # if the sizes are the same then it's probably the same file - if old_size and curEp.file_size == old_size: + if old_size and cur_ep.file_size == old_size: same_file = True else: same_file = False - curEp.checkForMetaFiles() + cur_ep.checkForMetaFiles() - if rootEp == None: - rootEp = curEp + if None is root_ep: + root_ep = cur_ep else: - if curEp not in rootEp.relatedEps: - rootEp.relatedEps.append(curEp) + if cur_ep not in root_ep.relatedEps: + root_ep.relatedEps.append(cur_ep) # if it's a new file then if not same_file: - curEp.release_name = '' + cur_ep.release_name = '' # if they replace a file on me I'll make some attempt at re-checking the quality unless I know it's the same file - if checkQualityAgain and not same_file: - newQuality = Quality.nameQuality(file, self.is_anime) - logger.log(u"Since this file has been renamed, I checked " + file + " and found quality " + - Quality.qualityStrings[newQuality], logger.DEBUG) - if newQuality != Quality.UNKNOWN: - curEp.status = Quality.compositeStatus(DOWNLOADED, newQuality) - + if check_quality_again and not same_file: + new_quality = Quality.nameQuality(file, self.is_anime) + if Quality.UNKNOWN == new_quality: + new_quality = Quality.fileQuality(file) + logger.log(u'Since this file was renamed, file %s was checked and quality "%s" found' + % (file, Quality.qualityStrings[new_quality]), logger.DEBUG) + if Quality.UNKNOWN != new_quality: + cur_ep.status = Quality.compositeStatus(DOWNLOADED, new_quality) # check for status/quality changes as long as it's a new file - elif not same_file and sickbeard.helpers.isMediaFile(file) and curEp.status not in Quality.DOWNLOADED + [ - ARCHIVED, IGNORED]: + elif not same_file and sickbeard.helpers.isMediaFile(file)\ + and cur_ep.status not in Quality.DOWNLOADED + [ARCHIVED, IGNORED]: - oldStatus, oldQuality = Quality.splitCompositeStatus(curEp.status) - newQuality = Quality.nameQuality(file, self.is_anime) - if newQuality == Quality.UNKNOWN: - newQuality = Quality.assumeQuality(file) + old_status, old_quality = Quality.splitCompositeStatus(cur_ep.status) + new_quality = Quality.nameQuality(file, self.is_anime) + if Quality.UNKNOWN == new_quality: + new_quality = Quality.fileQuality(file) + if Quality.UNKNOWN == new_quality: + new_quality = Quality.assumeQuality(file) - newStatus = None + new_status = None # if it was snatched and now exists then set the status correctly - if oldStatus == SNATCHED and oldQuality <= newQuality: - logger.log(u"STATUS: this episode used to be snatched with quality " + Quality.qualityStrings[ - oldQuality] + u" but a file exists with quality " + Quality.qualityStrings[ - newQuality] + u" so I'm setting the status to DOWNLOADED", logger.DEBUG) - newStatus = DOWNLOADED + if SNATCHED == old_status and old_quality <= new_quality: + logger.log(u'STATUS: this episode used to be snatched with quality %s but a file exists with quality %s so setting the status to DOWNLOADED' + % (Quality.qualityStrings[old_quality], Quality.qualityStrings[new_quality]), logger.DEBUG) + new_status = DOWNLOADED # if it was snatched proper and we found a higher quality one then allow the status change - elif oldStatus == SNATCHED_PROPER and oldQuality < newQuality: - logger.log(u"STATUS: this episode used to be snatched proper with quality " + Quality.qualityStrings[ - oldQuality] + u" but a file exists with quality " + Quality.qualityStrings[ - newQuality] + u" so I'm setting the status to DOWNLOADED", logger.DEBUG) - newStatus = DOWNLOADED + elif SNATCHED_PROPER == old_status and old_quality < new_quality: + logger.log(u'STATUS: this episode used to be snatched proper with quality %s but a file exists with quality %s so setting the status to DOWNLOADED' + % (Quality.qualityStrings[old_quality], Quality.qualityStrings[new_quality]), logger.DEBUG) + new_status = DOWNLOADED - elif oldStatus not in (SNATCHED, SNATCHED_PROPER): - newStatus = DOWNLOADED + elif old_status not in (SNATCHED, SNATCHED_PROPER): + new_status = DOWNLOADED - if newStatus != None: - with curEp.lock: - logger.log(u"STATUS: we have an associated file, so setting the status from " + str( - curEp.status) + u" to DOWNLOADED/" + str(Quality.statusFromName(file, anime=self.is_anime)), - logger.DEBUG) - curEp.status = Quality.compositeStatus(newStatus, newQuality) + if None is not new_status: + with cur_ep.lock: + logger.log(u'STATUS: we have an associated file, so setting the status from %s to DOWNLOADED/%s' + % (cur_ep.status, Quality.compositeStatus(Quality.DOWNLOADED, new_quality)), logger.DEBUG) + cur_ep.status = Quality.compositeStatus(new_status, new_quality) - with curEp.lock: - result = curEp.get_sql() + with cur_ep.lock: + result = cur_ep.get_sql() if None is not result: sql_l.append(result) if 0 < len(sql_l): - myDB = db.DBConnection() - myDB.mass_action(sql_l) - + my_db = db.DBConnection() + my_db.mass_action(sql_l) # creating metafiles on the root should be good enough - if sickbeard.USE_FAILED_DOWNLOADS and rootEp is not None: - with rootEp.lock: - rootEp.createMetaFiles() + if sickbeard.USE_FAILED_DOWNLOADS and root_ep is not None: + with root_ep.lock: + root_ep.createMetaFiles() - return rootEp + return root_ep def loadFromDB(self, skipNFO=False): @@ -1812,14 +1808,13 @@ class TVEpisode(object): elif sickbeard.helpers.isMediaFile(self.location): # leave propers alone, you have to either post-process them or manually change them back if self.status not in Quality.SNATCHED_PROPER + Quality.DOWNLOADED + Quality.SNATCHED + [ARCHIVED]: - logger.log( - u"5 Status changes from " + str(self.status) + " to " + str(Quality.statusFromName(self.location)), - logger.DEBUG) - self.status = Quality.statusFromName(self.location, anime=self.show.is_anime) + status_quality = Quality.statusFromNameOrFile(self.location, anime=self.show.is_anime) + logger.log(u'(1) Status changes from %s to %s' % (self.status, status_quality), logger.DEBUG) + self.status = status_quality # shouldn't get here probably else: - logger.log(u"6 Status changes from " + str(self.status) + " to " + str(UNKNOWN), logger.DEBUG) + logger.log(u"(2) Status changes from " + str(self.status) + " to " + str(UNKNOWN), logger.DEBUG) self.status = UNKNOWN def loadFromNFO(self, location): @@ -1837,11 +1832,10 @@ class TVEpisode(object): if self.location != "": - if self.status == UNKNOWN: - if sickbeard.helpers.isMediaFile(self.location): - logger.log(u"7 Status changes from " + str(self.status) + " to " + str( - Quality.statusFromName(self.location, anime=self.show.is_anime)), logger.DEBUG) - self.status = Quality.statusFromName(self.location, anime=self.show.is_anime) + if UNKNOWN == self.status and sickbeard.helpers.isMediaFile(self.location): + status_quality = Quality.statusFromNameOrFile(self.location, anime=self.show.is_anime) + logger.log(u'(3) Status changes from %s to %s' % (self.status, status_quality), logger.DEBUG) + self.status = status_quality nfoFile = sickbeard.helpers.replaceExtension(self.location, "nfo") logger.log(str(self.show.indexerid) + u": Using NFO name " + nfoFile, logger.DEBUG)