diff --git a/CHANGES.md b/CHANGES.md index 32f519e6..0fa591d1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,7 @@ * Update Certifi to 2015.11.20.1 (385476b) * Update chardet packages 2.3.0 (26982c5) to 2.3.0 (d7fae98) * Update dateutil library 2.4.2 (083f666) to 2.4.2 (d4baf97) +* Update Hachoir library 1.3.4 (r1383) to 1.3.4 (r1435) ### 0.11.0 (2016-01-10 22:30:00 UTC) diff --git a/lib/hachoir_metadata/jpeg.py b/lib/hachoir_metadata/jpeg.py index a112318f..9e951672 100644 --- a/lib/hachoir_metadata/jpeg.py +++ b/lib/hachoir_metadata/jpeg.py @@ -24,6 +24,19 @@ class JpegMetadata(RootMetadata): "FNumber": "camera_focal", "BrightnessValue": "camera_brightness", "MaxApertureValue": "camera_aperture", + "ISOSpeedRatings": "iso_speed_ratings", + "ExifVersion": "exif_version", + "DateTimeOriginal": "date_time_original", + "DateTimeDigitized": "date_time_digitized", + "CompressedBitsPerPixel": "compressed_bits_per_pixel", + "ShutterSpeedValue": "shutter_speed_value", + "ApertureValue": "aperture_value", + "ExposureBiasValue": "exposure_bias_value", + "FocalLength": "focal_length", + "FlashpixVersion": "flashpix_version", + "FocalPlaneXResolution": "focal_plane_x_resolution", + "FocalPlaneYResolution": "focal_plane_y_resolution", + "FocalLengthIn35mmFilm": "focal_length_in_35mm_film", # Generic metadatas "ImageDescription": "title", @@ -32,6 +45,7 @@ class JpegMetadata(RootMetadata): "PixelXDimension": "width", "PixelYDimension": "height", "UserComment": "comment", + "JPEGInterchangeFormatLength": "thumbnail_size", } IPTC_KEY = { diff --git a/lib/hachoir_metadata/metadata.py b/lib/hachoir_metadata/metadata.py index 37461c9d..dbdc411b 100644 --- a/lib/hachoir_metadata/metadata.py +++ b/lib/hachoir_metadata/metadata.py @@ -284,6 +284,10 @@ def extractMetadata(parser, quality=QUALITY_NORMAL): metadata.extract(parser) except HACHOIR_ERRORS, err: error("Error during metadata extraction: %s" % unicode(err)) + return None + except Exception, err: + error("Error during metadata extraction: %s" % unicode(err)) + return None if metadata: metadata.mime_type = parser.mime_type metadata.endian = endian_name[parser.endian] diff --git a/lib/hachoir_metadata/qt/__init__.py b/lib/hachoir_metadata/qt/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/hachoir_metadata/qt/dialog.ui b/lib/hachoir_metadata/qt/dialog.ui new file mode 100644 index 00000000..498a8dae --- /dev/null +++ b/lib/hachoir_metadata/qt/dialog.ui @@ -0,0 +1,64 @@ + + Form + + + + 0 + 0 + 441 + 412 + + + + hachoir-metadata + + + + + + + + Open + + + + + + + + 0 + 0 + + + + + + + + + + true + + + false + + + 0 + + + 0 + + + + + + + Quit + + + + + + + + diff --git a/lib/hachoir_metadata/register.py b/lib/hachoir_metadata/register.py index 3cbde86d..97dcb559 100644 --- a/lib/hachoir_metadata/register.py +++ b/lib/hachoir_metadata/register.py @@ -102,6 +102,23 @@ def registerAllItems(meta): meta.register(Data("bit_rate", 604, _("Bit rate"), text_handler=humanBitRate, filter=NumberFilter(1, MAX_BIT_RATE), type=(int, long, float))) meta.register(Data("aspect_ratio", 604, _("Aspect ratio"), type=(int, long, float))) + meta.register(Data("thumbnail_size", 604, _("Thumbnail size"), text_handler=humanFilesize, type=(int, long, float))) + + meta.register(Data("iso_speed_ratings", 800, _("ISO speed rating"))) + meta.register(Data("exif_version", 801, _("EXIF version"))) + meta.register(Data("date_time_original", 802, _("Date-time original"), text_handler=humanDatetime, + filter=DATETIME_FILTER, type=(datetime, date), conversion=setDatetime)) + meta.register(Data("date_time_digitized", 803, _("Date-time digitized"), text_handler=humanDatetime, + filter=DATETIME_FILTER, type=(datetime, date), conversion=setDatetime)) + meta.register(Data("compressed_bits_per_pixel", 804, _("Compressed bits per pixel"), type=(int, long, float))) + meta.register(Data("shutter_speed_value", 805, _("Shutter speed"), type=(int, long, float))) + meta.register(Data("aperture_value", 806, _("Aperture"))) + meta.register(Data("exposure_bias_value", 807, _("Exposure bias"))) + meta.register(Data("focal_length", 808, _("Focal length"))) + meta.register(Data("flashpix_version", 809, _("Flashpix version"))) + meta.register(Data("focal_plane_x_resolution", 810, _("Focal plane width"))) + meta.register(Data("focal_plane_y_resolution", 811, _("Focal plane height"), type=float)) + meta.register(Data("focal_length_in_35mm_film", 812, _("Focal length in 35mm film"))) meta.register(Data("os", 900, _("OS"), type=unicode)) meta.register(Data("producer", 901, _("Producer"), type=unicode)) diff --git a/lib/hachoir_parser/archive/__init__.py b/lib/hachoir_parser/archive/__init__.py index 46103c1a..d9d332b9 100644 --- a/lib/hachoir_parser/archive/__init__.py +++ b/lib/hachoir_parser/archive/__init__.py @@ -1,5 +1,6 @@ from hachoir_parser.archive.ace import AceFile from hachoir_parser.archive.ar import ArchiveFile +from hachoir_parser.archive.bomstore import BomFile from hachoir_parser.archive.bzip2_parser import Bzip2Parser from hachoir_parser.archive.cab import CabFile from hachoir_parser.archive.gzip_parser import GzipParser @@ -11,3 +12,4 @@ from hachoir_parser.archive.sevenzip import SevenZipParser from hachoir_parser.archive.mar import MarFile from hachoir_parser.archive.mozilla_ar import MozillaArchive from hachoir_parser.archive.zlib import ZlibData +from hachoir_parser.archive.prs_pak import PRSPakFile diff --git a/lib/hachoir_parser/archive/bomstore.py b/lib/hachoir_parser/archive/bomstore.py new file mode 100644 index 00000000..a8511501 --- /dev/null +++ b/lib/hachoir_parser/archive/bomstore.py @@ -0,0 +1,90 @@ +""" +Apple BOMStorage parser. + +Used for Assets.Bom files by Interface Builder, and for .bom files by Installer.app. + +Documents: + +Author: Robert Xiao +Created: 2015-05-14 +""" + +from hachoir_parser import HachoirParser +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, Enum, +Bits, GenericInteger, Float32, Float64, UInt8, UInt32, UInt64, Bytes, NullBytes, RawBytes, String) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import displayHandler +from hachoir_core.tools import humanDatetime +from datetime import datetime, timedelta + +class BomTrailerEntry(FieldSet): + static_size = 64 # bits + def createFields(self): + yield UInt32(self, "offset") + yield UInt32(self, "size") + def createDescription(self): + return "Object at offset %d, size %d" % (self['offset'].value, self['size'].value) + +class BomTrailer(FieldSet): + def createFields(self): + yield UInt32(self, "num_spaces", "Total number of entries, including blank entries") + nobj = self['/num_objects'].value + nspace = self['num_spaces'].value + for i in xrange(nobj+1): + yield BomTrailerEntry(self, "entry[]") + yield NullBytes(self, "blank_entries", (nspace - nobj - 1) * (BomTrailerEntry.static_size / 8)) + yield UInt32(self, "num_trail") + ntrail = self['num_trail'].value + for i in xrange(ntrail): + yield BomTrailerEntry(self, "trail[]") + + def createDescription(self): + return "Bom file trailer" + +class BomFile(HachoirParser, RootSeekableFieldSet): + endian = BIG_ENDIAN + MAGIC = "BOMStore" + PARSER_TAGS = { + "id": "bom_store", + "category": "archive", + "file_ext": ("bom","car"), + "magic": ((MAGIC, 0),), + "min_size": 32, # 32-byte header + "description": "Apple bill-of-materials file", + } + + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: + return "Invalid magic" + return True + + def createFields(self): + yield Bytes(self, "magic", 8, "File magic (BOMStore)") + yield UInt32(self, "version") # ? + yield UInt32(self, "num_objects") + yield UInt32(self, "trailer_offset") + yield UInt32(self, "trailer_size") + yield UInt32(self, "header_offset") + yield UInt32(self, "header_size") + + yield RawBytes(self, "object[]", 512-32, "Null object (size 0, offset 0)") # null object + + self.seekByte(self['trailer_offset'].value) + yield BomTrailer(self, "trailer") + + self.seekByte(self['header_offset'].value) + yield RawBytes(self, "header", self['header_size'].value) + + for entry in self['trailer'].array('entry'): + if entry['size'].value == 0: + continue + self.seekByte(entry['offset'].value) + yield RawBytes(self, "object[]", entry['size'].value) + + for entry in self['trailer'].array('trail'): + self.seekByte(entry['offset'].value) + yield RawBytes(self, "trail[]", entry['size'].value) diff --git a/lib/hachoir_parser/archive/prs_pak.py b/lib/hachoir_parser/archive/prs_pak.py new file mode 100644 index 00000000..85afd136 --- /dev/null +++ b/lib/hachoir_parser/archive/prs_pak.py @@ -0,0 +1,48 @@ +""" +Parallel Realities Starfighter .pak file parser + +See http://www.parallelrealities.co.uk/projects/starfighter.php +or svn://svn.debian.org/svn/pkg-games/packages/trunk/starfighter/ + +Author: Oliver Gerlich +""" + +from hachoir_parser import Parser +from hachoir_core.field import (ParserError, + UInt32, String, SubFile, FieldSet) +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import filesizeHandler + +class FileEntry(FieldSet): + def createFields(self): + yield String(self, "filename", 56, truncate="\0") + yield filesizeHandler(UInt32(self, "size")) + yield SubFile(self, "data", self["size"].value, filename=self["filename"].value) + + def createDescription(self): + return self["filename"].value + +class PRSPakFile(Parser): + PARSER_TAGS = { + "id": "prs_pak", + "category": "archive", + "file_ext": ("pak",), + "mime": (u"application/octet-stream",), + "min_size": 4*8, # just the identifier + "magic": (('PACK', 0),), + "description": "Parallel Realities Starfighter .pak archive", + } + + endian = LITTLE_ENDIAN + + def validate(self): + return (self.stream.readBytes(0, 4) == 'PACK' + and self["file[0]/size"].value >= 0 + and len(self["file[0]/filename"].value) > 0) + + def createFields(self): + yield String(self, "magic", 4) + + # all remaining data must be file entries: + while self.current_size < self._size: + yield FileEntry(self, "file[]") diff --git a/lib/hachoir_parser/archive/rar.py b/lib/hachoir_parser/archive/rar.py index 2be5887c..cf924162 100644 --- a/lib/hachoir_parser/archive/rar.py +++ b/lib/hachoir_parser/archive/rar.py @@ -14,6 +14,7 @@ from hachoir_core.field import (StaticFieldSet, FieldSet, from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal from hachoir_core.endian import LITTLE_ENDIAN from hachoir_parser.common.msdos import MSDOSFileAttr32 +from datetime import timedelta MAX_FILESIZE = 1000 * 1024 * 1024 @@ -63,9 +64,13 @@ def formatRARVersion(field): """ return "%u.%u" % divmod(field.value, 10) -def commonFlags(s): - yield Bit(s, "has_added_size", "Additional field indicating additional size") - yield Bit(s, "is_ignorable", "Old versions of RAR should ignore this block when copying data") +def markerFlags(s): + yield UInt16(s, "flags", "Marker flags, always 0x1a21") + +commonFlags = ( + (Bit, "is_ignorable", "Old versions of RAR should ignore this block when copying data"), + (Bit, "has_added_size", "Additional field indicating additional size"), +) class ArchiveFlags(StaticFieldSet): format = ( @@ -79,8 +84,8 @@ class ArchiveFlags(StaticFieldSet): (Bit, "is_passworded", "Needs a password to be decrypted"), (Bit, "is_first_vol", "Whether it is the first volume"), (Bit, "is_encrypted", "Whether the encryption version is present"), - (NullBits, "internal", 6, "Reserved for 'internal use'") - ) + (NullBits, "internal", 4, "Reserved for 'internal use'"), + ) + commonFlags def archiveFlags(s): yield ArchiveFlags(s, "flags", "Archiver block flags") @@ -135,29 +140,57 @@ class FileFlags(FieldSet): yield Bit(self, "is_solid", "Information from previous files is used (solid flag)") # The 3 following lines are what blocks more staticity yield Enum(Bits(self, "dictionary_size", 3, "Dictionary size"), DICTIONARY_SIZE) - for bit in commonFlags(self): - yield bit yield Bit(self, "is_large", "file64 operations needed") yield Bit(self, "is_unicode", "Filename also encoded using Unicode") yield Bit(self, "has_salt", "Has salt for encryption") yield Bit(self, "uses_file_version", "File versioning is used") - yield Bit(self, "has_ext_time", "Extra time ??") + yield Bit(self, "has_ext_time", "Extra time info present") yield Bit(self, "has_ext_flags", "Extra flag ??") + for field in commonFlags: + yield field[0](self, *field[1:]) def fileFlags(s): yield FileFlags(s, "flags", "File block flags") +class ExtTimeFlags(FieldSet): + static_size = 16 + def createFields(self): + for name in ['arctime', 'atime', 'ctime', 'mtime']: + yield Bits(self, "%s_count" % name, 2, "Number of %s bytes" % name) + yield Bit(self, "%s_onesec" % name, "Add one second to the timestamp?") + yield Bit(self, "%s_present" % name, "Is %s extra time present?" % name) + class ExtTime(FieldSet): def createFields(self): - yield textHandler(UInt16(self, "time_flags", "Flags for extended time"), hexadecimal) - flags = self["time_flags"].value - for index in xrange(4): - rmode = flags >> ((3-index)*4) - if rmode & 8: - if index: - yield TimeDateMSDOS32(self, "dos_time[]", "DOS Time") - if rmode & 3: - yield RawBytes(self, "remainder[]", rmode & 3, "Time remainder") + yield ExtTimeFlags(self, "time_flags") + for name in ['mtime', 'ctime', 'atime', 'arctime']: + if self['time_flags/%s_present' % name].value: + if name != 'mtime': + yield TimeDateMSDOS32(self, "%s" % name, "%s DOS timestamp" % name) + count = self['time_flags/%s_count' % name].value + if count: + yield Bits(self, "%s_remainder" % name, 8 * count, "%s extra precision time (in 100ns increments)" % name) + + def createDescription(self): + out = 'Time extension' + pieces = [] + for name in ['mtime', 'ctime', 'atime', 'arctime']: + if not self['time_flags/%s_present' % name].value: + continue + + if name == 'mtime': + basetime = self['../ftime'].value + else: + basetime = self['%s' % name].value + delta = timedelta() + if self['time_flags/%s_onesec' % name].value: + delta += timedelta(seconds=1) + if '%s_remainder'%name in self: + delta += timedelta(microseconds=self['%s_remainder' % name].value / 10.0) + pieces.append('%s=%s' % (name, basetime + delta)) + if pieces: + out += ': ' + ', '.join(pieces) + return out def specialHeader(s, is_file): yield filesizeHandler(UInt32(s, "compressed_size", "Compressed size (bytes)")) @@ -188,9 +221,9 @@ def specialHeader(s, is_file): # Start additional fields from unrar - file only if is_file: if s["flags/has_salt"].value: - yield textHandler(UInt8(s, "salt", "Salt"), hexadecimal) + yield RawBytes(s, "salt", 8, "Encryption salt to increase security") if s["flags/has_ext_time"].value: - yield ExtTime(s, "extra_time", "Extra time info") + yield ExtTime(s, "extra_time") def fileHeader(s): return specialHeader(s, True) @@ -203,9 +236,11 @@ def fileBody(s): if size > 0: yield RawBytes(s, "compressed_data", size, "File compressed data") -def fileDescription(s): - return "File entry: %s (%s)" % \ - (s["filename"].display, s["compressed_size"].display) +def fileDescription(tag): + def _fileDescription(s): + return "%s: %s (%s)" % \ + (tag, s["filename"].display, s["compressed_size"].display) + return _fileDescription def newSubHeader(s): return specialHeader(s, False) @@ -216,36 +251,31 @@ class EndFlags(StaticFieldSet): (Bit, "has_data_crc", "Whether a CRC value is present"), (Bit, "rev_space"), (Bit, "has_vol_number", "Whether the volume number is present"), - (Bits, "unused[]", 4), - (Bit, "has_added_size", "Additional field indicating additional size"), - (Bit, "is_ignorable", "Old versions of RAR should ignore this block when copying data"), - (Bits, "unused[]", 6), - ) + (NullBits, "unused[]", 10), + ) + commonFlags def endFlags(s): yield EndFlags(s, "flags", "End block flags") -class BlockFlags(FieldSet): +class BlockFlags(StaticFieldSet): static_size = 16 - def createFields(self): - yield textHandler(Bits(self, "unused[]", 8, "Unused flag bits"), hexadecimal) - yield Bit(self, "has_added_size", "Additional field indicating additional size") - yield Bit(self, "is_ignorable", "Old versions of RAR should ignore this block when copying data") - yield Bits(self, "unused[]", 6) + format = ( + (NullBits, "unused[]", 14), + ) + commonFlags class Block(FieldSet): BLOCK_INFO = { # None means 'use default function' - 0x72: ("marker", "Archive header", None, None, None), + 0x72: ("marker", "File format marker", markerFlags, None, None), 0x73: ("archive_start", "Archive info", archiveFlags, archiveHeader, None), - 0x74: ("file[]", fileDescription, fileFlags, fileHeader, fileBody), - 0x75: ("comment[]", "Stray comment", None, commentHeader, commentBody), + 0x74: ("file[]", fileDescription("File entry"), fileFlags, fileHeader, fileBody), + 0x75: ("comment[]", "Comment", None, commentHeader, commentBody), 0x76: ("av_info[]", "Extra information", None, avInfoHeader, avInfoBody), - 0x77: ("sub_block[]", "Stray subblock", None, newSubHeader, fileBody), + 0x77: ("sub_block[]", fileDescription("Subblock"), None, newSubHeader, fileBody), 0x78: ("recovery[]", "Recovery block", None, recoveryHeader, None), 0x79: ("signature", "Signature block", None, signatureHeader, None), - 0x7A: ("new_sub_block[]", "Stray new-format subblock", fileFlags, + 0x7A: ("sub_block[]", fileDescription("New-format subblock"), fileFlags, newSubHeader, fileBody), 0x7B: ("archive_end", "Archive end block", endFlags, None, None), } diff --git a/lib/hachoir_parser/archive/sevenzip.py b/lib/hachoir_parser/archive/sevenzip.py index 7a0148f5..a64cac9a 100644 --- a/lib/hachoir_parser/archive/sevenzip.py +++ b/lib/hachoir_parser/archive/sevenzip.py @@ -7,15 +7,27 @@ Informations: Author: Olivier SCHWAB Creation date: 6 december 2006 + +Updated by: Robert Xiao +Date: February 26 2011 """ from hachoir_parser import Parser from hachoir_core.field import (Field, FieldSet, ParserError, - GenericVector, - Enum, UInt8, UInt32, UInt64, - Bytes, RawBytes) + CompressedField, CString, + Enum, Bit, Bits, UInt8, UInt32, UInt64, + Bytes, RawBytes, TimestampWin64) +from hachoir_core.stream import StringInputStream from hachoir_core.endian import LITTLE_ENDIAN from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.tools import createDict, alignValue +from hachoir_parser.common.msdos import MSDOSFileAttr32 + +try: + from pylzma import decompress as lzmadecompress + has_lzma = True +except ImportError: + has_lzma = False class SZUInt64(Field): """ @@ -38,167 +50,258 @@ class SZUInt64(Field): self._size += 8 self.createValue = lambda: value -ID_END, ID_HEADER, ID_ARCHIVE_PROPS, ID_ADD_STREAM_INFO, ID_MAIN_STREAM_INFO, \ -ID_FILES_INFO, ID_PACK_INFO, ID_UNPACK_INFO, ID_SUBSTREAMS_INFO, ID_SIZE, \ -ID_CRC, ID_FOLDER, ID_CODERS_UNPACK_SIZE, ID_NUM_UNPACK_STREAMS, \ -ID_EMPTY_STREAM, ID_EMPTY_FILE, ID_ANTI, ID_NAME, ID_CREATION_TIME, \ -ID_LAST_ACCESS_TIME, ID_LAST_WRITE_TIME, ID_WIN_ATTR, ID_COMMENT, \ -ID_ENCODED_HEADER = xrange(24) +PROP_INFO = { + 0x00: ('kEnd', 'End-of-header marker'), -ID_INFO = { - ID_END : "End", - ID_HEADER : "Header embedding another one", - ID_ARCHIVE_PROPS : "Archive Properties", - ID_ADD_STREAM_INFO : "Additional Streams Info", - ID_MAIN_STREAM_INFO : "Main Streams Info", - ID_FILES_INFO : "Files Info", - ID_PACK_INFO : "Pack Info", - ID_UNPACK_INFO : "Unpack Info", - ID_SUBSTREAMS_INFO : "Substreams Info", - ID_SIZE : "Size", - ID_CRC : "CRC", - ID_FOLDER : "Folder", - ID_CODERS_UNPACK_SIZE: "Coders Unpacked size", - ID_NUM_UNPACK_STREAMS: "Number of Unpacked Streams", - ID_EMPTY_STREAM : "Empty Stream", - ID_EMPTY_FILE : "Empty File", - ID_ANTI : "Anti", - ID_NAME : "Name", - ID_CREATION_TIME : "Creation Time", - ID_LAST_ACCESS_TIME : "Last Access Time", - ID_LAST_WRITE_TIME : "Last Write Time", - ID_WIN_ATTR : "Win Attributes", - ID_COMMENT : "Comment", - ID_ENCODED_HEADER : "Header holding encoded data info", + 0x01: ('kHeader', 'Archive header'), + + 0x02: ('kArchiveProperties', 'Archive properties'), + + 0x03: ('kAdditionalStreamsInfo', 'AdditionalStreamsInfo'), + 0x04: ('kMainStreamsInfo', 'MainStreamsInfo'), + 0x05: ('kFilesInfo', 'FilesInfo'), + + 0x06: ('kPackInfo', 'PackInfo'), + 0x07: ('kUnPackInfo', 'UnPackInfo'), + 0x08: ('kSubStreamsInfo', 'SubStreamsInfo'), + + 0x09: ('kSize', 'Size'), + 0x0A: ('kCRC', 'CRC'), + + 0x0B: ('kFolder', 'Folder'), + + 0x0C: ('kCodersUnPackSize', 'CodersUnPackSize'), + 0x0D: ('kNumUnPackStream', 'NumUnPackStream'), + + 0x0E: ('kEmptyStream', 'EmptyStream'), + 0x0F: ('kEmptyFile', 'EmptyFile'), + 0x10: ('kAnti', 'Anti'), + + 0x11: ('kName', 'Name'), + 0x12: ('kCreationTime', 'CreationTime'), + 0x13: ('kLastAccessTime', 'LastAccessTime'), + 0x14: ('kLastWriteTime', 'LastWriteTime'), + 0x15: ('kWinAttributes', 'WinAttributes'), + 0x16: ('kComment', 'Comment'), + + 0x17: ('kEncodedHeader', 'Encoded archive header'), } +PROP_IDS = createDict(PROP_INFO, 0) +PROP_DESC = createDict(PROP_INFO, 1) +# create k* constants +for k in PROP_IDS: + globals()[PROP_IDS[k]] = k -class SkippedData(FieldSet): +def ReadNextByte(self): + return self.stream.readBits(self.absolute_address + self.current_size, 8, self.endian) + +def PropID(self, name): + return Enum(UInt8(self, name), PROP_IDS) + +class SevenZipBitVector(FieldSet): + def __init__(self, parent, name, num, has_all_byte=False, **args): + FieldSet.__init__(self, parent, name, **args) + self.has_all_byte=has_all_byte + self.num = num def createFields(self): - yield Enum(UInt8(self, "id[]"), ID_INFO) + if self.has_all_byte: + yield Enum(UInt8(self, "all_defined"), {0:'False', 1:'True'}) + if self['all_defined'].value: + return + nbytes = alignValue(self.num, 8)//8 + ctr = 0 + for i in xrange(nbytes): + for j in reversed(xrange(8)): + yield Bit(self, "bit[%d]"%(ctr+j)) + ctr += 8 + def isAllDefined(self): + return self.has_all_byte and self['all_defined'].value + def isDefined(self, index): + if self.isAllDefined(): + return True + return self['bit[%d]'%index].value + def createValue(self): + if self.isAllDefined(): + return range(self.num) + return [i for i in xrange(self.num) if self['bit[%d]'%i].value] + def createDisplay(self): + if self.isAllDefined(): + return 'all' + return ','.join(str(i) for i in self.value) + +class ArchiveProperty(FieldSet): + def createFields(self): + yield PropID(self, "id") size = SZUInt64(self, "size") yield size - if size.value > 0: - yield RawBytes(self, "data", size.value) + yield RawBytes(self, "data", size.value) + def createDescription(self): + return self['id'].display -def waitForID(s, wait_id, wait_name="waited_id[]"): - while not s.eof: - addr = s.absolute_address+s.current_size - uid = s.stream.readBits(addr, 8, LITTLE_ENDIAN) - if uid == wait_id: - yield Enum(UInt8(s, wait_name), ID_INFO) - s.info("Found ID %s (%u)" % (ID_INFO[uid], uid)) - return - s.info("Skipping ID %u!=%u" % (uid, wait_id)) - yield SkippedData(s, "skipped_id[]", "%u != %u" % (uid, wait_id)) +class ArchiveProperties(FieldSet): + def createFields(self): + yield PropID(self, "id") + while not self.eof: + uid = ReadNextByte(self) + if uid == kEnd: + yield PropID(self, "end_marker") + break + yield ArchiveProperty(self, "prop[]") -class HashDigest(FieldSet): - def __init__(self, parent, name, num_digests, desc=None): +class Digests(FieldSet): + def __init__(self, parent, name, num_digests, digest_desc=None, desc=None): FieldSet.__init__(self, parent, name, desc) self.num_digests = num_digests + if digest_desc is None: + self.digest_desc = ['stream %d'%i for i in xrange(num_digests)] + else: + self.digest_desc = digest_desc def createFields(self): - yield Enum(UInt8(self, "id"), ID_INFO) - bytes = self.stream.readBytes(self.absolute_address, self.num_digests) - if self.num_digests > 0: - yield GenericVector(self, "defined[]", self.num_digests, UInt8, "bool") - for index in xrange(self.num_digests): - if bytes[index]: - yield textHandler(UInt32(self, "hash[]", - "Hash for digest %u" % index), hexadecimal) + yield PropID(self, "id") + definearr = SevenZipBitVector(self, "defined", self.num_digests, has_all_byte=True) + yield definearr + for index in definearr.value: + yield textHandler(UInt32(self, "digest[]", + "Digest for %s" % self.digest_desc[index]), hexadecimal) class PackInfo(FieldSet): def createFields(self): - yield Enum(UInt8(self, "id"), ID_INFO) - # Very important, helps determine where the data is - yield SZUInt64(self, "pack_pos", "Position of the packs") - num = SZUInt64(self, "num_pack_streams") + yield PropID(self, "id") + + yield SZUInt64(self, "pack_pos", "File offset to the packed data") + num = SZUInt64(self, "num_pack_streams", "Number of packed streams") yield num - num = num.value - - for field in waitForID(self, ID_SIZE, "size_marker"): - yield field - - for size in xrange(num): - yield SZUInt64(self, "pack_size[]") while not self.eof: - addr = self.absolute_address+self.current_size - uid = self.stream.readBits(addr, 8, LITTLE_ENDIAN) - if uid == ID_END: - yield Enum(UInt8(self, "end_marker"), ID_INFO) + uid = ReadNextByte(self) + if uid == kEnd: + yield PropID(self, "end_marker") break - elif uid == ID_CRC: - yield HashDigest(self, "hash_digest", size) + elif uid == kSize: + yield PropID(self, "size_marker") + for index in xrange(num.value): + yield SZUInt64(self, "pack_size[]") + elif uid == kCRC: + yield Digests(self, "digests", num.value) else: - yield SkippedData(self, "skipped_data") + raise ParserError("Unexpected ID (%i)" % uid) -def lzmaParams(value): - param = value.value - remainder = param / 9 - # Literal coder context bits - lc = param % 9 - # Position state bits - pb = remainder / 5 - # Literal coder position bits - lp = remainder % 5 - return "lc=%u pb=%u lp=%u" % (lc, lp, pb) +METHODS = { + "\0": "Copy", + "\3": "Delta", + "\4": "x86_BCJ", + "\5": "PowerPC", + "\6": "IA64", + "\7": "ARM_LE", + "\8": "ARMT_LE", # thumb + "\9": "SPARC", + "\x21": "LZMA2", + "\2\3\2": "Common-Swap-2", + "\2\3\4": "Common-Swap-4", + "\3\1\1": "7z-LZMA", + "\3\3\1\3": "7z-Branch-x86-BCJ", + "\3\3\1\x1b": "7z-Branch-x86-BCJ2", + "\3\3\2\5": "7z-Branch-PowerPC-BE", + "\3\3\3\1": "7z-Branch-Alpha-LE", + "\3\3\4\1": "7z-Branch-IA64-LE", + "\3\3\5\1": "7z-Branch-ARM-LE", + "\3\3\6\5": "7z-Branch-M68-BE", + "\3\3\7\1": "7z-Branch-ARMT-LE", + "\3\3\8\5": "7z-Branch-SPARC-BE", + "\3\4\1": "7z-PPMD", + "\3\x7f\1": "7z-Experimental", + "\4\0": "Reserved", + "\4\1\0": "Zip-Copy", + "\4\1\1": "Zip-Shrink", + "\4\1\6": "Zip-Implode", + "\4\1\x08": "Zip-Deflate", + "\4\1\x09": "Zip-Deflate64", + "\4\1\x10": "Zip-BZip2", + "\4\1\x14": "Zip-LZMA", + "\4\1\x60": "Zip-JPEG", + "\4\1\x61": "Zip-WavPack", + "\4\1\x62": "Zip-PPMD", + "\4\1\x63": "Zip-wzAES", + "\4\2\2": "BZip2", + "\4\3\1": "RAR-15", + "\4\3\2": "RAR-20", + "\4\3\3": "RAR-29", + "\4\4\1": "Arj3", + "\4\4\2": "Arj4", + "\4\5": "Z", + "\4\6": "LZH", + "\4\7": "7z-Reserved", + "\4\8": "CAB", + "\4\9\1": "NSIS-Deflate", + "\4\9\1": "NSIS-BZip2", + "\6\0": "Crypto-Reserved", + "\6\1\x00": "Crypto-AES128-ECB", + "\6\1\x01": "Crypto-AES128-CBC", + "\6\1\x02": "Crypto-AES128-CFB", + "\6\1\x03": "Crypto-AES128-OFB", + "\6\1\x40": "Crypto-AES192-ECB", + "\6\1\x41": "Crypto-AES192-CBC", + "\6\1\x42": "Crypto-AES192-CFB", + "\6\1\x43": "Crypto-AES192-OFB", + "\6\1\x80": "Crypto-AES256-ECB", + "\6\1\x81": "Crypto-AES256-CBC", + "\6\1\x82": "Crypto-AES256-CFB", + "\6\1\x83": "Crypto-AES256-OFB", + "\6\1\xc0": "Crypto-AES-ECB", + "\6\1\xc1": "Crypto-AES-CBC", + "\6\1\xc2": "Crypto-AES-CFB", + "\6\1\xc3": "Crypto-AES-OFB", + "\6\7": "Crypto-Reserved", + "\6\x0f": "Crypto-Reserved", + "\6\xf0": "Crypto-Misc", + "\6\xf1\1\1": "Crypto-Zip", + "\6\xf1\3\2": "Crypto-RAR-Unknown", + "\6\xf1\3\3": "Crypto-RAR-29", # AES128 + "\6\xf1\7\1": "Crypto-7z", # AES256 + "\7\0": "Hash-None", + "\7\1": "Hash-CRC", + "\7\2": "Hash-SHA1", + "\7\3": "Hash-SHA256", + "\7\4": "Hash-SHA384", + "\7\5": "Hash-SHA512", + "\7\xf0": "Hash-Misc", + "\7\xf1\3\3": "Hash-RAR-29", # modified SHA1 + "\7\xf1\7\1": "Hash-7z", # SHA256 +} -class CoderID(FieldSet): - CODECS = { - # Only 2 methods ... and what about PPMD ? - "\0" : "copy", - "\3\1\1": "lzma", - } +class Coder(FieldSet): def createFields(self): - byte = UInt8(self, "id_size") - yield byte - byte = byte.value - self.info("ID=%u" % byte) - size = byte & 0xF + yield Bits(self, "id_size", 4) + yield Bit(self, "is_not_simple", "If unset, stream setup is simple") + yield Bit(self, "has_attribs", "Are there compression properties attached?") + yield Bit(self, "unused[]") + yield Bit(self, "is_not_last_method", "Are there more methods after this one in the alternative method list?") + size = self['id_size'].value if size > 0: - name = self.stream.readBytes(self.absolute_address+self.current_size, size) - if name in self.CODECS: - name = self.CODECS[name] - self.info("Codec is %s" % name) - else: - self.info("Undetermined codec %s" % name) - name = "unknown" - yield RawBytes(self, name, size) - #yield textHandler(Bytes(self, "id", size), lambda: name) - if byte & 0x10: + yield Enum(RawBytes(self, "id", size), METHODS) + if self['is_not_simple'].value: yield SZUInt64(self, "num_stream_in") yield SZUInt64(self, "num_stream_out") self.info("Streams: IN=%u OUT=%u" % \ (self["num_stream_in"].value, self["num_stream_out"].value)) - if byte & 0x20: - size = SZUInt64(self, "properties_size[]") + if self['has_attribs'].value: + size = SZUInt64(self, "properties_size") yield size - if size.value == 5: - #LzmaDecodeProperties@LZMAStateDecode.c - yield textHandler(UInt8(self, "parameters"), lzmaParams) - yield filesizeHandler(UInt32(self, "dictionary_size")) - elif size.value > 0: - yield RawBytes(self, "properties[]", size.value) + yield RawBytes(self, "properties", size.value) + def _get_num_streams(self, direction): + if self['is_not_simple'].value: + return self['num_stream_%s'%direction].value + return 1 + in_streams = property(lambda self: self._get_num_streams('in')) + out_streams = property(lambda self: self._get_num_streams('out')) -class CoderInfo(FieldSet): - def __init__(self, parent, name, desc=None): - FieldSet.__init__(self, parent, name, desc) - self.in_streams = 1 - self.out_streams = 1 +class CoderList(FieldSet): def createFields(self): - # The real ID - addr = self.absolute_address + self.current_size - b = self.parent.stream.readBits(addr, 8, LITTLE_ENDIAN) - cid = CoderID(self, "coder_id") - yield cid - if b&0x10: # Work repeated, ... - self.in_streams = cid["num_stream_in"].value - self.out_streams = cid["num_stream_out"].value - - # Skip other IDs - while b&0x80: - addr = self.absolute_address + self.current_size - b = self.parent.stream.readBits(addr, 8, LITTLE_ENDIAN) - yield CoderID(self, "unused_codec_id[]") + while not self.eof: + field = Coder(self, "coder[]") + yield field + if not field['is_not_last_method'].value: + break class BindPairInfo(FieldSet): def createFields(self): @@ -208,45 +311,46 @@ class BindPairInfo(FieldSet): self.info("Indexes: IN=%u OUT=%u" % \ (self["in_index"].value, self["out_index"].value)) -class FolderItem(FieldSet): - def __init__(self, parent, name, desc=None): - FieldSet.__init__(self, parent, name, desc) - self.in_streams = 0 - self.out_streams = 0 - +class Folder(FieldSet): def createFields(self): yield SZUInt64(self, "num_coders") num = self["num_coders"].value self.info("Folder: %u codecs" % num) - # Coders info - for index in xrange(num): - ci = CoderInfo(self, "coder_info[]") - yield ci - self.in_streams += ci.in_streams - self.out_streams += ci.out_streams + in_streams = out_streams = 0 - # Bin pairs - self.info("out streams: %u" % self.out_streams) - for index in xrange(self.out_streams-1): + # Coder info + for index in xrange(num): + ci = CoderList(self, "coders[]") + yield ci + in_streams += ci['coder[0]'].in_streams + out_streams += ci['coder[0]'].out_streams + self._in_streams = in_streams + self._out_streams = out_streams + + # Bind pairs + self.info("out streams: %u" % out_streams) + for index in xrange(out_streams-1): yield BindPairInfo(self, "bind_pair[]") # Packed streams # @todo: Actually find mapping - packed_streams = self.in_streams - self.out_streams + 1 - if packed_streams == 1: - pass - else: + packed_streams = in_streams - out_streams + 1 + if packed_streams > 1: for index in xrange(packed_streams): yield SZUInt64(self, "pack_stream[]") - + def _get_num_streams(self, direction): + list(self) + return getattr(self, '_'+direction+'_streams') + in_streams = property(lambda self: self._get_num_streams('in')) + out_streams = property(lambda self: self._get_num_streams('out')) class UnpackInfo(FieldSet): def createFields(self): - yield Enum(UInt8(self, "id"), ID_INFO) - # Wait for synch - for field in waitForID(self, ID_FOLDER, "folder_marker"): - yield field + yield PropID(self, "id") + + yield PropID(self, "folder_marker") + assert self['folder_marker'].value == kFolder yield SZUInt64(self, "num_folders") # Get generic info @@ -254,97 +358,277 @@ class UnpackInfo(FieldSet): self.info("%u folders" % num) yield UInt8(self, "is_external") - # Read folder items - for folder_index in xrange(num): - yield FolderItem(self, "folder_item[]") + if self['is_external'].value: + yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream") + else: + # Read folder items + for folder_index in xrange(num): + yield Folder(self, "folder[]") - # Get unpack sizes for each coder of each folder - for field in waitForID(self, ID_CODERS_UNPACK_SIZE, "coders_unpsize_marker"): - yield field + yield PropID(self, "unpacksize_marker") + assert self['unpacksize_marker'].value == kCodersUnPackSize for folder_index in xrange(num): - folder_item = self["folder_item[%u]" % folder_index] - for index in xrange(folder_item.out_streams): - #yield UInt8(self, "unpack_size[]") - yield SZUInt64(self, "unpack_size[]") + folder = self["folder[%u]" % folder_index] + for index in xrange(folder.out_streams): + yield SZUInt64(self, "unpack_size[%d][%d]"%(folder_index,index)) # Extract digests while not self.eof: - addr = self.absolute_address+self.current_size - uid = self.stream.readBits(addr, 8, LITTLE_ENDIAN) - if uid == ID_END: - yield Enum(UInt8(self, "end_marker"), ID_INFO) + uid = ReadNextByte(self) + if uid == kEnd: + yield PropID(self, "end_marker") break - elif uid == ID_CRC: - yield HashDigest(self, "hash_digest", num) + elif uid == kCRC: + yield Digests(self, "digests", num) else: - yield SkippedData(self, "skip_data") + raise ParserError("Unexpected ID (%i)" % uid) class SubStreamInfo(FieldSet): def createFields(self): - yield Enum(UInt8(self, "id"), ID_INFO) - raise ParserError("SubStreamInfo not implemented yet") - -class EncodedHeader(FieldSet): - def createFields(self): - yield Enum(UInt8(self, "id"), ID_INFO) + yield PropID(self, "id") + num_folders = self['../unpack_info/num_folders'].value + num_unpackstreams = [1]*num_folders while not self.eof: - addr = self.absolute_address+self.current_size - uid = self.stream.readBits(addr, 8, LITTLE_ENDIAN) - if uid == ID_END: - yield Enum(UInt8(self, "end_marker"), ID_INFO) + uid = ReadNextByte(self) + if uid == kEnd: + yield PropID(self, "end_marker") break - elif uid == ID_PACK_INFO: - yield PackInfo(self, "pack_info", ID_INFO[ID_PACK_INFO]) - elif uid == ID_UNPACK_INFO: - yield UnpackInfo(self, "unpack_info", ID_INFO[ID_UNPACK_INFO]) - elif uid == ID_SUBSTREAMS_INFO: - yield SubStreamInfo(self, "substreams_info", ID_INFO[ID_SUBSTREAMS_INFO]) + elif uid == kNumUnPackStream: + yield PropID(self, "num_unpackstream_marker") + for i in xrange(num_folders): + field = SZUInt64(self, "num_unpackstreams[]") + yield field + num_unpackstreams[i] = field.value + elif uid == kSize: + yield PropID(self, "size_marker") + for i in xrange(num_folders): + # The last substream's size is the stream size minus the other substreams. + for j in xrange(num_unpackstreams[i]-1): + yield SZUInt64(self, "unpack_size[%d][%d]"%(i,j)) + elif uid == kCRC: + digests = [] + for i in xrange(num_folders): + if num_unpackstreams[i] == 1 and 'digests' in self['../unpack_info']: + continue + for j in xrange(num_unpackstreams[i]): + digests.append('folder %i, stream %i'%(i, j)) + yield Digests(self, "digests", len(digests), digests) else: - self.info("Unexpected ID (%i)" % uid) - break + raise ParserError("Unexpected ID (%i)" % uid) -class IDHeader(FieldSet): +class StreamsInfo(FieldSet): def createFields(self): - yield Enum(UInt8(self, "id"), ID_INFO) - ParserError("IDHeader not implemented") + yield PropID(self, "id") + while not self.eof: + uid = ReadNextByte(self) + if uid == kEnd: + yield PropID(self, "end") + break + elif uid == kPackInfo: + yield PackInfo(self, "pack_info", PROP_DESC[uid]) + elif uid == kUnPackInfo: + yield UnpackInfo(self, "unpack_info", PROP_DESC[uid]) + elif uid == kSubStreamsInfo: + yield SubStreamInfo(self, "substreams_info", PROP_DESC[uid]) + else: + raise ParserError("Unexpected ID (%i)" % uid) + +class EncodedHeader(StreamsInfo): + pass + +class EmptyStreamProperty(FieldSet): + def createFields(self): + yield PropID(self, "id") + yield SZUInt64(self, "size") + yield SevenZipBitVector(self, "vec", self['../num_files'].value) + def createValue(self): + return self['vec'].value + def createDisplay(self): + return self['vec'].display + +class EmptyFileProperty(FieldSet): + def createFields(self): + yield PropID(self, "id") + yield SZUInt64(self, "size") + empty_streams = self['../empty_streams/vec'].value + yield SevenZipBitVector(self, "vec", len(empty_streams)) + def createValue(self): + empty_streams = self['../empty_streams/vec'].value + return [empty_streams[i] for i in self['vec'].value] + def createDisplay(self): + return ','.join(str(i) for i in self.value) + +class FileTimeProperty(FieldSet): + def createFields(self): + yield PropID(self, "id") + yield SZUInt64(self, "size") + definearr = SevenZipBitVector(self, "defined", self['../num_files'].value, has_all_byte=True) + yield definearr + yield UInt8(self, "is_external") + if self['is_external'].value: + yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream") + else: + for index in definearr.value: + yield TimestampWin64(self, "timestamp[%d]"%index) + +class FileNames(FieldSet): + def createFields(self): + yield PropID(self, "id") + yield SZUInt64(self, "size") + yield UInt8(self, "is_external") + if self['is_external'].value: + yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream") + else: + for index in xrange(self['../num_files'].value): + yield CString(self, "name[%d]"%index, charset="UTF-16-LE") + +class FileAttributes(FieldSet): + def createFields(self): + yield PropID(self, "id") + yield SZUInt64(self, "size") + definearr = SevenZipBitVector(self, "defined", self['../num_files'].value, has_all_byte=True) + yield definearr + yield UInt8(self, "is_external") + if self['is_external'].value: + yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream") + else: + for index in definearr.value: + yield MSDOSFileAttr32(self, "attributes[%d]"%index) + +class FilesInfo(FieldSet): + def createFields(self): + yield PropID(self, "id") + yield SZUInt64(self, "num_files") + while not self.eof: + uid = ReadNextByte(self) + if uid == kEnd: + yield PropID(self, "end_marker") + break + elif uid == kEmptyStream: + yield EmptyStreamProperty(self, "empty_streams") + elif uid == kEmptyFile: + yield EmptyFileProperty(self, "empty_files") + elif uid == kAnti: + yield EmptyFileProperty(self, "anti_files") + elif uid == kCreationTime: + yield FileTimeProperty(self, "creation_time") + elif uid == kLastAccessTime: + yield FileTimeProperty(self, "access_time") + elif uid == kLastWriteTime: + yield FileTimeProperty(self, "modified_time") + elif uid == kName: + yield FileNames(self, "filenames") + elif uid == kWinAttributes: + yield FileAttributes(self, "attributes") + else: + yield ArchiveProperty(self, "prop[]") + +class Header(FieldSet): + def createFields(self): + yield PropID(self, "id") + while not self.eof: + uid = ReadNextByte(self) + if uid == kEnd: + yield PropID(self, "end") + break + elif uid == kArchiveProperties: + yield ArchiveProperties(self, "props", PROP_DESC[uid]) + elif uid == kAdditionalStreamsInfo: + yield StreamsInfo(self, "additional_streams", PROP_DESC[uid]) + elif uid == kMainStreamsInfo: + yield StreamsInfo(self, "main_streams", PROP_DESC[uid]) + elif uid == kFilesInfo: + yield FilesInfo(self, "files_info", PROP_DESC[uid]) + else: + raise ParserError("Unexpected ID %u" % uid) class NextHeader(FieldSet): def __init__(self, parent, name, desc="Next header"): FieldSet.__init__(self, parent, name, desc) self._size = 8*self["/signature/start_hdr/next_hdr_size"].value - # Less work, as much interpretable information as the other - # version... what an obnoxious format - def createFields2(self): - yield Enum(UInt8(self, "header_type"), ID_INFO) - yield RawBytes(self, "header_data", self._size-1) def createFields(self): - uid = self.stream.readBits(self.absolute_address, 8, LITTLE_ENDIAN) - if uid == ID_HEADER: - yield IDHeader(self, "header", ID_INFO[ID_HEADER]) - elif uid == ID_ENCODED_HEADER: - yield EncodedHeader(self, "encoded_hdr", ID_INFO[ID_ENCODED_HEADER]) - # Game Over: this is usually encoded using LZMA, not copy - # See SzReadAndDecodePackedStreams/SzDecode being called with the - # data position from "/next_hdr/encoded_hdr/pack_info/pack_pos" - # We should process further, yet we can't... + uid = ReadNextByte(self) + if uid == kHeader: + yield Header(self, "header", PROP_DESC[uid]) + elif uid == kEncodedHeader: + yield EncodedHeader(self, "encoded_hdr", PROP_DESC[uid]) else: - ParserError("Unexpected ID %u" % uid) - size = self._size - self.current_size - if size > 0: - yield RawBytes(self, "next_hdr_data", size//8, "Next header's data") + raise ParserError("Unexpected ID %u" % uid) +class NextHeaderParser(Parser): + PARSER_TAGS = { + } + endian = LITTLE_ENDIAN + + def createFields(self): + uid = ReadNextByte(self) + if uid == kHeader: + yield Header(self, "header", PROP_DESC[uid]) + elif uid == kEncodedHeader: + yield EncodedHeader(self, "encoded_hdr", PROP_DESC[uid]) + else: + raise ParserError("Unexpected ID %u" % uid) + + def validate(self): + return True + +class CompressedData(Bytes): + def __init__(self, parent, name, length, decompressor, description=None, + parser=None, filename=None, mime_type=None, parser_class=None): + if filename: + if not isinstance(filename, unicode): + filename = makePrintable(filename, "ISO-8859-1") + if not description: + description = 'File "%s" (%s)' % (filename, humanFilesize(length)) + Bytes.__init__(self, parent, name, length, description) + self.setupInputStream(decompressor, parser, filename, mime_type, parser_class) + + def setupInputStream(self, decompressor, parser, filename, mime_type, parser_class): + def createInputStream(cis, **args): + tags = args.setdefault("tags",[]) + if parser_class: + tags.append(( "class", parser_class )) + if parser is not None: + tags.append(( "id", parser.PARSER_TAGS["id"] )) + if mime_type: + tags.append(( "mime", mime_type )) + if filename: + tags.append(( "filename", filename )) + print args + return StringInputStream(decompressor(self.value), **args) + self.setSubIStream(createInputStream) + +def get_header_decompressor(self): + unpack_info = self['/next_hdr/encoded_hdr/unpack_info'] + assert unpack_info['num_folders'].value == 1 + coder = unpack_info['folder[0]/coders[0]/coder[0]'] + method = METHODS[coder['id'].value] + if method == 'Copy': + return lambda data: data + elif method == '7z-LZMA': + props = coder['properties'].value + length = unpack_info['unpack_size[0][0]'].value + return lambda data: lzmadecompress(props+data, maxlength=length) + +def get_header_field(self, name, size, description=None): + decompressor = get_header_decompressor(self) + if decompressor is None: + return RawBytes(self, name, size, description=description) + return CompressedData(self, name, size, decompressor, description=description, parser_class=NextHeaderParser) + class Body(FieldSet): def __init__(self, parent, name, desc="Body data"): FieldSet.__init__(self, parent, name, desc) self._size = 8*self["/signature/start_hdr/next_hdr_offset"].value def createFields(self): - if "encoded_hdr" in self["/next_hdr/"]: + if "encoded_hdr" in self["/next_hdr"]: pack_size = sum([s.value for s in self.array("/next_hdr/encoded_hdr/pack_info/pack_size")]) body_size = self["/next_hdr/encoded_hdr/pack_info/pack_pos"].value - yield RawBytes(self, "compressed_data", body_size, "Compressed data") + if body_size: + yield RawBytes(self, "compressed_data", body_size, "Compressed data") # Here we could check if copy method was used to "compress" it, # but this never happens, so just output "compressed file info" - yield RawBytes(self, "compressed_file_info", pack_size, + yield get_header_field(self, "compressed_file_info", pack_size, "Compressed file information") size = (self._size//8) - pack_size - body_size if size > 0: @@ -372,13 +656,14 @@ class SignatureHeader(FieldSet): yield StartHeader(self, "start_hdr", "Start header") class SevenZipParser(Parser): + MAGIC = "7z\xbc\xaf\x27\x1c" PARSER_TAGS = { "id": "7zip", "category": "archive", "file_ext": ("7z",), "mime": (u"application/x-7z-compressed",), "min_size": 32*8, - "magic": (("7z\xbc\xaf\x27\x1c", 0),), + "magic": ((MAGIC, 0),), "description": "Compressed archive in 7z format" } endian = LITTLE_ENDIAN @@ -389,13 +674,12 @@ class SevenZipParser(Parser): yield NextHeader(self, "next_hdr") def validate(self): - if self.stream.readBytes(0,6) != "7z\xbc\xaf'\x1c": + if self.stream.readBytes(0,len(self.MAGIC)) != self.MAGIC: return "Invalid signature" return True def createContentSize(self): - size = self["/signature/start_hdr/next_hdr_offset"].value - size += self["/signature/start_hdr/next_hdr_size"].value - size += 12 # Signature size - size += 20 # Start header size - return size*8 + size = self["/signature/start_hdr/next_hdr_offset"].value*8 + size += self["/signature/start_hdr/next_hdr_size"].value*8 + size += SignatureHeader.static_size + return size diff --git a/lib/hachoir_parser/archive/zip.py b/lib/hachoir_parser/archive/zip.py index 8271ac93..7d256d6e 100644 --- a/lib/hachoir_parser/archive/zip.py +++ b/lib/hachoir_parser/archive/zip.py @@ -329,6 +329,9 @@ class ZipFile(Parser): u"application/x-jar": "jar", u"application/java-archive": "jar", + # Android APK + u"application/vnd.android.package-archive": "apk", + # OpenOffice 1.0 u"application/vnd.sun.xml.calc": "sxc", u"application/vnd.sun.xml.draw": "sxd", diff --git a/lib/hachoir_parser/audio/__init__.py b/lib/hachoir_parser/audio/__init__.py index 1cc33a23..a0b48c11 100644 --- a/lib/hachoir_parser/audio/__init__.py +++ b/lib/hachoir_parser/audio/__init__.py @@ -1,6 +1,7 @@ from hachoir_parser.audio.aiff import AiffFile from hachoir_parser.audio.au import AuFile from hachoir_parser.audio.itunesdb import ITunesDBFile +from hachoir_parser.audio.ipod_playcounts import PlayCountFile from hachoir_parser.audio.midi import MidiFile from hachoir_parser.audio.mpeg_audio import MpegAudioFile from hachoir_parser.audio.real_audio import RealAudioFile diff --git a/lib/hachoir_parser/audio/ipod_playcounts.py b/lib/hachoir_parser/audio/ipod_playcounts.py new file mode 100644 index 00000000..c7e6919c --- /dev/null +++ b/lib/hachoir_parser/audio/ipod_playcounts.py @@ -0,0 +1,60 @@ +""" +iPod Play Count parser. + +Documentation: +- http://ipl.derpapst.org/wiki/ITunesDB/Play_Counts_File + (formerly known as http://ipodlinux.org) + +Author: m42i +Creation date: 01 March 2014 +""" + +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, + UInt8, UInt16, UInt32, Int32, UInt64, TimestampMac32, + String, Float32, NullBytes, Enum, RawBytes) +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import humanDuration +from hachoir_core.text_handler import displayHandler, filesizeHandler + +class PlayCountFile(Parser): + PARSER_TAGS = { + "id": "playcounts", + "category": "audio", + "min_size": 44*8, + "magic": (('mhdp',0),), + "description": "iPod Play Counts file" + } + + endian = LITTLE_ENDIAN + + def validate(self): + return self.stream.readBytes(0, 4) == 'mhdp' + + def createFields(self): + yield String(self, "header_id", 4, "Play Count Header Markup (\"mhdp\")", charset="ISO-8859-1") + yield UInt32(self, "header_length", "Header Length") + yield UInt32(self, "entry_length", "Single Entry Length") + yield UInt32(self, "entry_number", "Number of Songs on iPod") + padding = self.seekByte(self["header_length"].value, "header padding") + if padding: + yield padding + + for i in xrange(self["entry_number"].value): + yield PlayCountEntry(self, "track[]") + + +class PlayCountEntry(FieldSet): + def __init__(self, *args, **kw): + FieldSet.__init__(self, *args, **kw) + self._size = 28*8 + + def createFields(self): + yield UInt32(self, "play_count", "Playcount since last sync") + yield TimestampMac32(self, "last_played", "Time of the last play of the track") + yield UInt32(self, "audio_bookmark", "Last position in milliseconds") + yield UInt32(self, "rating", "Rating in steps of 20 up to 100") + yield UInt32(self, "unknown", "unknown") + yield UInt32(self, "skip_count", "Number of skips since last sync") + yield TimestampMac32(self, "last_skipped", "Time of the last skip") + diff --git a/lib/hachoir_parser/audio/itunesdb.py b/lib/hachoir_parser/audio/itunesdb.py index a70d9cb0..9390cbcd 100644 --- a/lib/hachoir_parser/audio/itunesdb.py +++ b/lib/hachoir_parser/audio/itunesdb.py @@ -2,7 +2,7 @@ iPod iTunesDB parser. Documentation: -- http://ipodlinux.org/ITunesDB +- http://ipl.derpapst.org/wiki/ITunesDB/iTunesDB_File Author: Romain HERAULT Creation date: 19 august 2006 @@ -71,13 +71,27 @@ class DataObject(FieldSet): 19:"Show (for TV Shows only)", 20:"Episode", 21:"TV Network", + 22:"Album-Artist", + 23:"Artist for Sorting", + 24:"List of keywords pretaining track", + 25:"Locale for TV show(?)", + 27:"Title for Sorting", + 28:"Album for Sorting", + 29:"Album-Artist for Sorting", + 30:"Composer for Sorting", + 31:"Show for Sorting", + # 32:"Unknown binary field for video tracks", 50:"Smart Playlist Data", 51:"Smart Playlist Rules", 52:"Library Playlist Index", - 100:"Column info", + 53:"Library Playlist Index letter in jump table", + 100:"Ccolumn Sizing Info as well as an order indicator in playlists.", + 102:"For iPhone", 200:"Album name (for album descriptions)", 201:"Album artist (for album descriptions)", - 202:"Album sort artist (for album descriptions)" + 202:"Album sort artist (for album descriptions)", + 203:"Podcast URL in Album List", + 204:"TV Show in Album List" } mhod52_sort_index_type_name={ @@ -97,15 +111,7 @@ class DataObject(FieldSet): yield UInt32(self, "header_length", "Header Length") yield UInt32(self, "entry_length", "Entry Length") yield Enum(UInt32(self, "type", "type"),self.type_name) - if(self["type"].value<15) or (self["type"].value >= 200): - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") - yield UInt32(self, "position", "Position") - yield UInt32(self, "length", "String Length in bytes") - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") - yield String(self, "string", self["length"].value, "String Data", charset="UTF-16-LE") - elif (self["type"].value<17): + if (self["type"].value == 15) or (self["type"].value == 16): yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]") yield String(self, "string", self._size/8-self["header_length"].value, "String Data", charset="UTF-8") @@ -121,6 +127,14 @@ class DataObject(FieldSet): yield padding for i in xrange(self["entry_count"].value): yield UInt32(self, "index["+str(i)+"]", "Index of the "+str(i)+"nth mhit") + elif(self["type"].value<15) or (self["type"].value>17) or (self["type"].value >= 200): + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt32(self, "position", "Position") + yield UInt32(self, "length", "String Length in bytes") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield String(self, "string", self["length"].value, "String Data", charset="UTF-16-LE") else: padding = self.seekByte(self["header_length"].value, "header padding") if padding: @@ -178,8 +192,8 @@ class TrackItem(FieldSet): yield UInt32(self, "stop_time", "Stop playing at, in milliseconds") yield UInt32(self, "soundcheck", "SoundCheck preamp") yield UInt32(self, "playcount_1", "Play count of the track") - yield UInt32(self, "playcount_2", "Play count of the track (identical to playcount_1)") - yield UInt32(self, "last_played_time", "Time the song was last played") + yield UInt32(self, "playcount_2", "Play count of the track when last synced") + yield TimestampMac32(self, "last_played_time", "Time the song was last played") yield UInt32(self, "disc_number", "disc number in multi disc sets") yield UInt32(self, "total_discs", "Total number of discs in the disc set") yield UInt32(self, "userid", "User ID in the DRM scheme") diff --git a/lib/hachoir_parser/game/__init__.py b/lib/hachoir_parser/game/__init__.py index 1b6447b9..f43cf2bb 100644 --- a/lib/hachoir_parser/game/__init__.py +++ b/lib/hachoir_parser/game/__init__.py @@ -1,4 +1,5 @@ from hachoir_parser.game.zsnes import ZSNESFile from hachoir_parser.game.spider_man_video import SpiderManVideoFile from hachoir_parser.game.laf import LafFile -from hachoir_parser.game.blp import BLP1File, BLP2File \ No newline at end of file +from hachoir_parser.game.blp import BLP1File, BLP2File +from hachoir_parser.game.uasset import UAssetFile diff --git a/lib/hachoir_parser/game/uasset.py b/lib/hachoir_parser/game/uasset.py new file mode 100644 index 00000000..80cb0f6c --- /dev/null +++ b/lib/hachoir_parser/game/uasset.py @@ -0,0 +1,199 @@ +""" +Unreal 4 .uasset file parser + +Author: Robert Xiao +Creation date: 2015-01-17 +""" + +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, StaticFieldSet, SeekableFieldSet, Int32, UInt32, + String, PascalString32, PaddingBytes, Bytes, RawBytes) +from hachoir_core.endian import LITTLE_ENDIAN + +class StringTable(FieldSet): + def __init__(self, parent, name, count, *args): + FieldSet.__init__(self, parent, name, *args) + self.count = count + + def createFields(self): + for i in xrange(self.count): + yield PascalString32(self, "string[]", strip='\0') + +def getObject(self, val): + if val == 0: + return None + elif val < 0: + return self['/header/refs/ref[%d]' % (-val-1)] + else: + return self['/header/assets/asset[%d]' % (val-1)] + + +class AssetHeader(FieldSet): + def createFields(self): + yield Int32(self, "type1") + yield Int32(self, "type2") + yield Int32(self, "parent") # 0 = no parent + yield Int32(self, "name_index") + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + yield Int32(self, "size") + yield Int32(self, "offset") + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + + @property + def typeName(self): + return getObject(self, self["type1"].value).objectName + + @property + def objectName(self): + name_index = self['name_index'].value + return self['/header/strings/string[%d]' % name_index].value + + @property + def fullObjectName(self): + name = self.objectName + if self['parent'].value: + name = '%s.%s' % (getObject(self, self['parent'].value).fullObjectName, name) + return name + + def createValue(self): + return '' % ( + self.fullObjectName, self.typeName, self['size'].value) + + def createDescription(self): + return str([t.value for t in self.array('unk')]) + +class AssetTable(FieldSet): + def __init__(self, parent, name, count, *args): + FieldSet.__init__(self, parent, name, *args) + self.count = count + + def createFields(self): + for i in xrange(self.count): + yield AssetHeader(self, "asset[]") + +class ReferenceHeader(FieldSet): + def createFields(self): + yield Int32(self, "unk[]") + yield Int32(self, "unk[]") + yield Int32(self, "type_index") + yield Int32(self, "unk[]") + yield Int32(self, "parent") + yield Int32(self, "name_index") + yield Int32(self, "unk[]") + + @property + def typeName(self): + type_index = self['type_index'].value + return self['/header/strings/string[%d]' % type_index].value + + @property + def objectName(self): + name_index = self['name_index'].value + return self['/header/strings/string[%d]' % name_index].value + + @property + def fullObjectName(self): + name = self.objectName + if self['parent'].value: + name = '[%s].%s' % (getObject(self, self['parent'].value).fullObjectName, name) + return name + + def createValue(self): + return '' % (self.fullObjectName, self.typeName) + + def createDescription(self): + return str([t.value for t in self.array('unk')]) + +class ReferenceTable(FieldSet): + def __init__(self, parent, name, count, *args): + FieldSet.__init__(self, parent, name, *args) + self.count = count + + def createFields(self): + for i in xrange(self.count): + yield ReferenceHeader(self, "ref[]") + + + +class UAssetHeader(SeekableFieldSet): + def __init__(self, *args): + SeekableFieldSet.__init__(self, *args) + self._size = self["header_size"].value * 8 + + def createFields(self): + yield UInt32(self, "magic") + yield Int32(self, "version") + yield RawBytes(self, "unk[]", 16) + yield UInt32(self, "header_size") + yield PascalString32(self, "none", strip='\0') + yield RawBytes(self, "unk[]", 4) + + yield UInt32(self, "num_strings", "Number of strings in the header") + yield UInt32(self, "offset_strings", "Offset to string table within the header") + yield UInt32(self, "num_assets", "Number of assets described in the header") + yield UInt32(self, "offset_assets", "Offset to asset table within the header") + yield UInt32(self, "num_refs", "Number of references? described in the header") + yield UInt32(self, "offset_refs", "Offset to reference table within the header") + + yield UInt32(self, "offset_unk[]", "Offset to something") + yield UInt32(self, "unk[]") + yield UInt32(self, "offset_unk[]", "Offset to some other thing") + yield UInt32(self, "unk[]") + + yield RawBytes(self, "signature", 16, "Some kind of hash") + + yield UInt32(self, "unk[]") + yield UInt32(self, "num_assets2", "num_assets again") + assert self['num_assets'].value == self['num_assets2'].value + yield UInt32(self, "num_strings2", "num_strings again") + assert self['num_strings'].value == self['num_strings2'].value + yield RawBytes(self, "unk[]", 34) + yield UInt32(self, "unk[]") + yield UInt32(self, "size_unk", "Size of something") + yield RawBytes(self, "unk[]", 12) + + self.seekByte(self["offset_strings"].value) + yield StringTable(self, "strings", self["num_strings"].value) + + self.seekByte(self["offset_assets"].value) + yield AssetTable(self, "assets", self["num_assets"].value) + + self.seekByte(self["offset_refs"].value) + yield ReferenceTable(self, "refs", self["num_refs"].value) + +class Asset(FieldSet): + def createFields(self): + yield UInt32(self, "type") + +class UAssetFile(Parser): + MAGIC = "\xc1\x83\x2a\x9e" + PARSER_TAGS = { + "id": "uasset", + "category": "game", + "description": "Unreal .uasset file", + "min_size": 32, + "file_ext": (".uasset",), + "magic": ((MAGIC, 0),), + } + endian = LITTLE_ENDIAN + + def validate(self): + temp = self.stream.readBytes(0, 4) + if temp != self.MAGIC: + return "Wrong header" + return True + + def createFields(self): + yield UAssetHeader(self, "header") + for asset in self['/header/assets'].array('asset'): + self.seekByte(asset['offset'].value) + yield RawBytes(self, "asset[]", asset['size'].value, description="Data for asset %s" % asset.fullObjectName) diff --git a/lib/hachoir_parser/misc/__init__.py b/lib/hachoir_parser/misc/__init__.py index f1392015..3e796976 100644 --- a/lib/hachoir_parser/misc/__init__.py +++ b/lib/hachoir_parser/misc/__init__.py @@ -15,4 +15,5 @@ from hachoir_parser.misc.dsstore import DSStore from hachoir_parser.misc.word_doc import WordDocumentParser from hachoir_parser.misc.word_2 import Word2DocumentParser from hachoir_parser.misc.mstask import MSTaskFile +from hachoir_parser.misc.androidxml import AndroidXMLFile from hachoir_parser.misc.mapsforge_map import MapsforgeMapFile diff --git a/lib/hachoir_parser/misc/androidxml.py b/lib/hachoir_parser/misc/androidxml.py new file mode 100644 index 00000000..6a519efb --- /dev/null +++ b/lib/hachoir_parser/misc/androidxml.py @@ -0,0 +1,220 @@ +''' +AndroidManifest.xml parser + +References: +- http://code.google.com/p/androguard/source/browse/core/bytecodes/apk.py + +Author: Robert Xiao +Creation Date: May 29, 2011 +''' + +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, + String, Enum, GenericVector, + UInt8, UInt16, UInt32, Int32, + Float32, Bits,) +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.tools import createDict +from hachoir_core.endian import LITTLE_ENDIAN + + +class PascalCString16(FieldSet): + def createFields(self): + yield UInt16(self, "size") + self._size = (self['size'].value+2)*16 + yield String(self, "string", (self['size'].value+1)*2, strip='\0', charset="UTF-16-LE") + def createValue(self): + return self['string'].value + +class StringTable(FieldSet): + def createFields(self): + for field in self['../offsets']: + pad = self.seekByte(field.value) + if pad: + yield pad + yield PascalCString16(self, "string[]") + +def Top(self): + while not self.eof: + yield Chunk(self, "chunk[]") + +def StringChunk(self): + # TODO: styles + yield UInt32(self, "string_count") + yield UInt32(self, "style_count") + yield UInt32(self, "reserved[]") + yield UInt32(self, "string_offset") + yield UInt32(self, "style_offset") + yield GenericVector(self, "offsets", self['string_count'].value, UInt32, + description="Offsets for string table") + pad = self.seekByte(self['string_offset'].value) + if pad: + yield pad + yield StringTable(self, "table") + +def ResourceIDs(self): + while self._current_size < self._size: + yield textHandler(UInt32(self, "resource_id[]"), hexadecimal) + +def stringIndex(field): + if field.value == -1: + return '' + return field['/xml_file/string_table/table/string[%d]'%field.value].display + +def NamespaceTag(self): + yield UInt32(self, "lineno", "Line number from original XML file") + yield Int32(self, "unk[]", "Always -1") + yield textHandler(Int32(self, "prefix"), stringIndex) + yield textHandler(Int32(self, "uri"), stringIndex) +def NamespaceStartValue(self): + return "xmlns:%s='%s'"%(self['prefix'].display, self['uri'].display) +def NamespaceEndValue(self): + return "/%s"%self['prefix'].display + +def IntTextHandler(func): + return lambda *args, **kwargs: textHandler(Int32(*args, **kwargs), func) +def booleanText(field): + if field.value == 0: + return 'false' + return 'true' +class XMLUnitFloat(FieldSet): + static_size = 32 + UNIT_MAP = {} + RADIX_MAP = { + 0: 0, + 1: 7, + 2: 15, + 3: 23, + } + def createFields(self): + yield Enum(Bits(self, "unit", 4), self.UNIT_MAP) + yield Enum(Bits(self, "exponent", 2), self.RADIX_MAP) + yield Bits(self, "reserved[]", 2) + yield Bits(self, "mantissa", 24) + def createValue(self): + return float(self['mantissa'].value) >> self.RADIX_MAP[self['exponent'].value] + def createDisplay(self): + return '%f%s'%(self.value, self.UNIT_MAP.get(self['unit'].value, '')) +class XMLDimensionFloat(XMLUnitFloat): + UNIT_MAP = dict(enumerate(["px","dip","sp","pt","in","mm"])) +class XMLFractionFloat(XMLUnitFloat): + UNIT_MAP = {0: '%', 1: '%p'} +class XMLAttribute(FieldSet): + TYPE_INFO = { + 0: ('Null', IntTextHandler(lambda field: '')), + 1: ('Reference', IntTextHandler(lambda field: '@%08x'%field.value)), + 2: ('Attribute', IntTextHandler(lambda field: '?%08x'%field.value)), + 3: ('String', IntTextHandler(stringIndex)), + 4: ('Float', Float32), + 5: ('Dimension', XMLDimensionFloat), + 6: ('Fraction', XMLFractionFloat), + 16: ('Int_Dec', Int32), + 17: ('Int_Hex', IntTextHandler(hexadecimal)), + 18: ('Int_Boolean', IntTextHandler(booleanText)), + 28: ('Int_Color_Argb8', IntTextHandler(lambda field: '#%08x'%field.value)), + 29: ('Int_Color_Rgb8', IntTextHandler(lambda field: '#%08x'%field.value)), + 30: ('Int_Color_Argb4', IntTextHandler(lambda field: '#%08x'%field.value)), + 31: ('Int_Color_Rgb4', IntTextHandler(lambda field: '#%08x'%field.value)), + } + TYPE_NAME = createDict(TYPE_INFO, 0) + TYPE_FUNC = createDict(TYPE_INFO, 1) + static_size = 5*32 + def createFields(self): + yield textHandler(Int32(self, "ns"), stringIndex) + yield textHandler(Int32(self, "name"), stringIndex) + yield textHandler(Int32(self, "value_string"), stringIndex) + yield UInt16(self, "unk[]") + yield UInt8(self, "unk[]") + yield Enum(UInt8(self, "value_type"), self.TYPE_NAME) + func = self.TYPE_FUNC.get(self['value_type'].value, None) + if not func: + func = UInt32 + yield func(self, "value_data") + def createValue(self): + return (self['name'].display, self['value_data'].value) + def createDisplay(self): + return '%s="%s"'%(self['name'].display, self['value_data'].display) + +def TagStart(self): + yield UInt32(self, "lineno", "Line number from original XML file") + yield Int32(self, "unk[]", "Always -1") + yield textHandler(Int32(self, "ns"), stringIndex) + yield textHandler(Int32(self, "name"), stringIndex) + yield UInt32(self, "flags") + yield UInt16(self, "attrib_count") + yield UInt16(self, "attrib_id") + yield UInt16(self, "attrib_class") + yield UInt16(self, "attrib_style") + for i in xrange(self['attrib_count'].value): + yield XMLAttribute(self, "attrib[]") +def TagStartValue(self): + attrstr = ' '.join(attr.display for attr in self.array('attrib')) + if attrstr: attrstr = ' '+attrstr + if not self['ns'].display: + return '<%s%s>'%(self['name'].display, attrstr) + return "<%s:%s%s>"%(self['ns'].display, self['name'].display, attrstr) + +def TagEnd(self): + yield UInt32(self, "lineno", "Line number from original XML file") + yield Int32(self, "unk[]", "Always -1") + yield textHandler(Int32(self, "ns"), stringIndex) + yield textHandler(Int32(self, "name"), stringIndex) +def TagEndValue(self): + if not self['ns'].display: + return ''%self['name'].display + return ""%(self['ns'].display, self['name'].display) + +def TextChunk(self): + # TODO + yield UInt32(self, "lineno", "Line number from original XML file") + yield Int32(self, "unk[]", "Always -1") + +class Chunk(FieldSet): + CHUNK_INFO = { + 0x0001: ("string_table", "String Table", StringChunk, None), + 0x0003: ("xml_file", "XML File", Top, None), + 0x0100: ("namespace_start[]", "Start Namespace", NamespaceTag, NamespaceStartValue), + 0x0101: ("namespace_end[]", "End Namespace", NamespaceTag, NamespaceEndValue), + 0x0102: ("tag_start[]", "Start Tag", TagStart, TagStartValue), + 0x0103: ("tag_end[]", "End Tag", TagEnd, TagEndValue), + 0x0104: ("text[]", "Text", TextChunk, None), + 0x0180: ("resource_ids", "Resource IDs", ResourceIDs, None), + } + CHUNK_DESC = createDict(CHUNK_INFO, 1) + def __init__(self, parent, name, description=None): + FieldSet.__init__(self, parent, name, description) + self._size = self['chunk_size'].value* 8 + type = self['type'].value + self.parse_func = None + if type in self.CHUNK_INFO: + self._name, self._description, self.parse_func, value_func = self.CHUNK_INFO[type] + if value_func: + self.createValue = lambda: value_func(self) + + def createFields(self): + yield Enum(UInt16(self, "type"), self.CHUNK_DESC) + yield UInt16(self, "header_size") + yield UInt32(self, "chunk_size") + if self.parse_func: + for field in self.parse_func(self): + yield field + +class AndroidXMLFile(Parser): + MAGIC = "\x03\x00\x08\x00" + PARSER_TAGS = { + "id": "axml", + "category": "misc", + "file_ext": ("xml",), + "min_size": 32*8, + "magic": ((MAGIC, 0),), + "description": "Android binary XML format", + } + endian = LITTLE_ENDIAN + + def validate(self): + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: + return "Invalid magic" + return True + + def createFields(self): + yield Chunk(self, "xml_file") diff --git a/lib/hachoir_parser/misc/mapsforge_map.py b/lib/hachoir_parser/misc/mapsforge_map.py index 4b99653a..156979a2 100644 --- a/lib/hachoir_parser/misc/mapsforge_map.py +++ b/lib/hachoir_parser/misc/mapsforge_map.py @@ -10,7 +10,7 @@ References: from hachoir_parser import Parser from hachoir_core.field import (ParserError, - Bit, Bits, UInt8, UInt16, UInt32, UInt64, String, RawBytes, + Bit, Bits, UInt8, UInt16, UInt32, Int32, UInt64, String, RawBytes, PaddingBits, PaddingBytes, Enum, Field, FieldSet, SeekableFieldSet, RootSeekableFieldSet) from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN @@ -140,6 +140,11 @@ class TileHeader(FieldSet): class POIData(FieldSet): def createFields(self): + if self["/have_debug"].value: + yield String(self, "signature", 32) + if not self['signature'].value.startswith("***POIStart"): + raise ValueError + yield IntVbe(self, "lat_diff") yield IntVbe(self, "lon_diff") yield Bits(self, "layer", 4) @@ -179,6 +184,11 @@ class SubTileBitmap(FieldSet): class WayProperties(FieldSet): def createFields(self): + if self["/have_debug"].value: + yield String(self, "signature", 32) + if not self['signature'].value.startswith("---WayStart"): + raise ValueError + yield UIntVbe(self, "way_data_size") # WayProperties is split into an outer and an inner field, to allow specifying data size for inner part: @@ -251,6 +261,11 @@ class TileData(FieldSet): self.zoomIntervalCfg = zoomIntervalCfg def createFields(self): + if self["/have_debug"].value: + yield String(self, "signature", 32) + if not self['signature'].value.startswith("###TileStart"): + raise ValueError + yield TileHeader(self, "tile_header", self.zoomIntervalCfg) numLevels = int(self.zoomIntervalCfg["max_zoom_level"].value - self.zoomIntervalCfg["min_zoom_level"].value) +1 @@ -272,6 +287,11 @@ class ZoomSubFile(SeekableFieldSet): self.zoomIntervalCfg = zoomIntervalCfg def createFields(self): + if self["/have_debug"].value: + yield String(self, "signature", 16) + if self['signature'].value != "+++IndexStart+++": + raise ValueError + indexEntries = [] numTiles = None i = 0 @@ -284,13 +304,24 @@ class ZoomSubFile(SeekableFieldSet): if numTiles is None: # calculate number of tiles (TODO: better calc this from map bounding box) firstOffset = self["tile_index_entry[0]"]["offset"].value + if self["/have_debug"].value: + firstOffset -= 16 numTiles = firstOffset / 5 if i >= numTiles: break - for indexEntry in indexEntries: - self.seekByte(indexEntry["offset"].value, relative=True) - yield TileData(self, "tile_data[]", zoomIntervalCfg=self.zoomIntervalCfg) + for i, indexEntry in enumerate(indexEntries): + offset = indexEntry["offset"].value + self.seekByte(offset, relative=True) + if i != len(indexEntries) - 1: + next_offset = indexEntries[i + 1]["offset"].value + size = (next_offset - offset) * 8 + else: + size = self.size - offset * 8 + if size == 0: + # hachoir doesn't support empty field. + continue + yield TileData(self, "tile_data[%d]" % i, zoomIntervalCfg=self.zoomIntervalCfg, size=size) @@ -314,10 +345,10 @@ class MapsforgeMapFile(Parser, RootSeekableFieldSet): yield UInt32(self, "file_version") yield UInt64(self, "file_size") yield UInt64(self, "creation_date") - yield UInt32(self, "min_lat") - yield UInt32(self, "min_lon") - yield UInt32(self, "max_lat") - yield UInt32(self, "max_lon") + yield Int32(self, "min_lat") + yield Int32(self, "min_lon") + yield Int32(self, "max_lat") + yield Int32(self, "max_lon") yield UInt16(self, "tile_size") yield VbeString(self, "projection") diff --git a/lib/hachoir_parser/program/__init__.py b/lib/hachoir_parser/program/__init__.py index 261eaf15..321baf25 100644 --- a/lib/hachoir_parser/program/__init__.py +++ b/lib/hachoir_parser/program/__init__.py @@ -1,7 +1,9 @@ from hachoir_parser.program.elf import ElfFile from hachoir_parser.program.exe import ExeFile +from hachoir_parser.program.macho import MachoFile, MachoFatFile from hachoir_parser.program.python import PythonCompiledFile from hachoir_parser.program.java import JavaCompiledClassFile from hachoir_parser.program.prc import PRCFile from hachoir_parser.program.nds import NdsFile - +from hachoir_parser.program.dex import DexFile +from hachoir_parser.program.java_serialized import JavaSerializedFile diff --git a/lib/hachoir_parser/program/dex.py b/lib/hachoir_parser/program/dex.py new file mode 100644 index 00000000..67ab2f18 --- /dev/null +++ b/lib/hachoir_parser/program/dex.py @@ -0,0 +1,238 @@ +''' +Dalvik Executable (dex) parser. + +References: +- http://www.dalvikvm.com/ +- http://code.google.com/p/androguard/source/browse/core/bytecodes/dvm.py +- http://androguard.googlecode.com/hg/specs/dalvik/dex-format.html + +Author: Robert Xiao +Creation Date: May 29, 2011 +''' + +from hachoir_parser import HachoirParser +from hachoir_core.field import (SeekableFieldSet, RootSeekableFieldSet, FieldSet, ParserError, + String, RawBytes, GenericVector, + UInt8, UInt16, UInt32, NullBits, Bit) +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.program.java import eat_descriptor + +class DexHeader(FieldSet): + def createFields(self): + yield String(self, "magic", 4) + yield String(self, "version", 4, strip='\0') + yield textHandler(UInt32(self, "checksum"), hexadecimal) + yield RawBytes(self, "signature", 20, description="SHA1 sum over all subsequent data") + yield filesizeHandler(UInt32(self, "filesize")) + yield UInt32(self, "size", description="Header size") + self._size = self['size'].value*8 + yield textHandler(UInt32(self, "endian"), hexadecimal) + yield UInt32(self, "link_count") + yield UInt32(self, "link_offset") + yield UInt32(self, "map_offset", description="offset to map footer") + yield UInt32(self, "string_count", description="number of entries in string table") + yield UInt32(self, "string_offset", description="offset to string table") + yield UInt32(self, "type_desc_count", description="number of entries in type descriptor table") + yield UInt32(self, "type_desc_offset", description="offset to type descriptor table") + yield UInt32(self, "meth_desc_count", description="number of entries in method descriptor table") + yield UInt32(self, "meth_desc_offset", description="offset to method descriptor table") + yield UInt32(self, "field_count", description="number of entries in field table") + yield UInt32(self, "field_offset", description="offset to field table") + yield UInt32(self, "method_count", description="number of entries in method table") + yield UInt32(self, "method_offset", description="offset to method table") + yield UInt32(self, "class_count", description="number of entries in class table") + yield UInt32(self, "class_offset", description="offset to class table") + yield UInt32(self, "data_size", description="size of data region") + yield UInt32(self, "data_offset", description="offset to data region") + +def stringIndex(field): + return field['/string_table/item[%d]'%field.value].display + +def classDisplay(field): + disp, tail = eat_descriptor(stringIndex(field)) + return disp + +def classIndex(field): + return field['/type_desc_table/item[%d]'%field.value].display + +# modified from java.py +code_to_type_name = { + 'B': "byte", + 'C': "char", + 'D': "double", + 'F': "float", + 'I': "int", + 'J': "long", + 'L': "object", + 'S': "short", + 'Z': "boolean", +} + +def argumentDisplay(field): + # parse "shorty" descriptors (these start with the return code, which is redundant) + text = stringIndex(field)[1:] + return [code_to_type_name.get(c,c) for c in text] + +def signatureIndex(field): + return field['/meth_desc_table/item[%d]'%field.value].display + +class PascalCString(FieldSet): + def createFields(self): + yield UInt8(self, "size") + self._size = (self['size'].value+2)*8 + yield String(self, "string", self['size'].value+1, strip='\0') + def createValue(self): + return self['string'].value + +class StringTable(SeekableFieldSet): + def createFields(self): + for item in self['/string_offsets'].array('item'): + self.seekByte(item.value, relative=False) + yield PascalCString(self, "item[]") + +class TypeDescriptorEntry(FieldSet): + static_size = 32 + def createFields(self): + yield textHandler(UInt32(self, "desc", description="Type descriptor"), classDisplay) + def createValue(self): + return (self['desc'].value,) + def createDisplay(self): + return self['desc'].display + +class MethodDescriptorEntry(FieldSet): + static_size = 96 + def createFields(self): + yield textHandler(UInt32(self, "args", description="Argument type"), argumentDisplay) + yield textHandler(UInt32(self, "return", description="Return type"), classIndex) + yield UInt32(self, "param_offset", "Offset to parameter detail list") + def createValue(self): + return (self['args'].value, self['return'].value) + def createDisplay(self): + return "%s (%s)"%(self['return'].display, ', '.join(self['args'].display)) + +class FieldEntry(FieldSet): + static_size = 64 + def createFields(self): + yield textHandler(UInt16(self, "class", description="Class containing this field"), classIndex) + yield textHandler(UInt16(self, "type", description="Field type"), classIndex) + yield textHandler(UInt32(self, "name", description="Field name"), stringIndex) + def createValue(self): + return (self['class'].value, self['type'].value, self['name'].value) + def createDisplay(self): + return "%s %s.%s"%(self['type'].display, self['class'].display, self['name'].display) + +class MethodEntry(FieldSet): + static_size = 64 + def createFields(self): + yield textHandler(UInt16(self, "class", description="Class containing this method"), classIndex) + yield textHandler(UInt16(self, "sig", description="Method signature"), signatureIndex) + yield textHandler(UInt32(self, "name", description="Method name"), stringIndex) + def createValue(self): + return (self['class'].value, self['sig'].value, self['name'].value) + def createDisplay(self): + sig = self['/meth_desc_table/item[%d]'%self['sig'].value] + return "%s %s.%s(%s)"%(sig['return'].display, self['class'].display, self['name'].display, ', '.join(sig['args'].display)) + +class AccessFlags(FieldSet): + static_size = 32 + def createFields(self): + yield Bit(self, "public") + yield Bit(self, "private") + yield Bit(self, "protected") + yield Bit(self, "static") + yield Bit(self, "final") + yield Bit(self, "synchronized") + yield Bit(self, "volatile") + yield Bit(self, "transient") + yield Bit(self, "native") + yield Bit(self, "interface") + yield Bit(self, "abstract") + yield Bit(self, "strictfp") + yield Bit(self, "synthetic") + yield Bit(self, "annotation") + yield Bit(self, "enum") + yield NullBits(self, "reserved[]", 1) + yield Bit(self, "constructor") + yield NullBits(self, "reserved[]", 15) + def createValue(self): + return tuple(f for f in self if f.value is True) + def createDisplay(self): + return ' '.join(f.name for f in self if f.value is True) + +class ClassEntry(FieldSet): + static_size = 8*32 + def createFields(self): + yield textHandler(UInt32(self, "class", description="Class being described"), classIndex) + yield AccessFlags(self, "flags") + yield textHandler(UInt32(self, "superclass", description="Superclass"), classIndex) + yield UInt32(self, "interfaces_offset", description="Offset to interface list") + yield textHandler(UInt32(self, "filename", description="Filename"), stringIndex) + yield UInt32(self, "annotations_offset") + yield UInt32(self, "class_data_offset") + yield UInt32(self, "static_values_offset") + def createValue(self): + return tuple(f.value for f in self) + def createDisplay(self): + disp = self['flags'].display + if not self['flags/interface'].value: + if disp: + disp += ' ' + disp += 'class' + disp += ' '+self['class'].display + if self['superclass'].display != 'java.lang.Object': + disp += ' extends '+self['superclass'].display + return disp + +class DexFile(HachoirParser, RootSeekableFieldSet): + MAGIC = "dex\n" + PARSER_TAGS = { + "id": "dex", + "category": "program", + "file_ext": ("dex",), + "min_size": 80*8, + "magic": ((MAGIC, 0),), + "description": "Dalvik VM Executable", + } + endian = LITTLE_ENDIAN + + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: + return "Invalid magic" + if self['header/version'].value != '035': + return "Unknown version" + return True + + def createFields(self): + yield DexHeader(self, "header") + + self.seekByte(self['header/string_offset'].value) + yield GenericVector(self, "string_offsets", self['header/string_count'].value, UInt32, + description="Offsets for string table") + self.seekByte(self['string_offsets/item[0]'].value) + yield StringTable(self, "string_table", + description="String table") + + self.seekByte(self['header/type_desc_offset'].value) + yield GenericVector(self, "type_desc_table", self['header/type_desc_count'].value, TypeDescriptorEntry, + description="Type descriptor table") + + self.seekByte(self['header/meth_desc_offset'].value) + yield GenericVector(self, "meth_desc_table", self['header/meth_desc_count'].value, MethodDescriptorEntry, + description="Method descriptor table") + + self.seekByte(self['header/field_offset'].value) + yield GenericVector(self, "field_table", self['header/field_count'].value, FieldEntry, + description="Field definition table") + + self.seekByte(self['header/method_offset'].value) + yield GenericVector(self, "method_table", self['header/method_count'].value, MethodEntry, + description="Method definition table") + + self.seekByte(self['header/class_offset'].value) + yield GenericVector(self, "class_table", self['header/class_count'].value, ClassEntry, + description="Class definition table") diff --git a/lib/hachoir_parser/program/exe.py b/lib/hachoir_parser/program/exe.py index 5a7bc727..3752b4c2 100644 --- a/lib/hachoir_parser/program/exe.py +++ b/lib/hachoir_parser/program/exe.py @@ -60,7 +60,7 @@ class ExeFile(HachoirParser, RootSeekableFieldSet): PARSER_TAGS = { "id": "exe", "category": "program", - "file_ext": ("exe", "dll", "ocx"), + "file_ext": ("exe", "dll", "ocx", "pyd", "scr"), "mime": (u"application/x-dosexec",), "min_size": 64*8, #"magic": (("MZ", 0),), diff --git a/lib/hachoir_parser/program/java.py b/lib/hachoir_parser/program/java.py index 7329cbe0..c1f17c4d 100644 --- a/lib/hachoir_parser/program/java.py +++ b/lib/hachoir_parser/program/java.py @@ -732,6 +732,14 @@ class FieldInfo(FieldSet): yield FieldArray(self, "attributes", AttributeInfo, self["attributes_count"].value) + def createDescription(self): + bits = [] + for mod in ['transient', 'protected', 'private', 'public', 'static', 'final', 'volatile']: + if self[mod].value: + bits.append(mod) + bits.append(parse_field_descriptor(str(self['descriptor_index'].get_cp_entry()))) + bits.append(str(self['name_index'].get_cp_entry())) + return ' '.join(bits) ############################################################################### # method_info { @@ -766,6 +774,15 @@ class MethodInfo(FieldSet): yield FieldArray(self, "attributes", AttributeInfo, self["attributes_count"].value) + def createDescription(self): + bits = [] + for mod in ['strict', 'static', 'native', 'synchronized', 'protected', 'private', 'public', 'final', 'abstract']: + if self[mod].value: + bits.append(mod) + name = str(self['name_index'].get_cp_entry()) + meth = str(self['descriptor_index'].get_cp_entry()) + bits.append(parse_method_descriptor(meth, name)) + return ' '.join(bits) ############################################################################### # attribute_info { @@ -954,6 +971,18 @@ class InnerClassesEntry(StaticFieldSet): (Bit, "public"), ) + def createDescription(self): + bits = [] + for mod in ['super', 'static', 'protected', 'private', 'public', 'abstract', 'final', 'interface']: + if self[mod].value: + bits.append(mod) + if not self['interface'].value: + bits.append('class') + + name = str(self['inner_class_info_index'].get_cp_entry()) + bits.append(name) + return ' '.join(bits) + class LineNumberTableEntry(StaticFieldSet): format = ( (UInt16, "start_pc"), diff --git a/lib/hachoir_parser/program/java_serialized.py b/lib/hachoir_parser/program/java_serialized.py new file mode 100644 index 00000000..5e7742ab --- /dev/null +++ b/lib/hachoir_parser/program/java_serialized.py @@ -0,0 +1,372 @@ +''' +Java Object Serialization Stream parser. + +References: +- http://docs.oracle.com/javase/7/docs/platform/serialization/spec/protocol.html +- http://www.javaworld.com/article/2072752/the-java-serialization-algorithm-revealed.html + +Author: Robert Xiao +Creation Date: Jun 18, 2015 +''' + +from hachoir_parser import Parser +from hachoir_core.field import ( + ParserError, FieldSet, StaticFieldSet, + Enum, RawBytes, String, PascalString16, Float32, Float64, + Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, + Bit, NullBits) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import paddingSize + +from .java import parse_field_descriptor + +class LongString(FieldSet): + def createFields(self): + yield Int64(self, "length") + yield String(self, "value", charset="UTF-8") + def createDescription(self): + return self['value'].description + def createValue(self): + return self['value'].value + +class UTF16Character(UInt16): + def createDisplay(self): + return repr(unichr(self.value)) + +class JavaBool(UInt8): + def createValue(self): + val = UInt8.createValue(self) + return (val != 0) + +class SerializedNull(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + def createValue(self): + return None + def createDisplay(self): + return 'null' + +class SerializedReference(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + yield Int32(self, "handle") + + @property + def referent(self): + return self.root.handles[self['handle'].value] + + def createValue(self): + return self.referent.value + + def createDisplay(self): + return "-> " + str(self.referent.display) + +class FieldDesc(FieldSet): + def createFields(self): + yield String(self, "typecode", 1) + yield PascalString16(self, "fieldName", charset="UTF-8") + if self['typecode'].value in ('[', 'L'): + yield SerializedContent(self, "className") + + @property + def typeDescriptor(self): + typecode = self['typecode'].value + if typecode in ('[', 'L'): + return self['className'].value + else: + return typecode + + @property + def typeName(self): + return parse_field_descriptor(self.typeDescriptor) + + @property + def fieldName(self): + return self['fieldName'].value + + def createValue(self): + return (self.typeDescriptor, self.fieldName) + + def createDisplay(self): + return '%s %s' % (self.typeName, self.fieldName) + +class ClassAnnotation(FieldSet): + def createFields(self): + # TODO + yield Enum(UInt8(self, "endBlockData"), TYPECODE_NAMES) + +class SerializedClassDesc(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + yield PascalString16(self, "className", charset="UTF-8") + yield Int64(self, "serialVersionUID") + self.root.newHandle(self) + yield NullBits(self, "classDescFlags_reserved", 3) + yield Bit(self, "classDescFlags_enum", "Is the class an Enum?") + yield Bit(self, "classDescFlags_block_data", "Was the externalizable's block data written using stream version 2?") + yield Bit(self, "classDescFlags_externalizable", "Does the class implement java.io.Externalizable?") + yield Bit(self, "classDescFlags_serializable", "Does the class implement java.io.Serializable?") + yield Bit(self, "classDescFlags_write_method", "Does the class have a writeObject method?") + yield Int16(self, "fieldDesc_count") + for i in xrange(self['fieldDesc_count'].value): + yield FieldDesc(self, "fieldDesc[]") + yield ClassAnnotation(self, "classAnnotation") + yield SerializedContent(self, "superClassDesc") + + @property + def className(self): + return self['className'].value + +class ObjectValue(FieldSet): + def gen_values(self, classDesc): + if isinstance(classDesc, SerializedReference): + classDesc = classDesc.referent + if isinstance(classDesc, SerializedNull): + return + # TODO: proxy class desc + + for field in self.gen_values(classDesc['superClassDesc']): + yield field + + for fieldDesc in classDesc.array('fieldDesc'): + tc = fieldDesc['typecode'].value + klass = VALUE_CLASS_MAP[tc] + field = klass(self, "field[]", description="%s.%s" % (classDesc.className, fieldDesc.fieldName)) + field.fieldName = fieldDesc.fieldName + yield field + + def createFields(self): + for field in self.gen_values(self.parent.classDesc): + yield field + +class SerializedObject(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + yield SerializedContent(self, "classDesc") + self.root.newHandle(self) + + yield ObjectValue(self, "value") + + @property + def classDesc(self): + classDesc = self['classDesc'] + if isinstance(classDesc, SerializedReference): + classDesc = classDesc.referent + return classDesc + + def createValue(self): + return tuple(field.value for field in self['value'].array('field')) + + def createDisplay(self): + out = [] + for field in self['value'].array('field'): + if isinstance(field, SerializedReference) and not isinstance(field.referent, SerializedString): + # Avoid recursive references + out.append('%s=#' % (field.fieldName, field.referent.classDesc.className)) + else: + out.append('%s=%s' % (field.fieldName, field.display)) + return '%s(%s)' % (self.classDesc.className, ', '.join(out)) + +class SerializedString(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + self.root.newHandle(self) + yield PascalString16(self, "value", charset="UTF-8") + def createValue(self): + return self['value'].value + def createDisplay(self): + return self['value'].display + +class SerializedArray(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + yield SerializedContent(self, "classDesc") + self.root.newHandle(self) + + yield Int32(self, "size") + klass = VALUE_CLASS_MAP[self.classDesc.className[1]] # className is [ + for i in xrange(self['size'].value): + yield klass(self, "value[]") + + @property + def classDesc(self): + classDesc = self['classDesc'] + if isinstance(classDesc, SerializedReference): + classDesc = classDesc.referent + return classDesc + + def createValue(self): + return [v.value for v in self.array('value')] + + def createDisplay(self): + out = [] + for field in self.array('value'): + if isinstance(field, SerializedReference) and not isinstance(field.referent, SerializedString): + # Avoid recursive references + out.append('#' % (field.referent.classDesc.className,)) + else: + out.append('%s' % (field.display,)) + return '[%s]' % ', '.join(out) + +class SerializedClass(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + yield SerializedContent(self, "classDesc") + self.root.newHandle(self) + +class BlockData(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + # TODO + +class StreamReset(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + self.root.resetHandles() + +class BlockDataLong(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + # TODO + +class SerializedException(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + self.root.resetHandles() + yield SerializableObject(self, "object") + self.root.resetHandles() + +class SerializedLongString(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + self.root.newHandle(self) + yield LongString(self, "value") + def createValue(self): + return self['value'].value + +class SerializedProxyClassDesc(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + # TODO + +class SerializedEnum(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES) + yield SerializedContent(self, "classDesc") + self.root.newHandle(self) + yield SerializedContent(self, "enumConstantName") + + @property + def classDesc(self): + classDesc = self['classDesc'] + if isinstance(classDesc, SerializedReference): + classDesc = classDesc.referent + return classDesc + + def createValue(self): + return self['enumConstantName'].value + + def createDisplay(self): + return '%s.%s' % (self.classDesc.className, self.value) + +TYPECODE_NAMES = { + 0x70: "NULL", + 0x71: "REFERENCE", + 0x72: "CLASSDESC", + 0x73: "OBJECT", + 0x74: "STRING", + 0x75: "ARRAY", + 0x76: "CLASS", + 0x77: "BLOCKDATA", + 0x78: "ENDBLOCKDATA", + 0x79: "RESET", + 0x7A: "BLOCKDATALONG", + 0x7B: "EXCEPTION", + 0x7C: "LONGSTRING", + 0x7D: "PROXYCLASSDESC", + 0x7E: "ENUM", +} + +TYPECODE_TABLE = { + 0x70: SerializedNull, + 0x71: SerializedReference, + 0x72: SerializedClassDesc, + 0x73: SerializedObject, + 0x74: SerializedString, + 0x75: SerializedArray, + 0x76: SerializedClass, + 0x77: BlockData, +# 0x78: EndBlockData, + 0x79: StreamReset, + 0x7a: BlockDataLong, + 0x7b: SerializedException, + 0x7c: SerializedLongString, + 0x7d: SerializedProxyClassDesc, + 0x7e: SerializedEnum, +} + +def SerializedContent(parent, name, description=None): + tc = parent.stream.readBits(parent.absolute_address + parent.current_size, 8, parent.endian) + klass = TYPECODE_TABLE.get(tc, None) + if klass is None: + raise ParserError("Unknown typecode 0x%02x" % tc) + return klass(parent, name, description) + +VALUE_CLASS_MAP = { + 'B': Int8, + 'C': UTF16Character, + 'D': Float64, + 'F': Float32, + 'I': Int32, + 'J': Int64, + 'S': Int16, + 'Z': JavaBool, + '[': SerializedContent, # SerializedArray or reference + 'L': SerializedContent, # SerializedObject or reference +} + + +class JavaSerializedFile(Parser): + endian = BIG_ENDIAN + + MAGIC = 0xaced + KNOWN_VERSIONS = (5,) + + PARSER_TAGS = { + "id": "java_serialized", + "category": "program", + "file_ext": ("ser",), + "mime": (u"application/java-serialized-object",), + "min_size": 4*4, + "magic": (("\xac\xed", 0),), + "description": "Serialized Java object", + } + + def validate(self): + if self["magic"].value != self.MAGIC: + return "Wrong magic signature!" + if self["version"].value not in self.KNOWN_VERSIONS: + return "Unknown version (%d)" % self["version"].value + return True + + def createDescription(self): + return "Serialized Java object, version %s" % self["version"].value + + def resetHandles(self): + self.handles = {} + self.nextHandleNum = 0x7E0000 + + def newHandle(self, obj): + self.handles[self.nextHandleNum] = obj + self.nextHandleNum += 1 + + def createFields(self): + self.resetHandles() + + yield textHandler(UInt16(self, "magic", "Java serialized object signature"), + hexadecimal) + yield UInt16(self, "version", "Stream version") + + while not self.eof: + yield SerializedContent(self, "object[]") diff --git a/lib/hachoir_parser/program/macho.py b/lib/hachoir_parser/program/macho.py new file mode 100644 index 00000000..3ec7e1c2 --- /dev/null +++ b/lib/hachoir_parser/program/macho.py @@ -0,0 +1,471 @@ +""" +Mach-O (Mac OS X executable file format) parser. + +Author: Robert Xiao +Creation date: February 11, 2015 +""" + +from hachoir_parser import HachoirParser +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, ParserError, Bit, NullBits, RawBits, + Int32, UInt8, UInt16, UInt32, UInt64, Enum, + String, RawBytes, Bytes) +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN + +class ElfHeader(FieldSet): + LITTLE_ENDIAN_ID = 1 + BIG_ENDIAN_ID = 2 + MACHINE_NAME = { + # e_machine, EM_ defines + 0: u"No machine", + 1: u"AT&T WE 32100", + 2: u"SPARC", + 3: u"Intel 80386", + 4: u"Motorola 68000", + 5: u"Motorola 88000", + 6: u"Intel 80486", + 7: u"Intel 80860", + 8: u"MIPS I Architecture", + 9: u"Amdahl UTS on System/370", + 10: u"MIPS RS3000 Little-endian", + 11: u"IBM RS/6000 XXX reserved", + 15: u"Hewlett-Packard PA-RISC", + 16: u"NCube XXX reserved", + 17: u"Fujitsu VPP500", + 18: u"Enhanced instruction set SPARC", + 19: u"Intel 80960", + 20: u"PowerPC 32-bit", + 21: u"PowerPC 64-bit", + 36: u"NEC V800", + 37: u"Fujitsu FR20", + 38: u"TRW RH-32", + 39: u"Motorola RCE", + 40: u"Advanced RISC Machines (ARM)", + 41: u"DIGITAL Alpha", + 42: u"Hitachi Super-H", + 43: u"SPARC Version 9", + 44: u"Siemens Tricore", + 45: u"Argonaut RISC Core", + 46: u"Hitachi H8/300", + 47: u"Hitachi H8/300H", + 48: u"Hitachi H8S", + 49: u"Hitachi H8/500", + 50: u"Intel Merced (IA-64) Processor", + 51: u"Stanford MIPS-X", + 52: u"Motorola Coldfire", + 53: u"Motorola MC68HC12", + 62: u"Advanced Micro Devices x86-64", + 75: u"DIGITAL VAX", + 36902: u"used by NetBSD/alpha; obsolete", + } + CLASS_NAME = { + # e_ident[EI_CLASS], ELFCLASS defines + 1: u"32 bits", + 2: u"64 bits" + } + TYPE_NAME = { + # e_type, ET_ defines + 0: u"No file type", + 1: u"Relocatable file", + 2: u"Executable file", + 3: u"Shared object file", + 4: u"Core file", + 0xFF00: u"Processor-specific (0xFF00)", + 0xFFFF: u"Processor-specific (0xFFFF)", + } + OSABI_NAME = { + # e_ident[EI_OSABI], ELFOSABI_ defines + 0: u"UNIX System V ABI", + 1: u"HP-UX operating system", + 2: u"NetBSD", + 3: u"GNU/Linux", + 4: u"GNU/Hurd", + 5: u"86Open common IA32 ABI", + 6: u"Solaris", + 7: u"Monterey", + 8: u"IRIX", + 9: u"FreeBSD", + 10: u"TRU64 UNIX", + 11: u"Novell Modesto", + 12: u"OpenBSD", + 97: u"ARM", + 255: u"Standalone (embedded) application", + } + ENDIAN_NAME = { + # e_ident[EI_DATA], ELFDATA defines + LITTLE_ENDIAN_ID: "Little endian", + BIG_ENDIAN_ID: "Big endian", + } + + def createFields(self): + yield Bytes(self, "signature", 4, r'ELF signature ("\x7fELF")') + yield Enum(UInt8(self, "class", "Class"), self.CLASS_NAME) + if self["class"].value == 1: + ElfLongWord = UInt32 + else: + ElfLongWord = UInt64 + yield Enum(UInt8(self, "endian", "Endian"), self.ENDIAN_NAME) + yield UInt8(self, "file_version", "File version") + yield Enum(UInt8(self, "osabi_ident", "OS/syscall ABI identification"), self.OSABI_NAME) + yield UInt8(self, "abi_version", "syscall ABI version") + yield String(self, "pad", 7, "Pad") + + yield Enum(UInt16(self, "type", "File type"), self.TYPE_NAME) + yield Enum(UInt16(self, "machine", "Machine type"), self.MACHINE_NAME) + yield UInt32(self, "version", "ELF format version") + yield textHandler(ElfLongWord(self, "entry", "Entry point"), hexadecimal) + yield ElfLongWord(self, "phoff", "Program header file offset") + yield ElfLongWord(self, "shoff", "Section header file offset") + yield UInt32(self, "flags", "Architecture-specific flags") + yield UInt16(self, "ehsize", "Elf header size (this header)") + yield UInt16(self, "phentsize", "Program header entry size") + yield UInt16(self, "phnum", "Program header entry count") + yield UInt16(self, "shentsize", "Section header entry size") + yield UInt16(self, "shnum", "Section header entry count") + yield UInt16(self, "shstrndx", "Section header string table index") + + def isValid(self): + if self["signature"].value != "\x7FELF": + return "Wrong ELF signature" + if self["class"].value not in self.CLASS_NAME: + return "Unknown class" + if self["endian"].value not in self.ENDIAN_NAME: + return "Unknown endian (%s)" % self["endian"].value + return "" + +class SectionFlags(FieldSet): + def createFields(self): + if self.root.endian == BIG_ENDIAN: + if self.root.is64bit: + yield RawBits(self, "reserved[]", 32) + yield RawBits(self, "processor_specific", 4, "Processor specific flags") + yield NullBits(self, "reserved[]", 17) + yield Bit(self, "is_tls", "Section contains TLS data?") + yield NullBits(self, "reserved[]", 7) + yield Bit(self, "is_exec", "Section contains executable instructions?") + yield Bit(self, "is_alloc", "Section occupies memory?") + yield Bit(self, "is_writable", "Section contains writable data?") + else: + yield Bit(self, "is_writable", "Section contains writable data?") + yield Bit(self, "is_alloc", "Section occupies memory?") + yield Bit(self, "is_exec", "Section contains executable instructions?") + yield NullBits(self, "reserved[]", 7) + yield Bit(self, "is_tls", "Section contains TLS data?") + yield RawBits(self, "processor_specific", 4, "Processor specific flags") + yield NullBits(self, "reserved[]", 17) + if self.root.is64bit: + yield RawBits(self, "reserved[]", 32) + +class SymbolStringTableOffset(UInt32): + def createDisplay(self): + section_index = self['/header/shstrndx'].value + section = self['/section['+str(section_index)+']'] + text = section.value[self.value:] + return text.split('\0',1)[0] + +class SectionHeader32(FieldSet): + static_size = 40*8 + TYPE_NAME = { + # sh_type, SHT_ defines + 0: "Inactive", + 1: "Program defined information", + 2: "Symbol table section", + 3: "String table section", + 4: "Relocation section with addends", + 5: "Symbol hash table section", + 6: "Dynamic section", + 7: "Note section", + 8: "Block started by symbol (BSS) or No space section", + 9: "Relocation section without addends", + 10:"Reserved - purpose unknown", + 11:"Dynamic symbol table section", + } + + def createFields(self): + yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)") + yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME) + yield SectionFlags(self, "flags", "Section flags") + yield textHandler(UInt32(self, "VMA", "Virtual memory address"), hexadecimal) + yield textHandler(UInt32(self, "LMA", "Logical memory address (offset in file)"), hexadecimal) + yield textHandler(UInt32(self, "size", "Section size (bytes)"), hexadecimal) + yield UInt32(self, "link", "Index of a related section") + yield UInt32(self, "info", "Type-dependent information") + yield UInt32(self, "addr_align", "Address alignment (bytes)") + yield UInt32(self, "entry_size", "Size of each entry in section") + + def createDescription(self): + return "Section header (name: %s, type: %s)" % \ + (self["name"].display, self["type"].display) + +class SectionHeader64(SectionHeader32): + static_size = 64*8 + + def createFields(self): + yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)") + yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME) + yield SectionFlags(self, "flags", "Section flags") + yield textHandler(UInt64(self, "VMA", "Virtual memory address"), hexadecimal) + yield textHandler(UInt64(self, "LMA", "Logical memory address (offset in file)"), hexadecimal) + yield textHandler(UInt64(self, "size", "Section size (bytes)"), hexadecimal) + yield UInt32(self, "link", "Index of a related section") + yield UInt32(self, "info", "Type-dependent information") + yield UInt64(self, "addr_align", "Address alignment (bytes)") + yield UInt64(self, "entry_size", "Size of each entry in section") + +class ProgramFlags(FieldSet): + static_size = 32 + FLAGS = (('pf_r','readable'),('pf_w','writable'),('pf_x','executable')) + + def createFields(self): + if self.root.endian == BIG_ENDIAN: + yield NullBits(self, "padding[]", 29) + for fld, desc in self.FLAGS: + yield Bit(self, fld, "Segment is " + desc) + else: + for fld, desc in reversed(self.FLAGS): + yield Bit(self, fld, "Segment is " + desc) + yield NullBits(self, "padding[]", 29) + + def createDescription(self): + attribs=[] + for fld, desc in self.FLAGS: + if self[fld].value: + attribs.append(desc) + return 'Segment is '+', '.join(attribs) + +class ProgramHeader32(FieldSet): + TYPE_NAME = { + # p_type, PT_ defines + 0: u"Unused program header table entry", + 1: u"Loadable program segment", + 2: u"Dynamic linking information", + 3: u"Program interpreter", + 4: u"Auxiliary information", + 5: u"Reserved, unspecified semantics", + 6: u"Entry for header table itself", + 7: u"Thread Local Storage segment", + 0x70000000: u"MIPS_REGINFO", + } + static_size = 32*8 + + def createFields(self): + yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME) + yield UInt32(self, "offset", "Offset") + yield textHandler(UInt32(self, "vaddr", "V. address"), hexadecimal) + yield textHandler(UInt32(self, "paddr", "P. address"), hexadecimal) + yield UInt32(self, "file_size", "File size") + yield UInt32(self, "mem_size", "Memory size") + yield ProgramFlags(self, "flags") + yield UInt32(self, "align", "Alignment padding") + + def createDescription(self): + return "Program Header (%s)" % self["type"].display + +class ProgramHeader64(ProgramHeader32): + static_size = 56*8 + + def createFields(self): + yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME) + yield ProgramFlags(self, "flags") + yield UInt64(self, "offset", "Offset") + yield textHandler(UInt64(self, "vaddr", "V. address"), hexadecimal) + yield textHandler(UInt64(self, "paddr", "P. address"), hexadecimal) + yield UInt64(self, "file_size", "File size") + yield UInt64(self, "mem_size", "Memory size") + yield UInt64(self, "align", "Alignment padding") + + +CPU_ARCH_ABI64 = 0x01000000 +CPU_TYPE = { + -1: 'Any', + 1: 'VAX', + 6: 'MC680x0', + 7: 'i386', + 7|CPU_ARCH_ABI64: 'x86_64', + 8: 'MIPS', + 10: 'MC98000', + 11: 'HPPA', + 12: 'ARM', + 12|CPU_ARCH_ABI64: 'ARM64', + 13: 'MC88000', + 14: 'SPARC', + 15: 'I860', + 16: 'Alpha', + 18: 'PowerPC', + 18|CPU_ARCH_ABI64: 'PowerPC64', +} + +FILE_TYPE = { + 1: 'Relocatable object', + 2: 'Demand-paged executable', + 3: 'Fixed VM shared library', + 4: 'Core file', + 5: 'Preloaded executable', + 6: 'Dynamically bound shared library', + 7: 'Dynamic link editor', + 8: 'Dynamically bound bundle', + 9: 'Shared library stub for static linking only', + 10: 'Companion file with only debug sections', + 11: 'x86_64 kext', +} + +MACHO_MAGICS = { + "\xfe\xed\xfa\xce": (0, BIG_ENDIAN), # 32-bit big endian + "\xce\xfa\xed\xfe": (0, LITTLE_ENDIAN), # 32-bit little endian + "\xfe\xed\xfa\xcf": (1, BIG_ENDIAN), # 64-bit big endian + "\xcf\xfa\xed\xfe": (1, LITTLE_ENDIAN), # 64-bit little endian +} + +class MachoHeader(FieldSet): + def createFields(self): + yield Bytes(self, "magic", 4, "Mach-O signature") + yield Enum(Int32(self, "cputype"), CPU_TYPE) + yield Int32(self, "cpusubtype") + yield Enum(UInt32(self, "filetype"), FILE_TYPE) + yield UInt32(self, "ncmds") + yield UInt32(self, "sizeofcmds") + yield UInt32(self, "flags") + if self.parent.is64bit: + yield UInt32(self, "reserved") + +class MachoLoadCommand(FieldSet): + LOAD_COMMANDS = { + } + + def createFields(self): + yield Enum(UInt32(self, "cmd"), self.LOAD_COMMANDS) + yield UInt32(self, "cmdsize") + self._size = self['cmdsize'].value * 8 + +class MachoFileBase(RootSeekableFieldSet): + MAGICS = {"\xfe\xed\xfa\xce": (0, BIG_ENDIAN), # 32-bit big endian + "\xce\xfa\xed\xfe": (0, LITTLE_ENDIAN), # 32-bit little endian + "\xfe\xed\xfa\xcf": (1, BIG_ENDIAN), # 64-bit big endian + "\xcf\xfa\xed\xfe": (1, LITTLE_ENDIAN), # 64-bit little endian + } + + def createFields(self): + baseaddr = self.absolute_address + # Choose size and endianness based on magic + magic = self.stream.readBytes(baseaddr, 4) + self.is64bit, self.endian = self.MAGICS[magic] + + yield MachoHeader(self, "header", "Header") + for i in xrange(self['header/ncmds'].value): + yield MachoLoadCommand(self, "load_command[]") + + return + + # Parse header and program headers + yield ElfHeader(self, "header", "Header") + self.is64bit = (self["header/class"].value == 2) + + for index in xrange(self["header/phnum"].value): + if self.is64bit: + yield ProgramHeader64(self, "prg_header[]") + else: + yield ProgramHeader32(self, "prg_header[]") + + self.seekByte(self["header/shoff"].value, relative=False) + + for index in xrange(self["header/shnum"].value): + if self.is64bit: + yield SectionHeader64(self, "section_header[]") + else: + yield SectionHeader32(self, "section_header[]") + + for index in xrange(self["header/shnum"].value): + field = self["section_header["+str(index)+"]"] + if field['size'].value != 0: + self.seekByte(field['LMA'].value, relative=False) + yield RawBytes(self, "section["+str(index)+"]", field['size'].value) + + def createDescription(self): + return "Mach-O program/library: %s" % (self["header/cputype"].display) + +class MachoFile(HachoirParser, MachoFileBase): + PARSER_TAGS = { + "id": "macho", + "category": "program", + "file_ext": ("dylib", "bundle", "o", ""), + "min_size": (28+56)*8, # Header + one segment load command + "mime": ( + u"application/x-executable", + u"application/x-object", + u"application/x-sharedlib", + u"application/x-executable-file", + u"application/x-coredump"), + "magic": tuple((m,0) for m in MachoFileBase.MAGICS), + "description": "Mach-O program/library" + } + endian = BIG_ENDIAN + + def __init__(self, stream, **args): + MachoFileBase.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, 4) not in self.MAGICS: + return "Invalid magic" + return True + +class MachoFatArch(FieldSet): + def createFields(self): + yield Enum(Int32(self, "cputype"), CPU_TYPE) + yield Int32(self, "cpusubtype") + yield textHandler(UInt32(self, "offset"), hexadecimal) + yield UInt32(self, "size") + yield UInt32(self, "align") + self['align'].createDescription = lambda: str(1 << self['align'].value) + +class MachoFatHeader(FieldSet): + def createFields(self): + yield Bytes(self, "magic", 4, "Mach-O signature") + yield UInt32(self, "nfat_arch", "Number of architectures in this fat file") + for i in xrange(self['nfat_arch'].value): + yield MachoFatArch(self, 'arch[]') + +class MachoFatFile(HachoirParser, RootSeekableFieldSet): + MAGIC_BE = "\xca\xfe\xba\xbe" + MAGIC_LE = "\xbe\xba\xfe\xca" + + PARSER_TAGS = { + "id": "macho_fat", + "category": "program", + "file_ext": ("dylib", "bundle", ""), + "min_size": 4096*8 + MachoFile.PARSER_TAGS['min_size'], # One page + size for one arch + "mime": ( + u"application/x-executable", + u"application/x-object", + u"application/x-sharedlib", + u"application/x-executable-file", + u"application/x-coredump"), + "magic": ((MAGIC_LE, 0), (MAGIC_BE, 0)), + "description": "Mach-O fat program/library" + } + endian = BIG_ENDIAN + + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, 4) not in (self.MAGIC_LE, self.MAGIC_BE): + return "Invalid magic" + if self['header/nfat_arch'].value >= 16: + # This helps to distinguish mach-o from java. + return "Too many architectures" + return True + + def createFields(self): + # Choose the right endian based on file magic + if self.stream.readBytes(0, 4) == self.MAGIC_LE: + self.endian = LITTLE_ENDIAN + else: + self.endian = BIG_ENDIAN + + # Parse header and program headers + yield MachoFatHeader(self, "header", "Header") + for arch in self['header'].array('arch'): + self.seekByte(arch['offset'].value) + yield MachoFileBase(self, 'file[]', self.stream, None, arch['size'].value * 8) diff --git a/lib/hachoir_parser/video/__init__.py b/lib/hachoir_parser/video/__init__.py index 26f787e9..0989f6e7 100644 --- a/lib/hachoir_parser/video/__init__.py +++ b/lib/hachoir_parser/video/__init__.py @@ -3,4 +3,4 @@ from hachoir_parser.video.flv import FlvFile from hachoir_parser.video.mov import MovFile from hachoir_parser.video.mpeg_video import MPEGVideoFile from hachoir_parser.video.mpeg_ts import MPEG_TS - +from hachoir_parser.video.avchd import AVCHDINDX, AVCHDMOBJ, AVCHDMPLS, AVCHDCLPI diff --git a/lib/hachoir_parser/video/avchd.py b/lib/hachoir_parser/video/avchd.py new file mode 100644 index 00000000..9f8c855c --- /dev/null +++ b/lib/hachoir_parser/video/avchd.py @@ -0,0 +1,433 @@ +""" +Parser for AVCHD/Blu-ray formats + +Notice: This parser is based off reverse-engineering efforts. +It is NOT based on official specifications, and is subject to change as +more information becomes available. There's a lot of guesswork here, so if you find +that something disagrees with an official specification, please change it. + +Notice: This parser has NOT been tested on Blu-ray disc data, only on files +taken from AVCHD camcorders. + +Author: Robert Xiao +Creation: December 30, 2010 + +References: +- Wikipedia: http://en.wikipedia.org/wiki/AVCHD +- European patent EP1821310: http://www.freepatentsonline.com/EP1821310.html +""" + +""" +File structure: +Root (/PRIVATE/AVCHD, /AVCHD, /, etc.) + AVCHDTN/: (AVCHD only) + THUMB.TDT: Thumbnail Data: stored as a series of 16KiB pages, where each thumbnail starts on a page boundary + THUMB.TID: Thumbnail Index (TIDX), unknown format + BDMV/: + INDEX.BDM|index.bdmv: Bluray Disc Metadata (INDX): Clip index file + MOVIEOBJ.BDM|MovieObject.bdmv: Bluray Disc Metadata (MOBJ): Clip description file + AUXDATA/: (Optional, Blu-ray only) + sound.bdmv: Sound(s) associated with HDMV Interactive Graphic streams applications + ?????.otf: Font(s) associated with Text subtitle applications + BACKUP/: (Optional) + [Copies of *.bdmv, CLIPINF/* and PLAYLIST/*] + CLIPINF/: + ?????.CPI/?????.clpi: Clip information (HDMV) + PLAYLIST/: + ?????.MPL/?????.mpls: Movie Playlist information (MPLS) + STREAM/: + ?????.MTS|?????.m2ts: BDAV MPEG-2 Transport Stream (video file) + SSIF/: (Blu-ray 3D only) + ?????.ssif: Stereoscopic Interleaved file + IISVPL/: (Optional?, AVCHD only?) + ?????.VPL: Virtual Playlist? (MPLS) +""" + +from hachoir_parser import HachoirParser +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, + RawBytes, Bytes, String, Bits, UInt8, UInt16, UInt32, PascalString8, Enum) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.iso639 import ISO639_2 +from hachoir_core.text_handler import textHandler, hexadecimal +from datetime import datetime + +def fromhex(field): + return int('%x'%field.value) + +class AVCHDTimestamp(FieldSet): + static_size = 8*8 + def createFields(self): + yield textHandler(UInt8(self, "unknown", description="0x1E"), hexadecimal) + yield textHandler(UInt8(self, "century"), hexadecimal) + yield textHandler(UInt8(self, "year"), hexadecimal) + yield textHandler(UInt8(self, "month"), hexadecimal) + yield textHandler(UInt8(self, "day"), hexadecimal) + yield textHandler(UInt8(self, "hour"), hexadecimal) + yield textHandler(UInt8(self, "minute"), hexadecimal) + yield textHandler(UInt8(self, "second"), hexadecimal) + + def createValue(self): + return datetime(fromhex(self['century'])*100 + fromhex(self['year']), + fromhex(self['month']), fromhex(self['day']), + fromhex(self['hour']), fromhex(self['minute']), fromhex(self['second'])) + +class AVCHDGenericChunk(FieldSet): + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield RawBytes(self, "raw[]", self['size'].value) + +class AVCHDINDX_0(FieldSet): + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield RawBytes(self, "unknown[]", 22) + yield UInt32(self, "count") + for i in xrange(self['count'].value): + yield RawBytes(self, "data[]", 12) + +class AVCHDIDEX_0(FieldSet): + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield RawBytes(self, "unknown[]", 40) + yield AVCHDTimestamp(self, "last_modified") + yield RawBytes(self, "unknown[]", self._size//8-52) + +class AVCHDMOBJ_Chunk(FieldSet): + def createFields(self): + yield UInt32(self, "unknown[]") + yield UInt32(self, "index") + yield UInt32(self, "unknown[]") + yield textHandler(UInt32(self, "unknown_id"), hexadecimal) + yield UInt32(self, "unknown[]") + yield textHandler(UInt32(self, "playlist_id"), lambda field: '%05d'%field.value) + yield UInt32(self, "unknown[]") + +class AVCHDMPLS_StreamEntry(FieldSet): + ENTRYTYPE = {1:'PlayItem on disc', + 2:'SubPath on disc', + 3:'PlayItem in local storage', + 4:'SubPath in local storage'} + def createFields(self): + yield UInt8(self, "size") + self._size = (self['size'].value+1)*8 + yield Enum(UInt8(self, "type"), self.ENTRYTYPE) + if self['type'].value in (1,3): + yield textHandler(UInt16(self, "pid", "PID of item in clip stream m2ts file"), hexadecimal) + else: # 2,4 + ''' + The patent says: + ref_to_SubPath_id + ref_to_SubClip_entry_id + ref_to_Stream_PID_of_subClip + Sizes aren't given, though, so I cannot determine the format without a sample. + ''' + pass + +class AVCHDMPLS_StreamAttribs(FieldSet): + STREAMTYPE = { + 0x01: "V_MPEG1", + 0x02: "V_MPEG2", + 0x1B: "V_AVC", + 0xEA: "V_VC1", + 0x03: "A_MPEG1", + 0x04: "A_MPEG2", + 0x80: "A_LPCM", + 0x81: "A_AC3", + 0x84: "A_AC3_PLUS", + 0xA1: "A_AC3_PLUS_SEC", + 0x83: "A_TRUEHD", + 0x82: "A_DTS", + 0x85: "A_DTS-HD", + 0xA2: "A_DTS-HD_SEC", + 0x86: "A_DTS-MA", + 0x90: "S_PGS", + 0x91: "S_IGS", + 0x92: "T_SUBTITLE", + } + # Enumerations taken from "ClownBD's CLIPINF Editor". Values may not be accurate. + def createFields(self): + yield UInt8(self, "size") + self._size = (self['size'].value+1)*8 + yield Enum(UInt8(self, "type"), self.STREAMTYPE) + if self['type'].display.startswith('V'): # Video + yield Enum(Bits(self, "resolution", 4), {1:'480i', 2:'576i', 3:'480p', 4:'1080i', 5:'720p', 6:'1080p', 7:'576p'}) + yield Enum(Bits(self, "fps", 4), {1:'24/1.001', 2:'24', 3:'25', 4:'30/1.001', 6:'50', 7:'60/1.001'}) + yield Enum(UInt8(self, "aspect_ratio"), {0x20:'4:3', 0x30:'16:9'}) + elif self['type'].display.startswith('A'): # Audio + yield Enum(Bits(self, "channel_layout", 4), {1:'Mono', 3:'Stereo', 6:'Multi', 12:'Combi'}) + yield Enum(Bits(self, "sample_rate", 4), {1:'48KHz', 4:'96KHz', 5:'192KHz', 12:'48-192KHz', 14:'48-96KHz'}) + yield Enum(String(self, "language", 3), ISO639_2) + elif self['type'].display.startswith('T'): # Text subtitle + yield UInt8(self, "unknown[]") + yield Enum(String(self, "language", 3), ISO639_2) + elif self['type'].display.startswith('S'): # Graphics + yield Enum(String(self, "language", 3), ISO639_2) + else: + pass + +class AVCHDMPLS_Stream(FieldSet): + def createFields(self): + yield AVCHDMPLS_StreamEntry(self, "entry") + yield AVCHDMPLS_StreamAttribs(self, "attribs") + +class AVCHDMPLS_PlayItem(FieldSet): + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield UInt16(self, "unknown[]") + yield UInt8(self, "video_count", "Number of video stream entries") + yield UInt8(self, "audio_count", "Number of video stream entries") + yield UInt8(self, "subtitle_count", "Number of presentation graphics/text subtitle entries") + yield UInt8(self, "ig_count", "Number of interactive graphics entries") + yield RawBytes(self, "unknown[]", 8) + for i in xrange(self['video_count'].value): + yield AVCHDMPLS_Stream(self, "video[]") + for i in xrange(self['audio_count'].value): + yield AVCHDMPLS_Stream(self, "audio[]") + for i in xrange(self['subtitle_count'].value): + yield AVCHDMPLS_Stream(self, "subtitle[]") + for i in xrange(self['ig_count'].value): + yield AVCHDMPLS_Stream(self, "ig[]") + +class AVCHDMPLS_0_Chunk(FieldSet): + def createFields(self): + yield UInt16(self, "size") + self._size = (self['size'].value+2)*8 + yield Bytes(self, "clip_id", 5) + yield Bytes(self, "clip_type", 4) + yield RawBytes(self, "unknown[]", 3) + yield UInt32(self, "clip_start_time[]", "clip start time (units unknown)") + yield UInt32(self, "clip_end_time[]", "clip end time (units unknown)") + yield RawBytes(self, "unknown[]", 10) + yield AVCHDMPLS_PlayItem(self, "playitem") + +class AVCHDMPLS_0(FieldSet): + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield UInt32(self, "count") + yield UInt16(self, "unknown[]") + for i in xrange(self['count'].value): + yield AVCHDMPLS_0_Chunk(self, "chunk[]") + +class AVCHDMPLS_PlayItemMark(FieldSet): + def createFields(self): + yield UInt16(self, "unknown[]") + yield UInt16(self, "playitem_idx", "Index of the associated PlayItem") + yield UInt32(self, "mark_time", "Marker time in clip (units unknown)") + yield RawBytes(self, "unknown", 6) + +class AVCHDMPLS_1(FieldSet): + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield UInt16(self, "count") + for i in xrange(self['count'].value): + yield AVCHDMPLS_PlayItemMark(self, "chunk[]") + +class AVCHDPLEX_1_Chunk(FieldSet): + static_size = 66*8 + def createFields(self): + yield RawBytes(self, "unknown[]", 10) + yield AVCHDTimestamp(self, "date") + yield RawBytes(self, "unknown[]", 1) + yield PascalString8(self, "date") + def createValue(self): + return self['date'].value + +class AVCHDPLEX_0(FieldSet): + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield RawBytes(self, "unknown[]", 10) + yield AVCHDTimestamp(self, "last_modified") + yield RawBytes(self, "unknown[]", 2) + yield PascalString8(self, "date") + +class AVCHDPLEX_1(FieldSet): + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield UInt16(self, "count") + for i in xrange(self['count'].value): + yield AVCHDPLEX_1_Chunk(self, "chunk[]") + +class AVCHDCLPI_1(FieldSet): + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield RawBytes(self, "unknown[]", 10) + yield textHandler(UInt16(self, "video_pid", "PID of video data in stream file"), hexadecimal) + yield AVCHDMPLS_StreamAttribs(self, "video_attribs") + yield textHandler(UInt16(self, "audio_pid", "PID of audio data in stream file"), hexadecimal) + yield AVCHDMPLS_StreamAttribs(self, "audio_attribs") + +def AVCHDIDEX(self): + yield AVCHDIDEX_0(self, "chunk[]") + yield AVCHDGenericChunk(self, "chunk[]") + +def AVCHDPLEX(self): + yield AVCHDPLEX_0(self, "chunk[]") + yield AVCHDPLEX_1(self, "chunk[]") + yield AVCHDGenericChunk(self, "chunk[]") + +def AVCHDCLEX(self): + yield AVCHDGenericChunk(self, "chunk[]") + yield AVCHDGenericChunk(self, "chunk[]") + +class AVCHDChunkWithHeader(FieldSet): + TYPES = {'IDEX': AVCHDIDEX, + 'PLEX': AVCHDPLEX, + 'CLEX': AVCHDCLEX,} + def createFields(self): + yield UInt32(self, "size") + self._size = (self['size'].value+4)*8 + yield UInt32(self, "unknown[]", "24") + yield UInt32(self, "unknown[]", "1") + yield UInt32(self, "unknown[]", "0x10000100") + yield UInt32(self, "unknown[]", "24") + yield UInt32(self, "size2") + assert self['size'].value == self['size2'].value+20 + yield Bytes(self, "magic", 4) + yield RawBytes(self, "unknown[]", 36) + for field in self.TYPES[self['magic'].value](self): + yield field + +class AVCHDINDX(HachoirParser, RootSeekableFieldSet): + endian = BIG_ENDIAN + MAGIC = "INDX0" + PARSER_TAGS = { + "id": "bdmv_index", + "category": "video", + "file_ext": ("bdm","bdmv"), + "magic": ((MAGIC, 0),), + "min_size": 8, # INDX0?00 + "description": "INDEX.BDM", + } + + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: + return "Invalid magic" + return True + + def createFields(self): + yield Bytes(self, "filetype", 4, "File type (INDX)") + yield Bytes(self, "fileversion", 4, "File version (0?00)") + yield UInt32(self, "offset[0]") + yield UInt32(self, "offset[1]") + self.seekByte(self['offset[0]'].value) + yield AVCHDINDX_0(self, "chunk[]") + self.seekByte(self['offset[1]'].value) + yield AVCHDChunkWithHeader(self, "chunk[]") + +class AVCHDMOBJ(HachoirParser, RootSeekableFieldSet): + endian = BIG_ENDIAN + MAGIC = "MOBJ0" + PARSER_TAGS = { + "id": "bdmv_mobj", + "category": "video", + "file_ext": ("bdm","bdmv"), + "magic": ((MAGIC, 0),), + "min_size": 8, # MOBJ0?00 + "description": "MOVIEOBJ.BDM", + } + + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: + return "Invalid magic" + return True + + def createFields(self): + yield Bytes(self, "filetype", 4, "File type (MOBJ)") + yield Bytes(self, "fileversion", 4, "File version (0?00)") + yield RawBytes(self, "unknown[]", 32) + yield UInt32(self, "size") + yield UInt32(self, "unknown[]") + yield UInt16(self, "count") + yield textHandler(UInt32(self, "unknown_id"), hexadecimal) + for i in xrange(1, self['count'].value): + yield AVCHDMOBJ_Chunk(self, "movie_object[]") + +class AVCHDMPLS(HachoirParser, RootSeekableFieldSet): + endian = BIG_ENDIAN + MAGIC = "MPLS0" + PARSER_TAGS = { + "id": "bdmv_mpls", + "category": "video", + "file_ext": ("mpl","mpls","vpl"), + "magic": ((MAGIC, 0),), + "min_size": 8, # MPLS0?00 + "description": "MPLS", + } + + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: + return "Invalid magic" + return True + + def createFields(self): + yield Bytes(self, "filetype", 4, "File type (MPLS)") + yield Bytes(self, "fileversion", 4, "File version (0?00)") + yield UInt32(self, "offset[0]") + yield UInt32(self, "offset[1]") + yield UInt32(self, "offset[2]") + self.seekByte(self['offset[0]'].value) + yield AVCHDMPLS_0(self, "chunk[]") + self.seekByte(self['offset[1]'].value) + yield AVCHDMPLS_1(self, "chunk[]") + self.seekByte(self['offset[2]'].value) + yield AVCHDChunkWithHeader(self, "chunk[]") + +class AVCHDCLPI(HachoirParser, RootSeekableFieldSet): + endian = BIG_ENDIAN + MAGIC = "HDMV0" + PARSER_TAGS = { + "id": "bdmv_clpi", + "category": "video", + "file_ext": ("cpi","clpi"), + "magic": ((MAGIC, 0),), + "min_size": 8, # HDMV0?00 + "description": "HDMV", + } + + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: + return "Invalid magic" + return True + + def createFields(self): + yield Bytes(self, "filetype", 4, "File type (HDMV)") + yield Bytes(self, "fileversion", 4, "File version (0?00)") + yield UInt32(self, "offset[]") + yield UInt32(self, "offset[]") + yield UInt32(self, "offset[]") + yield UInt32(self, "offset[]") + yield UInt32(self, "offset[]") + self.seekByte(self['offset[0]'].value) + yield AVCHDGenericChunk(self, "chunk[]") + self.seekByte(self['offset[1]'].value) + yield AVCHDCLPI_1(self, "chunk[]") + self.seekByte(self['offset[2]'].value) + yield AVCHDGenericChunk(self, "chunk[]") + self.seekByte(self['offset[3]'].value) + yield AVCHDGenericChunk(self, "chunk[]") + self.seekByte(self['offset[4]'].value) + yield AVCHDChunkWithHeader(self, "chunk[]") diff --git a/lib/hachoir_parser/video/mov.py b/lib/hachoir_parser/video/mov.py index 1ab6ac51..84b8d862 100644 --- a/lib/hachoir_parser/video/mov.py +++ b/lib/hachoir_parser/video/mov.py @@ -718,7 +718,7 @@ class Atom(FieldSet): # ipmc: IPMP control "moof": (AtomList, "moof", "movie fragment"), "mfhd": (MovieFragmentHeader, "mfhd", "movie fragment header"), - # traf: track fragment + "traf": (AtomList, "traf", "track fragment"), # tfhd: track fragment header # trun: track fragment run # sdtp: independent and disposable samples diff --git a/lib/hachoir_parser/video/mpeg_ts.py b/lib/hachoir_parser/video/mpeg_ts.py index ed8724a3..bf2066af 100644 --- a/lib/hachoir_parser/video/mpeg_ts.py +++ b/lib/hachoir_parser/video/mpeg_ts.py @@ -11,10 +11,38 @@ Creation date: 13 january 2007 from hachoir_parser import Parser from hachoir_core.field import (FieldSet, ParserError, MissingField, - UInt8, Enum, Bit, Bits, RawBytes) + UInt8, Enum, Bit, Bits, RawBytes, RawBits) from hachoir_core.endian import BIG_ENDIAN from hachoir_core.text_handler import textHandler, hexadecimal +class AdaptationField(FieldSet): + def createFields(self): + yield UInt8(self, "length") + + yield Bit(self, "discontinuity_indicator") + yield Bit(self, "random_access_indicator") + yield Bit(self, "es_prio_indicator") + yield Bit(self, "has_pcr") + yield Bit(self, "has_opcr") + yield Bit(self, "has_splice_point") + yield Bit(self, "private_data") + yield Bit(self, "has_extension") + + if self['has_pcr'].value: + yield Bits(self, "pcr_base", 33) + yield Bits(self, "pcr_ext", 9) + + if self['has_opcr'].value: + yield Bits(self, "opcr_base", 33) + yield Bits(self, "opcr_ext", 9) + + if self['has_splice_point'].value: + yield Bits(self, "splice_countdown", 8) + + stuff_len = ((self['length'].value+1)*8) - self.current_size + if self['length'].value and stuff_len: + yield RawBits(self, 'stuffing', stuff_len) + class Packet(FieldSet): def __init__(self, *args): FieldSet.__init__(self, *args) @@ -46,7 +74,11 @@ class Packet(FieldSet): yield Bit(self, "has_adaptation") yield Bit(self, "has_payload") yield Bits(self, "counter", 4) - yield RawBytes(self, "payload", 184) + + if self["has_adaptation"].value: + yield AdaptationField(self, "adaptation_field") + if self["has_payload"].value: + yield RawBytes(self, "payload", 188-(self.current_size/8)) if self["has_error"].value: yield RawBytes(self, "error_correction", 16) @@ -54,6 +86,8 @@ class Packet(FieldSet): text = "Packet: PID %s" % self["pid"].display if self["payload_unit_start"].value: text += ", start of payload" + if self["has_adaptation"].value: + text += ", with adaptation field" return text def isValid(self): @@ -96,7 +130,7 @@ class MPEG_TS(Parser): sync = self.stream.searchBytes("\x47", self.current_size, self.current_size+204*8) if sync is None: raise ParserError("Unable to find synchronization byte") - elif sync: + elif sync-self.current_size: yield RawBytes(self, "incomplete_packet[]", (sync-self.current_size)//8) yield Packet(self, "packet[]")