diff --git a/CHANGES.md b/CHANGES.md
index 32f519e6..0fa591d1 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -12,6 +12,7 @@
* Update Certifi to 2015.11.20.1 (385476b)
* Update chardet packages 2.3.0 (26982c5) to 2.3.0 (d7fae98)
* Update dateutil library 2.4.2 (083f666) to 2.4.2 (d4baf97)
+* Update Hachoir library 1.3.4 (r1383) to 1.3.4 (r1435)
### 0.11.0 (2016-01-10 22:30:00 UTC)
diff --git a/lib/hachoir_metadata/jpeg.py b/lib/hachoir_metadata/jpeg.py
index a112318f..9e951672 100644
--- a/lib/hachoir_metadata/jpeg.py
+++ b/lib/hachoir_metadata/jpeg.py
@@ -24,6 +24,19 @@ class JpegMetadata(RootMetadata):
"FNumber": "camera_focal",
"BrightnessValue": "camera_brightness",
"MaxApertureValue": "camera_aperture",
+ "ISOSpeedRatings": "iso_speed_ratings",
+ "ExifVersion": "exif_version",
+ "DateTimeOriginal": "date_time_original",
+ "DateTimeDigitized": "date_time_digitized",
+ "CompressedBitsPerPixel": "compressed_bits_per_pixel",
+ "ShutterSpeedValue": "shutter_speed_value",
+ "ApertureValue": "aperture_value",
+ "ExposureBiasValue": "exposure_bias_value",
+ "FocalLength": "focal_length",
+ "FlashpixVersion": "flashpix_version",
+ "FocalPlaneXResolution": "focal_plane_x_resolution",
+ "FocalPlaneYResolution": "focal_plane_y_resolution",
+ "FocalLengthIn35mmFilm": "focal_length_in_35mm_film",
# Generic metadatas
"ImageDescription": "title",
@@ -32,6 +45,7 @@ class JpegMetadata(RootMetadata):
"PixelXDimension": "width",
"PixelYDimension": "height",
"UserComment": "comment",
+ "JPEGInterchangeFormatLength": "thumbnail_size",
}
IPTC_KEY = {
diff --git a/lib/hachoir_metadata/metadata.py b/lib/hachoir_metadata/metadata.py
index 37461c9d..dbdc411b 100644
--- a/lib/hachoir_metadata/metadata.py
+++ b/lib/hachoir_metadata/metadata.py
@@ -284,6 +284,10 @@ def extractMetadata(parser, quality=QUALITY_NORMAL):
metadata.extract(parser)
except HACHOIR_ERRORS, err:
error("Error during metadata extraction: %s" % unicode(err))
+ return None
+ except Exception, err:
+ error("Error during metadata extraction: %s" % unicode(err))
+ return None
if metadata:
metadata.mime_type = parser.mime_type
metadata.endian = endian_name[parser.endian]
diff --git a/lib/hachoir_metadata/qt/__init__.py b/lib/hachoir_metadata/qt/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/lib/hachoir_metadata/qt/dialog.ui b/lib/hachoir_metadata/qt/dialog.ui
new file mode 100644
index 00000000..498a8dae
--- /dev/null
+++ b/lib/hachoir_metadata/qt/dialog.ui
@@ -0,0 +1,64 @@
+
+ Form
+
+
+
+ 0
+ 0
+ 441
+ 412
+
+
+
+ hachoir-metadata
+
+
+ -
+
+
-
+
+
+ Open
+
+
+
+ -
+
+
+
+ 0
+ 0
+
+
+
+
+
+
+ -
+
+
+ true
+
+
+ false
+
+
+ 0
+
+
+ 0
+
+
+
+ -
+
+
+ Quit
+
+
+
+
+
+
+
+
diff --git a/lib/hachoir_metadata/register.py b/lib/hachoir_metadata/register.py
index 3cbde86d..97dcb559 100644
--- a/lib/hachoir_metadata/register.py
+++ b/lib/hachoir_metadata/register.py
@@ -102,6 +102,23 @@ def registerAllItems(meta):
meta.register(Data("bit_rate", 604, _("Bit rate"), text_handler=humanBitRate,
filter=NumberFilter(1, MAX_BIT_RATE), type=(int, long, float)))
meta.register(Data("aspect_ratio", 604, _("Aspect ratio"), type=(int, long, float)))
+ meta.register(Data("thumbnail_size", 604, _("Thumbnail size"), text_handler=humanFilesize, type=(int, long, float)))
+
+ meta.register(Data("iso_speed_ratings", 800, _("ISO speed rating")))
+ meta.register(Data("exif_version", 801, _("EXIF version")))
+ meta.register(Data("date_time_original", 802, _("Date-time original"), text_handler=humanDatetime,
+ filter=DATETIME_FILTER, type=(datetime, date), conversion=setDatetime))
+ meta.register(Data("date_time_digitized", 803, _("Date-time digitized"), text_handler=humanDatetime,
+ filter=DATETIME_FILTER, type=(datetime, date), conversion=setDatetime))
+ meta.register(Data("compressed_bits_per_pixel", 804, _("Compressed bits per pixel"), type=(int, long, float)))
+ meta.register(Data("shutter_speed_value", 805, _("Shutter speed"), type=(int, long, float)))
+ meta.register(Data("aperture_value", 806, _("Aperture")))
+ meta.register(Data("exposure_bias_value", 807, _("Exposure bias")))
+ meta.register(Data("focal_length", 808, _("Focal length")))
+ meta.register(Data("flashpix_version", 809, _("Flashpix version")))
+ meta.register(Data("focal_plane_x_resolution", 810, _("Focal plane width")))
+ meta.register(Data("focal_plane_y_resolution", 811, _("Focal plane height"), type=float))
+ meta.register(Data("focal_length_in_35mm_film", 812, _("Focal length in 35mm film")))
meta.register(Data("os", 900, _("OS"), type=unicode))
meta.register(Data("producer", 901, _("Producer"), type=unicode))
diff --git a/lib/hachoir_parser/archive/__init__.py b/lib/hachoir_parser/archive/__init__.py
index 46103c1a..d9d332b9 100644
--- a/lib/hachoir_parser/archive/__init__.py
+++ b/lib/hachoir_parser/archive/__init__.py
@@ -1,5 +1,6 @@
from hachoir_parser.archive.ace import AceFile
from hachoir_parser.archive.ar import ArchiveFile
+from hachoir_parser.archive.bomstore import BomFile
from hachoir_parser.archive.bzip2_parser import Bzip2Parser
from hachoir_parser.archive.cab import CabFile
from hachoir_parser.archive.gzip_parser import GzipParser
@@ -11,3 +12,4 @@ from hachoir_parser.archive.sevenzip import SevenZipParser
from hachoir_parser.archive.mar import MarFile
from hachoir_parser.archive.mozilla_ar import MozillaArchive
from hachoir_parser.archive.zlib import ZlibData
+from hachoir_parser.archive.prs_pak import PRSPakFile
diff --git a/lib/hachoir_parser/archive/bomstore.py b/lib/hachoir_parser/archive/bomstore.py
new file mode 100644
index 00000000..a8511501
--- /dev/null
+++ b/lib/hachoir_parser/archive/bomstore.py
@@ -0,0 +1,90 @@
+"""
+Apple BOMStorage parser.
+
+Used for Assets.Bom files by Interface Builder, and for .bom files by Installer.app.
+
+Documents:
+
+Author: Robert Xiao
+Created: 2015-05-14
+"""
+
+from hachoir_parser import HachoirParser
+from hachoir_core.field import (RootSeekableFieldSet, FieldSet, Enum,
+Bits, GenericInteger, Float32, Float64, UInt8, UInt32, UInt64, Bytes, NullBytes, RawBytes, String)
+from hachoir_core.endian import BIG_ENDIAN
+from hachoir_core.text_handler import displayHandler
+from hachoir_core.tools import humanDatetime
+from datetime import datetime, timedelta
+
+class BomTrailerEntry(FieldSet):
+ static_size = 64 # bits
+ def createFields(self):
+ yield UInt32(self, "offset")
+ yield UInt32(self, "size")
+ def createDescription(self):
+ return "Object at offset %d, size %d" % (self['offset'].value, self['size'].value)
+
+class BomTrailer(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "num_spaces", "Total number of entries, including blank entries")
+ nobj = self['/num_objects'].value
+ nspace = self['num_spaces'].value
+ for i in xrange(nobj+1):
+ yield BomTrailerEntry(self, "entry[]")
+ yield NullBytes(self, "blank_entries", (nspace - nobj - 1) * (BomTrailerEntry.static_size / 8))
+ yield UInt32(self, "num_trail")
+ ntrail = self['num_trail'].value
+ for i in xrange(ntrail):
+ yield BomTrailerEntry(self, "trail[]")
+
+ def createDescription(self):
+ return "Bom file trailer"
+
+class BomFile(HachoirParser, RootSeekableFieldSet):
+ endian = BIG_ENDIAN
+ MAGIC = "BOMStore"
+ PARSER_TAGS = {
+ "id": "bom_store",
+ "category": "archive",
+ "file_ext": ("bom","car"),
+ "magic": ((MAGIC, 0),),
+ "min_size": 32, # 32-byte header
+ "description": "Apple bill-of-materials file",
+ }
+
+ def __init__(self, stream, **args):
+ RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
+ HachoirParser.__init__(self, stream, **args)
+
+ def validate(self):
+ if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
+ return "Invalid magic"
+ return True
+
+ def createFields(self):
+ yield Bytes(self, "magic", 8, "File magic (BOMStore)")
+ yield UInt32(self, "version") # ?
+ yield UInt32(self, "num_objects")
+ yield UInt32(self, "trailer_offset")
+ yield UInt32(self, "trailer_size")
+ yield UInt32(self, "header_offset")
+ yield UInt32(self, "header_size")
+
+ yield RawBytes(self, "object[]", 512-32, "Null object (size 0, offset 0)") # null object
+
+ self.seekByte(self['trailer_offset'].value)
+ yield BomTrailer(self, "trailer")
+
+ self.seekByte(self['header_offset'].value)
+ yield RawBytes(self, "header", self['header_size'].value)
+
+ for entry in self['trailer'].array('entry'):
+ if entry['size'].value == 0:
+ continue
+ self.seekByte(entry['offset'].value)
+ yield RawBytes(self, "object[]", entry['size'].value)
+
+ for entry in self['trailer'].array('trail'):
+ self.seekByte(entry['offset'].value)
+ yield RawBytes(self, "trail[]", entry['size'].value)
diff --git a/lib/hachoir_parser/archive/prs_pak.py b/lib/hachoir_parser/archive/prs_pak.py
new file mode 100644
index 00000000..85afd136
--- /dev/null
+++ b/lib/hachoir_parser/archive/prs_pak.py
@@ -0,0 +1,48 @@
+"""
+Parallel Realities Starfighter .pak file parser
+
+See http://www.parallelrealities.co.uk/projects/starfighter.php
+or svn://svn.debian.org/svn/pkg-games/packages/trunk/starfighter/
+
+Author: Oliver Gerlich
+"""
+
+from hachoir_parser import Parser
+from hachoir_core.field import (ParserError,
+ UInt32, String, SubFile, FieldSet)
+from hachoir_core.endian import LITTLE_ENDIAN
+from hachoir_core.text_handler import filesizeHandler
+
+class FileEntry(FieldSet):
+ def createFields(self):
+ yield String(self, "filename", 56, truncate="\0")
+ yield filesizeHandler(UInt32(self, "size"))
+ yield SubFile(self, "data", self["size"].value, filename=self["filename"].value)
+
+ def createDescription(self):
+ return self["filename"].value
+
+class PRSPakFile(Parser):
+ PARSER_TAGS = {
+ "id": "prs_pak",
+ "category": "archive",
+ "file_ext": ("pak",),
+ "mime": (u"application/octet-stream",),
+ "min_size": 4*8, # just the identifier
+ "magic": (('PACK', 0),),
+ "description": "Parallel Realities Starfighter .pak archive",
+ }
+
+ endian = LITTLE_ENDIAN
+
+ def validate(self):
+ return (self.stream.readBytes(0, 4) == 'PACK'
+ and self["file[0]/size"].value >= 0
+ and len(self["file[0]/filename"].value) > 0)
+
+ def createFields(self):
+ yield String(self, "magic", 4)
+
+ # all remaining data must be file entries:
+ while self.current_size < self._size:
+ yield FileEntry(self, "file[]")
diff --git a/lib/hachoir_parser/archive/rar.py b/lib/hachoir_parser/archive/rar.py
index 2be5887c..cf924162 100644
--- a/lib/hachoir_parser/archive/rar.py
+++ b/lib/hachoir_parser/archive/rar.py
@@ -14,6 +14,7 @@ from hachoir_core.field import (StaticFieldSet, FieldSet,
from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_parser.common.msdos import MSDOSFileAttr32
+from datetime import timedelta
MAX_FILESIZE = 1000 * 1024 * 1024
@@ -63,9 +64,13 @@ def formatRARVersion(field):
"""
return "%u.%u" % divmod(field.value, 10)
-def commonFlags(s):
- yield Bit(s, "has_added_size", "Additional field indicating additional size")
- yield Bit(s, "is_ignorable", "Old versions of RAR should ignore this block when copying data")
+def markerFlags(s):
+ yield UInt16(s, "flags", "Marker flags, always 0x1a21")
+
+commonFlags = (
+ (Bit, "is_ignorable", "Old versions of RAR should ignore this block when copying data"),
+ (Bit, "has_added_size", "Additional field indicating additional size"),
+)
class ArchiveFlags(StaticFieldSet):
format = (
@@ -79,8 +84,8 @@ class ArchiveFlags(StaticFieldSet):
(Bit, "is_passworded", "Needs a password to be decrypted"),
(Bit, "is_first_vol", "Whether it is the first volume"),
(Bit, "is_encrypted", "Whether the encryption version is present"),
- (NullBits, "internal", 6, "Reserved for 'internal use'")
- )
+ (NullBits, "internal", 4, "Reserved for 'internal use'"),
+ ) + commonFlags
def archiveFlags(s):
yield ArchiveFlags(s, "flags", "Archiver block flags")
@@ -135,29 +140,57 @@ class FileFlags(FieldSet):
yield Bit(self, "is_solid", "Information from previous files is used (solid flag)")
# The 3 following lines are what blocks more staticity
yield Enum(Bits(self, "dictionary_size", 3, "Dictionary size"), DICTIONARY_SIZE)
- for bit in commonFlags(self):
- yield bit
yield Bit(self, "is_large", "file64 operations needed")
yield Bit(self, "is_unicode", "Filename also encoded using Unicode")
yield Bit(self, "has_salt", "Has salt for encryption")
yield Bit(self, "uses_file_version", "File versioning is used")
- yield Bit(self, "has_ext_time", "Extra time ??")
+ yield Bit(self, "has_ext_time", "Extra time info present")
yield Bit(self, "has_ext_flags", "Extra flag ??")
+ for field in commonFlags:
+ yield field[0](self, *field[1:])
def fileFlags(s):
yield FileFlags(s, "flags", "File block flags")
+class ExtTimeFlags(FieldSet):
+ static_size = 16
+ def createFields(self):
+ for name in ['arctime', 'atime', 'ctime', 'mtime']:
+ yield Bits(self, "%s_count" % name, 2, "Number of %s bytes" % name)
+ yield Bit(self, "%s_onesec" % name, "Add one second to the timestamp?")
+ yield Bit(self, "%s_present" % name, "Is %s extra time present?" % name)
+
class ExtTime(FieldSet):
def createFields(self):
- yield textHandler(UInt16(self, "time_flags", "Flags for extended time"), hexadecimal)
- flags = self["time_flags"].value
- for index in xrange(4):
- rmode = flags >> ((3-index)*4)
- if rmode & 8:
- if index:
- yield TimeDateMSDOS32(self, "dos_time[]", "DOS Time")
- if rmode & 3:
- yield RawBytes(self, "remainder[]", rmode & 3, "Time remainder")
+ yield ExtTimeFlags(self, "time_flags")
+ for name in ['mtime', 'ctime', 'atime', 'arctime']:
+ if self['time_flags/%s_present' % name].value:
+ if name != 'mtime':
+ yield TimeDateMSDOS32(self, "%s" % name, "%s DOS timestamp" % name)
+ count = self['time_flags/%s_count' % name].value
+ if count:
+ yield Bits(self, "%s_remainder" % name, 8 * count, "%s extra precision time (in 100ns increments)" % name)
+
+ def createDescription(self):
+ out = 'Time extension'
+ pieces = []
+ for name in ['mtime', 'ctime', 'atime', 'arctime']:
+ if not self['time_flags/%s_present' % name].value:
+ continue
+
+ if name == 'mtime':
+ basetime = self['../ftime'].value
+ else:
+ basetime = self['%s' % name].value
+ delta = timedelta()
+ if self['time_flags/%s_onesec' % name].value:
+ delta += timedelta(seconds=1)
+ if '%s_remainder'%name in self:
+ delta += timedelta(microseconds=self['%s_remainder' % name].value / 10.0)
+ pieces.append('%s=%s' % (name, basetime + delta))
+ if pieces:
+ out += ': ' + ', '.join(pieces)
+ return out
def specialHeader(s, is_file):
yield filesizeHandler(UInt32(s, "compressed_size", "Compressed size (bytes)"))
@@ -188,9 +221,9 @@ def specialHeader(s, is_file):
# Start additional fields from unrar - file only
if is_file:
if s["flags/has_salt"].value:
- yield textHandler(UInt8(s, "salt", "Salt"), hexadecimal)
+ yield RawBytes(s, "salt", 8, "Encryption salt to increase security")
if s["flags/has_ext_time"].value:
- yield ExtTime(s, "extra_time", "Extra time info")
+ yield ExtTime(s, "extra_time")
def fileHeader(s):
return specialHeader(s, True)
@@ -203,9 +236,11 @@ def fileBody(s):
if size > 0:
yield RawBytes(s, "compressed_data", size, "File compressed data")
-def fileDescription(s):
- return "File entry: %s (%s)" % \
- (s["filename"].display, s["compressed_size"].display)
+def fileDescription(tag):
+ def _fileDescription(s):
+ return "%s: %s (%s)" % \
+ (tag, s["filename"].display, s["compressed_size"].display)
+ return _fileDescription
def newSubHeader(s):
return specialHeader(s, False)
@@ -216,36 +251,31 @@ class EndFlags(StaticFieldSet):
(Bit, "has_data_crc", "Whether a CRC value is present"),
(Bit, "rev_space"),
(Bit, "has_vol_number", "Whether the volume number is present"),
- (Bits, "unused[]", 4),
- (Bit, "has_added_size", "Additional field indicating additional size"),
- (Bit, "is_ignorable", "Old versions of RAR should ignore this block when copying data"),
- (Bits, "unused[]", 6),
- )
+ (NullBits, "unused[]", 10),
+ ) + commonFlags
def endFlags(s):
yield EndFlags(s, "flags", "End block flags")
-class BlockFlags(FieldSet):
+class BlockFlags(StaticFieldSet):
static_size = 16
- def createFields(self):
- yield textHandler(Bits(self, "unused[]", 8, "Unused flag bits"), hexadecimal)
- yield Bit(self, "has_added_size", "Additional field indicating additional size")
- yield Bit(self, "is_ignorable", "Old versions of RAR should ignore this block when copying data")
- yield Bits(self, "unused[]", 6)
+ format = (
+ (NullBits, "unused[]", 14),
+ ) + commonFlags
class Block(FieldSet):
BLOCK_INFO = {
# None means 'use default function'
- 0x72: ("marker", "Archive header", None, None, None),
+ 0x72: ("marker", "File format marker", markerFlags, None, None),
0x73: ("archive_start", "Archive info", archiveFlags, archiveHeader, None),
- 0x74: ("file[]", fileDescription, fileFlags, fileHeader, fileBody),
- 0x75: ("comment[]", "Stray comment", None, commentHeader, commentBody),
+ 0x74: ("file[]", fileDescription("File entry"), fileFlags, fileHeader, fileBody),
+ 0x75: ("comment[]", "Comment", None, commentHeader, commentBody),
0x76: ("av_info[]", "Extra information", None, avInfoHeader, avInfoBody),
- 0x77: ("sub_block[]", "Stray subblock", None, newSubHeader, fileBody),
+ 0x77: ("sub_block[]", fileDescription("Subblock"), None, newSubHeader, fileBody),
0x78: ("recovery[]", "Recovery block", None, recoveryHeader, None),
0x79: ("signature", "Signature block", None, signatureHeader, None),
- 0x7A: ("new_sub_block[]", "Stray new-format subblock", fileFlags,
+ 0x7A: ("sub_block[]", fileDescription("New-format subblock"), fileFlags,
newSubHeader, fileBody),
0x7B: ("archive_end", "Archive end block", endFlags, None, None),
}
diff --git a/lib/hachoir_parser/archive/sevenzip.py b/lib/hachoir_parser/archive/sevenzip.py
index 7a0148f5..a64cac9a 100644
--- a/lib/hachoir_parser/archive/sevenzip.py
+++ b/lib/hachoir_parser/archive/sevenzip.py
@@ -7,15 +7,27 @@ Informations:
Author: Olivier SCHWAB
Creation date: 6 december 2006
+
+Updated by: Robert Xiao
+Date: February 26 2011
"""
from hachoir_parser import Parser
from hachoir_core.field import (Field, FieldSet, ParserError,
- GenericVector,
- Enum, UInt8, UInt32, UInt64,
- Bytes, RawBytes)
+ CompressedField, CString,
+ Enum, Bit, Bits, UInt8, UInt32, UInt64,
+ Bytes, RawBytes, TimestampWin64)
+from hachoir_core.stream import StringInputStream
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler
+from hachoir_core.tools import createDict, alignValue
+from hachoir_parser.common.msdos import MSDOSFileAttr32
+
+try:
+ from pylzma import decompress as lzmadecompress
+ has_lzma = True
+except ImportError:
+ has_lzma = False
class SZUInt64(Field):
"""
@@ -38,167 +50,258 @@ class SZUInt64(Field):
self._size += 8
self.createValue = lambda: value
-ID_END, ID_HEADER, ID_ARCHIVE_PROPS, ID_ADD_STREAM_INFO, ID_MAIN_STREAM_INFO, \
-ID_FILES_INFO, ID_PACK_INFO, ID_UNPACK_INFO, ID_SUBSTREAMS_INFO, ID_SIZE, \
-ID_CRC, ID_FOLDER, ID_CODERS_UNPACK_SIZE, ID_NUM_UNPACK_STREAMS, \
-ID_EMPTY_STREAM, ID_EMPTY_FILE, ID_ANTI, ID_NAME, ID_CREATION_TIME, \
-ID_LAST_ACCESS_TIME, ID_LAST_WRITE_TIME, ID_WIN_ATTR, ID_COMMENT, \
-ID_ENCODED_HEADER = xrange(24)
+PROP_INFO = {
+ 0x00: ('kEnd', 'End-of-header marker'),
-ID_INFO = {
- ID_END : "End",
- ID_HEADER : "Header embedding another one",
- ID_ARCHIVE_PROPS : "Archive Properties",
- ID_ADD_STREAM_INFO : "Additional Streams Info",
- ID_MAIN_STREAM_INFO : "Main Streams Info",
- ID_FILES_INFO : "Files Info",
- ID_PACK_INFO : "Pack Info",
- ID_UNPACK_INFO : "Unpack Info",
- ID_SUBSTREAMS_INFO : "Substreams Info",
- ID_SIZE : "Size",
- ID_CRC : "CRC",
- ID_FOLDER : "Folder",
- ID_CODERS_UNPACK_SIZE: "Coders Unpacked size",
- ID_NUM_UNPACK_STREAMS: "Number of Unpacked Streams",
- ID_EMPTY_STREAM : "Empty Stream",
- ID_EMPTY_FILE : "Empty File",
- ID_ANTI : "Anti",
- ID_NAME : "Name",
- ID_CREATION_TIME : "Creation Time",
- ID_LAST_ACCESS_TIME : "Last Access Time",
- ID_LAST_WRITE_TIME : "Last Write Time",
- ID_WIN_ATTR : "Win Attributes",
- ID_COMMENT : "Comment",
- ID_ENCODED_HEADER : "Header holding encoded data info",
+ 0x01: ('kHeader', 'Archive header'),
+
+ 0x02: ('kArchiveProperties', 'Archive properties'),
+
+ 0x03: ('kAdditionalStreamsInfo', 'AdditionalStreamsInfo'),
+ 0x04: ('kMainStreamsInfo', 'MainStreamsInfo'),
+ 0x05: ('kFilesInfo', 'FilesInfo'),
+
+ 0x06: ('kPackInfo', 'PackInfo'),
+ 0x07: ('kUnPackInfo', 'UnPackInfo'),
+ 0x08: ('kSubStreamsInfo', 'SubStreamsInfo'),
+
+ 0x09: ('kSize', 'Size'),
+ 0x0A: ('kCRC', 'CRC'),
+
+ 0x0B: ('kFolder', 'Folder'),
+
+ 0x0C: ('kCodersUnPackSize', 'CodersUnPackSize'),
+ 0x0D: ('kNumUnPackStream', 'NumUnPackStream'),
+
+ 0x0E: ('kEmptyStream', 'EmptyStream'),
+ 0x0F: ('kEmptyFile', 'EmptyFile'),
+ 0x10: ('kAnti', 'Anti'),
+
+ 0x11: ('kName', 'Name'),
+ 0x12: ('kCreationTime', 'CreationTime'),
+ 0x13: ('kLastAccessTime', 'LastAccessTime'),
+ 0x14: ('kLastWriteTime', 'LastWriteTime'),
+ 0x15: ('kWinAttributes', 'WinAttributes'),
+ 0x16: ('kComment', 'Comment'),
+
+ 0x17: ('kEncodedHeader', 'Encoded archive header'),
}
+PROP_IDS = createDict(PROP_INFO, 0)
+PROP_DESC = createDict(PROP_INFO, 1)
+# create k* constants
+for k in PROP_IDS:
+ globals()[PROP_IDS[k]] = k
-class SkippedData(FieldSet):
+def ReadNextByte(self):
+ return self.stream.readBits(self.absolute_address + self.current_size, 8, self.endian)
+
+def PropID(self, name):
+ return Enum(UInt8(self, name), PROP_IDS)
+
+class SevenZipBitVector(FieldSet):
+ def __init__(self, parent, name, num, has_all_byte=False, **args):
+ FieldSet.__init__(self, parent, name, **args)
+ self.has_all_byte=has_all_byte
+ self.num = num
def createFields(self):
- yield Enum(UInt8(self, "id[]"), ID_INFO)
+ if self.has_all_byte:
+ yield Enum(UInt8(self, "all_defined"), {0:'False', 1:'True'})
+ if self['all_defined'].value:
+ return
+ nbytes = alignValue(self.num, 8)//8
+ ctr = 0
+ for i in xrange(nbytes):
+ for j in reversed(xrange(8)):
+ yield Bit(self, "bit[%d]"%(ctr+j))
+ ctr += 8
+ def isAllDefined(self):
+ return self.has_all_byte and self['all_defined'].value
+ def isDefined(self, index):
+ if self.isAllDefined():
+ return True
+ return self['bit[%d]'%index].value
+ def createValue(self):
+ if self.isAllDefined():
+ return range(self.num)
+ return [i for i in xrange(self.num) if self['bit[%d]'%i].value]
+ def createDisplay(self):
+ if self.isAllDefined():
+ return 'all'
+ return ','.join(str(i) for i in self.value)
+
+class ArchiveProperty(FieldSet):
+ def createFields(self):
+ yield PropID(self, "id")
size = SZUInt64(self, "size")
yield size
- if size.value > 0:
- yield RawBytes(self, "data", size.value)
+ yield RawBytes(self, "data", size.value)
+ def createDescription(self):
+ return self['id'].display
-def waitForID(s, wait_id, wait_name="waited_id[]"):
- while not s.eof:
- addr = s.absolute_address+s.current_size
- uid = s.stream.readBits(addr, 8, LITTLE_ENDIAN)
- if uid == wait_id:
- yield Enum(UInt8(s, wait_name), ID_INFO)
- s.info("Found ID %s (%u)" % (ID_INFO[uid], uid))
- return
- s.info("Skipping ID %u!=%u" % (uid, wait_id))
- yield SkippedData(s, "skipped_id[]", "%u != %u" % (uid, wait_id))
+class ArchiveProperties(FieldSet):
+ def createFields(self):
+ yield PropID(self, "id")
+ while not self.eof:
+ uid = ReadNextByte(self)
+ if uid == kEnd:
+ yield PropID(self, "end_marker")
+ break
+ yield ArchiveProperty(self, "prop[]")
-class HashDigest(FieldSet):
- def __init__(self, parent, name, num_digests, desc=None):
+class Digests(FieldSet):
+ def __init__(self, parent, name, num_digests, digest_desc=None, desc=None):
FieldSet.__init__(self, parent, name, desc)
self.num_digests = num_digests
+ if digest_desc is None:
+ self.digest_desc = ['stream %d'%i for i in xrange(num_digests)]
+ else:
+ self.digest_desc = digest_desc
def createFields(self):
- yield Enum(UInt8(self, "id"), ID_INFO)
- bytes = self.stream.readBytes(self.absolute_address, self.num_digests)
- if self.num_digests > 0:
- yield GenericVector(self, "defined[]", self.num_digests, UInt8, "bool")
- for index in xrange(self.num_digests):
- if bytes[index]:
- yield textHandler(UInt32(self, "hash[]",
- "Hash for digest %u" % index), hexadecimal)
+ yield PropID(self, "id")
+ definearr = SevenZipBitVector(self, "defined", self.num_digests, has_all_byte=True)
+ yield definearr
+ for index in definearr.value:
+ yield textHandler(UInt32(self, "digest[]",
+ "Digest for %s" % self.digest_desc[index]), hexadecimal)
class PackInfo(FieldSet):
def createFields(self):
- yield Enum(UInt8(self, "id"), ID_INFO)
- # Very important, helps determine where the data is
- yield SZUInt64(self, "pack_pos", "Position of the packs")
- num = SZUInt64(self, "num_pack_streams")
+ yield PropID(self, "id")
+
+ yield SZUInt64(self, "pack_pos", "File offset to the packed data")
+ num = SZUInt64(self, "num_pack_streams", "Number of packed streams")
yield num
- num = num.value
-
- for field in waitForID(self, ID_SIZE, "size_marker"):
- yield field
-
- for size in xrange(num):
- yield SZUInt64(self, "pack_size[]")
while not self.eof:
- addr = self.absolute_address+self.current_size
- uid = self.stream.readBits(addr, 8, LITTLE_ENDIAN)
- if uid == ID_END:
- yield Enum(UInt8(self, "end_marker"), ID_INFO)
+ uid = ReadNextByte(self)
+ if uid == kEnd:
+ yield PropID(self, "end_marker")
break
- elif uid == ID_CRC:
- yield HashDigest(self, "hash_digest", size)
+ elif uid == kSize:
+ yield PropID(self, "size_marker")
+ for index in xrange(num.value):
+ yield SZUInt64(self, "pack_size[]")
+ elif uid == kCRC:
+ yield Digests(self, "digests", num.value)
else:
- yield SkippedData(self, "skipped_data")
+ raise ParserError("Unexpected ID (%i)" % uid)
-def lzmaParams(value):
- param = value.value
- remainder = param / 9
- # Literal coder context bits
- lc = param % 9
- # Position state bits
- pb = remainder / 5
- # Literal coder position bits
- lp = remainder % 5
- return "lc=%u pb=%u lp=%u" % (lc, lp, pb)
+METHODS = {
+ "\0": "Copy",
+ "\3": "Delta",
+ "\4": "x86_BCJ",
+ "\5": "PowerPC",
+ "\6": "IA64",
+ "\7": "ARM_LE",
+ "\8": "ARMT_LE", # thumb
+ "\9": "SPARC",
+ "\x21": "LZMA2",
+ "\2\3\2": "Common-Swap-2",
+ "\2\3\4": "Common-Swap-4",
+ "\3\1\1": "7z-LZMA",
+ "\3\3\1\3": "7z-Branch-x86-BCJ",
+ "\3\3\1\x1b": "7z-Branch-x86-BCJ2",
+ "\3\3\2\5": "7z-Branch-PowerPC-BE",
+ "\3\3\3\1": "7z-Branch-Alpha-LE",
+ "\3\3\4\1": "7z-Branch-IA64-LE",
+ "\3\3\5\1": "7z-Branch-ARM-LE",
+ "\3\3\6\5": "7z-Branch-M68-BE",
+ "\3\3\7\1": "7z-Branch-ARMT-LE",
+ "\3\3\8\5": "7z-Branch-SPARC-BE",
+ "\3\4\1": "7z-PPMD",
+ "\3\x7f\1": "7z-Experimental",
+ "\4\0": "Reserved",
+ "\4\1\0": "Zip-Copy",
+ "\4\1\1": "Zip-Shrink",
+ "\4\1\6": "Zip-Implode",
+ "\4\1\x08": "Zip-Deflate",
+ "\4\1\x09": "Zip-Deflate64",
+ "\4\1\x10": "Zip-BZip2",
+ "\4\1\x14": "Zip-LZMA",
+ "\4\1\x60": "Zip-JPEG",
+ "\4\1\x61": "Zip-WavPack",
+ "\4\1\x62": "Zip-PPMD",
+ "\4\1\x63": "Zip-wzAES",
+ "\4\2\2": "BZip2",
+ "\4\3\1": "RAR-15",
+ "\4\3\2": "RAR-20",
+ "\4\3\3": "RAR-29",
+ "\4\4\1": "Arj3",
+ "\4\4\2": "Arj4",
+ "\4\5": "Z",
+ "\4\6": "LZH",
+ "\4\7": "7z-Reserved",
+ "\4\8": "CAB",
+ "\4\9\1": "NSIS-Deflate",
+ "\4\9\1": "NSIS-BZip2",
+ "\6\0": "Crypto-Reserved",
+ "\6\1\x00": "Crypto-AES128-ECB",
+ "\6\1\x01": "Crypto-AES128-CBC",
+ "\6\1\x02": "Crypto-AES128-CFB",
+ "\6\1\x03": "Crypto-AES128-OFB",
+ "\6\1\x40": "Crypto-AES192-ECB",
+ "\6\1\x41": "Crypto-AES192-CBC",
+ "\6\1\x42": "Crypto-AES192-CFB",
+ "\6\1\x43": "Crypto-AES192-OFB",
+ "\6\1\x80": "Crypto-AES256-ECB",
+ "\6\1\x81": "Crypto-AES256-CBC",
+ "\6\1\x82": "Crypto-AES256-CFB",
+ "\6\1\x83": "Crypto-AES256-OFB",
+ "\6\1\xc0": "Crypto-AES-ECB",
+ "\6\1\xc1": "Crypto-AES-CBC",
+ "\6\1\xc2": "Crypto-AES-CFB",
+ "\6\1\xc3": "Crypto-AES-OFB",
+ "\6\7": "Crypto-Reserved",
+ "\6\x0f": "Crypto-Reserved",
+ "\6\xf0": "Crypto-Misc",
+ "\6\xf1\1\1": "Crypto-Zip",
+ "\6\xf1\3\2": "Crypto-RAR-Unknown",
+ "\6\xf1\3\3": "Crypto-RAR-29", # AES128
+ "\6\xf1\7\1": "Crypto-7z", # AES256
+ "\7\0": "Hash-None",
+ "\7\1": "Hash-CRC",
+ "\7\2": "Hash-SHA1",
+ "\7\3": "Hash-SHA256",
+ "\7\4": "Hash-SHA384",
+ "\7\5": "Hash-SHA512",
+ "\7\xf0": "Hash-Misc",
+ "\7\xf1\3\3": "Hash-RAR-29", # modified SHA1
+ "\7\xf1\7\1": "Hash-7z", # SHA256
+}
-class CoderID(FieldSet):
- CODECS = {
- # Only 2 methods ... and what about PPMD ?
- "\0" : "copy",
- "\3\1\1": "lzma",
- }
+class Coder(FieldSet):
def createFields(self):
- byte = UInt8(self, "id_size")
- yield byte
- byte = byte.value
- self.info("ID=%u" % byte)
- size = byte & 0xF
+ yield Bits(self, "id_size", 4)
+ yield Bit(self, "is_not_simple", "If unset, stream setup is simple")
+ yield Bit(self, "has_attribs", "Are there compression properties attached?")
+ yield Bit(self, "unused[]")
+ yield Bit(self, "is_not_last_method", "Are there more methods after this one in the alternative method list?")
+ size = self['id_size'].value
if size > 0:
- name = self.stream.readBytes(self.absolute_address+self.current_size, size)
- if name in self.CODECS:
- name = self.CODECS[name]
- self.info("Codec is %s" % name)
- else:
- self.info("Undetermined codec %s" % name)
- name = "unknown"
- yield RawBytes(self, name, size)
- #yield textHandler(Bytes(self, "id", size), lambda: name)
- if byte & 0x10:
+ yield Enum(RawBytes(self, "id", size), METHODS)
+ if self['is_not_simple'].value:
yield SZUInt64(self, "num_stream_in")
yield SZUInt64(self, "num_stream_out")
self.info("Streams: IN=%u OUT=%u" % \
(self["num_stream_in"].value, self["num_stream_out"].value))
- if byte & 0x20:
- size = SZUInt64(self, "properties_size[]")
+ if self['has_attribs'].value:
+ size = SZUInt64(self, "properties_size")
yield size
- if size.value == 5:
- #LzmaDecodeProperties@LZMAStateDecode.c
- yield textHandler(UInt8(self, "parameters"), lzmaParams)
- yield filesizeHandler(UInt32(self, "dictionary_size"))
- elif size.value > 0:
- yield RawBytes(self, "properties[]", size.value)
+ yield RawBytes(self, "properties", size.value)
+ def _get_num_streams(self, direction):
+ if self['is_not_simple'].value:
+ return self['num_stream_%s'%direction].value
+ return 1
+ in_streams = property(lambda self: self._get_num_streams('in'))
+ out_streams = property(lambda self: self._get_num_streams('out'))
-class CoderInfo(FieldSet):
- def __init__(self, parent, name, desc=None):
- FieldSet.__init__(self, parent, name, desc)
- self.in_streams = 1
- self.out_streams = 1
+class CoderList(FieldSet):
def createFields(self):
- # The real ID
- addr = self.absolute_address + self.current_size
- b = self.parent.stream.readBits(addr, 8, LITTLE_ENDIAN)
- cid = CoderID(self, "coder_id")
- yield cid
- if b&0x10: # Work repeated, ...
- self.in_streams = cid["num_stream_in"].value
- self.out_streams = cid["num_stream_out"].value
-
- # Skip other IDs
- while b&0x80:
- addr = self.absolute_address + self.current_size
- b = self.parent.stream.readBits(addr, 8, LITTLE_ENDIAN)
- yield CoderID(self, "unused_codec_id[]")
+ while not self.eof:
+ field = Coder(self, "coder[]")
+ yield field
+ if not field['is_not_last_method'].value:
+ break
class BindPairInfo(FieldSet):
def createFields(self):
@@ -208,45 +311,46 @@ class BindPairInfo(FieldSet):
self.info("Indexes: IN=%u OUT=%u" % \
(self["in_index"].value, self["out_index"].value))
-class FolderItem(FieldSet):
- def __init__(self, parent, name, desc=None):
- FieldSet.__init__(self, parent, name, desc)
- self.in_streams = 0
- self.out_streams = 0
-
+class Folder(FieldSet):
def createFields(self):
yield SZUInt64(self, "num_coders")
num = self["num_coders"].value
self.info("Folder: %u codecs" % num)
- # Coders info
- for index in xrange(num):
- ci = CoderInfo(self, "coder_info[]")
- yield ci
- self.in_streams += ci.in_streams
- self.out_streams += ci.out_streams
+ in_streams = out_streams = 0
- # Bin pairs
- self.info("out streams: %u" % self.out_streams)
- for index in xrange(self.out_streams-1):
+ # Coder info
+ for index in xrange(num):
+ ci = CoderList(self, "coders[]")
+ yield ci
+ in_streams += ci['coder[0]'].in_streams
+ out_streams += ci['coder[0]'].out_streams
+ self._in_streams = in_streams
+ self._out_streams = out_streams
+
+ # Bind pairs
+ self.info("out streams: %u" % out_streams)
+ for index in xrange(out_streams-1):
yield BindPairInfo(self, "bind_pair[]")
# Packed streams
# @todo: Actually find mapping
- packed_streams = self.in_streams - self.out_streams + 1
- if packed_streams == 1:
- pass
- else:
+ packed_streams = in_streams - out_streams + 1
+ if packed_streams > 1:
for index in xrange(packed_streams):
yield SZUInt64(self, "pack_stream[]")
-
+ def _get_num_streams(self, direction):
+ list(self)
+ return getattr(self, '_'+direction+'_streams')
+ in_streams = property(lambda self: self._get_num_streams('in'))
+ out_streams = property(lambda self: self._get_num_streams('out'))
class UnpackInfo(FieldSet):
def createFields(self):
- yield Enum(UInt8(self, "id"), ID_INFO)
- # Wait for synch
- for field in waitForID(self, ID_FOLDER, "folder_marker"):
- yield field
+ yield PropID(self, "id")
+
+ yield PropID(self, "folder_marker")
+ assert self['folder_marker'].value == kFolder
yield SZUInt64(self, "num_folders")
# Get generic info
@@ -254,97 +358,277 @@ class UnpackInfo(FieldSet):
self.info("%u folders" % num)
yield UInt8(self, "is_external")
- # Read folder items
- for folder_index in xrange(num):
- yield FolderItem(self, "folder_item[]")
+ if self['is_external'].value:
+ yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
+ else:
+ # Read folder items
+ for folder_index in xrange(num):
+ yield Folder(self, "folder[]")
- # Get unpack sizes for each coder of each folder
- for field in waitForID(self, ID_CODERS_UNPACK_SIZE, "coders_unpsize_marker"):
- yield field
+ yield PropID(self, "unpacksize_marker")
+ assert self['unpacksize_marker'].value == kCodersUnPackSize
for folder_index in xrange(num):
- folder_item = self["folder_item[%u]" % folder_index]
- for index in xrange(folder_item.out_streams):
- #yield UInt8(self, "unpack_size[]")
- yield SZUInt64(self, "unpack_size[]")
+ folder = self["folder[%u]" % folder_index]
+ for index in xrange(folder.out_streams):
+ yield SZUInt64(self, "unpack_size[%d][%d]"%(folder_index,index))
# Extract digests
while not self.eof:
- addr = self.absolute_address+self.current_size
- uid = self.stream.readBits(addr, 8, LITTLE_ENDIAN)
- if uid == ID_END:
- yield Enum(UInt8(self, "end_marker"), ID_INFO)
+ uid = ReadNextByte(self)
+ if uid == kEnd:
+ yield PropID(self, "end_marker")
break
- elif uid == ID_CRC:
- yield HashDigest(self, "hash_digest", num)
+ elif uid == kCRC:
+ yield Digests(self, "digests", num)
else:
- yield SkippedData(self, "skip_data")
+ raise ParserError("Unexpected ID (%i)" % uid)
class SubStreamInfo(FieldSet):
def createFields(self):
- yield Enum(UInt8(self, "id"), ID_INFO)
- raise ParserError("SubStreamInfo not implemented yet")
-
-class EncodedHeader(FieldSet):
- def createFields(self):
- yield Enum(UInt8(self, "id"), ID_INFO)
+ yield PropID(self, "id")
+ num_folders = self['../unpack_info/num_folders'].value
+ num_unpackstreams = [1]*num_folders
while not self.eof:
- addr = self.absolute_address+self.current_size
- uid = self.stream.readBits(addr, 8, LITTLE_ENDIAN)
- if uid == ID_END:
- yield Enum(UInt8(self, "end_marker"), ID_INFO)
+ uid = ReadNextByte(self)
+ if uid == kEnd:
+ yield PropID(self, "end_marker")
break
- elif uid == ID_PACK_INFO:
- yield PackInfo(self, "pack_info", ID_INFO[ID_PACK_INFO])
- elif uid == ID_UNPACK_INFO:
- yield UnpackInfo(self, "unpack_info", ID_INFO[ID_UNPACK_INFO])
- elif uid == ID_SUBSTREAMS_INFO:
- yield SubStreamInfo(self, "substreams_info", ID_INFO[ID_SUBSTREAMS_INFO])
+ elif uid == kNumUnPackStream:
+ yield PropID(self, "num_unpackstream_marker")
+ for i in xrange(num_folders):
+ field = SZUInt64(self, "num_unpackstreams[]")
+ yield field
+ num_unpackstreams[i] = field.value
+ elif uid == kSize:
+ yield PropID(self, "size_marker")
+ for i in xrange(num_folders):
+ # The last substream's size is the stream size minus the other substreams.
+ for j in xrange(num_unpackstreams[i]-1):
+ yield SZUInt64(self, "unpack_size[%d][%d]"%(i,j))
+ elif uid == kCRC:
+ digests = []
+ for i in xrange(num_folders):
+ if num_unpackstreams[i] == 1 and 'digests' in self['../unpack_info']:
+ continue
+ for j in xrange(num_unpackstreams[i]):
+ digests.append('folder %i, stream %i'%(i, j))
+ yield Digests(self, "digests", len(digests), digests)
else:
- self.info("Unexpected ID (%i)" % uid)
- break
+ raise ParserError("Unexpected ID (%i)" % uid)
-class IDHeader(FieldSet):
+class StreamsInfo(FieldSet):
def createFields(self):
- yield Enum(UInt8(self, "id"), ID_INFO)
- ParserError("IDHeader not implemented")
+ yield PropID(self, "id")
+ while not self.eof:
+ uid = ReadNextByte(self)
+ if uid == kEnd:
+ yield PropID(self, "end")
+ break
+ elif uid == kPackInfo:
+ yield PackInfo(self, "pack_info", PROP_DESC[uid])
+ elif uid == kUnPackInfo:
+ yield UnpackInfo(self, "unpack_info", PROP_DESC[uid])
+ elif uid == kSubStreamsInfo:
+ yield SubStreamInfo(self, "substreams_info", PROP_DESC[uid])
+ else:
+ raise ParserError("Unexpected ID (%i)" % uid)
+
+class EncodedHeader(StreamsInfo):
+ pass
+
+class EmptyStreamProperty(FieldSet):
+ def createFields(self):
+ yield PropID(self, "id")
+ yield SZUInt64(self, "size")
+ yield SevenZipBitVector(self, "vec", self['../num_files'].value)
+ def createValue(self):
+ return self['vec'].value
+ def createDisplay(self):
+ return self['vec'].display
+
+class EmptyFileProperty(FieldSet):
+ def createFields(self):
+ yield PropID(self, "id")
+ yield SZUInt64(self, "size")
+ empty_streams = self['../empty_streams/vec'].value
+ yield SevenZipBitVector(self, "vec", len(empty_streams))
+ def createValue(self):
+ empty_streams = self['../empty_streams/vec'].value
+ return [empty_streams[i] for i in self['vec'].value]
+ def createDisplay(self):
+ return ','.join(str(i) for i in self.value)
+
+class FileTimeProperty(FieldSet):
+ def createFields(self):
+ yield PropID(self, "id")
+ yield SZUInt64(self, "size")
+ definearr = SevenZipBitVector(self, "defined", self['../num_files'].value, has_all_byte=True)
+ yield definearr
+ yield UInt8(self, "is_external")
+ if self['is_external'].value:
+ yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
+ else:
+ for index in definearr.value:
+ yield TimestampWin64(self, "timestamp[%d]"%index)
+
+class FileNames(FieldSet):
+ def createFields(self):
+ yield PropID(self, "id")
+ yield SZUInt64(self, "size")
+ yield UInt8(self, "is_external")
+ if self['is_external'].value:
+ yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
+ else:
+ for index in xrange(self['../num_files'].value):
+ yield CString(self, "name[%d]"%index, charset="UTF-16-LE")
+
+class FileAttributes(FieldSet):
+ def createFields(self):
+ yield PropID(self, "id")
+ yield SZUInt64(self, "size")
+ definearr = SevenZipBitVector(self, "defined", self['../num_files'].value, has_all_byte=True)
+ yield definearr
+ yield UInt8(self, "is_external")
+ if self['is_external'].value:
+ yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
+ else:
+ for index in definearr.value:
+ yield MSDOSFileAttr32(self, "attributes[%d]"%index)
+
+class FilesInfo(FieldSet):
+ def createFields(self):
+ yield PropID(self, "id")
+ yield SZUInt64(self, "num_files")
+ while not self.eof:
+ uid = ReadNextByte(self)
+ if uid == kEnd:
+ yield PropID(self, "end_marker")
+ break
+ elif uid == kEmptyStream:
+ yield EmptyStreamProperty(self, "empty_streams")
+ elif uid == kEmptyFile:
+ yield EmptyFileProperty(self, "empty_files")
+ elif uid == kAnti:
+ yield EmptyFileProperty(self, "anti_files")
+ elif uid == kCreationTime:
+ yield FileTimeProperty(self, "creation_time")
+ elif uid == kLastAccessTime:
+ yield FileTimeProperty(self, "access_time")
+ elif uid == kLastWriteTime:
+ yield FileTimeProperty(self, "modified_time")
+ elif uid == kName:
+ yield FileNames(self, "filenames")
+ elif uid == kWinAttributes:
+ yield FileAttributes(self, "attributes")
+ else:
+ yield ArchiveProperty(self, "prop[]")
+
+class Header(FieldSet):
+ def createFields(self):
+ yield PropID(self, "id")
+ while not self.eof:
+ uid = ReadNextByte(self)
+ if uid == kEnd:
+ yield PropID(self, "end")
+ break
+ elif uid == kArchiveProperties:
+ yield ArchiveProperties(self, "props", PROP_DESC[uid])
+ elif uid == kAdditionalStreamsInfo:
+ yield StreamsInfo(self, "additional_streams", PROP_DESC[uid])
+ elif uid == kMainStreamsInfo:
+ yield StreamsInfo(self, "main_streams", PROP_DESC[uid])
+ elif uid == kFilesInfo:
+ yield FilesInfo(self, "files_info", PROP_DESC[uid])
+ else:
+ raise ParserError("Unexpected ID %u" % uid)
class NextHeader(FieldSet):
def __init__(self, parent, name, desc="Next header"):
FieldSet.__init__(self, parent, name, desc)
self._size = 8*self["/signature/start_hdr/next_hdr_size"].value
- # Less work, as much interpretable information as the other
- # version... what an obnoxious format
- def createFields2(self):
- yield Enum(UInt8(self, "header_type"), ID_INFO)
- yield RawBytes(self, "header_data", self._size-1)
def createFields(self):
- uid = self.stream.readBits(self.absolute_address, 8, LITTLE_ENDIAN)
- if uid == ID_HEADER:
- yield IDHeader(self, "header", ID_INFO[ID_HEADER])
- elif uid == ID_ENCODED_HEADER:
- yield EncodedHeader(self, "encoded_hdr", ID_INFO[ID_ENCODED_HEADER])
- # Game Over: this is usually encoded using LZMA, not copy
- # See SzReadAndDecodePackedStreams/SzDecode being called with the
- # data position from "/next_hdr/encoded_hdr/pack_info/pack_pos"
- # We should process further, yet we can't...
+ uid = ReadNextByte(self)
+ if uid == kHeader:
+ yield Header(self, "header", PROP_DESC[uid])
+ elif uid == kEncodedHeader:
+ yield EncodedHeader(self, "encoded_hdr", PROP_DESC[uid])
else:
- ParserError("Unexpected ID %u" % uid)
- size = self._size - self.current_size
- if size > 0:
- yield RawBytes(self, "next_hdr_data", size//8, "Next header's data")
+ raise ParserError("Unexpected ID %u" % uid)
+class NextHeaderParser(Parser):
+ PARSER_TAGS = {
+ }
+ endian = LITTLE_ENDIAN
+
+ def createFields(self):
+ uid = ReadNextByte(self)
+ if uid == kHeader:
+ yield Header(self, "header", PROP_DESC[uid])
+ elif uid == kEncodedHeader:
+ yield EncodedHeader(self, "encoded_hdr", PROP_DESC[uid])
+ else:
+ raise ParserError("Unexpected ID %u" % uid)
+
+ def validate(self):
+ return True
+
+class CompressedData(Bytes):
+ def __init__(self, parent, name, length, decompressor, description=None,
+ parser=None, filename=None, mime_type=None, parser_class=None):
+ if filename:
+ if not isinstance(filename, unicode):
+ filename = makePrintable(filename, "ISO-8859-1")
+ if not description:
+ description = 'File "%s" (%s)' % (filename, humanFilesize(length))
+ Bytes.__init__(self, parent, name, length, description)
+ self.setupInputStream(decompressor, parser, filename, mime_type, parser_class)
+
+ def setupInputStream(self, decompressor, parser, filename, mime_type, parser_class):
+ def createInputStream(cis, **args):
+ tags = args.setdefault("tags",[])
+ if parser_class:
+ tags.append(( "class", parser_class ))
+ if parser is not None:
+ tags.append(( "id", parser.PARSER_TAGS["id"] ))
+ if mime_type:
+ tags.append(( "mime", mime_type ))
+ if filename:
+ tags.append(( "filename", filename ))
+ print args
+ return StringInputStream(decompressor(self.value), **args)
+ self.setSubIStream(createInputStream)
+
+def get_header_decompressor(self):
+ unpack_info = self['/next_hdr/encoded_hdr/unpack_info']
+ assert unpack_info['num_folders'].value == 1
+ coder = unpack_info['folder[0]/coders[0]/coder[0]']
+ method = METHODS[coder['id'].value]
+ if method == 'Copy':
+ return lambda data: data
+ elif method == '7z-LZMA':
+ props = coder['properties'].value
+ length = unpack_info['unpack_size[0][0]'].value
+ return lambda data: lzmadecompress(props+data, maxlength=length)
+
+def get_header_field(self, name, size, description=None):
+ decompressor = get_header_decompressor(self)
+ if decompressor is None:
+ return RawBytes(self, name, size, description=description)
+ return CompressedData(self, name, size, decompressor, description=description, parser_class=NextHeaderParser)
+
class Body(FieldSet):
def __init__(self, parent, name, desc="Body data"):
FieldSet.__init__(self, parent, name, desc)
self._size = 8*self["/signature/start_hdr/next_hdr_offset"].value
def createFields(self):
- if "encoded_hdr" in self["/next_hdr/"]:
+ if "encoded_hdr" in self["/next_hdr"]:
pack_size = sum([s.value for s in self.array("/next_hdr/encoded_hdr/pack_info/pack_size")])
body_size = self["/next_hdr/encoded_hdr/pack_info/pack_pos"].value
- yield RawBytes(self, "compressed_data", body_size, "Compressed data")
+ if body_size:
+ yield RawBytes(self, "compressed_data", body_size, "Compressed data")
# Here we could check if copy method was used to "compress" it,
# but this never happens, so just output "compressed file info"
- yield RawBytes(self, "compressed_file_info", pack_size,
+ yield get_header_field(self, "compressed_file_info", pack_size,
"Compressed file information")
size = (self._size//8) - pack_size - body_size
if size > 0:
@@ -372,13 +656,14 @@ class SignatureHeader(FieldSet):
yield StartHeader(self, "start_hdr", "Start header")
class SevenZipParser(Parser):
+ MAGIC = "7z\xbc\xaf\x27\x1c"
PARSER_TAGS = {
"id": "7zip",
"category": "archive",
"file_ext": ("7z",),
"mime": (u"application/x-7z-compressed",),
"min_size": 32*8,
- "magic": (("7z\xbc\xaf\x27\x1c", 0),),
+ "magic": ((MAGIC, 0),),
"description": "Compressed archive in 7z format"
}
endian = LITTLE_ENDIAN
@@ -389,13 +674,12 @@ class SevenZipParser(Parser):
yield NextHeader(self, "next_hdr")
def validate(self):
- if self.stream.readBytes(0,6) != "7z\xbc\xaf'\x1c":
+ if self.stream.readBytes(0,len(self.MAGIC)) != self.MAGIC:
return "Invalid signature"
return True
def createContentSize(self):
- size = self["/signature/start_hdr/next_hdr_offset"].value
- size += self["/signature/start_hdr/next_hdr_size"].value
- size += 12 # Signature size
- size += 20 # Start header size
- return size*8
+ size = self["/signature/start_hdr/next_hdr_offset"].value*8
+ size += self["/signature/start_hdr/next_hdr_size"].value*8
+ size += SignatureHeader.static_size
+ return size
diff --git a/lib/hachoir_parser/archive/zip.py b/lib/hachoir_parser/archive/zip.py
index 8271ac93..7d256d6e 100644
--- a/lib/hachoir_parser/archive/zip.py
+++ b/lib/hachoir_parser/archive/zip.py
@@ -329,6 +329,9 @@ class ZipFile(Parser):
u"application/x-jar": "jar",
u"application/java-archive": "jar",
+ # Android APK
+ u"application/vnd.android.package-archive": "apk",
+
# OpenOffice 1.0
u"application/vnd.sun.xml.calc": "sxc",
u"application/vnd.sun.xml.draw": "sxd",
diff --git a/lib/hachoir_parser/audio/__init__.py b/lib/hachoir_parser/audio/__init__.py
index 1cc33a23..a0b48c11 100644
--- a/lib/hachoir_parser/audio/__init__.py
+++ b/lib/hachoir_parser/audio/__init__.py
@@ -1,6 +1,7 @@
from hachoir_parser.audio.aiff import AiffFile
from hachoir_parser.audio.au import AuFile
from hachoir_parser.audio.itunesdb import ITunesDBFile
+from hachoir_parser.audio.ipod_playcounts import PlayCountFile
from hachoir_parser.audio.midi import MidiFile
from hachoir_parser.audio.mpeg_audio import MpegAudioFile
from hachoir_parser.audio.real_audio import RealAudioFile
diff --git a/lib/hachoir_parser/audio/ipod_playcounts.py b/lib/hachoir_parser/audio/ipod_playcounts.py
new file mode 100644
index 00000000..c7e6919c
--- /dev/null
+++ b/lib/hachoir_parser/audio/ipod_playcounts.py
@@ -0,0 +1,60 @@
+"""
+iPod Play Count parser.
+
+Documentation:
+- http://ipl.derpapst.org/wiki/ITunesDB/Play_Counts_File
+ (formerly known as http://ipodlinux.org)
+
+Author: m42i
+Creation date: 01 March 2014
+"""
+
+from hachoir_parser import Parser
+from hachoir_core.field import (FieldSet,
+ UInt8, UInt16, UInt32, Int32, UInt64, TimestampMac32,
+ String, Float32, NullBytes, Enum, RawBytes)
+from hachoir_core.endian import LITTLE_ENDIAN
+from hachoir_core.tools import humanDuration
+from hachoir_core.text_handler import displayHandler, filesizeHandler
+
+class PlayCountFile(Parser):
+ PARSER_TAGS = {
+ "id": "playcounts",
+ "category": "audio",
+ "min_size": 44*8,
+ "magic": (('mhdp',0),),
+ "description": "iPod Play Counts file"
+ }
+
+ endian = LITTLE_ENDIAN
+
+ def validate(self):
+ return self.stream.readBytes(0, 4) == 'mhdp'
+
+ def createFields(self):
+ yield String(self, "header_id", 4, "Play Count Header Markup (\"mhdp\")", charset="ISO-8859-1")
+ yield UInt32(self, "header_length", "Header Length")
+ yield UInt32(self, "entry_length", "Single Entry Length")
+ yield UInt32(self, "entry_number", "Number of Songs on iPod")
+ padding = self.seekByte(self["header_length"].value, "header padding")
+ if padding:
+ yield padding
+
+ for i in xrange(self["entry_number"].value):
+ yield PlayCountEntry(self, "track[]")
+
+
+class PlayCountEntry(FieldSet):
+ def __init__(self, *args, **kw):
+ FieldSet.__init__(self, *args, **kw)
+ self._size = 28*8
+
+ def createFields(self):
+ yield UInt32(self, "play_count", "Playcount since last sync")
+ yield TimestampMac32(self, "last_played", "Time of the last play of the track")
+ yield UInt32(self, "audio_bookmark", "Last position in milliseconds")
+ yield UInt32(self, "rating", "Rating in steps of 20 up to 100")
+ yield UInt32(self, "unknown", "unknown")
+ yield UInt32(self, "skip_count", "Number of skips since last sync")
+ yield TimestampMac32(self, "last_skipped", "Time of the last skip")
+
diff --git a/lib/hachoir_parser/audio/itunesdb.py b/lib/hachoir_parser/audio/itunesdb.py
index a70d9cb0..9390cbcd 100644
--- a/lib/hachoir_parser/audio/itunesdb.py
+++ b/lib/hachoir_parser/audio/itunesdb.py
@@ -2,7 +2,7 @@
iPod iTunesDB parser.
Documentation:
-- http://ipodlinux.org/ITunesDB
+- http://ipl.derpapst.org/wiki/ITunesDB/iTunesDB_File
Author: Romain HERAULT
Creation date: 19 august 2006
@@ -71,13 +71,27 @@ class DataObject(FieldSet):
19:"Show (for TV Shows only)",
20:"Episode",
21:"TV Network",
+ 22:"Album-Artist",
+ 23:"Artist for Sorting",
+ 24:"List of keywords pretaining track",
+ 25:"Locale for TV show(?)",
+ 27:"Title for Sorting",
+ 28:"Album for Sorting",
+ 29:"Album-Artist for Sorting",
+ 30:"Composer for Sorting",
+ 31:"Show for Sorting",
+ # 32:"Unknown binary field for video tracks",
50:"Smart Playlist Data",
51:"Smart Playlist Rules",
52:"Library Playlist Index",
- 100:"Column info",
+ 53:"Library Playlist Index letter in jump table",
+ 100:"Ccolumn Sizing Info as well as an order indicator in playlists.",
+ 102:"For iPhone",
200:"Album name (for album descriptions)",
201:"Album artist (for album descriptions)",
- 202:"Album sort artist (for album descriptions)"
+ 202:"Album sort artist (for album descriptions)",
+ 203:"Podcast URL in Album List",
+ 204:"TV Show in Album List"
}
mhod52_sort_index_type_name={
@@ -97,15 +111,7 @@ class DataObject(FieldSet):
yield UInt32(self, "header_length", "Header Length")
yield UInt32(self, "entry_length", "Entry Length")
yield Enum(UInt32(self, "type", "type"),self.type_name)
- if(self["type"].value<15) or (self["type"].value >= 200):
- yield UInt32(self, "unknown[]")
- yield UInt32(self, "unknown[]")
- yield UInt32(self, "position", "Position")
- yield UInt32(self, "length", "String Length in bytes")
- yield UInt32(self, "unknown[]")
- yield UInt32(self, "unknown[]")
- yield String(self, "string", self["length"].value, "String Data", charset="UTF-16-LE")
- elif (self["type"].value<17):
+ if (self["type"].value == 15) or (self["type"].value == 16):
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield String(self, "string", self._size/8-self["header_length"].value, "String Data", charset="UTF-8")
@@ -121,6 +127,14 @@ class DataObject(FieldSet):
yield padding
for i in xrange(self["entry_count"].value):
yield UInt32(self, "index["+str(i)+"]", "Index of the "+str(i)+"nth mhit")
+ elif(self["type"].value<15) or (self["type"].value>17) or (self["type"].value >= 200):
+ yield UInt32(self, "unknown[]")
+ yield UInt32(self, "unknown[]")
+ yield UInt32(self, "position", "Position")
+ yield UInt32(self, "length", "String Length in bytes")
+ yield UInt32(self, "unknown[]")
+ yield UInt32(self, "unknown[]")
+ yield String(self, "string", self["length"].value, "String Data", charset="UTF-16-LE")
else:
padding = self.seekByte(self["header_length"].value, "header padding")
if padding:
@@ -178,8 +192,8 @@ class TrackItem(FieldSet):
yield UInt32(self, "stop_time", "Stop playing at, in milliseconds")
yield UInt32(self, "soundcheck", "SoundCheck preamp")
yield UInt32(self, "playcount_1", "Play count of the track")
- yield UInt32(self, "playcount_2", "Play count of the track (identical to playcount_1)")
- yield UInt32(self, "last_played_time", "Time the song was last played")
+ yield UInt32(self, "playcount_2", "Play count of the track when last synced")
+ yield TimestampMac32(self, "last_played_time", "Time the song was last played")
yield UInt32(self, "disc_number", "disc number in multi disc sets")
yield UInt32(self, "total_discs", "Total number of discs in the disc set")
yield UInt32(self, "userid", "User ID in the DRM scheme")
diff --git a/lib/hachoir_parser/game/__init__.py b/lib/hachoir_parser/game/__init__.py
index 1b6447b9..f43cf2bb 100644
--- a/lib/hachoir_parser/game/__init__.py
+++ b/lib/hachoir_parser/game/__init__.py
@@ -1,4 +1,5 @@
from hachoir_parser.game.zsnes import ZSNESFile
from hachoir_parser.game.spider_man_video import SpiderManVideoFile
from hachoir_parser.game.laf import LafFile
-from hachoir_parser.game.blp import BLP1File, BLP2File
\ No newline at end of file
+from hachoir_parser.game.blp import BLP1File, BLP2File
+from hachoir_parser.game.uasset import UAssetFile
diff --git a/lib/hachoir_parser/game/uasset.py b/lib/hachoir_parser/game/uasset.py
new file mode 100644
index 00000000..80cb0f6c
--- /dev/null
+++ b/lib/hachoir_parser/game/uasset.py
@@ -0,0 +1,199 @@
+"""
+Unreal 4 .uasset file parser
+
+Author: Robert Xiao
+Creation date: 2015-01-17
+"""
+
+from hachoir_parser import Parser
+from hachoir_core.field import (FieldSet, StaticFieldSet, SeekableFieldSet, Int32, UInt32,
+ String, PascalString32, PaddingBytes, Bytes, RawBytes)
+from hachoir_core.endian import LITTLE_ENDIAN
+
+class StringTable(FieldSet):
+ def __init__(self, parent, name, count, *args):
+ FieldSet.__init__(self, parent, name, *args)
+ self.count = count
+
+ def createFields(self):
+ for i in xrange(self.count):
+ yield PascalString32(self, "string[]", strip='\0')
+
+def getObject(self, val):
+ if val == 0:
+ return None
+ elif val < 0:
+ return self['/header/refs/ref[%d]' % (-val-1)]
+ else:
+ return self['/header/assets/asset[%d]' % (val-1)]
+
+
+class AssetHeader(FieldSet):
+ def createFields(self):
+ yield Int32(self, "type1")
+ yield Int32(self, "type2")
+ yield Int32(self, "parent") # 0 = no parent
+ yield Int32(self, "name_index")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "size")
+ yield Int32(self, "offset")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+
+ @property
+ def typeName(self):
+ return getObject(self, self["type1"].value).objectName
+
+ @property
+ def objectName(self):
+ name_index = self['name_index'].value
+ return self['/header/strings/string[%d]' % name_index].value
+
+ @property
+ def fullObjectName(self):
+ name = self.objectName
+ if self['parent'].value:
+ name = '%s.%s' % (getObject(self, self['parent'].value).fullObjectName, name)
+ return name
+
+ def createValue(self):
+ return '' % (
+ self.fullObjectName, self.typeName, self['size'].value)
+
+ def createDescription(self):
+ return str([t.value for t in self.array('unk')])
+
+class AssetTable(FieldSet):
+ def __init__(self, parent, name, count, *args):
+ FieldSet.__init__(self, parent, name, *args)
+ self.count = count
+
+ def createFields(self):
+ for i in xrange(self.count):
+ yield AssetHeader(self, "asset[]")
+
+class ReferenceHeader(FieldSet):
+ def createFields(self):
+ yield Int32(self, "unk[]")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "type_index")
+ yield Int32(self, "unk[]")
+ yield Int32(self, "parent")
+ yield Int32(self, "name_index")
+ yield Int32(self, "unk[]")
+
+ @property
+ def typeName(self):
+ type_index = self['type_index'].value
+ return self['/header/strings/string[%d]' % type_index].value
+
+ @property
+ def objectName(self):
+ name_index = self['name_index'].value
+ return self['/header/strings/string[%d]' % name_index].value
+
+ @property
+ def fullObjectName(self):
+ name = self.objectName
+ if self['parent'].value:
+ name = '[%s].%s' % (getObject(self, self['parent'].value).fullObjectName, name)
+ return name
+
+ def createValue(self):
+ return '' % (self.fullObjectName, self.typeName)
+
+ def createDescription(self):
+ return str([t.value for t in self.array('unk')])
+
+class ReferenceTable(FieldSet):
+ def __init__(self, parent, name, count, *args):
+ FieldSet.__init__(self, parent, name, *args)
+ self.count = count
+
+ def createFields(self):
+ for i in xrange(self.count):
+ yield ReferenceHeader(self, "ref[]")
+
+
+
+class UAssetHeader(SeekableFieldSet):
+ def __init__(self, *args):
+ SeekableFieldSet.__init__(self, *args)
+ self._size = self["header_size"].value * 8
+
+ def createFields(self):
+ yield UInt32(self, "magic")
+ yield Int32(self, "version")
+ yield RawBytes(self, "unk[]", 16)
+ yield UInt32(self, "header_size")
+ yield PascalString32(self, "none", strip='\0')
+ yield RawBytes(self, "unk[]", 4)
+
+ yield UInt32(self, "num_strings", "Number of strings in the header")
+ yield UInt32(self, "offset_strings", "Offset to string table within the header")
+ yield UInt32(self, "num_assets", "Number of assets described in the header")
+ yield UInt32(self, "offset_assets", "Offset to asset table within the header")
+ yield UInt32(self, "num_refs", "Number of references? described in the header")
+ yield UInt32(self, "offset_refs", "Offset to reference table within the header")
+
+ yield UInt32(self, "offset_unk[]", "Offset to something")
+ yield UInt32(self, "unk[]")
+ yield UInt32(self, "offset_unk[]", "Offset to some other thing")
+ yield UInt32(self, "unk[]")
+
+ yield RawBytes(self, "signature", 16, "Some kind of hash")
+
+ yield UInt32(self, "unk[]")
+ yield UInt32(self, "num_assets2", "num_assets again")
+ assert self['num_assets'].value == self['num_assets2'].value
+ yield UInt32(self, "num_strings2", "num_strings again")
+ assert self['num_strings'].value == self['num_strings2'].value
+ yield RawBytes(self, "unk[]", 34)
+ yield UInt32(self, "unk[]")
+ yield UInt32(self, "size_unk", "Size of something")
+ yield RawBytes(self, "unk[]", 12)
+
+ self.seekByte(self["offset_strings"].value)
+ yield StringTable(self, "strings", self["num_strings"].value)
+
+ self.seekByte(self["offset_assets"].value)
+ yield AssetTable(self, "assets", self["num_assets"].value)
+
+ self.seekByte(self["offset_refs"].value)
+ yield ReferenceTable(self, "refs", self["num_refs"].value)
+
+class Asset(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "type")
+
+class UAssetFile(Parser):
+ MAGIC = "\xc1\x83\x2a\x9e"
+ PARSER_TAGS = {
+ "id": "uasset",
+ "category": "game",
+ "description": "Unreal .uasset file",
+ "min_size": 32,
+ "file_ext": (".uasset",),
+ "magic": ((MAGIC, 0),),
+ }
+ endian = LITTLE_ENDIAN
+
+ def validate(self):
+ temp = self.stream.readBytes(0, 4)
+ if temp != self.MAGIC:
+ return "Wrong header"
+ return True
+
+ def createFields(self):
+ yield UAssetHeader(self, "header")
+ for asset in self['/header/assets'].array('asset'):
+ self.seekByte(asset['offset'].value)
+ yield RawBytes(self, "asset[]", asset['size'].value, description="Data for asset %s" % asset.fullObjectName)
diff --git a/lib/hachoir_parser/misc/__init__.py b/lib/hachoir_parser/misc/__init__.py
index f1392015..3e796976 100644
--- a/lib/hachoir_parser/misc/__init__.py
+++ b/lib/hachoir_parser/misc/__init__.py
@@ -15,4 +15,5 @@ from hachoir_parser.misc.dsstore import DSStore
from hachoir_parser.misc.word_doc import WordDocumentParser
from hachoir_parser.misc.word_2 import Word2DocumentParser
from hachoir_parser.misc.mstask import MSTaskFile
+from hachoir_parser.misc.androidxml import AndroidXMLFile
from hachoir_parser.misc.mapsforge_map import MapsforgeMapFile
diff --git a/lib/hachoir_parser/misc/androidxml.py b/lib/hachoir_parser/misc/androidxml.py
new file mode 100644
index 00000000..6a519efb
--- /dev/null
+++ b/lib/hachoir_parser/misc/androidxml.py
@@ -0,0 +1,220 @@
+'''
+AndroidManifest.xml parser
+
+References:
+- http://code.google.com/p/androguard/source/browse/core/bytecodes/apk.py
+
+Author: Robert Xiao
+Creation Date: May 29, 2011
+'''
+
+from hachoir_parser import Parser
+from hachoir_core.field import (FieldSet, ParserError,
+ String, Enum, GenericVector,
+ UInt8, UInt16, UInt32, Int32,
+ Float32, Bits,)
+from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler
+from hachoir_core.tools import createDict
+from hachoir_core.endian import LITTLE_ENDIAN
+
+
+class PascalCString16(FieldSet):
+ def createFields(self):
+ yield UInt16(self, "size")
+ self._size = (self['size'].value+2)*16
+ yield String(self, "string", (self['size'].value+1)*2, strip='\0', charset="UTF-16-LE")
+ def createValue(self):
+ return self['string'].value
+
+class StringTable(FieldSet):
+ def createFields(self):
+ for field in self['../offsets']:
+ pad = self.seekByte(field.value)
+ if pad:
+ yield pad
+ yield PascalCString16(self, "string[]")
+
+def Top(self):
+ while not self.eof:
+ yield Chunk(self, "chunk[]")
+
+def StringChunk(self):
+ # TODO: styles
+ yield UInt32(self, "string_count")
+ yield UInt32(self, "style_count")
+ yield UInt32(self, "reserved[]")
+ yield UInt32(self, "string_offset")
+ yield UInt32(self, "style_offset")
+ yield GenericVector(self, "offsets", self['string_count'].value, UInt32,
+ description="Offsets for string table")
+ pad = self.seekByte(self['string_offset'].value)
+ if pad:
+ yield pad
+ yield StringTable(self, "table")
+
+def ResourceIDs(self):
+ while self._current_size < self._size:
+ yield textHandler(UInt32(self, "resource_id[]"), hexadecimal)
+
+def stringIndex(field):
+ if field.value == -1:
+ return ''
+ return field['/xml_file/string_table/table/string[%d]'%field.value].display
+
+def NamespaceTag(self):
+ yield UInt32(self, "lineno", "Line number from original XML file")
+ yield Int32(self, "unk[]", "Always -1")
+ yield textHandler(Int32(self, "prefix"), stringIndex)
+ yield textHandler(Int32(self, "uri"), stringIndex)
+def NamespaceStartValue(self):
+ return "xmlns:%s='%s'"%(self['prefix'].display, self['uri'].display)
+def NamespaceEndValue(self):
+ return "/%s"%self['prefix'].display
+
+def IntTextHandler(func):
+ return lambda *args, **kwargs: textHandler(Int32(*args, **kwargs), func)
+def booleanText(field):
+ if field.value == 0:
+ return 'false'
+ return 'true'
+class XMLUnitFloat(FieldSet):
+ static_size = 32
+ UNIT_MAP = {}
+ RADIX_MAP = {
+ 0: 0,
+ 1: 7,
+ 2: 15,
+ 3: 23,
+ }
+ def createFields(self):
+ yield Enum(Bits(self, "unit", 4), self.UNIT_MAP)
+ yield Enum(Bits(self, "exponent", 2), self.RADIX_MAP)
+ yield Bits(self, "reserved[]", 2)
+ yield Bits(self, "mantissa", 24)
+ def createValue(self):
+ return float(self['mantissa'].value) >> self.RADIX_MAP[self['exponent'].value]
+ def createDisplay(self):
+ return '%f%s'%(self.value, self.UNIT_MAP.get(self['unit'].value, ''))
+class XMLDimensionFloat(XMLUnitFloat):
+ UNIT_MAP = dict(enumerate(["px","dip","sp","pt","in","mm"]))
+class XMLFractionFloat(XMLUnitFloat):
+ UNIT_MAP = {0: '%', 1: '%p'}
+class XMLAttribute(FieldSet):
+ TYPE_INFO = {
+ 0: ('Null', IntTextHandler(lambda field: '')),
+ 1: ('Reference', IntTextHandler(lambda field: '@%08x'%field.value)),
+ 2: ('Attribute', IntTextHandler(lambda field: '?%08x'%field.value)),
+ 3: ('String', IntTextHandler(stringIndex)),
+ 4: ('Float', Float32),
+ 5: ('Dimension', XMLDimensionFloat),
+ 6: ('Fraction', XMLFractionFloat),
+ 16: ('Int_Dec', Int32),
+ 17: ('Int_Hex', IntTextHandler(hexadecimal)),
+ 18: ('Int_Boolean', IntTextHandler(booleanText)),
+ 28: ('Int_Color_Argb8', IntTextHandler(lambda field: '#%08x'%field.value)),
+ 29: ('Int_Color_Rgb8', IntTextHandler(lambda field: '#%08x'%field.value)),
+ 30: ('Int_Color_Argb4', IntTextHandler(lambda field: '#%08x'%field.value)),
+ 31: ('Int_Color_Rgb4', IntTextHandler(lambda field: '#%08x'%field.value)),
+ }
+ TYPE_NAME = createDict(TYPE_INFO, 0)
+ TYPE_FUNC = createDict(TYPE_INFO, 1)
+ static_size = 5*32
+ def createFields(self):
+ yield textHandler(Int32(self, "ns"), stringIndex)
+ yield textHandler(Int32(self, "name"), stringIndex)
+ yield textHandler(Int32(self, "value_string"), stringIndex)
+ yield UInt16(self, "unk[]")
+ yield UInt8(self, "unk[]")
+ yield Enum(UInt8(self, "value_type"), self.TYPE_NAME)
+ func = self.TYPE_FUNC.get(self['value_type'].value, None)
+ if not func:
+ func = UInt32
+ yield func(self, "value_data")
+ def createValue(self):
+ return (self['name'].display, self['value_data'].value)
+ def createDisplay(self):
+ return '%s="%s"'%(self['name'].display, self['value_data'].display)
+
+def TagStart(self):
+ yield UInt32(self, "lineno", "Line number from original XML file")
+ yield Int32(self, "unk[]", "Always -1")
+ yield textHandler(Int32(self, "ns"), stringIndex)
+ yield textHandler(Int32(self, "name"), stringIndex)
+ yield UInt32(self, "flags")
+ yield UInt16(self, "attrib_count")
+ yield UInt16(self, "attrib_id")
+ yield UInt16(self, "attrib_class")
+ yield UInt16(self, "attrib_style")
+ for i in xrange(self['attrib_count'].value):
+ yield XMLAttribute(self, "attrib[]")
+def TagStartValue(self):
+ attrstr = ' '.join(attr.display for attr in self.array('attrib'))
+ if attrstr: attrstr = ' '+attrstr
+ if not self['ns'].display:
+ return '<%s%s>'%(self['name'].display, attrstr)
+ return "<%s:%s%s>"%(self['ns'].display, self['name'].display, attrstr)
+
+def TagEnd(self):
+ yield UInt32(self, "lineno", "Line number from original XML file")
+ yield Int32(self, "unk[]", "Always -1")
+ yield textHandler(Int32(self, "ns"), stringIndex)
+ yield textHandler(Int32(self, "name"), stringIndex)
+def TagEndValue(self):
+ if not self['ns'].display:
+ return '%s>'%self['name'].display
+ return "%s:%s>"%(self['ns'].display, self['name'].display)
+
+def TextChunk(self):
+ # TODO
+ yield UInt32(self, "lineno", "Line number from original XML file")
+ yield Int32(self, "unk[]", "Always -1")
+
+class Chunk(FieldSet):
+ CHUNK_INFO = {
+ 0x0001: ("string_table", "String Table", StringChunk, None),
+ 0x0003: ("xml_file", "XML File", Top, None),
+ 0x0100: ("namespace_start[]", "Start Namespace", NamespaceTag, NamespaceStartValue),
+ 0x0101: ("namespace_end[]", "End Namespace", NamespaceTag, NamespaceEndValue),
+ 0x0102: ("tag_start[]", "Start Tag", TagStart, TagStartValue),
+ 0x0103: ("tag_end[]", "End Tag", TagEnd, TagEndValue),
+ 0x0104: ("text[]", "Text", TextChunk, None),
+ 0x0180: ("resource_ids", "Resource IDs", ResourceIDs, None),
+ }
+ CHUNK_DESC = createDict(CHUNK_INFO, 1)
+ def __init__(self, parent, name, description=None):
+ FieldSet.__init__(self, parent, name, description)
+ self._size = self['chunk_size'].value* 8
+ type = self['type'].value
+ self.parse_func = None
+ if type in self.CHUNK_INFO:
+ self._name, self._description, self.parse_func, value_func = self.CHUNK_INFO[type]
+ if value_func:
+ self.createValue = lambda: value_func(self)
+
+ def createFields(self):
+ yield Enum(UInt16(self, "type"), self.CHUNK_DESC)
+ yield UInt16(self, "header_size")
+ yield UInt32(self, "chunk_size")
+ if self.parse_func:
+ for field in self.parse_func(self):
+ yield field
+
+class AndroidXMLFile(Parser):
+ MAGIC = "\x03\x00\x08\x00"
+ PARSER_TAGS = {
+ "id": "axml",
+ "category": "misc",
+ "file_ext": ("xml",),
+ "min_size": 32*8,
+ "magic": ((MAGIC, 0),),
+ "description": "Android binary XML format",
+ }
+ endian = LITTLE_ENDIAN
+
+ def validate(self):
+ if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
+ return "Invalid magic"
+ return True
+
+ def createFields(self):
+ yield Chunk(self, "xml_file")
diff --git a/lib/hachoir_parser/misc/mapsforge_map.py b/lib/hachoir_parser/misc/mapsforge_map.py
index 4b99653a..156979a2 100644
--- a/lib/hachoir_parser/misc/mapsforge_map.py
+++ b/lib/hachoir_parser/misc/mapsforge_map.py
@@ -10,7 +10,7 @@ References:
from hachoir_parser import Parser
from hachoir_core.field import (ParserError,
- Bit, Bits, UInt8, UInt16, UInt32, UInt64, String, RawBytes,
+ Bit, Bits, UInt8, UInt16, UInt32, Int32, UInt64, String, RawBytes,
PaddingBits, PaddingBytes,
Enum, Field, FieldSet, SeekableFieldSet, RootSeekableFieldSet)
from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN
@@ -140,6 +140,11 @@ class TileHeader(FieldSet):
class POIData(FieldSet):
def createFields(self):
+ if self["/have_debug"].value:
+ yield String(self, "signature", 32)
+ if not self['signature'].value.startswith("***POIStart"):
+ raise ValueError
+
yield IntVbe(self, "lat_diff")
yield IntVbe(self, "lon_diff")
yield Bits(self, "layer", 4)
@@ -179,6 +184,11 @@ class SubTileBitmap(FieldSet):
class WayProperties(FieldSet):
def createFields(self):
+ if self["/have_debug"].value:
+ yield String(self, "signature", 32)
+ if not self['signature'].value.startswith("---WayStart"):
+ raise ValueError
+
yield UIntVbe(self, "way_data_size")
# WayProperties is split into an outer and an inner field, to allow specifying data size for inner part:
@@ -251,6 +261,11 @@ class TileData(FieldSet):
self.zoomIntervalCfg = zoomIntervalCfg
def createFields(self):
+ if self["/have_debug"].value:
+ yield String(self, "signature", 32)
+ if not self['signature'].value.startswith("###TileStart"):
+ raise ValueError
+
yield TileHeader(self, "tile_header", self.zoomIntervalCfg)
numLevels = int(self.zoomIntervalCfg["max_zoom_level"].value - self.zoomIntervalCfg["min_zoom_level"].value) +1
@@ -272,6 +287,11 @@ class ZoomSubFile(SeekableFieldSet):
self.zoomIntervalCfg = zoomIntervalCfg
def createFields(self):
+ if self["/have_debug"].value:
+ yield String(self, "signature", 16)
+ if self['signature'].value != "+++IndexStart+++":
+ raise ValueError
+
indexEntries = []
numTiles = None
i = 0
@@ -284,13 +304,24 @@ class ZoomSubFile(SeekableFieldSet):
if numTiles is None:
# calculate number of tiles (TODO: better calc this from map bounding box)
firstOffset = self["tile_index_entry[0]"]["offset"].value
+ if self["/have_debug"].value:
+ firstOffset -= 16
numTiles = firstOffset / 5
if i >= numTiles:
break
- for indexEntry in indexEntries:
- self.seekByte(indexEntry["offset"].value, relative=True)
- yield TileData(self, "tile_data[]", zoomIntervalCfg=self.zoomIntervalCfg)
+ for i, indexEntry in enumerate(indexEntries):
+ offset = indexEntry["offset"].value
+ self.seekByte(offset, relative=True)
+ if i != len(indexEntries) - 1:
+ next_offset = indexEntries[i + 1]["offset"].value
+ size = (next_offset - offset) * 8
+ else:
+ size = self.size - offset * 8
+ if size == 0:
+ # hachoir doesn't support empty field.
+ continue
+ yield TileData(self, "tile_data[%d]" % i, zoomIntervalCfg=self.zoomIntervalCfg, size=size)
@@ -314,10 +345,10 @@ class MapsforgeMapFile(Parser, RootSeekableFieldSet):
yield UInt32(self, "file_version")
yield UInt64(self, "file_size")
yield UInt64(self, "creation_date")
- yield UInt32(self, "min_lat")
- yield UInt32(self, "min_lon")
- yield UInt32(self, "max_lat")
- yield UInt32(self, "max_lon")
+ yield Int32(self, "min_lat")
+ yield Int32(self, "min_lon")
+ yield Int32(self, "max_lat")
+ yield Int32(self, "max_lon")
yield UInt16(self, "tile_size")
yield VbeString(self, "projection")
diff --git a/lib/hachoir_parser/program/__init__.py b/lib/hachoir_parser/program/__init__.py
index 261eaf15..321baf25 100644
--- a/lib/hachoir_parser/program/__init__.py
+++ b/lib/hachoir_parser/program/__init__.py
@@ -1,7 +1,9 @@
from hachoir_parser.program.elf import ElfFile
from hachoir_parser.program.exe import ExeFile
+from hachoir_parser.program.macho import MachoFile, MachoFatFile
from hachoir_parser.program.python import PythonCompiledFile
from hachoir_parser.program.java import JavaCompiledClassFile
from hachoir_parser.program.prc import PRCFile
from hachoir_parser.program.nds import NdsFile
-
+from hachoir_parser.program.dex import DexFile
+from hachoir_parser.program.java_serialized import JavaSerializedFile
diff --git a/lib/hachoir_parser/program/dex.py b/lib/hachoir_parser/program/dex.py
new file mode 100644
index 00000000..67ab2f18
--- /dev/null
+++ b/lib/hachoir_parser/program/dex.py
@@ -0,0 +1,238 @@
+'''
+Dalvik Executable (dex) parser.
+
+References:
+- http://www.dalvikvm.com/
+- http://code.google.com/p/androguard/source/browse/core/bytecodes/dvm.py
+- http://androguard.googlecode.com/hg/specs/dalvik/dex-format.html
+
+Author: Robert Xiao
+Creation Date: May 29, 2011
+'''
+
+from hachoir_parser import HachoirParser
+from hachoir_core.field import (SeekableFieldSet, RootSeekableFieldSet, FieldSet, ParserError,
+ String, RawBytes, GenericVector,
+ UInt8, UInt16, UInt32, NullBits, Bit)
+from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler
+from hachoir_core.endian import LITTLE_ENDIAN
+from hachoir_parser.program.java import eat_descriptor
+
+class DexHeader(FieldSet):
+ def createFields(self):
+ yield String(self, "magic", 4)
+ yield String(self, "version", 4, strip='\0')
+ yield textHandler(UInt32(self, "checksum"), hexadecimal)
+ yield RawBytes(self, "signature", 20, description="SHA1 sum over all subsequent data")
+ yield filesizeHandler(UInt32(self, "filesize"))
+ yield UInt32(self, "size", description="Header size")
+ self._size = self['size'].value*8
+ yield textHandler(UInt32(self, "endian"), hexadecimal)
+ yield UInt32(self, "link_count")
+ yield UInt32(self, "link_offset")
+ yield UInt32(self, "map_offset", description="offset to map footer")
+ yield UInt32(self, "string_count", description="number of entries in string table")
+ yield UInt32(self, "string_offset", description="offset to string table")
+ yield UInt32(self, "type_desc_count", description="number of entries in type descriptor table")
+ yield UInt32(self, "type_desc_offset", description="offset to type descriptor table")
+ yield UInt32(self, "meth_desc_count", description="number of entries in method descriptor table")
+ yield UInt32(self, "meth_desc_offset", description="offset to method descriptor table")
+ yield UInt32(self, "field_count", description="number of entries in field table")
+ yield UInt32(self, "field_offset", description="offset to field table")
+ yield UInt32(self, "method_count", description="number of entries in method table")
+ yield UInt32(self, "method_offset", description="offset to method table")
+ yield UInt32(self, "class_count", description="number of entries in class table")
+ yield UInt32(self, "class_offset", description="offset to class table")
+ yield UInt32(self, "data_size", description="size of data region")
+ yield UInt32(self, "data_offset", description="offset to data region")
+
+def stringIndex(field):
+ return field['/string_table/item[%d]'%field.value].display
+
+def classDisplay(field):
+ disp, tail = eat_descriptor(stringIndex(field))
+ return disp
+
+def classIndex(field):
+ return field['/type_desc_table/item[%d]'%field.value].display
+
+# modified from java.py
+code_to_type_name = {
+ 'B': "byte",
+ 'C': "char",
+ 'D': "double",
+ 'F': "float",
+ 'I': "int",
+ 'J': "long",
+ 'L': "object",
+ 'S': "short",
+ 'Z': "boolean",
+}
+
+def argumentDisplay(field):
+ # parse "shorty" descriptors (these start with the return code, which is redundant)
+ text = stringIndex(field)[1:]
+ return [code_to_type_name.get(c,c) for c in text]
+
+def signatureIndex(field):
+ return field['/meth_desc_table/item[%d]'%field.value].display
+
+class PascalCString(FieldSet):
+ def createFields(self):
+ yield UInt8(self, "size")
+ self._size = (self['size'].value+2)*8
+ yield String(self, "string", self['size'].value+1, strip='\0')
+ def createValue(self):
+ return self['string'].value
+
+class StringTable(SeekableFieldSet):
+ def createFields(self):
+ for item in self['/string_offsets'].array('item'):
+ self.seekByte(item.value, relative=False)
+ yield PascalCString(self, "item[]")
+
+class TypeDescriptorEntry(FieldSet):
+ static_size = 32
+ def createFields(self):
+ yield textHandler(UInt32(self, "desc", description="Type descriptor"), classDisplay)
+ def createValue(self):
+ return (self['desc'].value,)
+ def createDisplay(self):
+ return self['desc'].display
+
+class MethodDescriptorEntry(FieldSet):
+ static_size = 96
+ def createFields(self):
+ yield textHandler(UInt32(self, "args", description="Argument type"), argumentDisplay)
+ yield textHandler(UInt32(self, "return", description="Return type"), classIndex)
+ yield UInt32(self, "param_offset", "Offset to parameter detail list")
+ def createValue(self):
+ return (self['args'].value, self['return'].value)
+ def createDisplay(self):
+ return "%s (%s)"%(self['return'].display, ', '.join(self['args'].display))
+
+class FieldEntry(FieldSet):
+ static_size = 64
+ def createFields(self):
+ yield textHandler(UInt16(self, "class", description="Class containing this field"), classIndex)
+ yield textHandler(UInt16(self, "type", description="Field type"), classIndex)
+ yield textHandler(UInt32(self, "name", description="Field name"), stringIndex)
+ def createValue(self):
+ return (self['class'].value, self['type'].value, self['name'].value)
+ def createDisplay(self):
+ return "%s %s.%s"%(self['type'].display, self['class'].display, self['name'].display)
+
+class MethodEntry(FieldSet):
+ static_size = 64
+ def createFields(self):
+ yield textHandler(UInt16(self, "class", description="Class containing this method"), classIndex)
+ yield textHandler(UInt16(self, "sig", description="Method signature"), signatureIndex)
+ yield textHandler(UInt32(self, "name", description="Method name"), stringIndex)
+ def createValue(self):
+ return (self['class'].value, self['sig'].value, self['name'].value)
+ def createDisplay(self):
+ sig = self['/meth_desc_table/item[%d]'%self['sig'].value]
+ return "%s %s.%s(%s)"%(sig['return'].display, self['class'].display, self['name'].display, ', '.join(sig['args'].display))
+
+class AccessFlags(FieldSet):
+ static_size = 32
+ def createFields(self):
+ yield Bit(self, "public")
+ yield Bit(self, "private")
+ yield Bit(self, "protected")
+ yield Bit(self, "static")
+ yield Bit(self, "final")
+ yield Bit(self, "synchronized")
+ yield Bit(self, "volatile")
+ yield Bit(self, "transient")
+ yield Bit(self, "native")
+ yield Bit(self, "interface")
+ yield Bit(self, "abstract")
+ yield Bit(self, "strictfp")
+ yield Bit(self, "synthetic")
+ yield Bit(self, "annotation")
+ yield Bit(self, "enum")
+ yield NullBits(self, "reserved[]", 1)
+ yield Bit(self, "constructor")
+ yield NullBits(self, "reserved[]", 15)
+ def createValue(self):
+ return tuple(f for f in self if f.value is True)
+ def createDisplay(self):
+ return ' '.join(f.name for f in self if f.value is True)
+
+class ClassEntry(FieldSet):
+ static_size = 8*32
+ def createFields(self):
+ yield textHandler(UInt32(self, "class", description="Class being described"), classIndex)
+ yield AccessFlags(self, "flags")
+ yield textHandler(UInt32(self, "superclass", description="Superclass"), classIndex)
+ yield UInt32(self, "interfaces_offset", description="Offset to interface list")
+ yield textHandler(UInt32(self, "filename", description="Filename"), stringIndex)
+ yield UInt32(self, "annotations_offset")
+ yield UInt32(self, "class_data_offset")
+ yield UInt32(self, "static_values_offset")
+ def createValue(self):
+ return tuple(f.value for f in self)
+ def createDisplay(self):
+ disp = self['flags'].display
+ if not self['flags/interface'].value:
+ if disp:
+ disp += ' '
+ disp += 'class'
+ disp += ' '+self['class'].display
+ if self['superclass'].display != 'java.lang.Object':
+ disp += ' extends '+self['superclass'].display
+ return disp
+
+class DexFile(HachoirParser, RootSeekableFieldSet):
+ MAGIC = "dex\n"
+ PARSER_TAGS = {
+ "id": "dex",
+ "category": "program",
+ "file_ext": ("dex",),
+ "min_size": 80*8,
+ "magic": ((MAGIC, 0),),
+ "description": "Dalvik VM Executable",
+ }
+ endian = LITTLE_ENDIAN
+
+ def __init__(self, stream, **args):
+ RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
+ HachoirParser.__init__(self, stream, **args)
+
+ def validate(self):
+ if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
+ return "Invalid magic"
+ if self['header/version'].value != '035':
+ return "Unknown version"
+ return True
+
+ def createFields(self):
+ yield DexHeader(self, "header")
+
+ self.seekByte(self['header/string_offset'].value)
+ yield GenericVector(self, "string_offsets", self['header/string_count'].value, UInt32,
+ description="Offsets for string table")
+ self.seekByte(self['string_offsets/item[0]'].value)
+ yield StringTable(self, "string_table",
+ description="String table")
+
+ self.seekByte(self['header/type_desc_offset'].value)
+ yield GenericVector(self, "type_desc_table", self['header/type_desc_count'].value, TypeDescriptorEntry,
+ description="Type descriptor table")
+
+ self.seekByte(self['header/meth_desc_offset'].value)
+ yield GenericVector(self, "meth_desc_table", self['header/meth_desc_count'].value, MethodDescriptorEntry,
+ description="Method descriptor table")
+
+ self.seekByte(self['header/field_offset'].value)
+ yield GenericVector(self, "field_table", self['header/field_count'].value, FieldEntry,
+ description="Field definition table")
+
+ self.seekByte(self['header/method_offset'].value)
+ yield GenericVector(self, "method_table", self['header/method_count'].value, MethodEntry,
+ description="Method definition table")
+
+ self.seekByte(self['header/class_offset'].value)
+ yield GenericVector(self, "class_table", self['header/class_count'].value, ClassEntry,
+ description="Class definition table")
diff --git a/lib/hachoir_parser/program/exe.py b/lib/hachoir_parser/program/exe.py
index 5a7bc727..3752b4c2 100644
--- a/lib/hachoir_parser/program/exe.py
+++ b/lib/hachoir_parser/program/exe.py
@@ -60,7 +60,7 @@ class ExeFile(HachoirParser, RootSeekableFieldSet):
PARSER_TAGS = {
"id": "exe",
"category": "program",
- "file_ext": ("exe", "dll", "ocx"),
+ "file_ext": ("exe", "dll", "ocx", "pyd", "scr"),
"mime": (u"application/x-dosexec",),
"min_size": 64*8,
#"magic": (("MZ", 0),),
diff --git a/lib/hachoir_parser/program/java.py b/lib/hachoir_parser/program/java.py
index 7329cbe0..c1f17c4d 100644
--- a/lib/hachoir_parser/program/java.py
+++ b/lib/hachoir_parser/program/java.py
@@ -732,6 +732,14 @@ class FieldInfo(FieldSet):
yield FieldArray(self, "attributes", AttributeInfo,
self["attributes_count"].value)
+ def createDescription(self):
+ bits = []
+ for mod in ['transient', 'protected', 'private', 'public', 'static', 'final', 'volatile']:
+ if self[mod].value:
+ bits.append(mod)
+ bits.append(parse_field_descriptor(str(self['descriptor_index'].get_cp_entry())))
+ bits.append(str(self['name_index'].get_cp_entry()))
+ return ' '.join(bits)
###############################################################################
# method_info {
@@ -766,6 +774,15 @@ class MethodInfo(FieldSet):
yield FieldArray(self, "attributes", AttributeInfo,
self["attributes_count"].value)
+ def createDescription(self):
+ bits = []
+ for mod in ['strict', 'static', 'native', 'synchronized', 'protected', 'private', 'public', 'final', 'abstract']:
+ if self[mod].value:
+ bits.append(mod)
+ name = str(self['name_index'].get_cp_entry())
+ meth = str(self['descriptor_index'].get_cp_entry())
+ bits.append(parse_method_descriptor(meth, name))
+ return ' '.join(bits)
###############################################################################
# attribute_info {
@@ -954,6 +971,18 @@ class InnerClassesEntry(StaticFieldSet):
(Bit, "public"),
)
+ def createDescription(self):
+ bits = []
+ for mod in ['super', 'static', 'protected', 'private', 'public', 'abstract', 'final', 'interface']:
+ if self[mod].value:
+ bits.append(mod)
+ if not self['interface'].value:
+ bits.append('class')
+
+ name = str(self['inner_class_info_index'].get_cp_entry())
+ bits.append(name)
+ return ' '.join(bits)
+
class LineNumberTableEntry(StaticFieldSet):
format = (
(UInt16, "start_pc"),
diff --git a/lib/hachoir_parser/program/java_serialized.py b/lib/hachoir_parser/program/java_serialized.py
new file mode 100644
index 00000000..5e7742ab
--- /dev/null
+++ b/lib/hachoir_parser/program/java_serialized.py
@@ -0,0 +1,372 @@
+'''
+Java Object Serialization Stream parser.
+
+References:
+- http://docs.oracle.com/javase/7/docs/platform/serialization/spec/protocol.html
+- http://www.javaworld.com/article/2072752/the-java-serialization-algorithm-revealed.html
+
+Author: Robert Xiao
+Creation Date: Jun 18, 2015
+'''
+
+from hachoir_parser import Parser
+from hachoir_core.field import (
+ ParserError, FieldSet, StaticFieldSet,
+ Enum, RawBytes, String, PascalString16, Float32, Float64,
+ Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64,
+ Bit, NullBits)
+from hachoir_core.endian import BIG_ENDIAN
+from hachoir_core.text_handler import textHandler, hexadecimal
+from hachoir_core.tools import paddingSize
+
+from .java import parse_field_descriptor
+
+class LongString(FieldSet):
+ def createFields(self):
+ yield Int64(self, "length")
+ yield String(self, "value", charset="UTF-8")
+ def createDescription(self):
+ return self['value'].description
+ def createValue(self):
+ return self['value'].value
+
+class UTF16Character(UInt16):
+ def createDisplay(self):
+ return repr(unichr(self.value))
+
+class JavaBool(UInt8):
+ def createValue(self):
+ val = UInt8.createValue(self)
+ return (val != 0)
+
+class SerializedNull(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ def createValue(self):
+ return None
+ def createDisplay(self):
+ return 'null'
+
+class SerializedReference(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ yield Int32(self, "handle")
+
+ @property
+ def referent(self):
+ return self.root.handles[self['handle'].value]
+
+ def createValue(self):
+ return self.referent.value
+
+ def createDisplay(self):
+ return "-> " + str(self.referent.display)
+
+class FieldDesc(FieldSet):
+ def createFields(self):
+ yield String(self, "typecode", 1)
+ yield PascalString16(self, "fieldName", charset="UTF-8")
+ if self['typecode'].value in ('[', 'L'):
+ yield SerializedContent(self, "className")
+
+ @property
+ def typeDescriptor(self):
+ typecode = self['typecode'].value
+ if typecode in ('[', 'L'):
+ return self['className'].value
+ else:
+ return typecode
+
+ @property
+ def typeName(self):
+ return parse_field_descriptor(self.typeDescriptor)
+
+ @property
+ def fieldName(self):
+ return self['fieldName'].value
+
+ def createValue(self):
+ return (self.typeDescriptor, self.fieldName)
+
+ def createDisplay(self):
+ return '%s %s' % (self.typeName, self.fieldName)
+
+class ClassAnnotation(FieldSet):
+ def createFields(self):
+ # TODO
+ yield Enum(UInt8(self, "endBlockData"), TYPECODE_NAMES)
+
+class SerializedClassDesc(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ yield PascalString16(self, "className", charset="UTF-8")
+ yield Int64(self, "serialVersionUID")
+ self.root.newHandle(self)
+ yield NullBits(self, "classDescFlags_reserved", 3)
+ yield Bit(self, "classDescFlags_enum", "Is the class an Enum?")
+ yield Bit(self, "classDescFlags_block_data", "Was the externalizable's block data written using stream version 2?")
+ yield Bit(self, "classDescFlags_externalizable", "Does the class implement java.io.Externalizable?")
+ yield Bit(self, "classDescFlags_serializable", "Does the class implement java.io.Serializable?")
+ yield Bit(self, "classDescFlags_write_method", "Does the class have a writeObject method?")
+ yield Int16(self, "fieldDesc_count")
+ for i in xrange(self['fieldDesc_count'].value):
+ yield FieldDesc(self, "fieldDesc[]")
+ yield ClassAnnotation(self, "classAnnotation")
+ yield SerializedContent(self, "superClassDesc")
+
+ @property
+ def className(self):
+ return self['className'].value
+
+class ObjectValue(FieldSet):
+ def gen_values(self, classDesc):
+ if isinstance(classDesc, SerializedReference):
+ classDesc = classDesc.referent
+ if isinstance(classDesc, SerializedNull):
+ return
+ # TODO: proxy class desc
+
+ for field in self.gen_values(classDesc['superClassDesc']):
+ yield field
+
+ for fieldDesc in classDesc.array('fieldDesc'):
+ tc = fieldDesc['typecode'].value
+ klass = VALUE_CLASS_MAP[tc]
+ field = klass(self, "field[]", description="%s.%s" % (classDesc.className, fieldDesc.fieldName))
+ field.fieldName = fieldDesc.fieldName
+ yield field
+
+ def createFields(self):
+ for field in self.gen_values(self.parent.classDesc):
+ yield field
+
+class SerializedObject(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ yield SerializedContent(self, "classDesc")
+ self.root.newHandle(self)
+
+ yield ObjectValue(self, "value")
+
+ @property
+ def classDesc(self):
+ classDesc = self['classDesc']
+ if isinstance(classDesc, SerializedReference):
+ classDesc = classDesc.referent
+ return classDesc
+
+ def createValue(self):
+ return tuple(field.value for field in self['value'].array('field'))
+
+ def createDisplay(self):
+ out = []
+ for field in self['value'].array('field'):
+ if isinstance(field, SerializedReference) and not isinstance(field.referent, SerializedString):
+ # Avoid recursive references
+ out.append('%s=#' % (field.fieldName, field.referent.classDesc.className))
+ else:
+ out.append('%s=%s' % (field.fieldName, field.display))
+ return '%s(%s)' % (self.classDesc.className, ', '.join(out))
+
+class SerializedString(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ self.root.newHandle(self)
+ yield PascalString16(self, "value", charset="UTF-8")
+ def createValue(self):
+ return self['value'].value
+ def createDisplay(self):
+ return self['value'].display
+
+class SerializedArray(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ yield SerializedContent(self, "classDesc")
+ self.root.newHandle(self)
+
+ yield Int32(self, "size")
+ klass = VALUE_CLASS_MAP[self.classDesc.className[1]] # className is [
+ for i in xrange(self['size'].value):
+ yield klass(self, "value[]")
+
+ @property
+ def classDesc(self):
+ classDesc = self['classDesc']
+ if isinstance(classDesc, SerializedReference):
+ classDesc = classDesc.referent
+ return classDesc
+
+ def createValue(self):
+ return [v.value for v in self.array('value')]
+
+ def createDisplay(self):
+ out = []
+ for field in self.array('value'):
+ if isinstance(field, SerializedReference) and not isinstance(field.referent, SerializedString):
+ # Avoid recursive references
+ out.append('#' % (field.referent.classDesc.className,))
+ else:
+ out.append('%s' % (field.display,))
+ return '[%s]' % ', '.join(out)
+
+class SerializedClass(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ yield SerializedContent(self, "classDesc")
+ self.root.newHandle(self)
+
+class BlockData(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ # TODO
+
+class StreamReset(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ self.root.resetHandles()
+
+class BlockDataLong(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ # TODO
+
+class SerializedException(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ self.root.resetHandles()
+ yield SerializableObject(self, "object")
+ self.root.resetHandles()
+
+class SerializedLongString(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ self.root.newHandle(self)
+ yield LongString(self, "value")
+ def createValue(self):
+ return self['value'].value
+
+class SerializedProxyClassDesc(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ # TODO
+
+class SerializedEnum(FieldSet):
+ def createFields(self):
+ yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
+ yield SerializedContent(self, "classDesc")
+ self.root.newHandle(self)
+ yield SerializedContent(self, "enumConstantName")
+
+ @property
+ def classDesc(self):
+ classDesc = self['classDesc']
+ if isinstance(classDesc, SerializedReference):
+ classDesc = classDesc.referent
+ return classDesc
+
+ def createValue(self):
+ return self['enumConstantName'].value
+
+ def createDisplay(self):
+ return '%s.%s' % (self.classDesc.className, self.value)
+
+TYPECODE_NAMES = {
+ 0x70: "NULL",
+ 0x71: "REFERENCE",
+ 0x72: "CLASSDESC",
+ 0x73: "OBJECT",
+ 0x74: "STRING",
+ 0x75: "ARRAY",
+ 0x76: "CLASS",
+ 0x77: "BLOCKDATA",
+ 0x78: "ENDBLOCKDATA",
+ 0x79: "RESET",
+ 0x7A: "BLOCKDATALONG",
+ 0x7B: "EXCEPTION",
+ 0x7C: "LONGSTRING",
+ 0x7D: "PROXYCLASSDESC",
+ 0x7E: "ENUM",
+}
+
+TYPECODE_TABLE = {
+ 0x70: SerializedNull,
+ 0x71: SerializedReference,
+ 0x72: SerializedClassDesc,
+ 0x73: SerializedObject,
+ 0x74: SerializedString,
+ 0x75: SerializedArray,
+ 0x76: SerializedClass,
+ 0x77: BlockData,
+# 0x78: EndBlockData,
+ 0x79: StreamReset,
+ 0x7a: BlockDataLong,
+ 0x7b: SerializedException,
+ 0x7c: SerializedLongString,
+ 0x7d: SerializedProxyClassDesc,
+ 0x7e: SerializedEnum,
+}
+
+def SerializedContent(parent, name, description=None):
+ tc = parent.stream.readBits(parent.absolute_address + parent.current_size, 8, parent.endian)
+ klass = TYPECODE_TABLE.get(tc, None)
+ if klass is None:
+ raise ParserError("Unknown typecode 0x%02x" % tc)
+ return klass(parent, name, description)
+
+VALUE_CLASS_MAP = {
+ 'B': Int8,
+ 'C': UTF16Character,
+ 'D': Float64,
+ 'F': Float32,
+ 'I': Int32,
+ 'J': Int64,
+ 'S': Int16,
+ 'Z': JavaBool,
+ '[': SerializedContent, # SerializedArray or reference
+ 'L': SerializedContent, # SerializedObject or reference
+}
+
+
+class JavaSerializedFile(Parser):
+ endian = BIG_ENDIAN
+
+ MAGIC = 0xaced
+ KNOWN_VERSIONS = (5,)
+
+ PARSER_TAGS = {
+ "id": "java_serialized",
+ "category": "program",
+ "file_ext": ("ser",),
+ "mime": (u"application/java-serialized-object",),
+ "min_size": 4*4,
+ "magic": (("\xac\xed", 0),),
+ "description": "Serialized Java object",
+ }
+
+ def validate(self):
+ if self["magic"].value != self.MAGIC:
+ return "Wrong magic signature!"
+ if self["version"].value not in self.KNOWN_VERSIONS:
+ return "Unknown version (%d)" % self["version"].value
+ return True
+
+ def createDescription(self):
+ return "Serialized Java object, version %s" % self["version"].value
+
+ def resetHandles(self):
+ self.handles = {}
+ self.nextHandleNum = 0x7E0000
+
+ def newHandle(self, obj):
+ self.handles[self.nextHandleNum] = obj
+ self.nextHandleNum += 1
+
+ def createFields(self):
+ self.resetHandles()
+
+ yield textHandler(UInt16(self, "magic", "Java serialized object signature"),
+ hexadecimal)
+ yield UInt16(self, "version", "Stream version")
+
+ while not self.eof:
+ yield SerializedContent(self, "object[]")
diff --git a/lib/hachoir_parser/program/macho.py b/lib/hachoir_parser/program/macho.py
new file mode 100644
index 00000000..3ec7e1c2
--- /dev/null
+++ b/lib/hachoir_parser/program/macho.py
@@ -0,0 +1,471 @@
+"""
+Mach-O (Mac OS X executable file format) parser.
+
+Author: Robert Xiao
+Creation date: February 11, 2015
+"""
+
+from hachoir_parser import HachoirParser
+from hachoir_core.field import (RootSeekableFieldSet, FieldSet, ParserError, Bit, NullBits, RawBits,
+ Int32, UInt8, UInt16, UInt32, UInt64, Enum,
+ String, RawBytes, Bytes)
+from hachoir_core.text_handler import textHandler, hexadecimal
+from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN
+
+class ElfHeader(FieldSet):
+ LITTLE_ENDIAN_ID = 1
+ BIG_ENDIAN_ID = 2
+ MACHINE_NAME = {
+ # e_machine, EM_ defines
+ 0: u"No machine",
+ 1: u"AT&T WE 32100",
+ 2: u"SPARC",
+ 3: u"Intel 80386",
+ 4: u"Motorola 68000",
+ 5: u"Motorola 88000",
+ 6: u"Intel 80486",
+ 7: u"Intel 80860",
+ 8: u"MIPS I Architecture",
+ 9: u"Amdahl UTS on System/370",
+ 10: u"MIPS RS3000 Little-endian",
+ 11: u"IBM RS/6000 XXX reserved",
+ 15: u"Hewlett-Packard PA-RISC",
+ 16: u"NCube XXX reserved",
+ 17: u"Fujitsu VPP500",
+ 18: u"Enhanced instruction set SPARC",
+ 19: u"Intel 80960",
+ 20: u"PowerPC 32-bit",
+ 21: u"PowerPC 64-bit",
+ 36: u"NEC V800",
+ 37: u"Fujitsu FR20",
+ 38: u"TRW RH-32",
+ 39: u"Motorola RCE",
+ 40: u"Advanced RISC Machines (ARM)",
+ 41: u"DIGITAL Alpha",
+ 42: u"Hitachi Super-H",
+ 43: u"SPARC Version 9",
+ 44: u"Siemens Tricore",
+ 45: u"Argonaut RISC Core",
+ 46: u"Hitachi H8/300",
+ 47: u"Hitachi H8/300H",
+ 48: u"Hitachi H8S",
+ 49: u"Hitachi H8/500",
+ 50: u"Intel Merced (IA-64) Processor",
+ 51: u"Stanford MIPS-X",
+ 52: u"Motorola Coldfire",
+ 53: u"Motorola MC68HC12",
+ 62: u"Advanced Micro Devices x86-64",
+ 75: u"DIGITAL VAX",
+ 36902: u"used by NetBSD/alpha; obsolete",
+ }
+ CLASS_NAME = {
+ # e_ident[EI_CLASS], ELFCLASS defines
+ 1: u"32 bits",
+ 2: u"64 bits"
+ }
+ TYPE_NAME = {
+ # e_type, ET_ defines
+ 0: u"No file type",
+ 1: u"Relocatable file",
+ 2: u"Executable file",
+ 3: u"Shared object file",
+ 4: u"Core file",
+ 0xFF00: u"Processor-specific (0xFF00)",
+ 0xFFFF: u"Processor-specific (0xFFFF)",
+ }
+ OSABI_NAME = {
+ # e_ident[EI_OSABI], ELFOSABI_ defines
+ 0: u"UNIX System V ABI",
+ 1: u"HP-UX operating system",
+ 2: u"NetBSD",
+ 3: u"GNU/Linux",
+ 4: u"GNU/Hurd",
+ 5: u"86Open common IA32 ABI",
+ 6: u"Solaris",
+ 7: u"Monterey",
+ 8: u"IRIX",
+ 9: u"FreeBSD",
+ 10: u"TRU64 UNIX",
+ 11: u"Novell Modesto",
+ 12: u"OpenBSD",
+ 97: u"ARM",
+ 255: u"Standalone (embedded) application",
+ }
+ ENDIAN_NAME = {
+ # e_ident[EI_DATA], ELFDATA defines
+ LITTLE_ENDIAN_ID: "Little endian",
+ BIG_ENDIAN_ID: "Big endian",
+ }
+
+ def createFields(self):
+ yield Bytes(self, "signature", 4, r'ELF signature ("\x7fELF")')
+ yield Enum(UInt8(self, "class", "Class"), self.CLASS_NAME)
+ if self["class"].value == 1:
+ ElfLongWord = UInt32
+ else:
+ ElfLongWord = UInt64
+ yield Enum(UInt8(self, "endian", "Endian"), self.ENDIAN_NAME)
+ yield UInt8(self, "file_version", "File version")
+ yield Enum(UInt8(self, "osabi_ident", "OS/syscall ABI identification"), self.OSABI_NAME)
+ yield UInt8(self, "abi_version", "syscall ABI version")
+ yield String(self, "pad", 7, "Pad")
+
+ yield Enum(UInt16(self, "type", "File type"), self.TYPE_NAME)
+ yield Enum(UInt16(self, "machine", "Machine type"), self.MACHINE_NAME)
+ yield UInt32(self, "version", "ELF format version")
+ yield textHandler(ElfLongWord(self, "entry", "Entry point"), hexadecimal)
+ yield ElfLongWord(self, "phoff", "Program header file offset")
+ yield ElfLongWord(self, "shoff", "Section header file offset")
+ yield UInt32(self, "flags", "Architecture-specific flags")
+ yield UInt16(self, "ehsize", "Elf header size (this header)")
+ yield UInt16(self, "phentsize", "Program header entry size")
+ yield UInt16(self, "phnum", "Program header entry count")
+ yield UInt16(self, "shentsize", "Section header entry size")
+ yield UInt16(self, "shnum", "Section header entry count")
+ yield UInt16(self, "shstrndx", "Section header string table index")
+
+ def isValid(self):
+ if self["signature"].value != "\x7FELF":
+ return "Wrong ELF signature"
+ if self["class"].value not in self.CLASS_NAME:
+ return "Unknown class"
+ if self["endian"].value not in self.ENDIAN_NAME:
+ return "Unknown endian (%s)" % self["endian"].value
+ return ""
+
+class SectionFlags(FieldSet):
+ def createFields(self):
+ if self.root.endian == BIG_ENDIAN:
+ if self.root.is64bit:
+ yield RawBits(self, "reserved[]", 32)
+ yield RawBits(self, "processor_specific", 4, "Processor specific flags")
+ yield NullBits(self, "reserved[]", 17)
+ yield Bit(self, "is_tls", "Section contains TLS data?")
+ yield NullBits(self, "reserved[]", 7)
+ yield Bit(self, "is_exec", "Section contains executable instructions?")
+ yield Bit(self, "is_alloc", "Section occupies memory?")
+ yield Bit(self, "is_writable", "Section contains writable data?")
+ else:
+ yield Bit(self, "is_writable", "Section contains writable data?")
+ yield Bit(self, "is_alloc", "Section occupies memory?")
+ yield Bit(self, "is_exec", "Section contains executable instructions?")
+ yield NullBits(self, "reserved[]", 7)
+ yield Bit(self, "is_tls", "Section contains TLS data?")
+ yield RawBits(self, "processor_specific", 4, "Processor specific flags")
+ yield NullBits(self, "reserved[]", 17)
+ if self.root.is64bit:
+ yield RawBits(self, "reserved[]", 32)
+
+class SymbolStringTableOffset(UInt32):
+ def createDisplay(self):
+ section_index = self['/header/shstrndx'].value
+ section = self['/section['+str(section_index)+']']
+ text = section.value[self.value:]
+ return text.split('\0',1)[0]
+
+class SectionHeader32(FieldSet):
+ static_size = 40*8
+ TYPE_NAME = {
+ # sh_type, SHT_ defines
+ 0: "Inactive",
+ 1: "Program defined information",
+ 2: "Symbol table section",
+ 3: "String table section",
+ 4: "Relocation section with addends",
+ 5: "Symbol hash table section",
+ 6: "Dynamic section",
+ 7: "Note section",
+ 8: "Block started by symbol (BSS) or No space section",
+ 9: "Relocation section without addends",
+ 10:"Reserved - purpose unknown",
+ 11:"Dynamic symbol table section",
+ }
+
+ def createFields(self):
+ yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)")
+ yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME)
+ yield SectionFlags(self, "flags", "Section flags")
+ yield textHandler(UInt32(self, "VMA", "Virtual memory address"), hexadecimal)
+ yield textHandler(UInt32(self, "LMA", "Logical memory address (offset in file)"), hexadecimal)
+ yield textHandler(UInt32(self, "size", "Section size (bytes)"), hexadecimal)
+ yield UInt32(self, "link", "Index of a related section")
+ yield UInt32(self, "info", "Type-dependent information")
+ yield UInt32(self, "addr_align", "Address alignment (bytes)")
+ yield UInt32(self, "entry_size", "Size of each entry in section")
+
+ def createDescription(self):
+ return "Section header (name: %s, type: %s)" % \
+ (self["name"].display, self["type"].display)
+
+class SectionHeader64(SectionHeader32):
+ static_size = 64*8
+
+ def createFields(self):
+ yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)")
+ yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME)
+ yield SectionFlags(self, "flags", "Section flags")
+ yield textHandler(UInt64(self, "VMA", "Virtual memory address"), hexadecimal)
+ yield textHandler(UInt64(self, "LMA", "Logical memory address (offset in file)"), hexadecimal)
+ yield textHandler(UInt64(self, "size", "Section size (bytes)"), hexadecimal)
+ yield UInt32(self, "link", "Index of a related section")
+ yield UInt32(self, "info", "Type-dependent information")
+ yield UInt64(self, "addr_align", "Address alignment (bytes)")
+ yield UInt64(self, "entry_size", "Size of each entry in section")
+
+class ProgramFlags(FieldSet):
+ static_size = 32
+ FLAGS = (('pf_r','readable'),('pf_w','writable'),('pf_x','executable'))
+
+ def createFields(self):
+ if self.root.endian == BIG_ENDIAN:
+ yield NullBits(self, "padding[]", 29)
+ for fld, desc in self.FLAGS:
+ yield Bit(self, fld, "Segment is " + desc)
+ else:
+ for fld, desc in reversed(self.FLAGS):
+ yield Bit(self, fld, "Segment is " + desc)
+ yield NullBits(self, "padding[]", 29)
+
+ def createDescription(self):
+ attribs=[]
+ for fld, desc in self.FLAGS:
+ if self[fld].value:
+ attribs.append(desc)
+ return 'Segment is '+', '.join(attribs)
+
+class ProgramHeader32(FieldSet):
+ TYPE_NAME = {
+ # p_type, PT_ defines
+ 0: u"Unused program header table entry",
+ 1: u"Loadable program segment",
+ 2: u"Dynamic linking information",
+ 3: u"Program interpreter",
+ 4: u"Auxiliary information",
+ 5: u"Reserved, unspecified semantics",
+ 6: u"Entry for header table itself",
+ 7: u"Thread Local Storage segment",
+ 0x70000000: u"MIPS_REGINFO",
+ }
+ static_size = 32*8
+
+ def createFields(self):
+ yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME)
+ yield UInt32(self, "offset", "Offset")
+ yield textHandler(UInt32(self, "vaddr", "V. address"), hexadecimal)
+ yield textHandler(UInt32(self, "paddr", "P. address"), hexadecimal)
+ yield UInt32(self, "file_size", "File size")
+ yield UInt32(self, "mem_size", "Memory size")
+ yield ProgramFlags(self, "flags")
+ yield UInt32(self, "align", "Alignment padding")
+
+ def createDescription(self):
+ return "Program Header (%s)" % self["type"].display
+
+class ProgramHeader64(ProgramHeader32):
+ static_size = 56*8
+
+ def createFields(self):
+ yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME)
+ yield ProgramFlags(self, "flags")
+ yield UInt64(self, "offset", "Offset")
+ yield textHandler(UInt64(self, "vaddr", "V. address"), hexadecimal)
+ yield textHandler(UInt64(self, "paddr", "P. address"), hexadecimal)
+ yield UInt64(self, "file_size", "File size")
+ yield UInt64(self, "mem_size", "Memory size")
+ yield UInt64(self, "align", "Alignment padding")
+
+
+CPU_ARCH_ABI64 = 0x01000000
+CPU_TYPE = {
+ -1: 'Any',
+ 1: 'VAX',
+ 6: 'MC680x0',
+ 7: 'i386',
+ 7|CPU_ARCH_ABI64: 'x86_64',
+ 8: 'MIPS',
+ 10: 'MC98000',
+ 11: 'HPPA',
+ 12: 'ARM',
+ 12|CPU_ARCH_ABI64: 'ARM64',
+ 13: 'MC88000',
+ 14: 'SPARC',
+ 15: 'I860',
+ 16: 'Alpha',
+ 18: 'PowerPC',
+ 18|CPU_ARCH_ABI64: 'PowerPC64',
+}
+
+FILE_TYPE = {
+ 1: 'Relocatable object',
+ 2: 'Demand-paged executable',
+ 3: 'Fixed VM shared library',
+ 4: 'Core file',
+ 5: 'Preloaded executable',
+ 6: 'Dynamically bound shared library',
+ 7: 'Dynamic link editor',
+ 8: 'Dynamically bound bundle',
+ 9: 'Shared library stub for static linking only',
+ 10: 'Companion file with only debug sections',
+ 11: 'x86_64 kext',
+}
+
+MACHO_MAGICS = {
+ "\xfe\xed\xfa\xce": (0, BIG_ENDIAN), # 32-bit big endian
+ "\xce\xfa\xed\xfe": (0, LITTLE_ENDIAN), # 32-bit little endian
+ "\xfe\xed\xfa\xcf": (1, BIG_ENDIAN), # 64-bit big endian
+ "\xcf\xfa\xed\xfe": (1, LITTLE_ENDIAN), # 64-bit little endian
+}
+
+class MachoHeader(FieldSet):
+ def createFields(self):
+ yield Bytes(self, "magic", 4, "Mach-O signature")
+ yield Enum(Int32(self, "cputype"), CPU_TYPE)
+ yield Int32(self, "cpusubtype")
+ yield Enum(UInt32(self, "filetype"), FILE_TYPE)
+ yield UInt32(self, "ncmds")
+ yield UInt32(self, "sizeofcmds")
+ yield UInt32(self, "flags")
+ if self.parent.is64bit:
+ yield UInt32(self, "reserved")
+
+class MachoLoadCommand(FieldSet):
+ LOAD_COMMANDS = {
+ }
+
+ def createFields(self):
+ yield Enum(UInt32(self, "cmd"), self.LOAD_COMMANDS)
+ yield UInt32(self, "cmdsize")
+ self._size = self['cmdsize'].value * 8
+
+class MachoFileBase(RootSeekableFieldSet):
+ MAGICS = {"\xfe\xed\xfa\xce": (0, BIG_ENDIAN), # 32-bit big endian
+ "\xce\xfa\xed\xfe": (0, LITTLE_ENDIAN), # 32-bit little endian
+ "\xfe\xed\xfa\xcf": (1, BIG_ENDIAN), # 64-bit big endian
+ "\xcf\xfa\xed\xfe": (1, LITTLE_ENDIAN), # 64-bit little endian
+ }
+
+ def createFields(self):
+ baseaddr = self.absolute_address
+ # Choose size and endianness based on magic
+ magic = self.stream.readBytes(baseaddr, 4)
+ self.is64bit, self.endian = self.MAGICS[magic]
+
+ yield MachoHeader(self, "header", "Header")
+ for i in xrange(self['header/ncmds'].value):
+ yield MachoLoadCommand(self, "load_command[]")
+
+ return
+
+ # Parse header and program headers
+ yield ElfHeader(self, "header", "Header")
+ self.is64bit = (self["header/class"].value == 2)
+
+ for index in xrange(self["header/phnum"].value):
+ if self.is64bit:
+ yield ProgramHeader64(self, "prg_header[]")
+ else:
+ yield ProgramHeader32(self, "prg_header[]")
+
+ self.seekByte(self["header/shoff"].value, relative=False)
+
+ for index in xrange(self["header/shnum"].value):
+ if self.is64bit:
+ yield SectionHeader64(self, "section_header[]")
+ else:
+ yield SectionHeader32(self, "section_header[]")
+
+ for index in xrange(self["header/shnum"].value):
+ field = self["section_header["+str(index)+"]"]
+ if field['size'].value != 0:
+ self.seekByte(field['LMA'].value, relative=False)
+ yield RawBytes(self, "section["+str(index)+"]", field['size'].value)
+
+ def createDescription(self):
+ return "Mach-O program/library: %s" % (self["header/cputype"].display)
+
+class MachoFile(HachoirParser, MachoFileBase):
+ PARSER_TAGS = {
+ "id": "macho",
+ "category": "program",
+ "file_ext": ("dylib", "bundle", "o", ""),
+ "min_size": (28+56)*8, # Header + one segment load command
+ "mime": (
+ u"application/x-executable",
+ u"application/x-object",
+ u"application/x-sharedlib",
+ u"application/x-executable-file",
+ u"application/x-coredump"),
+ "magic": tuple((m,0) for m in MachoFileBase.MAGICS),
+ "description": "Mach-O program/library"
+ }
+ endian = BIG_ENDIAN
+
+ def __init__(self, stream, **args):
+ MachoFileBase.__init__(self, None, "root", stream, None, stream.askSize(self))
+ HachoirParser.__init__(self, stream, **args)
+
+ def validate(self):
+ if self.stream.readBytes(0, 4) not in self.MAGICS:
+ return "Invalid magic"
+ return True
+
+class MachoFatArch(FieldSet):
+ def createFields(self):
+ yield Enum(Int32(self, "cputype"), CPU_TYPE)
+ yield Int32(self, "cpusubtype")
+ yield textHandler(UInt32(self, "offset"), hexadecimal)
+ yield UInt32(self, "size")
+ yield UInt32(self, "align")
+ self['align'].createDescription = lambda: str(1 << self['align'].value)
+
+class MachoFatHeader(FieldSet):
+ def createFields(self):
+ yield Bytes(self, "magic", 4, "Mach-O signature")
+ yield UInt32(self, "nfat_arch", "Number of architectures in this fat file")
+ for i in xrange(self['nfat_arch'].value):
+ yield MachoFatArch(self, 'arch[]')
+
+class MachoFatFile(HachoirParser, RootSeekableFieldSet):
+ MAGIC_BE = "\xca\xfe\xba\xbe"
+ MAGIC_LE = "\xbe\xba\xfe\xca"
+
+ PARSER_TAGS = {
+ "id": "macho_fat",
+ "category": "program",
+ "file_ext": ("dylib", "bundle", ""),
+ "min_size": 4096*8 + MachoFile.PARSER_TAGS['min_size'], # One page + size for one arch
+ "mime": (
+ u"application/x-executable",
+ u"application/x-object",
+ u"application/x-sharedlib",
+ u"application/x-executable-file",
+ u"application/x-coredump"),
+ "magic": ((MAGIC_LE, 0), (MAGIC_BE, 0)),
+ "description": "Mach-O fat program/library"
+ }
+ endian = BIG_ENDIAN
+
+ def __init__(self, stream, **args):
+ RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
+ HachoirParser.__init__(self, stream, **args)
+
+ def validate(self):
+ if self.stream.readBytes(0, 4) not in (self.MAGIC_LE, self.MAGIC_BE):
+ return "Invalid magic"
+ if self['header/nfat_arch'].value >= 16:
+ # This helps to distinguish mach-o from java.
+ return "Too many architectures"
+ return True
+
+ def createFields(self):
+ # Choose the right endian based on file magic
+ if self.stream.readBytes(0, 4) == self.MAGIC_LE:
+ self.endian = LITTLE_ENDIAN
+ else:
+ self.endian = BIG_ENDIAN
+
+ # Parse header and program headers
+ yield MachoFatHeader(self, "header", "Header")
+ for arch in self['header'].array('arch'):
+ self.seekByte(arch['offset'].value)
+ yield MachoFileBase(self, 'file[]', self.stream, None, arch['size'].value * 8)
diff --git a/lib/hachoir_parser/video/__init__.py b/lib/hachoir_parser/video/__init__.py
index 26f787e9..0989f6e7 100644
--- a/lib/hachoir_parser/video/__init__.py
+++ b/lib/hachoir_parser/video/__init__.py
@@ -3,4 +3,4 @@ from hachoir_parser.video.flv import FlvFile
from hachoir_parser.video.mov import MovFile
from hachoir_parser.video.mpeg_video import MPEGVideoFile
from hachoir_parser.video.mpeg_ts import MPEG_TS
-
+from hachoir_parser.video.avchd import AVCHDINDX, AVCHDMOBJ, AVCHDMPLS, AVCHDCLPI
diff --git a/lib/hachoir_parser/video/avchd.py b/lib/hachoir_parser/video/avchd.py
new file mode 100644
index 00000000..9f8c855c
--- /dev/null
+++ b/lib/hachoir_parser/video/avchd.py
@@ -0,0 +1,433 @@
+"""
+Parser for AVCHD/Blu-ray formats
+
+Notice: This parser is based off reverse-engineering efforts.
+It is NOT based on official specifications, and is subject to change as
+more information becomes available. There's a lot of guesswork here, so if you find
+that something disagrees with an official specification, please change it.
+
+Notice: This parser has NOT been tested on Blu-ray disc data, only on files
+taken from AVCHD camcorders.
+
+Author: Robert Xiao
+Creation: December 30, 2010
+
+References:
+- Wikipedia: http://en.wikipedia.org/wiki/AVCHD
+- European patent EP1821310: http://www.freepatentsonline.com/EP1821310.html
+"""
+
+"""
+File structure:
+Root (/PRIVATE/AVCHD, /AVCHD, /, etc.)
+ AVCHDTN/: (AVCHD only)
+ THUMB.TDT: Thumbnail Data: stored as a series of 16KiB pages, where each thumbnail starts on a page boundary
+ THUMB.TID: Thumbnail Index (TIDX), unknown format
+ BDMV/:
+ INDEX.BDM|index.bdmv: Bluray Disc Metadata (INDX): Clip index file
+ MOVIEOBJ.BDM|MovieObject.bdmv: Bluray Disc Metadata (MOBJ): Clip description file
+ AUXDATA/: (Optional, Blu-ray only)
+ sound.bdmv: Sound(s) associated with HDMV Interactive Graphic streams applications
+ ?????.otf: Font(s) associated with Text subtitle applications
+ BACKUP/: (Optional)
+ [Copies of *.bdmv, CLIPINF/* and PLAYLIST/*]
+ CLIPINF/:
+ ?????.CPI/?????.clpi: Clip information (HDMV)
+ PLAYLIST/:
+ ?????.MPL/?????.mpls: Movie Playlist information (MPLS)
+ STREAM/:
+ ?????.MTS|?????.m2ts: BDAV MPEG-2 Transport Stream (video file)
+ SSIF/: (Blu-ray 3D only)
+ ?????.ssif: Stereoscopic Interleaved file
+ IISVPL/: (Optional?, AVCHD only?)
+ ?????.VPL: Virtual Playlist? (MPLS)
+"""
+
+from hachoir_parser import HachoirParser
+from hachoir_core.field import (RootSeekableFieldSet, FieldSet,
+ RawBytes, Bytes, String, Bits, UInt8, UInt16, UInt32, PascalString8, Enum)
+from hachoir_core.endian import BIG_ENDIAN
+from hachoir_core.iso639 import ISO639_2
+from hachoir_core.text_handler import textHandler, hexadecimal
+from datetime import datetime
+
+def fromhex(field):
+ return int('%x'%field.value)
+
+class AVCHDTimestamp(FieldSet):
+ static_size = 8*8
+ def createFields(self):
+ yield textHandler(UInt8(self, "unknown", description="0x1E"), hexadecimal)
+ yield textHandler(UInt8(self, "century"), hexadecimal)
+ yield textHandler(UInt8(self, "year"), hexadecimal)
+ yield textHandler(UInt8(self, "month"), hexadecimal)
+ yield textHandler(UInt8(self, "day"), hexadecimal)
+ yield textHandler(UInt8(self, "hour"), hexadecimal)
+ yield textHandler(UInt8(self, "minute"), hexadecimal)
+ yield textHandler(UInt8(self, "second"), hexadecimal)
+
+ def createValue(self):
+ return datetime(fromhex(self['century'])*100 + fromhex(self['year']),
+ fromhex(self['month']), fromhex(self['day']),
+ fromhex(self['hour']), fromhex(self['minute']), fromhex(self['second']))
+
+class AVCHDGenericChunk(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield RawBytes(self, "raw[]", self['size'].value)
+
+class AVCHDINDX_0(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield RawBytes(self, "unknown[]", 22)
+ yield UInt32(self, "count")
+ for i in xrange(self['count'].value):
+ yield RawBytes(self, "data[]", 12)
+
+class AVCHDIDEX_0(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield RawBytes(self, "unknown[]", 40)
+ yield AVCHDTimestamp(self, "last_modified")
+ yield RawBytes(self, "unknown[]", self._size//8-52)
+
+class AVCHDMOBJ_Chunk(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "unknown[]")
+ yield UInt32(self, "index")
+ yield UInt32(self, "unknown[]")
+ yield textHandler(UInt32(self, "unknown_id"), hexadecimal)
+ yield UInt32(self, "unknown[]")
+ yield textHandler(UInt32(self, "playlist_id"), lambda field: '%05d'%field.value)
+ yield UInt32(self, "unknown[]")
+
+class AVCHDMPLS_StreamEntry(FieldSet):
+ ENTRYTYPE = {1:'PlayItem on disc',
+ 2:'SubPath on disc',
+ 3:'PlayItem in local storage',
+ 4:'SubPath in local storage'}
+ def createFields(self):
+ yield UInt8(self, "size")
+ self._size = (self['size'].value+1)*8
+ yield Enum(UInt8(self, "type"), self.ENTRYTYPE)
+ if self['type'].value in (1,3):
+ yield textHandler(UInt16(self, "pid", "PID of item in clip stream m2ts file"), hexadecimal)
+ else: # 2,4
+ '''
+ The patent says:
+ ref_to_SubPath_id
+ ref_to_SubClip_entry_id
+ ref_to_Stream_PID_of_subClip
+ Sizes aren't given, though, so I cannot determine the format without a sample.
+ '''
+ pass
+
+class AVCHDMPLS_StreamAttribs(FieldSet):
+ STREAMTYPE = {
+ 0x01: "V_MPEG1",
+ 0x02: "V_MPEG2",
+ 0x1B: "V_AVC",
+ 0xEA: "V_VC1",
+ 0x03: "A_MPEG1",
+ 0x04: "A_MPEG2",
+ 0x80: "A_LPCM",
+ 0x81: "A_AC3",
+ 0x84: "A_AC3_PLUS",
+ 0xA1: "A_AC3_PLUS_SEC",
+ 0x83: "A_TRUEHD",
+ 0x82: "A_DTS",
+ 0x85: "A_DTS-HD",
+ 0xA2: "A_DTS-HD_SEC",
+ 0x86: "A_DTS-MA",
+ 0x90: "S_PGS",
+ 0x91: "S_IGS",
+ 0x92: "T_SUBTITLE",
+ }
+ # Enumerations taken from "ClownBD's CLIPINF Editor". Values may not be accurate.
+ def createFields(self):
+ yield UInt8(self, "size")
+ self._size = (self['size'].value+1)*8
+ yield Enum(UInt8(self, "type"), self.STREAMTYPE)
+ if self['type'].display.startswith('V'): # Video
+ yield Enum(Bits(self, "resolution", 4), {1:'480i', 2:'576i', 3:'480p', 4:'1080i', 5:'720p', 6:'1080p', 7:'576p'})
+ yield Enum(Bits(self, "fps", 4), {1:'24/1.001', 2:'24', 3:'25', 4:'30/1.001', 6:'50', 7:'60/1.001'})
+ yield Enum(UInt8(self, "aspect_ratio"), {0x20:'4:3', 0x30:'16:9'})
+ elif self['type'].display.startswith('A'): # Audio
+ yield Enum(Bits(self, "channel_layout", 4), {1:'Mono', 3:'Stereo', 6:'Multi', 12:'Combi'})
+ yield Enum(Bits(self, "sample_rate", 4), {1:'48KHz', 4:'96KHz', 5:'192KHz', 12:'48-192KHz', 14:'48-96KHz'})
+ yield Enum(String(self, "language", 3), ISO639_2)
+ elif self['type'].display.startswith('T'): # Text subtitle
+ yield UInt8(self, "unknown[]")
+ yield Enum(String(self, "language", 3), ISO639_2)
+ elif self['type'].display.startswith('S'): # Graphics
+ yield Enum(String(self, "language", 3), ISO639_2)
+ else:
+ pass
+
+class AVCHDMPLS_Stream(FieldSet):
+ def createFields(self):
+ yield AVCHDMPLS_StreamEntry(self, "entry")
+ yield AVCHDMPLS_StreamAttribs(self, "attribs")
+
+class AVCHDMPLS_PlayItem(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield UInt16(self, "unknown[]")
+ yield UInt8(self, "video_count", "Number of video stream entries")
+ yield UInt8(self, "audio_count", "Number of video stream entries")
+ yield UInt8(self, "subtitle_count", "Number of presentation graphics/text subtitle entries")
+ yield UInt8(self, "ig_count", "Number of interactive graphics entries")
+ yield RawBytes(self, "unknown[]", 8)
+ for i in xrange(self['video_count'].value):
+ yield AVCHDMPLS_Stream(self, "video[]")
+ for i in xrange(self['audio_count'].value):
+ yield AVCHDMPLS_Stream(self, "audio[]")
+ for i in xrange(self['subtitle_count'].value):
+ yield AVCHDMPLS_Stream(self, "subtitle[]")
+ for i in xrange(self['ig_count'].value):
+ yield AVCHDMPLS_Stream(self, "ig[]")
+
+class AVCHDMPLS_0_Chunk(FieldSet):
+ def createFields(self):
+ yield UInt16(self, "size")
+ self._size = (self['size'].value+2)*8
+ yield Bytes(self, "clip_id", 5)
+ yield Bytes(self, "clip_type", 4)
+ yield RawBytes(self, "unknown[]", 3)
+ yield UInt32(self, "clip_start_time[]", "clip start time (units unknown)")
+ yield UInt32(self, "clip_end_time[]", "clip end time (units unknown)")
+ yield RawBytes(self, "unknown[]", 10)
+ yield AVCHDMPLS_PlayItem(self, "playitem")
+
+class AVCHDMPLS_0(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield UInt32(self, "count")
+ yield UInt16(self, "unknown[]")
+ for i in xrange(self['count'].value):
+ yield AVCHDMPLS_0_Chunk(self, "chunk[]")
+
+class AVCHDMPLS_PlayItemMark(FieldSet):
+ def createFields(self):
+ yield UInt16(self, "unknown[]")
+ yield UInt16(self, "playitem_idx", "Index of the associated PlayItem")
+ yield UInt32(self, "mark_time", "Marker time in clip (units unknown)")
+ yield RawBytes(self, "unknown", 6)
+
+class AVCHDMPLS_1(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield UInt16(self, "count")
+ for i in xrange(self['count'].value):
+ yield AVCHDMPLS_PlayItemMark(self, "chunk[]")
+
+class AVCHDPLEX_1_Chunk(FieldSet):
+ static_size = 66*8
+ def createFields(self):
+ yield RawBytes(self, "unknown[]", 10)
+ yield AVCHDTimestamp(self, "date")
+ yield RawBytes(self, "unknown[]", 1)
+ yield PascalString8(self, "date")
+ def createValue(self):
+ return self['date'].value
+
+class AVCHDPLEX_0(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield RawBytes(self, "unknown[]", 10)
+ yield AVCHDTimestamp(self, "last_modified")
+ yield RawBytes(self, "unknown[]", 2)
+ yield PascalString8(self, "date")
+
+class AVCHDPLEX_1(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield UInt16(self, "count")
+ for i in xrange(self['count'].value):
+ yield AVCHDPLEX_1_Chunk(self, "chunk[]")
+
+class AVCHDCLPI_1(FieldSet):
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield RawBytes(self, "unknown[]", 10)
+ yield textHandler(UInt16(self, "video_pid", "PID of video data in stream file"), hexadecimal)
+ yield AVCHDMPLS_StreamAttribs(self, "video_attribs")
+ yield textHandler(UInt16(self, "audio_pid", "PID of audio data in stream file"), hexadecimal)
+ yield AVCHDMPLS_StreamAttribs(self, "audio_attribs")
+
+def AVCHDIDEX(self):
+ yield AVCHDIDEX_0(self, "chunk[]")
+ yield AVCHDGenericChunk(self, "chunk[]")
+
+def AVCHDPLEX(self):
+ yield AVCHDPLEX_0(self, "chunk[]")
+ yield AVCHDPLEX_1(self, "chunk[]")
+ yield AVCHDGenericChunk(self, "chunk[]")
+
+def AVCHDCLEX(self):
+ yield AVCHDGenericChunk(self, "chunk[]")
+ yield AVCHDGenericChunk(self, "chunk[]")
+
+class AVCHDChunkWithHeader(FieldSet):
+ TYPES = {'IDEX': AVCHDIDEX,
+ 'PLEX': AVCHDPLEX,
+ 'CLEX': AVCHDCLEX,}
+ def createFields(self):
+ yield UInt32(self, "size")
+ self._size = (self['size'].value+4)*8
+ yield UInt32(self, "unknown[]", "24")
+ yield UInt32(self, "unknown[]", "1")
+ yield UInt32(self, "unknown[]", "0x10000100")
+ yield UInt32(self, "unknown[]", "24")
+ yield UInt32(self, "size2")
+ assert self['size'].value == self['size2'].value+20
+ yield Bytes(self, "magic", 4)
+ yield RawBytes(self, "unknown[]", 36)
+ for field in self.TYPES[self['magic'].value](self):
+ yield field
+
+class AVCHDINDX(HachoirParser, RootSeekableFieldSet):
+ endian = BIG_ENDIAN
+ MAGIC = "INDX0"
+ PARSER_TAGS = {
+ "id": "bdmv_index",
+ "category": "video",
+ "file_ext": ("bdm","bdmv"),
+ "magic": ((MAGIC, 0),),
+ "min_size": 8, # INDX0?00
+ "description": "INDEX.BDM",
+ }
+
+ def __init__(self, stream, **args):
+ RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
+ HachoirParser.__init__(self, stream, **args)
+
+ def validate(self):
+ if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
+ return "Invalid magic"
+ return True
+
+ def createFields(self):
+ yield Bytes(self, "filetype", 4, "File type (INDX)")
+ yield Bytes(self, "fileversion", 4, "File version (0?00)")
+ yield UInt32(self, "offset[0]")
+ yield UInt32(self, "offset[1]")
+ self.seekByte(self['offset[0]'].value)
+ yield AVCHDINDX_0(self, "chunk[]")
+ self.seekByte(self['offset[1]'].value)
+ yield AVCHDChunkWithHeader(self, "chunk[]")
+
+class AVCHDMOBJ(HachoirParser, RootSeekableFieldSet):
+ endian = BIG_ENDIAN
+ MAGIC = "MOBJ0"
+ PARSER_TAGS = {
+ "id": "bdmv_mobj",
+ "category": "video",
+ "file_ext": ("bdm","bdmv"),
+ "magic": ((MAGIC, 0),),
+ "min_size": 8, # MOBJ0?00
+ "description": "MOVIEOBJ.BDM",
+ }
+
+ def __init__(self, stream, **args):
+ RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
+ HachoirParser.__init__(self, stream, **args)
+
+ def validate(self):
+ if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
+ return "Invalid magic"
+ return True
+
+ def createFields(self):
+ yield Bytes(self, "filetype", 4, "File type (MOBJ)")
+ yield Bytes(self, "fileversion", 4, "File version (0?00)")
+ yield RawBytes(self, "unknown[]", 32)
+ yield UInt32(self, "size")
+ yield UInt32(self, "unknown[]")
+ yield UInt16(self, "count")
+ yield textHandler(UInt32(self, "unknown_id"), hexadecimal)
+ for i in xrange(1, self['count'].value):
+ yield AVCHDMOBJ_Chunk(self, "movie_object[]")
+
+class AVCHDMPLS(HachoirParser, RootSeekableFieldSet):
+ endian = BIG_ENDIAN
+ MAGIC = "MPLS0"
+ PARSER_TAGS = {
+ "id": "bdmv_mpls",
+ "category": "video",
+ "file_ext": ("mpl","mpls","vpl"),
+ "magic": ((MAGIC, 0),),
+ "min_size": 8, # MPLS0?00
+ "description": "MPLS",
+ }
+
+ def __init__(self, stream, **args):
+ RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
+ HachoirParser.__init__(self, stream, **args)
+
+ def validate(self):
+ if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
+ return "Invalid magic"
+ return True
+
+ def createFields(self):
+ yield Bytes(self, "filetype", 4, "File type (MPLS)")
+ yield Bytes(self, "fileversion", 4, "File version (0?00)")
+ yield UInt32(self, "offset[0]")
+ yield UInt32(self, "offset[1]")
+ yield UInt32(self, "offset[2]")
+ self.seekByte(self['offset[0]'].value)
+ yield AVCHDMPLS_0(self, "chunk[]")
+ self.seekByte(self['offset[1]'].value)
+ yield AVCHDMPLS_1(self, "chunk[]")
+ self.seekByte(self['offset[2]'].value)
+ yield AVCHDChunkWithHeader(self, "chunk[]")
+
+class AVCHDCLPI(HachoirParser, RootSeekableFieldSet):
+ endian = BIG_ENDIAN
+ MAGIC = "HDMV0"
+ PARSER_TAGS = {
+ "id": "bdmv_clpi",
+ "category": "video",
+ "file_ext": ("cpi","clpi"),
+ "magic": ((MAGIC, 0),),
+ "min_size": 8, # HDMV0?00
+ "description": "HDMV",
+ }
+
+ def __init__(self, stream, **args):
+ RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
+ HachoirParser.__init__(self, stream, **args)
+
+ def validate(self):
+ if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
+ return "Invalid magic"
+ return True
+
+ def createFields(self):
+ yield Bytes(self, "filetype", 4, "File type (HDMV)")
+ yield Bytes(self, "fileversion", 4, "File version (0?00)")
+ yield UInt32(self, "offset[]")
+ yield UInt32(self, "offset[]")
+ yield UInt32(self, "offset[]")
+ yield UInt32(self, "offset[]")
+ yield UInt32(self, "offset[]")
+ self.seekByte(self['offset[0]'].value)
+ yield AVCHDGenericChunk(self, "chunk[]")
+ self.seekByte(self['offset[1]'].value)
+ yield AVCHDCLPI_1(self, "chunk[]")
+ self.seekByte(self['offset[2]'].value)
+ yield AVCHDGenericChunk(self, "chunk[]")
+ self.seekByte(self['offset[3]'].value)
+ yield AVCHDGenericChunk(self, "chunk[]")
+ self.seekByte(self['offset[4]'].value)
+ yield AVCHDChunkWithHeader(self, "chunk[]")
diff --git a/lib/hachoir_parser/video/mov.py b/lib/hachoir_parser/video/mov.py
index 1ab6ac51..84b8d862 100644
--- a/lib/hachoir_parser/video/mov.py
+++ b/lib/hachoir_parser/video/mov.py
@@ -718,7 +718,7 @@ class Atom(FieldSet):
# ipmc: IPMP control
"moof": (AtomList, "moof", "movie fragment"),
"mfhd": (MovieFragmentHeader, "mfhd", "movie fragment header"),
- # traf: track fragment
+ "traf": (AtomList, "traf", "track fragment"),
# tfhd: track fragment header
# trun: track fragment run
# sdtp: independent and disposable samples
diff --git a/lib/hachoir_parser/video/mpeg_ts.py b/lib/hachoir_parser/video/mpeg_ts.py
index ed8724a3..bf2066af 100644
--- a/lib/hachoir_parser/video/mpeg_ts.py
+++ b/lib/hachoir_parser/video/mpeg_ts.py
@@ -11,10 +11,38 @@ Creation date: 13 january 2007
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet, ParserError, MissingField,
- UInt8, Enum, Bit, Bits, RawBytes)
+ UInt8, Enum, Bit, Bits, RawBytes, RawBits)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal
+class AdaptationField(FieldSet):
+ def createFields(self):
+ yield UInt8(self, "length")
+
+ yield Bit(self, "discontinuity_indicator")
+ yield Bit(self, "random_access_indicator")
+ yield Bit(self, "es_prio_indicator")
+ yield Bit(self, "has_pcr")
+ yield Bit(self, "has_opcr")
+ yield Bit(self, "has_splice_point")
+ yield Bit(self, "private_data")
+ yield Bit(self, "has_extension")
+
+ if self['has_pcr'].value:
+ yield Bits(self, "pcr_base", 33)
+ yield Bits(self, "pcr_ext", 9)
+
+ if self['has_opcr'].value:
+ yield Bits(self, "opcr_base", 33)
+ yield Bits(self, "opcr_ext", 9)
+
+ if self['has_splice_point'].value:
+ yield Bits(self, "splice_countdown", 8)
+
+ stuff_len = ((self['length'].value+1)*8) - self.current_size
+ if self['length'].value and stuff_len:
+ yield RawBits(self, 'stuffing', stuff_len)
+
class Packet(FieldSet):
def __init__(self, *args):
FieldSet.__init__(self, *args)
@@ -46,7 +74,11 @@ class Packet(FieldSet):
yield Bit(self, "has_adaptation")
yield Bit(self, "has_payload")
yield Bits(self, "counter", 4)
- yield RawBytes(self, "payload", 184)
+
+ if self["has_adaptation"].value:
+ yield AdaptationField(self, "adaptation_field")
+ if self["has_payload"].value:
+ yield RawBytes(self, "payload", 188-(self.current_size/8))
if self["has_error"].value:
yield RawBytes(self, "error_correction", 16)
@@ -54,6 +86,8 @@ class Packet(FieldSet):
text = "Packet: PID %s" % self["pid"].display
if self["payload_unit_start"].value:
text += ", start of payload"
+ if self["has_adaptation"].value:
+ text += ", with adaptation field"
return text
def isValid(self):
@@ -96,7 +130,7 @@ class MPEG_TS(Parser):
sync = self.stream.searchBytes("\x47", self.current_size, self.current_size+204*8)
if sync is None:
raise ParserError("Unable to find synchronization byte")
- elif sync:
+ elif sync-self.current_size:
yield RawBytes(self, "incomplete_packet[]", (sync-self.current_size)//8)
yield Packet(self, "packet[]")