Merge pull request #618 from JackDandy/feature/UpdateHachoir

Update Hachoir library 1.3.4 (r1383) to 1.3.4 (r1435).
This commit is contained in:
JackDandy 2016-01-12 03:24:03 +00:00
commit 589d2544dd
30 changed files with 2955 additions and 292 deletions

View file

@ -12,6 +12,7 @@
* Update Certifi to 2015.11.20.1 (385476b)
* Update chardet packages 2.3.0 (26982c5) to 2.3.0 (d7fae98)
* Update dateutil library 2.4.2 (083f666) to 2.4.2 (d4baf97)
* Update Hachoir library 1.3.4 (r1383) to 1.3.4 (r1435)
### 0.11.0 (2016-01-10 22:30:00 UTC)

View file

@ -24,6 +24,19 @@ class JpegMetadata(RootMetadata):
"FNumber": "camera_focal",
"BrightnessValue": "camera_brightness",
"MaxApertureValue": "camera_aperture",
"ISOSpeedRatings": "iso_speed_ratings",
"ExifVersion": "exif_version",
"DateTimeOriginal": "date_time_original",
"DateTimeDigitized": "date_time_digitized",
"CompressedBitsPerPixel": "compressed_bits_per_pixel",
"ShutterSpeedValue": "shutter_speed_value",
"ApertureValue": "aperture_value",
"ExposureBiasValue": "exposure_bias_value",
"FocalLength": "focal_length",
"FlashpixVersion": "flashpix_version",
"FocalPlaneXResolution": "focal_plane_x_resolution",
"FocalPlaneYResolution": "focal_plane_y_resolution",
"FocalLengthIn35mmFilm": "focal_length_in_35mm_film",
# Generic metadatas
"ImageDescription": "title",
@ -32,6 +45,7 @@ class JpegMetadata(RootMetadata):
"PixelXDimension": "width",
"PixelYDimension": "height",
"UserComment": "comment",
"JPEGInterchangeFormatLength": "thumbnail_size",
}
IPTC_KEY = {

View file

@ -284,6 +284,10 @@ def extractMetadata(parser, quality=QUALITY_NORMAL):
metadata.extract(parser)
except HACHOIR_ERRORS, err:
error("Error during metadata extraction: %s" % unicode(err))
return None
except Exception, err:
error("Error during metadata extraction: %s" % unicode(err))
return None
if metadata:
metadata.mime_type = parser.mime_type
metadata.endian = endian_name[parser.endian]

View file

View file

@ -0,0 +1,64 @@
<ui version="4.0" >
<class>Form</class>
<widget class="QWidget" name="Form" >
<property name="geometry" >
<rect>
<x>0</x>
<y>0</y>
<width>441</width>
<height>412</height>
</rect>
</property>
<property name="windowTitle" >
<string>hachoir-metadata</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout" >
<item>
<layout class="QHBoxLayout" name="horizontalLayout_2" >
<item>
<widget class="QPushButton" name="open_button" >
<property name="text" >
<string>Open</string>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="files_combo" >
<property name="sizePolicy" >
<sizepolicy vsizetype="Fixed" hsizetype="Expanding" >
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
</widget>
</item>
</layout>
</item>
<item>
<widget class="QTableWidget" name="metadata_table" >
<property name="alternatingRowColors" >
<bool>true</bool>
</property>
<property name="showGrid" >
<bool>false</bool>
</property>
<property name="rowCount" >
<number>0</number>
</property>
<property name="columnCount" >
<number>0</number>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="quit_button" >
<property name="text" >
<string>Quit</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>
<connections/>
</ui>

View file

@ -102,6 +102,23 @@ def registerAllItems(meta):
meta.register(Data("bit_rate", 604, _("Bit rate"), text_handler=humanBitRate,
filter=NumberFilter(1, MAX_BIT_RATE), type=(int, long, float)))
meta.register(Data("aspect_ratio", 604, _("Aspect ratio"), type=(int, long, float)))
meta.register(Data("thumbnail_size", 604, _("Thumbnail size"), text_handler=humanFilesize, type=(int, long, float)))
meta.register(Data("iso_speed_ratings", 800, _("ISO speed rating")))
meta.register(Data("exif_version", 801, _("EXIF version")))
meta.register(Data("date_time_original", 802, _("Date-time original"), text_handler=humanDatetime,
filter=DATETIME_FILTER, type=(datetime, date), conversion=setDatetime))
meta.register(Data("date_time_digitized", 803, _("Date-time digitized"), text_handler=humanDatetime,
filter=DATETIME_FILTER, type=(datetime, date), conversion=setDatetime))
meta.register(Data("compressed_bits_per_pixel", 804, _("Compressed bits per pixel"), type=(int, long, float)))
meta.register(Data("shutter_speed_value", 805, _("Shutter speed"), type=(int, long, float)))
meta.register(Data("aperture_value", 806, _("Aperture")))
meta.register(Data("exposure_bias_value", 807, _("Exposure bias")))
meta.register(Data("focal_length", 808, _("Focal length")))
meta.register(Data("flashpix_version", 809, _("Flashpix version")))
meta.register(Data("focal_plane_x_resolution", 810, _("Focal plane width")))
meta.register(Data("focal_plane_y_resolution", 811, _("Focal plane height"), type=float))
meta.register(Data("focal_length_in_35mm_film", 812, _("Focal length in 35mm film")))
meta.register(Data("os", 900, _("OS"), type=unicode))
meta.register(Data("producer", 901, _("Producer"), type=unicode))

View file

@ -1,5 +1,6 @@
from hachoir_parser.archive.ace import AceFile
from hachoir_parser.archive.ar import ArchiveFile
from hachoir_parser.archive.bomstore import BomFile
from hachoir_parser.archive.bzip2_parser import Bzip2Parser
from hachoir_parser.archive.cab import CabFile
from hachoir_parser.archive.gzip_parser import GzipParser
@ -11,3 +12,4 @@ from hachoir_parser.archive.sevenzip import SevenZipParser
from hachoir_parser.archive.mar import MarFile
from hachoir_parser.archive.mozilla_ar import MozillaArchive
from hachoir_parser.archive.zlib import ZlibData
from hachoir_parser.archive.prs_pak import PRSPakFile

View file

@ -0,0 +1,90 @@
"""
Apple BOMStorage parser.
Used for Assets.Bom files by Interface Builder, and for .bom files by Installer.app.
Documents:
Author: Robert Xiao
Created: 2015-05-14
"""
from hachoir_parser import HachoirParser
from hachoir_core.field import (RootSeekableFieldSet, FieldSet, Enum,
Bits, GenericInteger, Float32, Float64, UInt8, UInt32, UInt64, Bytes, NullBytes, RawBytes, String)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import displayHandler
from hachoir_core.tools import humanDatetime
from datetime import datetime, timedelta
class BomTrailerEntry(FieldSet):
static_size = 64 # bits
def createFields(self):
yield UInt32(self, "offset")
yield UInt32(self, "size")
def createDescription(self):
return "Object at offset %d, size %d" % (self['offset'].value, self['size'].value)
class BomTrailer(FieldSet):
def createFields(self):
yield UInt32(self, "num_spaces", "Total number of entries, including blank entries")
nobj = self['/num_objects'].value
nspace = self['num_spaces'].value
for i in xrange(nobj+1):
yield BomTrailerEntry(self, "entry[]")
yield NullBytes(self, "blank_entries", (nspace - nobj - 1) * (BomTrailerEntry.static_size / 8))
yield UInt32(self, "num_trail")
ntrail = self['num_trail'].value
for i in xrange(ntrail):
yield BomTrailerEntry(self, "trail[]")
def createDescription(self):
return "Bom file trailer"
class BomFile(HachoirParser, RootSeekableFieldSet):
endian = BIG_ENDIAN
MAGIC = "BOMStore"
PARSER_TAGS = {
"id": "bom_store",
"category": "archive",
"file_ext": ("bom","car"),
"magic": ((MAGIC, 0),),
"min_size": 32, # 32-byte header
"description": "Apple bill-of-materials file",
}
def __init__(self, stream, **args):
RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
HachoirParser.__init__(self, stream, **args)
def validate(self):
if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
return "Invalid magic"
return True
def createFields(self):
yield Bytes(self, "magic", 8, "File magic (BOMStore)")
yield UInt32(self, "version") # ?
yield UInt32(self, "num_objects")
yield UInt32(self, "trailer_offset")
yield UInt32(self, "trailer_size")
yield UInt32(self, "header_offset")
yield UInt32(self, "header_size")
yield RawBytes(self, "object[]", 512-32, "Null object (size 0, offset 0)") # null object
self.seekByte(self['trailer_offset'].value)
yield BomTrailer(self, "trailer")
self.seekByte(self['header_offset'].value)
yield RawBytes(self, "header", self['header_size'].value)
for entry in self['trailer'].array('entry'):
if entry['size'].value == 0:
continue
self.seekByte(entry['offset'].value)
yield RawBytes(self, "object[]", entry['size'].value)
for entry in self['trailer'].array('trail'):
self.seekByte(entry['offset'].value)
yield RawBytes(self, "trail[]", entry['size'].value)

View file

@ -0,0 +1,48 @@
"""
Parallel Realities Starfighter .pak file parser
See http://www.parallelrealities.co.uk/projects/starfighter.php
or svn://svn.debian.org/svn/pkg-games/packages/trunk/starfighter/
Author: Oliver Gerlich
"""
from hachoir_parser import Parser
from hachoir_core.field import (ParserError,
UInt32, String, SubFile, FieldSet)
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.text_handler import filesizeHandler
class FileEntry(FieldSet):
def createFields(self):
yield String(self, "filename", 56, truncate="\0")
yield filesizeHandler(UInt32(self, "size"))
yield SubFile(self, "data", self["size"].value, filename=self["filename"].value)
def createDescription(self):
return self["filename"].value
class PRSPakFile(Parser):
PARSER_TAGS = {
"id": "prs_pak",
"category": "archive",
"file_ext": ("pak",),
"mime": (u"application/octet-stream",),
"min_size": 4*8, # just the identifier
"magic": (('PACK', 0),),
"description": "Parallel Realities Starfighter .pak archive",
}
endian = LITTLE_ENDIAN
def validate(self):
return (self.stream.readBytes(0, 4) == 'PACK'
and self["file[0]/size"].value >= 0
and len(self["file[0]/filename"].value) > 0)
def createFields(self):
yield String(self, "magic", 4)
# all remaining data must be file entries:
while self.current_size < self._size:
yield FileEntry(self, "file[]")

View file

@ -14,6 +14,7 @@ from hachoir_core.field import (StaticFieldSet, FieldSet,
from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_parser.common.msdos import MSDOSFileAttr32
from datetime import timedelta
MAX_FILESIZE = 1000 * 1024 * 1024
@ -63,9 +64,13 @@ def formatRARVersion(field):
"""
return "%u.%u" % divmod(field.value, 10)
def commonFlags(s):
yield Bit(s, "has_added_size", "Additional field indicating additional size")
yield Bit(s, "is_ignorable", "Old versions of RAR should ignore this block when copying data")
def markerFlags(s):
yield UInt16(s, "flags", "Marker flags, always 0x1a21")
commonFlags = (
(Bit, "is_ignorable", "Old versions of RAR should ignore this block when copying data"),
(Bit, "has_added_size", "Additional field indicating additional size"),
)
class ArchiveFlags(StaticFieldSet):
format = (
@ -79,8 +84,8 @@ class ArchiveFlags(StaticFieldSet):
(Bit, "is_passworded", "Needs a password to be decrypted"),
(Bit, "is_first_vol", "Whether it is the first volume"),
(Bit, "is_encrypted", "Whether the encryption version is present"),
(NullBits, "internal", 6, "Reserved for 'internal use'")
)
(NullBits, "internal", 4, "Reserved for 'internal use'"),
) + commonFlags
def archiveFlags(s):
yield ArchiveFlags(s, "flags", "Archiver block flags")
@ -135,29 +140,57 @@ class FileFlags(FieldSet):
yield Bit(self, "is_solid", "Information from previous files is used (solid flag)")
# The 3 following lines are what blocks more staticity
yield Enum(Bits(self, "dictionary_size", 3, "Dictionary size"), DICTIONARY_SIZE)
for bit in commonFlags(self):
yield bit
yield Bit(self, "is_large", "file64 operations needed")
yield Bit(self, "is_unicode", "Filename also encoded using Unicode")
yield Bit(self, "has_salt", "Has salt for encryption")
yield Bit(self, "uses_file_version", "File versioning is used")
yield Bit(self, "has_ext_time", "Extra time ??")
yield Bit(self, "has_ext_time", "Extra time info present")
yield Bit(self, "has_ext_flags", "Extra flag ??")
for field in commonFlags:
yield field[0](self, *field[1:])
def fileFlags(s):
yield FileFlags(s, "flags", "File block flags")
class ExtTimeFlags(FieldSet):
static_size = 16
def createFields(self):
for name in ['arctime', 'atime', 'ctime', 'mtime']:
yield Bits(self, "%s_count" % name, 2, "Number of %s bytes" % name)
yield Bit(self, "%s_onesec" % name, "Add one second to the timestamp?")
yield Bit(self, "%s_present" % name, "Is %s extra time present?" % name)
class ExtTime(FieldSet):
def createFields(self):
yield textHandler(UInt16(self, "time_flags", "Flags for extended time"), hexadecimal)
flags = self["time_flags"].value
for index in xrange(4):
rmode = flags >> ((3-index)*4)
if rmode & 8:
if index:
yield TimeDateMSDOS32(self, "dos_time[]", "DOS Time")
if rmode & 3:
yield RawBytes(self, "remainder[]", rmode & 3, "Time remainder")
yield ExtTimeFlags(self, "time_flags")
for name in ['mtime', 'ctime', 'atime', 'arctime']:
if self['time_flags/%s_present' % name].value:
if name != 'mtime':
yield TimeDateMSDOS32(self, "%s" % name, "%s DOS timestamp" % name)
count = self['time_flags/%s_count' % name].value
if count:
yield Bits(self, "%s_remainder" % name, 8 * count, "%s extra precision time (in 100ns increments)" % name)
def createDescription(self):
out = 'Time extension'
pieces = []
for name in ['mtime', 'ctime', 'atime', 'arctime']:
if not self['time_flags/%s_present' % name].value:
continue
if name == 'mtime':
basetime = self['../ftime'].value
else:
basetime = self['%s' % name].value
delta = timedelta()
if self['time_flags/%s_onesec' % name].value:
delta += timedelta(seconds=1)
if '%s_remainder'%name in self:
delta += timedelta(microseconds=self['%s_remainder' % name].value / 10.0)
pieces.append('%s=%s' % (name, basetime + delta))
if pieces:
out += ': ' + ', '.join(pieces)
return out
def specialHeader(s, is_file):
yield filesizeHandler(UInt32(s, "compressed_size", "Compressed size (bytes)"))
@ -188,9 +221,9 @@ def specialHeader(s, is_file):
# Start additional fields from unrar - file only
if is_file:
if s["flags/has_salt"].value:
yield textHandler(UInt8(s, "salt", "Salt"), hexadecimal)
yield RawBytes(s, "salt", 8, "Encryption salt to increase security")
if s["flags/has_ext_time"].value:
yield ExtTime(s, "extra_time", "Extra time info")
yield ExtTime(s, "extra_time")
def fileHeader(s):
return specialHeader(s, True)
@ -203,9 +236,11 @@ def fileBody(s):
if size > 0:
yield RawBytes(s, "compressed_data", size, "File compressed data")
def fileDescription(s):
return "File entry: %s (%s)" % \
(s["filename"].display, s["compressed_size"].display)
def fileDescription(tag):
def _fileDescription(s):
return "%s: %s (%s)" % \
(tag, s["filename"].display, s["compressed_size"].display)
return _fileDescription
def newSubHeader(s):
return specialHeader(s, False)
@ -216,36 +251,31 @@ class EndFlags(StaticFieldSet):
(Bit, "has_data_crc", "Whether a CRC value is present"),
(Bit, "rev_space"),
(Bit, "has_vol_number", "Whether the volume number is present"),
(Bits, "unused[]", 4),
(Bit, "has_added_size", "Additional field indicating additional size"),
(Bit, "is_ignorable", "Old versions of RAR should ignore this block when copying data"),
(Bits, "unused[]", 6),
)
(NullBits, "unused[]", 10),
) + commonFlags
def endFlags(s):
yield EndFlags(s, "flags", "End block flags")
class BlockFlags(FieldSet):
class BlockFlags(StaticFieldSet):
static_size = 16
def createFields(self):
yield textHandler(Bits(self, "unused[]", 8, "Unused flag bits"), hexadecimal)
yield Bit(self, "has_added_size", "Additional field indicating additional size")
yield Bit(self, "is_ignorable", "Old versions of RAR should ignore this block when copying data")
yield Bits(self, "unused[]", 6)
format = (
(NullBits, "unused[]", 14),
) + commonFlags
class Block(FieldSet):
BLOCK_INFO = {
# None means 'use default function'
0x72: ("marker", "Archive header", None, None, None),
0x72: ("marker", "File format marker", markerFlags, None, None),
0x73: ("archive_start", "Archive info", archiveFlags, archiveHeader, None),
0x74: ("file[]", fileDescription, fileFlags, fileHeader, fileBody),
0x75: ("comment[]", "Stray comment", None, commentHeader, commentBody),
0x74: ("file[]", fileDescription("File entry"), fileFlags, fileHeader, fileBody),
0x75: ("comment[]", "Comment", None, commentHeader, commentBody),
0x76: ("av_info[]", "Extra information", None, avInfoHeader, avInfoBody),
0x77: ("sub_block[]", "Stray subblock", None, newSubHeader, fileBody),
0x77: ("sub_block[]", fileDescription("Subblock"), None, newSubHeader, fileBody),
0x78: ("recovery[]", "Recovery block", None, recoveryHeader, None),
0x79: ("signature", "Signature block", None, signatureHeader, None),
0x7A: ("new_sub_block[]", "Stray new-format subblock", fileFlags,
0x7A: ("sub_block[]", fileDescription("New-format subblock"), fileFlags,
newSubHeader, fileBody),
0x7B: ("archive_end", "Archive end block", endFlags, None, None),
}

View file

@ -7,15 +7,27 @@ Informations:
Author: Olivier SCHWAB
Creation date: 6 december 2006
Updated by: Robert Xiao
Date: February 26 2011
"""
from hachoir_parser import Parser
from hachoir_core.field import (Field, FieldSet, ParserError,
GenericVector,
Enum, UInt8, UInt32, UInt64,
Bytes, RawBytes)
CompressedField, CString,
Enum, Bit, Bits, UInt8, UInt32, UInt64,
Bytes, RawBytes, TimestampWin64)
from hachoir_core.stream import StringInputStream
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler
from hachoir_core.tools import createDict, alignValue
from hachoir_parser.common.msdos import MSDOSFileAttr32
try:
from pylzma import decompress as lzmadecompress
has_lzma = True
except ImportError:
has_lzma = False
class SZUInt64(Field):
"""
@ -38,167 +50,258 @@ class SZUInt64(Field):
self._size += 8
self.createValue = lambda: value
ID_END, ID_HEADER, ID_ARCHIVE_PROPS, ID_ADD_STREAM_INFO, ID_MAIN_STREAM_INFO, \
ID_FILES_INFO, ID_PACK_INFO, ID_UNPACK_INFO, ID_SUBSTREAMS_INFO, ID_SIZE, \
ID_CRC, ID_FOLDER, ID_CODERS_UNPACK_SIZE, ID_NUM_UNPACK_STREAMS, \
ID_EMPTY_STREAM, ID_EMPTY_FILE, ID_ANTI, ID_NAME, ID_CREATION_TIME, \
ID_LAST_ACCESS_TIME, ID_LAST_WRITE_TIME, ID_WIN_ATTR, ID_COMMENT, \
ID_ENCODED_HEADER = xrange(24)
PROP_INFO = {
0x00: ('kEnd', 'End-of-header marker'),
ID_INFO = {
ID_END : "End",
ID_HEADER : "Header embedding another one",
ID_ARCHIVE_PROPS : "Archive Properties",
ID_ADD_STREAM_INFO : "Additional Streams Info",
ID_MAIN_STREAM_INFO : "Main Streams Info",
ID_FILES_INFO : "Files Info",
ID_PACK_INFO : "Pack Info",
ID_UNPACK_INFO : "Unpack Info",
ID_SUBSTREAMS_INFO : "Substreams Info",
ID_SIZE : "Size",
ID_CRC : "CRC",
ID_FOLDER : "Folder",
ID_CODERS_UNPACK_SIZE: "Coders Unpacked size",
ID_NUM_UNPACK_STREAMS: "Number of Unpacked Streams",
ID_EMPTY_STREAM : "Empty Stream",
ID_EMPTY_FILE : "Empty File",
ID_ANTI : "Anti",
ID_NAME : "Name",
ID_CREATION_TIME : "Creation Time",
ID_LAST_ACCESS_TIME : "Last Access Time",
ID_LAST_WRITE_TIME : "Last Write Time",
ID_WIN_ATTR : "Win Attributes",
ID_COMMENT : "Comment",
ID_ENCODED_HEADER : "Header holding encoded data info",
0x01: ('kHeader', 'Archive header'),
0x02: ('kArchiveProperties', 'Archive properties'),
0x03: ('kAdditionalStreamsInfo', 'AdditionalStreamsInfo'),
0x04: ('kMainStreamsInfo', 'MainStreamsInfo'),
0x05: ('kFilesInfo', 'FilesInfo'),
0x06: ('kPackInfo', 'PackInfo'),
0x07: ('kUnPackInfo', 'UnPackInfo'),
0x08: ('kSubStreamsInfo', 'SubStreamsInfo'),
0x09: ('kSize', 'Size'),
0x0A: ('kCRC', 'CRC'),
0x0B: ('kFolder', 'Folder'),
0x0C: ('kCodersUnPackSize', 'CodersUnPackSize'),
0x0D: ('kNumUnPackStream', 'NumUnPackStream'),
0x0E: ('kEmptyStream', 'EmptyStream'),
0x0F: ('kEmptyFile', 'EmptyFile'),
0x10: ('kAnti', 'Anti'),
0x11: ('kName', 'Name'),
0x12: ('kCreationTime', 'CreationTime'),
0x13: ('kLastAccessTime', 'LastAccessTime'),
0x14: ('kLastWriteTime', 'LastWriteTime'),
0x15: ('kWinAttributes', 'WinAttributes'),
0x16: ('kComment', 'Comment'),
0x17: ('kEncodedHeader', 'Encoded archive header'),
}
PROP_IDS = createDict(PROP_INFO, 0)
PROP_DESC = createDict(PROP_INFO, 1)
# create k* constants
for k in PROP_IDS:
globals()[PROP_IDS[k]] = k
class SkippedData(FieldSet):
def ReadNextByte(self):
return self.stream.readBits(self.absolute_address + self.current_size, 8, self.endian)
def PropID(self, name):
return Enum(UInt8(self, name), PROP_IDS)
class SevenZipBitVector(FieldSet):
def __init__(self, parent, name, num, has_all_byte=False, **args):
FieldSet.__init__(self, parent, name, **args)
self.has_all_byte=has_all_byte
self.num = num
def createFields(self):
yield Enum(UInt8(self, "id[]"), ID_INFO)
if self.has_all_byte:
yield Enum(UInt8(self, "all_defined"), {0:'False', 1:'True'})
if self['all_defined'].value:
return
nbytes = alignValue(self.num, 8)//8
ctr = 0
for i in xrange(nbytes):
for j in reversed(xrange(8)):
yield Bit(self, "bit[%d]"%(ctr+j))
ctr += 8
def isAllDefined(self):
return self.has_all_byte and self['all_defined'].value
def isDefined(self, index):
if self.isAllDefined():
return True
return self['bit[%d]'%index].value
def createValue(self):
if self.isAllDefined():
return range(self.num)
return [i for i in xrange(self.num) if self['bit[%d]'%i].value]
def createDisplay(self):
if self.isAllDefined():
return 'all'
return ','.join(str(i) for i in self.value)
class ArchiveProperty(FieldSet):
def createFields(self):
yield PropID(self, "id")
size = SZUInt64(self, "size")
yield size
if size.value > 0:
yield RawBytes(self, "data", size.value)
yield RawBytes(self, "data", size.value)
def createDescription(self):
return self['id'].display
def waitForID(s, wait_id, wait_name="waited_id[]"):
while not s.eof:
addr = s.absolute_address+s.current_size
uid = s.stream.readBits(addr, 8, LITTLE_ENDIAN)
if uid == wait_id:
yield Enum(UInt8(s, wait_name), ID_INFO)
s.info("Found ID %s (%u)" % (ID_INFO[uid], uid))
return
s.info("Skipping ID %u!=%u" % (uid, wait_id))
yield SkippedData(s, "skipped_id[]", "%u != %u" % (uid, wait_id))
class ArchiveProperties(FieldSet):
def createFields(self):
yield PropID(self, "id")
while not self.eof:
uid = ReadNextByte(self)
if uid == kEnd:
yield PropID(self, "end_marker")
break
yield ArchiveProperty(self, "prop[]")
class HashDigest(FieldSet):
def __init__(self, parent, name, num_digests, desc=None):
class Digests(FieldSet):
def __init__(self, parent, name, num_digests, digest_desc=None, desc=None):
FieldSet.__init__(self, parent, name, desc)
self.num_digests = num_digests
if digest_desc is None:
self.digest_desc = ['stream %d'%i for i in xrange(num_digests)]
else:
self.digest_desc = digest_desc
def createFields(self):
yield Enum(UInt8(self, "id"), ID_INFO)
bytes = self.stream.readBytes(self.absolute_address, self.num_digests)
if self.num_digests > 0:
yield GenericVector(self, "defined[]", self.num_digests, UInt8, "bool")
for index in xrange(self.num_digests):
if bytes[index]:
yield textHandler(UInt32(self, "hash[]",
"Hash for digest %u" % index), hexadecimal)
yield PropID(self, "id")
definearr = SevenZipBitVector(self, "defined", self.num_digests, has_all_byte=True)
yield definearr
for index in definearr.value:
yield textHandler(UInt32(self, "digest[]",
"Digest for %s" % self.digest_desc[index]), hexadecimal)
class PackInfo(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "id"), ID_INFO)
# Very important, helps determine where the data is
yield SZUInt64(self, "pack_pos", "Position of the packs")
num = SZUInt64(self, "num_pack_streams")
yield PropID(self, "id")
yield SZUInt64(self, "pack_pos", "File offset to the packed data")
num = SZUInt64(self, "num_pack_streams", "Number of packed streams")
yield num
num = num.value
for field in waitForID(self, ID_SIZE, "size_marker"):
yield field
for size in xrange(num):
yield SZUInt64(self, "pack_size[]")
while not self.eof:
addr = self.absolute_address+self.current_size
uid = self.stream.readBits(addr, 8, LITTLE_ENDIAN)
if uid == ID_END:
yield Enum(UInt8(self, "end_marker"), ID_INFO)
uid = ReadNextByte(self)
if uid == kEnd:
yield PropID(self, "end_marker")
break
elif uid == ID_CRC:
yield HashDigest(self, "hash_digest", size)
elif uid == kSize:
yield PropID(self, "size_marker")
for index in xrange(num.value):
yield SZUInt64(self, "pack_size[]")
elif uid == kCRC:
yield Digests(self, "digests", num.value)
else:
yield SkippedData(self, "skipped_data")
raise ParserError("Unexpected ID (%i)" % uid)
def lzmaParams(value):
param = value.value
remainder = param / 9
# Literal coder context bits
lc = param % 9
# Position state bits
pb = remainder / 5
# Literal coder position bits
lp = remainder % 5
return "lc=%u pb=%u lp=%u" % (lc, lp, pb)
METHODS = {
"\0": "Copy",
"\3": "Delta",
"\4": "x86_BCJ",
"\5": "PowerPC",
"\6": "IA64",
"\7": "ARM_LE",
"\8": "ARMT_LE", # thumb
"\9": "SPARC",
"\x21": "LZMA2",
"\2\3\2": "Common-Swap-2",
"\2\3\4": "Common-Swap-4",
"\3\1\1": "7z-LZMA",
"\3\3\1\3": "7z-Branch-x86-BCJ",
"\3\3\1\x1b": "7z-Branch-x86-BCJ2",
"\3\3\2\5": "7z-Branch-PowerPC-BE",
"\3\3\3\1": "7z-Branch-Alpha-LE",
"\3\3\4\1": "7z-Branch-IA64-LE",
"\3\3\5\1": "7z-Branch-ARM-LE",
"\3\3\6\5": "7z-Branch-M68-BE",
"\3\3\7\1": "7z-Branch-ARMT-LE",
"\3\3\8\5": "7z-Branch-SPARC-BE",
"\3\4\1": "7z-PPMD",
"\3\x7f\1": "7z-Experimental",
"\4\0": "Reserved",
"\4\1\0": "Zip-Copy",
"\4\1\1": "Zip-Shrink",
"\4\1\6": "Zip-Implode",
"\4\1\x08": "Zip-Deflate",
"\4\1\x09": "Zip-Deflate64",
"\4\1\x10": "Zip-BZip2",
"\4\1\x14": "Zip-LZMA",
"\4\1\x60": "Zip-JPEG",
"\4\1\x61": "Zip-WavPack",
"\4\1\x62": "Zip-PPMD",
"\4\1\x63": "Zip-wzAES",
"\4\2\2": "BZip2",
"\4\3\1": "RAR-15",
"\4\3\2": "RAR-20",
"\4\3\3": "RAR-29",
"\4\4\1": "Arj3",
"\4\4\2": "Arj4",
"\4\5": "Z",
"\4\6": "LZH",
"\4\7": "7z-Reserved",
"\4\8": "CAB",
"\4\9\1": "NSIS-Deflate",
"\4\9\1": "NSIS-BZip2",
"\6\0": "Crypto-Reserved",
"\6\1\x00": "Crypto-AES128-ECB",
"\6\1\x01": "Crypto-AES128-CBC",
"\6\1\x02": "Crypto-AES128-CFB",
"\6\1\x03": "Crypto-AES128-OFB",
"\6\1\x40": "Crypto-AES192-ECB",
"\6\1\x41": "Crypto-AES192-CBC",
"\6\1\x42": "Crypto-AES192-CFB",
"\6\1\x43": "Crypto-AES192-OFB",
"\6\1\x80": "Crypto-AES256-ECB",
"\6\1\x81": "Crypto-AES256-CBC",
"\6\1\x82": "Crypto-AES256-CFB",
"\6\1\x83": "Crypto-AES256-OFB",
"\6\1\xc0": "Crypto-AES-ECB",
"\6\1\xc1": "Crypto-AES-CBC",
"\6\1\xc2": "Crypto-AES-CFB",
"\6\1\xc3": "Crypto-AES-OFB",
"\6\7": "Crypto-Reserved",
"\6\x0f": "Crypto-Reserved",
"\6\xf0": "Crypto-Misc",
"\6\xf1\1\1": "Crypto-Zip",
"\6\xf1\3\2": "Crypto-RAR-Unknown",
"\6\xf1\3\3": "Crypto-RAR-29", # AES128
"\6\xf1\7\1": "Crypto-7z", # AES256
"\7\0": "Hash-None",
"\7\1": "Hash-CRC",
"\7\2": "Hash-SHA1",
"\7\3": "Hash-SHA256",
"\7\4": "Hash-SHA384",
"\7\5": "Hash-SHA512",
"\7\xf0": "Hash-Misc",
"\7\xf1\3\3": "Hash-RAR-29", # modified SHA1
"\7\xf1\7\1": "Hash-7z", # SHA256
}
class CoderID(FieldSet):
CODECS = {
# Only 2 methods ... and what about PPMD ?
"\0" : "copy",
"\3\1\1": "lzma",
}
class Coder(FieldSet):
def createFields(self):
byte = UInt8(self, "id_size")
yield byte
byte = byte.value
self.info("ID=%u" % byte)
size = byte & 0xF
yield Bits(self, "id_size", 4)
yield Bit(self, "is_not_simple", "If unset, stream setup is simple")
yield Bit(self, "has_attribs", "Are there compression properties attached?")
yield Bit(self, "unused[]")
yield Bit(self, "is_not_last_method", "Are there more methods after this one in the alternative method list?")
size = self['id_size'].value
if size > 0:
name = self.stream.readBytes(self.absolute_address+self.current_size, size)
if name in self.CODECS:
name = self.CODECS[name]
self.info("Codec is %s" % name)
else:
self.info("Undetermined codec %s" % name)
name = "unknown"
yield RawBytes(self, name, size)
#yield textHandler(Bytes(self, "id", size), lambda: name)
if byte & 0x10:
yield Enum(RawBytes(self, "id", size), METHODS)
if self['is_not_simple'].value:
yield SZUInt64(self, "num_stream_in")
yield SZUInt64(self, "num_stream_out")
self.info("Streams: IN=%u OUT=%u" % \
(self["num_stream_in"].value, self["num_stream_out"].value))
if byte & 0x20:
size = SZUInt64(self, "properties_size[]")
if self['has_attribs'].value:
size = SZUInt64(self, "properties_size")
yield size
if size.value == 5:
#LzmaDecodeProperties@LZMAStateDecode.c
yield textHandler(UInt8(self, "parameters"), lzmaParams)
yield filesizeHandler(UInt32(self, "dictionary_size"))
elif size.value > 0:
yield RawBytes(self, "properties[]", size.value)
yield RawBytes(self, "properties", size.value)
def _get_num_streams(self, direction):
if self['is_not_simple'].value:
return self['num_stream_%s'%direction].value
return 1
in_streams = property(lambda self: self._get_num_streams('in'))
out_streams = property(lambda self: self._get_num_streams('out'))
class CoderInfo(FieldSet):
def __init__(self, parent, name, desc=None):
FieldSet.__init__(self, parent, name, desc)
self.in_streams = 1
self.out_streams = 1
class CoderList(FieldSet):
def createFields(self):
# The real ID
addr = self.absolute_address + self.current_size
b = self.parent.stream.readBits(addr, 8, LITTLE_ENDIAN)
cid = CoderID(self, "coder_id")
yield cid
if b&0x10: # Work repeated, ...
self.in_streams = cid["num_stream_in"].value
self.out_streams = cid["num_stream_out"].value
# Skip other IDs
while b&0x80:
addr = self.absolute_address + self.current_size
b = self.parent.stream.readBits(addr, 8, LITTLE_ENDIAN)
yield CoderID(self, "unused_codec_id[]")
while not self.eof:
field = Coder(self, "coder[]")
yield field
if not field['is_not_last_method'].value:
break
class BindPairInfo(FieldSet):
def createFields(self):
@ -208,45 +311,46 @@ class BindPairInfo(FieldSet):
self.info("Indexes: IN=%u OUT=%u" % \
(self["in_index"].value, self["out_index"].value))
class FolderItem(FieldSet):
def __init__(self, parent, name, desc=None):
FieldSet.__init__(self, parent, name, desc)
self.in_streams = 0
self.out_streams = 0
class Folder(FieldSet):
def createFields(self):
yield SZUInt64(self, "num_coders")
num = self["num_coders"].value
self.info("Folder: %u codecs" % num)
# Coders info
for index in xrange(num):
ci = CoderInfo(self, "coder_info[]")
yield ci
self.in_streams += ci.in_streams
self.out_streams += ci.out_streams
in_streams = out_streams = 0
# Bin pairs
self.info("out streams: %u" % self.out_streams)
for index in xrange(self.out_streams-1):
# Coder info
for index in xrange(num):
ci = CoderList(self, "coders[]")
yield ci
in_streams += ci['coder[0]'].in_streams
out_streams += ci['coder[0]'].out_streams
self._in_streams = in_streams
self._out_streams = out_streams
# Bind pairs
self.info("out streams: %u" % out_streams)
for index in xrange(out_streams-1):
yield BindPairInfo(self, "bind_pair[]")
# Packed streams
# @todo: Actually find mapping
packed_streams = self.in_streams - self.out_streams + 1
if packed_streams == 1:
pass
else:
packed_streams = in_streams - out_streams + 1
if packed_streams > 1:
for index in xrange(packed_streams):
yield SZUInt64(self, "pack_stream[]")
def _get_num_streams(self, direction):
list(self)
return getattr(self, '_'+direction+'_streams')
in_streams = property(lambda self: self._get_num_streams('in'))
out_streams = property(lambda self: self._get_num_streams('out'))
class UnpackInfo(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "id"), ID_INFO)
# Wait for synch
for field in waitForID(self, ID_FOLDER, "folder_marker"):
yield field
yield PropID(self, "id")
yield PropID(self, "folder_marker")
assert self['folder_marker'].value == kFolder
yield SZUInt64(self, "num_folders")
# Get generic info
@ -254,97 +358,277 @@ class UnpackInfo(FieldSet):
self.info("%u folders" % num)
yield UInt8(self, "is_external")
# Read folder items
for folder_index in xrange(num):
yield FolderItem(self, "folder_item[]")
if self['is_external'].value:
yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
else:
# Read folder items
for folder_index in xrange(num):
yield Folder(self, "folder[]")
# Get unpack sizes for each coder of each folder
for field in waitForID(self, ID_CODERS_UNPACK_SIZE, "coders_unpsize_marker"):
yield field
yield PropID(self, "unpacksize_marker")
assert self['unpacksize_marker'].value == kCodersUnPackSize
for folder_index in xrange(num):
folder_item = self["folder_item[%u]" % folder_index]
for index in xrange(folder_item.out_streams):
#yield UInt8(self, "unpack_size[]")
yield SZUInt64(self, "unpack_size[]")
folder = self["folder[%u]" % folder_index]
for index in xrange(folder.out_streams):
yield SZUInt64(self, "unpack_size[%d][%d]"%(folder_index,index))
# Extract digests
while not self.eof:
addr = self.absolute_address+self.current_size
uid = self.stream.readBits(addr, 8, LITTLE_ENDIAN)
if uid == ID_END:
yield Enum(UInt8(self, "end_marker"), ID_INFO)
uid = ReadNextByte(self)
if uid == kEnd:
yield PropID(self, "end_marker")
break
elif uid == ID_CRC:
yield HashDigest(self, "hash_digest", num)
elif uid == kCRC:
yield Digests(self, "digests", num)
else:
yield SkippedData(self, "skip_data")
raise ParserError("Unexpected ID (%i)" % uid)
class SubStreamInfo(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "id"), ID_INFO)
raise ParserError("SubStreamInfo not implemented yet")
class EncodedHeader(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "id"), ID_INFO)
yield PropID(self, "id")
num_folders = self['../unpack_info/num_folders'].value
num_unpackstreams = [1]*num_folders
while not self.eof:
addr = self.absolute_address+self.current_size
uid = self.stream.readBits(addr, 8, LITTLE_ENDIAN)
if uid == ID_END:
yield Enum(UInt8(self, "end_marker"), ID_INFO)
uid = ReadNextByte(self)
if uid == kEnd:
yield PropID(self, "end_marker")
break
elif uid == ID_PACK_INFO:
yield PackInfo(self, "pack_info", ID_INFO[ID_PACK_INFO])
elif uid == ID_UNPACK_INFO:
yield UnpackInfo(self, "unpack_info", ID_INFO[ID_UNPACK_INFO])
elif uid == ID_SUBSTREAMS_INFO:
yield SubStreamInfo(self, "substreams_info", ID_INFO[ID_SUBSTREAMS_INFO])
elif uid == kNumUnPackStream:
yield PropID(self, "num_unpackstream_marker")
for i in xrange(num_folders):
field = SZUInt64(self, "num_unpackstreams[]")
yield field
num_unpackstreams[i] = field.value
elif uid == kSize:
yield PropID(self, "size_marker")
for i in xrange(num_folders):
# The last substream's size is the stream size minus the other substreams.
for j in xrange(num_unpackstreams[i]-1):
yield SZUInt64(self, "unpack_size[%d][%d]"%(i,j))
elif uid == kCRC:
digests = []
for i in xrange(num_folders):
if num_unpackstreams[i] == 1 and 'digests' in self['../unpack_info']:
continue
for j in xrange(num_unpackstreams[i]):
digests.append('folder %i, stream %i'%(i, j))
yield Digests(self, "digests", len(digests), digests)
else:
self.info("Unexpected ID (%i)" % uid)
break
raise ParserError("Unexpected ID (%i)" % uid)
class IDHeader(FieldSet):
class StreamsInfo(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "id"), ID_INFO)
ParserError("IDHeader not implemented")
yield PropID(self, "id")
while not self.eof:
uid = ReadNextByte(self)
if uid == kEnd:
yield PropID(self, "end")
break
elif uid == kPackInfo:
yield PackInfo(self, "pack_info", PROP_DESC[uid])
elif uid == kUnPackInfo:
yield UnpackInfo(self, "unpack_info", PROP_DESC[uid])
elif uid == kSubStreamsInfo:
yield SubStreamInfo(self, "substreams_info", PROP_DESC[uid])
else:
raise ParserError("Unexpected ID (%i)" % uid)
class EncodedHeader(StreamsInfo):
pass
class EmptyStreamProperty(FieldSet):
def createFields(self):
yield PropID(self, "id")
yield SZUInt64(self, "size")
yield SevenZipBitVector(self, "vec", self['../num_files'].value)
def createValue(self):
return self['vec'].value
def createDisplay(self):
return self['vec'].display
class EmptyFileProperty(FieldSet):
def createFields(self):
yield PropID(self, "id")
yield SZUInt64(self, "size")
empty_streams = self['../empty_streams/vec'].value
yield SevenZipBitVector(self, "vec", len(empty_streams))
def createValue(self):
empty_streams = self['../empty_streams/vec'].value
return [empty_streams[i] for i in self['vec'].value]
def createDisplay(self):
return ','.join(str(i) for i in self.value)
class FileTimeProperty(FieldSet):
def createFields(self):
yield PropID(self, "id")
yield SZUInt64(self, "size")
definearr = SevenZipBitVector(self, "defined", self['../num_files'].value, has_all_byte=True)
yield definearr
yield UInt8(self, "is_external")
if self['is_external'].value:
yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
else:
for index in definearr.value:
yield TimestampWin64(self, "timestamp[%d]"%index)
class FileNames(FieldSet):
def createFields(self):
yield PropID(self, "id")
yield SZUInt64(self, "size")
yield UInt8(self, "is_external")
if self['is_external'].value:
yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
else:
for index in xrange(self['../num_files'].value):
yield CString(self, "name[%d]"%index, charset="UTF-16-LE")
class FileAttributes(FieldSet):
def createFields(self):
yield PropID(self, "id")
yield SZUInt64(self, "size")
definearr = SevenZipBitVector(self, "defined", self['../num_files'].value, has_all_byte=True)
yield definearr
yield UInt8(self, "is_external")
if self['is_external'].value:
yield SZUInt64(self, "folder_data_offset", "Offset to folder data within data stream")
else:
for index in definearr.value:
yield MSDOSFileAttr32(self, "attributes[%d]"%index)
class FilesInfo(FieldSet):
def createFields(self):
yield PropID(self, "id")
yield SZUInt64(self, "num_files")
while not self.eof:
uid = ReadNextByte(self)
if uid == kEnd:
yield PropID(self, "end_marker")
break
elif uid == kEmptyStream:
yield EmptyStreamProperty(self, "empty_streams")
elif uid == kEmptyFile:
yield EmptyFileProperty(self, "empty_files")
elif uid == kAnti:
yield EmptyFileProperty(self, "anti_files")
elif uid == kCreationTime:
yield FileTimeProperty(self, "creation_time")
elif uid == kLastAccessTime:
yield FileTimeProperty(self, "access_time")
elif uid == kLastWriteTime:
yield FileTimeProperty(self, "modified_time")
elif uid == kName:
yield FileNames(self, "filenames")
elif uid == kWinAttributes:
yield FileAttributes(self, "attributes")
else:
yield ArchiveProperty(self, "prop[]")
class Header(FieldSet):
def createFields(self):
yield PropID(self, "id")
while not self.eof:
uid = ReadNextByte(self)
if uid == kEnd:
yield PropID(self, "end")
break
elif uid == kArchiveProperties:
yield ArchiveProperties(self, "props", PROP_DESC[uid])
elif uid == kAdditionalStreamsInfo:
yield StreamsInfo(self, "additional_streams", PROP_DESC[uid])
elif uid == kMainStreamsInfo:
yield StreamsInfo(self, "main_streams", PROP_DESC[uid])
elif uid == kFilesInfo:
yield FilesInfo(self, "files_info", PROP_DESC[uid])
else:
raise ParserError("Unexpected ID %u" % uid)
class NextHeader(FieldSet):
def __init__(self, parent, name, desc="Next header"):
FieldSet.__init__(self, parent, name, desc)
self._size = 8*self["/signature/start_hdr/next_hdr_size"].value
# Less work, as much interpretable information as the other
# version... what an obnoxious format
def createFields2(self):
yield Enum(UInt8(self, "header_type"), ID_INFO)
yield RawBytes(self, "header_data", self._size-1)
def createFields(self):
uid = self.stream.readBits(self.absolute_address, 8, LITTLE_ENDIAN)
if uid == ID_HEADER:
yield IDHeader(self, "header", ID_INFO[ID_HEADER])
elif uid == ID_ENCODED_HEADER:
yield EncodedHeader(self, "encoded_hdr", ID_INFO[ID_ENCODED_HEADER])
# Game Over: this is usually encoded using LZMA, not copy
# See SzReadAndDecodePackedStreams/SzDecode being called with the
# data position from "/next_hdr/encoded_hdr/pack_info/pack_pos"
# We should process further, yet we can't...
uid = ReadNextByte(self)
if uid == kHeader:
yield Header(self, "header", PROP_DESC[uid])
elif uid == kEncodedHeader:
yield EncodedHeader(self, "encoded_hdr", PROP_DESC[uid])
else:
ParserError("Unexpected ID %u" % uid)
size = self._size - self.current_size
if size > 0:
yield RawBytes(self, "next_hdr_data", size//8, "Next header's data")
raise ParserError("Unexpected ID %u" % uid)
class NextHeaderParser(Parser):
PARSER_TAGS = {
}
endian = LITTLE_ENDIAN
def createFields(self):
uid = ReadNextByte(self)
if uid == kHeader:
yield Header(self, "header", PROP_DESC[uid])
elif uid == kEncodedHeader:
yield EncodedHeader(self, "encoded_hdr", PROP_DESC[uid])
else:
raise ParserError("Unexpected ID %u" % uid)
def validate(self):
return True
class CompressedData(Bytes):
def __init__(self, parent, name, length, decompressor, description=None,
parser=None, filename=None, mime_type=None, parser_class=None):
if filename:
if not isinstance(filename, unicode):
filename = makePrintable(filename, "ISO-8859-1")
if not description:
description = 'File "%s" (%s)' % (filename, humanFilesize(length))
Bytes.__init__(self, parent, name, length, description)
self.setupInputStream(decompressor, parser, filename, mime_type, parser_class)
def setupInputStream(self, decompressor, parser, filename, mime_type, parser_class):
def createInputStream(cis, **args):
tags = args.setdefault("tags",[])
if parser_class:
tags.append(( "class", parser_class ))
if parser is not None:
tags.append(( "id", parser.PARSER_TAGS["id"] ))
if mime_type:
tags.append(( "mime", mime_type ))
if filename:
tags.append(( "filename", filename ))
print args
return StringInputStream(decompressor(self.value), **args)
self.setSubIStream(createInputStream)
def get_header_decompressor(self):
unpack_info = self['/next_hdr/encoded_hdr/unpack_info']
assert unpack_info['num_folders'].value == 1
coder = unpack_info['folder[0]/coders[0]/coder[0]']
method = METHODS[coder['id'].value]
if method == 'Copy':
return lambda data: data
elif method == '7z-LZMA':
props = coder['properties'].value
length = unpack_info['unpack_size[0][0]'].value
return lambda data: lzmadecompress(props+data, maxlength=length)
def get_header_field(self, name, size, description=None):
decompressor = get_header_decompressor(self)
if decompressor is None:
return RawBytes(self, name, size, description=description)
return CompressedData(self, name, size, decompressor, description=description, parser_class=NextHeaderParser)
class Body(FieldSet):
def __init__(self, parent, name, desc="Body data"):
FieldSet.__init__(self, parent, name, desc)
self._size = 8*self["/signature/start_hdr/next_hdr_offset"].value
def createFields(self):
if "encoded_hdr" in self["/next_hdr/"]:
if "encoded_hdr" in self["/next_hdr"]:
pack_size = sum([s.value for s in self.array("/next_hdr/encoded_hdr/pack_info/pack_size")])
body_size = self["/next_hdr/encoded_hdr/pack_info/pack_pos"].value
yield RawBytes(self, "compressed_data", body_size, "Compressed data")
if body_size:
yield RawBytes(self, "compressed_data", body_size, "Compressed data")
# Here we could check if copy method was used to "compress" it,
# but this never happens, so just output "compressed file info"
yield RawBytes(self, "compressed_file_info", pack_size,
yield get_header_field(self, "compressed_file_info", pack_size,
"Compressed file information")
size = (self._size//8) - pack_size - body_size
if size > 0:
@ -372,13 +656,14 @@ class SignatureHeader(FieldSet):
yield StartHeader(self, "start_hdr", "Start header")
class SevenZipParser(Parser):
MAGIC = "7z\xbc\xaf\x27\x1c"
PARSER_TAGS = {
"id": "7zip",
"category": "archive",
"file_ext": ("7z",),
"mime": (u"application/x-7z-compressed",),
"min_size": 32*8,
"magic": (("7z\xbc\xaf\x27\x1c", 0),),
"magic": ((MAGIC, 0),),
"description": "Compressed archive in 7z format"
}
endian = LITTLE_ENDIAN
@ -389,13 +674,12 @@ class SevenZipParser(Parser):
yield NextHeader(self, "next_hdr")
def validate(self):
if self.stream.readBytes(0,6) != "7z\xbc\xaf'\x1c":
if self.stream.readBytes(0,len(self.MAGIC)) != self.MAGIC:
return "Invalid signature"
return True
def createContentSize(self):
size = self["/signature/start_hdr/next_hdr_offset"].value
size += self["/signature/start_hdr/next_hdr_size"].value
size += 12 # Signature size
size += 20 # Start header size
return size*8
size = self["/signature/start_hdr/next_hdr_offset"].value*8
size += self["/signature/start_hdr/next_hdr_size"].value*8
size += SignatureHeader.static_size
return size

View file

@ -329,6 +329,9 @@ class ZipFile(Parser):
u"application/x-jar": "jar",
u"application/java-archive": "jar",
# Android APK
u"application/vnd.android.package-archive": "apk",
# OpenOffice 1.0
u"application/vnd.sun.xml.calc": "sxc",
u"application/vnd.sun.xml.draw": "sxd",

View file

@ -1,6 +1,7 @@
from hachoir_parser.audio.aiff import AiffFile
from hachoir_parser.audio.au import AuFile
from hachoir_parser.audio.itunesdb import ITunesDBFile
from hachoir_parser.audio.ipod_playcounts import PlayCountFile
from hachoir_parser.audio.midi import MidiFile
from hachoir_parser.audio.mpeg_audio import MpegAudioFile
from hachoir_parser.audio.real_audio import RealAudioFile

View file

@ -0,0 +1,60 @@
"""
iPod Play Count parser.
Documentation:
- http://ipl.derpapst.org/wiki/ITunesDB/Play_Counts_File
(formerly known as http://ipodlinux.org)
Author: m42i
Creation date: 01 March 2014
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
UInt8, UInt16, UInt32, Int32, UInt64, TimestampMac32,
String, Float32, NullBytes, Enum, RawBytes)
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_core.tools import humanDuration
from hachoir_core.text_handler import displayHandler, filesizeHandler
class PlayCountFile(Parser):
PARSER_TAGS = {
"id": "playcounts",
"category": "audio",
"min_size": 44*8,
"magic": (('mhdp',0),),
"description": "iPod Play Counts file"
}
endian = LITTLE_ENDIAN
def validate(self):
return self.stream.readBytes(0, 4) == 'mhdp'
def createFields(self):
yield String(self, "header_id", 4, "Play Count Header Markup (\"mhdp\")", charset="ISO-8859-1")
yield UInt32(self, "header_length", "Header Length")
yield UInt32(self, "entry_length", "Single Entry Length")
yield UInt32(self, "entry_number", "Number of Songs on iPod")
padding = self.seekByte(self["header_length"].value, "header padding")
if padding:
yield padding
for i in xrange(self["entry_number"].value):
yield PlayCountEntry(self, "track[]")
class PlayCountEntry(FieldSet):
def __init__(self, *args, **kw):
FieldSet.__init__(self, *args, **kw)
self._size = 28*8
def createFields(self):
yield UInt32(self, "play_count", "Playcount since last sync")
yield TimestampMac32(self, "last_played", "Time of the last play of the track")
yield UInt32(self, "audio_bookmark", "Last position in milliseconds")
yield UInt32(self, "rating", "Rating in steps of 20 up to 100")
yield UInt32(self, "unknown", "unknown")
yield UInt32(self, "skip_count", "Number of skips since last sync")
yield TimestampMac32(self, "last_skipped", "Time of the last skip")

View file

@ -2,7 +2,7 @@
iPod iTunesDB parser.
Documentation:
- http://ipodlinux.org/ITunesDB
- http://ipl.derpapst.org/wiki/ITunesDB/iTunesDB_File
Author: Romain HERAULT
Creation date: 19 august 2006
@ -71,13 +71,27 @@ class DataObject(FieldSet):
19:"Show (for TV Shows only)",
20:"Episode",
21:"TV Network",
22:"Album-Artist",
23:"Artist for Sorting",
24:"List of keywords pretaining track",
25:"Locale for TV show(?)",
27:"Title for Sorting",
28:"Album for Sorting",
29:"Album-Artist for Sorting",
30:"Composer for Sorting",
31:"Show for Sorting",
# 32:"Unknown binary field for video tracks",
50:"Smart Playlist Data",
51:"Smart Playlist Rules",
52:"Library Playlist Index",
100:"Column info",
53:"Library Playlist Index letter in jump table",
100:"Ccolumn Sizing Info as well as an order indicator in playlists.",
102:"For iPhone",
200:"Album name (for album descriptions)",
201:"Album artist (for album descriptions)",
202:"Album sort artist (for album descriptions)"
202:"Album sort artist (for album descriptions)",
203:"Podcast URL in Album List",
204:"TV Show in Album List"
}
mhod52_sort_index_type_name={
@ -97,15 +111,7 @@ class DataObject(FieldSet):
yield UInt32(self, "header_length", "Header Length")
yield UInt32(self, "entry_length", "Entry Length")
yield Enum(UInt32(self, "type", "type"),self.type_name)
if(self["type"].value<15) or (self["type"].value >= 200):
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield UInt32(self, "position", "Position")
yield UInt32(self, "length", "String Length in bytes")
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield String(self, "string", self["length"].value, "String Data", charset="UTF-16-LE")
elif (self["type"].value<17):
if (self["type"].value == 15) or (self["type"].value == 16):
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield String(self, "string", self._size/8-self["header_length"].value, "String Data", charset="UTF-8")
@ -121,6 +127,14 @@ class DataObject(FieldSet):
yield padding
for i in xrange(self["entry_count"].value):
yield UInt32(self, "index["+str(i)+"]", "Index of the "+str(i)+"nth mhit")
elif(self["type"].value<15) or (self["type"].value>17) or (self["type"].value >= 200):
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield UInt32(self, "position", "Position")
yield UInt32(self, "length", "String Length in bytes")
yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]")
yield String(self, "string", self["length"].value, "String Data", charset="UTF-16-LE")
else:
padding = self.seekByte(self["header_length"].value, "header padding")
if padding:
@ -178,8 +192,8 @@ class TrackItem(FieldSet):
yield UInt32(self, "stop_time", "Stop playing at, in milliseconds")
yield UInt32(self, "soundcheck", "SoundCheck preamp")
yield UInt32(self, "playcount_1", "Play count of the track")
yield UInt32(self, "playcount_2", "Play count of the track (identical to playcount_1)")
yield UInt32(self, "last_played_time", "Time the song was last played")
yield UInt32(self, "playcount_2", "Play count of the track when last synced")
yield TimestampMac32(self, "last_played_time", "Time the song was last played")
yield UInt32(self, "disc_number", "disc number in multi disc sets")
yield UInt32(self, "total_discs", "Total number of discs in the disc set")
yield UInt32(self, "userid", "User ID in the DRM scheme")

View file

@ -1,4 +1,5 @@
from hachoir_parser.game.zsnes import ZSNESFile
from hachoir_parser.game.spider_man_video import SpiderManVideoFile
from hachoir_parser.game.laf import LafFile
from hachoir_parser.game.blp import BLP1File, BLP2File
from hachoir_parser.game.blp import BLP1File, BLP2File
from hachoir_parser.game.uasset import UAssetFile

View file

@ -0,0 +1,199 @@
"""
Unreal 4 .uasset file parser
Author: Robert Xiao
Creation date: 2015-01-17
"""
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet, StaticFieldSet, SeekableFieldSet, Int32, UInt32,
String, PascalString32, PaddingBytes, Bytes, RawBytes)
from hachoir_core.endian import LITTLE_ENDIAN
class StringTable(FieldSet):
def __init__(self, parent, name, count, *args):
FieldSet.__init__(self, parent, name, *args)
self.count = count
def createFields(self):
for i in xrange(self.count):
yield PascalString32(self, "string[]", strip='\0')
def getObject(self, val):
if val == 0:
return None
elif val < 0:
return self['/header/refs/ref[%d]' % (-val-1)]
else:
return self['/header/assets/asset[%d]' % (val-1)]
class AssetHeader(FieldSet):
def createFields(self):
yield Int32(self, "type1")
yield Int32(self, "type2")
yield Int32(self, "parent") # 0 = no parent
yield Int32(self, "name_index")
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
yield Int32(self, "size")
yield Int32(self, "offset")
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
@property
def typeName(self):
return getObject(self, self["type1"].value).objectName
@property
def objectName(self):
name_index = self['name_index'].value
return self['/header/strings/string[%d]' % name_index].value
@property
def fullObjectName(self):
name = self.objectName
if self['parent'].value:
name = '%s.%s' % (getObject(self, self['parent'].value).fullObjectName, name)
return name
def createValue(self):
return '<Asset %s of type %s, size %d>' % (
self.fullObjectName, self.typeName, self['size'].value)
def createDescription(self):
return str([t.value for t in self.array('unk')])
class AssetTable(FieldSet):
def __init__(self, parent, name, count, *args):
FieldSet.__init__(self, parent, name, *args)
self.count = count
def createFields(self):
for i in xrange(self.count):
yield AssetHeader(self, "asset[]")
class ReferenceHeader(FieldSet):
def createFields(self):
yield Int32(self, "unk[]")
yield Int32(self, "unk[]")
yield Int32(self, "type_index")
yield Int32(self, "unk[]")
yield Int32(self, "parent")
yield Int32(self, "name_index")
yield Int32(self, "unk[]")
@property
def typeName(self):
type_index = self['type_index'].value
return self['/header/strings/string[%d]' % type_index].value
@property
def objectName(self):
name_index = self['name_index'].value
return self['/header/strings/string[%d]' % name_index].value
@property
def fullObjectName(self):
name = self.objectName
if self['parent'].value:
name = '[%s].%s' % (getObject(self, self['parent'].value).fullObjectName, name)
return name
def createValue(self):
return '<Reference %s of type %s>' % (self.fullObjectName, self.typeName)
def createDescription(self):
return str([t.value for t in self.array('unk')])
class ReferenceTable(FieldSet):
def __init__(self, parent, name, count, *args):
FieldSet.__init__(self, parent, name, *args)
self.count = count
def createFields(self):
for i in xrange(self.count):
yield ReferenceHeader(self, "ref[]")
class UAssetHeader(SeekableFieldSet):
def __init__(self, *args):
SeekableFieldSet.__init__(self, *args)
self._size = self["header_size"].value * 8
def createFields(self):
yield UInt32(self, "magic")
yield Int32(self, "version")
yield RawBytes(self, "unk[]", 16)
yield UInt32(self, "header_size")
yield PascalString32(self, "none", strip='\0')
yield RawBytes(self, "unk[]", 4)
yield UInt32(self, "num_strings", "Number of strings in the header")
yield UInt32(self, "offset_strings", "Offset to string table within the header")
yield UInt32(self, "num_assets", "Number of assets described in the header")
yield UInt32(self, "offset_assets", "Offset to asset table within the header")
yield UInt32(self, "num_refs", "Number of references? described in the header")
yield UInt32(self, "offset_refs", "Offset to reference table within the header")
yield UInt32(self, "offset_unk[]", "Offset to something")
yield UInt32(self, "unk[]")
yield UInt32(self, "offset_unk[]", "Offset to some other thing")
yield UInt32(self, "unk[]")
yield RawBytes(self, "signature", 16, "Some kind of hash")
yield UInt32(self, "unk[]")
yield UInt32(self, "num_assets2", "num_assets again")
assert self['num_assets'].value == self['num_assets2'].value
yield UInt32(self, "num_strings2", "num_strings again")
assert self['num_strings'].value == self['num_strings2'].value
yield RawBytes(self, "unk[]", 34)
yield UInt32(self, "unk[]")
yield UInt32(self, "size_unk", "Size of something")
yield RawBytes(self, "unk[]", 12)
self.seekByte(self["offset_strings"].value)
yield StringTable(self, "strings", self["num_strings"].value)
self.seekByte(self["offset_assets"].value)
yield AssetTable(self, "assets", self["num_assets"].value)
self.seekByte(self["offset_refs"].value)
yield ReferenceTable(self, "refs", self["num_refs"].value)
class Asset(FieldSet):
def createFields(self):
yield UInt32(self, "type")
class UAssetFile(Parser):
MAGIC = "\xc1\x83\x2a\x9e"
PARSER_TAGS = {
"id": "uasset",
"category": "game",
"description": "Unreal .uasset file",
"min_size": 32,
"file_ext": (".uasset",),
"magic": ((MAGIC, 0),),
}
endian = LITTLE_ENDIAN
def validate(self):
temp = self.stream.readBytes(0, 4)
if temp != self.MAGIC:
return "Wrong header"
return True
def createFields(self):
yield UAssetHeader(self, "header")
for asset in self['/header/assets'].array('asset'):
self.seekByte(asset['offset'].value)
yield RawBytes(self, "asset[]", asset['size'].value, description="Data for asset %s" % asset.fullObjectName)

View file

@ -15,4 +15,5 @@ from hachoir_parser.misc.dsstore import DSStore
from hachoir_parser.misc.word_doc import WordDocumentParser
from hachoir_parser.misc.word_2 import Word2DocumentParser
from hachoir_parser.misc.mstask import MSTaskFile
from hachoir_parser.misc.androidxml import AndroidXMLFile
from hachoir_parser.misc.mapsforge_map import MapsforgeMapFile

View file

@ -0,0 +1,220 @@
'''
AndroidManifest.xml parser
References:
- http://code.google.com/p/androguard/source/browse/core/bytecodes/apk.py
Author: Robert Xiao
Creation Date: May 29, 2011
'''
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet, ParserError,
String, Enum, GenericVector,
UInt8, UInt16, UInt32, Int32,
Float32, Bits,)
from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler
from hachoir_core.tools import createDict
from hachoir_core.endian import LITTLE_ENDIAN
class PascalCString16(FieldSet):
def createFields(self):
yield UInt16(self, "size")
self._size = (self['size'].value+2)*16
yield String(self, "string", (self['size'].value+1)*2, strip='\0', charset="UTF-16-LE")
def createValue(self):
return self['string'].value
class StringTable(FieldSet):
def createFields(self):
for field in self['../offsets']:
pad = self.seekByte(field.value)
if pad:
yield pad
yield PascalCString16(self, "string[]")
def Top(self):
while not self.eof:
yield Chunk(self, "chunk[]")
def StringChunk(self):
# TODO: styles
yield UInt32(self, "string_count")
yield UInt32(self, "style_count")
yield UInt32(self, "reserved[]")
yield UInt32(self, "string_offset")
yield UInt32(self, "style_offset")
yield GenericVector(self, "offsets", self['string_count'].value, UInt32,
description="Offsets for string table")
pad = self.seekByte(self['string_offset'].value)
if pad:
yield pad
yield StringTable(self, "table")
def ResourceIDs(self):
while self._current_size < self._size:
yield textHandler(UInt32(self, "resource_id[]"), hexadecimal)
def stringIndex(field):
if field.value == -1:
return ''
return field['/xml_file/string_table/table/string[%d]'%field.value].display
def NamespaceTag(self):
yield UInt32(self, "lineno", "Line number from original XML file")
yield Int32(self, "unk[]", "Always -1")
yield textHandler(Int32(self, "prefix"), stringIndex)
yield textHandler(Int32(self, "uri"), stringIndex)
def NamespaceStartValue(self):
return "xmlns:%s='%s'"%(self['prefix'].display, self['uri'].display)
def NamespaceEndValue(self):
return "/%s"%self['prefix'].display
def IntTextHandler(func):
return lambda *args, **kwargs: textHandler(Int32(*args, **kwargs), func)
def booleanText(field):
if field.value == 0:
return 'false'
return 'true'
class XMLUnitFloat(FieldSet):
static_size = 32
UNIT_MAP = {}
RADIX_MAP = {
0: 0,
1: 7,
2: 15,
3: 23,
}
def createFields(self):
yield Enum(Bits(self, "unit", 4), self.UNIT_MAP)
yield Enum(Bits(self, "exponent", 2), self.RADIX_MAP)
yield Bits(self, "reserved[]", 2)
yield Bits(self, "mantissa", 24)
def createValue(self):
return float(self['mantissa'].value) >> self.RADIX_MAP[self['exponent'].value]
def createDisplay(self):
return '%f%s'%(self.value, self.UNIT_MAP.get(self['unit'].value, ''))
class XMLDimensionFloat(XMLUnitFloat):
UNIT_MAP = dict(enumerate(["px","dip","sp","pt","in","mm"]))
class XMLFractionFloat(XMLUnitFloat):
UNIT_MAP = {0: '%', 1: '%p'}
class XMLAttribute(FieldSet):
TYPE_INFO = {
0: ('Null', IntTextHandler(lambda field: '')),
1: ('Reference', IntTextHandler(lambda field: '@%08x'%field.value)),
2: ('Attribute', IntTextHandler(lambda field: '?%08x'%field.value)),
3: ('String', IntTextHandler(stringIndex)),
4: ('Float', Float32),
5: ('Dimension', XMLDimensionFloat),
6: ('Fraction', XMLFractionFloat),
16: ('Int_Dec', Int32),
17: ('Int_Hex', IntTextHandler(hexadecimal)),
18: ('Int_Boolean', IntTextHandler(booleanText)),
28: ('Int_Color_Argb8', IntTextHandler(lambda field: '#%08x'%field.value)),
29: ('Int_Color_Rgb8', IntTextHandler(lambda field: '#%08x'%field.value)),
30: ('Int_Color_Argb4', IntTextHandler(lambda field: '#%08x'%field.value)),
31: ('Int_Color_Rgb4', IntTextHandler(lambda field: '#%08x'%field.value)),
}
TYPE_NAME = createDict(TYPE_INFO, 0)
TYPE_FUNC = createDict(TYPE_INFO, 1)
static_size = 5*32
def createFields(self):
yield textHandler(Int32(self, "ns"), stringIndex)
yield textHandler(Int32(self, "name"), stringIndex)
yield textHandler(Int32(self, "value_string"), stringIndex)
yield UInt16(self, "unk[]")
yield UInt8(self, "unk[]")
yield Enum(UInt8(self, "value_type"), self.TYPE_NAME)
func = self.TYPE_FUNC.get(self['value_type'].value, None)
if not func:
func = UInt32
yield func(self, "value_data")
def createValue(self):
return (self['name'].display, self['value_data'].value)
def createDisplay(self):
return '%s="%s"'%(self['name'].display, self['value_data'].display)
def TagStart(self):
yield UInt32(self, "lineno", "Line number from original XML file")
yield Int32(self, "unk[]", "Always -1")
yield textHandler(Int32(self, "ns"), stringIndex)
yield textHandler(Int32(self, "name"), stringIndex)
yield UInt32(self, "flags")
yield UInt16(self, "attrib_count")
yield UInt16(self, "attrib_id")
yield UInt16(self, "attrib_class")
yield UInt16(self, "attrib_style")
for i in xrange(self['attrib_count'].value):
yield XMLAttribute(self, "attrib[]")
def TagStartValue(self):
attrstr = ' '.join(attr.display for attr in self.array('attrib'))
if attrstr: attrstr = ' '+attrstr
if not self['ns'].display:
return '<%s%s>'%(self['name'].display, attrstr)
return "<%s:%s%s>"%(self['ns'].display, self['name'].display, attrstr)
def TagEnd(self):
yield UInt32(self, "lineno", "Line number from original XML file")
yield Int32(self, "unk[]", "Always -1")
yield textHandler(Int32(self, "ns"), stringIndex)
yield textHandler(Int32(self, "name"), stringIndex)
def TagEndValue(self):
if not self['ns'].display:
return '</%s>'%self['name'].display
return "</%s:%s>"%(self['ns'].display, self['name'].display)
def TextChunk(self):
# TODO
yield UInt32(self, "lineno", "Line number from original XML file")
yield Int32(self, "unk[]", "Always -1")
class Chunk(FieldSet):
CHUNK_INFO = {
0x0001: ("string_table", "String Table", StringChunk, None),
0x0003: ("xml_file", "XML File", Top, None),
0x0100: ("namespace_start[]", "Start Namespace", NamespaceTag, NamespaceStartValue),
0x0101: ("namespace_end[]", "End Namespace", NamespaceTag, NamespaceEndValue),
0x0102: ("tag_start[]", "Start Tag", TagStart, TagStartValue),
0x0103: ("tag_end[]", "End Tag", TagEnd, TagEndValue),
0x0104: ("text[]", "Text", TextChunk, None),
0x0180: ("resource_ids", "Resource IDs", ResourceIDs, None),
}
CHUNK_DESC = createDict(CHUNK_INFO, 1)
def __init__(self, parent, name, description=None):
FieldSet.__init__(self, parent, name, description)
self._size = self['chunk_size'].value* 8
type = self['type'].value
self.parse_func = None
if type in self.CHUNK_INFO:
self._name, self._description, self.parse_func, value_func = self.CHUNK_INFO[type]
if value_func:
self.createValue = lambda: value_func(self)
def createFields(self):
yield Enum(UInt16(self, "type"), self.CHUNK_DESC)
yield UInt16(self, "header_size")
yield UInt32(self, "chunk_size")
if self.parse_func:
for field in self.parse_func(self):
yield field
class AndroidXMLFile(Parser):
MAGIC = "\x03\x00\x08\x00"
PARSER_TAGS = {
"id": "axml",
"category": "misc",
"file_ext": ("xml",),
"min_size": 32*8,
"magic": ((MAGIC, 0),),
"description": "Android binary XML format",
}
endian = LITTLE_ENDIAN
def validate(self):
if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
return "Invalid magic"
return True
def createFields(self):
yield Chunk(self, "xml_file")

View file

@ -10,7 +10,7 @@ References:
from hachoir_parser import Parser
from hachoir_core.field import (ParserError,
Bit, Bits, UInt8, UInt16, UInt32, UInt64, String, RawBytes,
Bit, Bits, UInt8, UInt16, UInt32, Int32, UInt64, String, RawBytes,
PaddingBits, PaddingBytes,
Enum, Field, FieldSet, SeekableFieldSet, RootSeekableFieldSet)
from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN
@ -140,6 +140,11 @@ class TileHeader(FieldSet):
class POIData(FieldSet):
def createFields(self):
if self["/have_debug"].value:
yield String(self, "signature", 32)
if not self['signature'].value.startswith("***POIStart"):
raise ValueError
yield IntVbe(self, "lat_diff")
yield IntVbe(self, "lon_diff")
yield Bits(self, "layer", 4)
@ -179,6 +184,11 @@ class SubTileBitmap(FieldSet):
class WayProperties(FieldSet):
def createFields(self):
if self["/have_debug"].value:
yield String(self, "signature", 32)
if not self['signature'].value.startswith("---WayStart"):
raise ValueError
yield UIntVbe(self, "way_data_size")
# WayProperties is split into an outer and an inner field, to allow specifying data size for inner part:
@ -251,6 +261,11 @@ class TileData(FieldSet):
self.zoomIntervalCfg = zoomIntervalCfg
def createFields(self):
if self["/have_debug"].value:
yield String(self, "signature", 32)
if not self['signature'].value.startswith("###TileStart"):
raise ValueError
yield TileHeader(self, "tile_header", self.zoomIntervalCfg)
numLevels = int(self.zoomIntervalCfg["max_zoom_level"].value - self.zoomIntervalCfg["min_zoom_level"].value) +1
@ -272,6 +287,11 @@ class ZoomSubFile(SeekableFieldSet):
self.zoomIntervalCfg = zoomIntervalCfg
def createFields(self):
if self["/have_debug"].value:
yield String(self, "signature", 16)
if self['signature'].value != "+++IndexStart+++":
raise ValueError
indexEntries = []
numTiles = None
i = 0
@ -284,13 +304,24 @@ class ZoomSubFile(SeekableFieldSet):
if numTiles is None:
# calculate number of tiles (TODO: better calc this from map bounding box)
firstOffset = self["tile_index_entry[0]"]["offset"].value
if self["/have_debug"].value:
firstOffset -= 16
numTiles = firstOffset / 5
if i >= numTiles:
break
for indexEntry in indexEntries:
self.seekByte(indexEntry["offset"].value, relative=True)
yield TileData(self, "tile_data[]", zoomIntervalCfg=self.zoomIntervalCfg)
for i, indexEntry in enumerate(indexEntries):
offset = indexEntry["offset"].value
self.seekByte(offset, relative=True)
if i != len(indexEntries) - 1:
next_offset = indexEntries[i + 1]["offset"].value
size = (next_offset - offset) * 8
else:
size = self.size - offset * 8
if size == 0:
# hachoir doesn't support empty field.
continue
yield TileData(self, "tile_data[%d]" % i, zoomIntervalCfg=self.zoomIntervalCfg, size=size)
@ -314,10 +345,10 @@ class MapsforgeMapFile(Parser, RootSeekableFieldSet):
yield UInt32(self, "file_version")
yield UInt64(self, "file_size")
yield UInt64(self, "creation_date")
yield UInt32(self, "min_lat")
yield UInt32(self, "min_lon")
yield UInt32(self, "max_lat")
yield UInt32(self, "max_lon")
yield Int32(self, "min_lat")
yield Int32(self, "min_lon")
yield Int32(self, "max_lat")
yield Int32(self, "max_lon")
yield UInt16(self, "tile_size")
yield VbeString(self, "projection")

View file

@ -1,7 +1,9 @@
from hachoir_parser.program.elf import ElfFile
from hachoir_parser.program.exe import ExeFile
from hachoir_parser.program.macho import MachoFile, MachoFatFile
from hachoir_parser.program.python import PythonCompiledFile
from hachoir_parser.program.java import JavaCompiledClassFile
from hachoir_parser.program.prc import PRCFile
from hachoir_parser.program.nds import NdsFile
from hachoir_parser.program.dex import DexFile
from hachoir_parser.program.java_serialized import JavaSerializedFile

View file

@ -0,0 +1,238 @@
'''
Dalvik Executable (dex) parser.
References:
- http://www.dalvikvm.com/
- http://code.google.com/p/androguard/source/browse/core/bytecodes/dvm.py
- http://androguard.googlecode.com/hg/specs/dalvik/dex-format.html
Author: Robert Xiao
Creation Date: May 29, 2011
'''
from hachoir_parser import HachoirParser
from hachoir_core.field import (SeekableFieldSet, RootSeekableFieldSet, FieldSet, ParserError,
String, RawBytes, GenericVector,
UInt8, UInt16, UInt32, NullBits, Bit)
from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler
from hachoir_core.endian import LITTLE_ENDIAN
from hachoir_parser.program.java import eat_descriptor
class DexHeader(FieldSet):
def createFields(self):
yield String(self, "magic", 4)
yield String(self, "version", 4, strip='\0')
yield textHandler(UInt32(self, "checksum"), hexadecimal)
yield RawBytes(self, "signature", 20, description="SHA1 sum over all subsequent data")
yield filesizeHandler(UInt32(self, "filesize"))
yield UInt32(self, "size", description="Header size")
self._size = self['size'].value*8
yield textHandler(UInt32(self, "endian"), hexadecimal)
yield UInt32(self, "link_count")
yield UInt32(self, "link_offset")
yield UInt32(self, "map_offset", description="offset to map footer")
yield UInt32(self, "string_count", description="number of entries in string table")
yield UInt32(self, "string_offset", description="offset to string table")
yield UInt32(self, "type_desc_count", description="number of entries in type descriptor table")
yield UInt32(self, "type_desc_offset", description="offset to type descriptor table")
yield UInt32(self, "meth_desc_count", description="number of entries in method descriptor table")
yield UInt32(self, "meth_desc_offset", description="offset to method descriptor table")
yield UInt32(self, "field_count", description="number of entries in field table")
yield UInt32(self, "field_offset", description="offset to field table")
yield UInt32(self, "method_count", description="number of entries in method table")
yield UInt32(self, "method_offset", description="offset to method table")
yield UInt32(self, "class_count", description="number of entries in class table")
yield UInt32(self, "class_offset", description="offset to class table")
yield UInt32(self, "data_size", description="size of data region")
yield UInt32(self, "data_offset", description="offset to data region")
def stringIndex(field):
return field['/string_table/item[%d]'%field.value].display
def classDisplay(field):
disp, tail = eat_descriptor(stringIndex(field))
return disp
def classIndex(field):
return field['/type_desc_table/item[%d]'%field.value].display
# modified from java.py
code_to_type_name = {
'B': "byte",
'C': "char",
'D': "double",
'F': "float",
'I': "int",
'J': "long",
'L': "object",
'S': "short",
'Z': "boolean",
}
def argumentDisplay(field):
# parse "shorty" descriptors (these start with the return code, which is redundant)
text = stringIndex(field)[1:]
return [code_to_type_name.get(c,c) for c in text]
def signatureIndex(field):
return field['/meth_desc_table/item[%d]'%field.value].display
class PascalCString(FieldSet):
def createFields(self):
yield UInt8(self, "size")
self._size = (self['size'].value+2)*8
yield String(self, "string", self['size'].value+1, strip='\0')
def createValue(self):
return self['string'].value
class StringTable(SeekableFieldSet):
def createFields(self):
for item in self['/string_offsets'].array('item'):
self.seekByte(item.value, relative=False)
yield PascalCString(self, "item[]")
class TypeDescriptorEntry(FieldSet):
static_size = 32
def createFields(self):
yield textHandler(UInt32(self, "desc", description="Type descriptor"), classDisplay)
def createValue(self):
return (self['desc'].value,)
def createDisplay(self):
return self['desc'].display
class MethodDescriptorEntry(FieldSet):
static_size = 96
def createFields(self):
yield textHandler(UInt32(self, "args", description="Argument type"), argumentDisplay)
yield textHandler(UInt32(self, "return", description="Return type"), classIndex)
yield UInt32(self, "param_offset", "Offset to parameter detail list")
def createValue(self):
return (self['args'].value, self['return'].value)
def createDisplay(self):
return "%s (%s)"%(self['return'].display, ', '.join(self['args'].display))
class FieldEntry(FieldSet):
static_size = 64
def createFields(self):
yield textHandler(UInt16(self, "class", description="Class containing this field"), classIndex)
yield textHandler(UInt16(self, "type", description="Field type"), classIndex)
yield textHandler(UInt32(self, "name", description="Field name"), stringIndex)
def createValue(self):
return (self['class'].value, self['type'].value, self['name'].value)
def createDisplay(self):
return "%s %s.%s"%(self['type'].display, self['class'].display, self['name'].display)
class MethodEntry(FieldSet):
static_size = 64
def createFields(self):
yield textHandler(UInt16(self, "class", description="Class containing this method"), classIndex)
yield textHandler(UInt16(self, "sig", description="Method signature"), signatureIndex)
yield textHandler(UInt32(self, "name", description="Method name"), stringIndex)
def createValue(self):
return (self['class'].value, self['sig'].value, self['name'].value)
def createDisplay(self):
sig = self['/meth_desc_table/item[%d]'%self['sig'].value]
return "%s %s.%s(%s)"%(sig['return'].display, self['class'].display, self['name'].display, ', '.join(sig['args'].display))
class AccessFlags(FieldSet):
static_size = 32
def createFields(self):
yield Bit(self, "public")
yield Bit(self, "private")
yield Bit(self, "protected")
yield Bit(self, "static")
yield Bit(self, "final")
yield Bit(self, "synchronized")
yield Bit(self, "volatile")
yield Bit(self, "transient")
yield Bit(self, "native")
yield Bit(self, "interface")
yield Bit(self, "abstract")
yield Bit(self, "strictfp")
yield Bit(self, "synthetic")
yield Bit(self, "annotation")
yield Bit(self, "enum")
yield NullBits(self, "reserved[]", 1)
yield Bit(self, "constructor")
yield NullBits(self, "reserved[]", 15)
def createValue(self):
return tuple(f for f in self if f.value is True)
def createDisplay(self):
return ' '.join(f.name for f in self if f.value is True)
class ClassEntry(FieldSet):
static_size = 8*32
def createFields(self):
yield textHandler(UInt32(self, "class", description="Class being described"), classIndex)
yield AccessFlags(self, "flags")
yield textHandler(UInt32(self, "superclass", description="Superclass"), classIndex)
yield UInt32(self, "interfaces_offset", description="Offset to interface list")
yield textHandler(UInt32(self, "filename", description="Filename"), stringIndex)
yield UInt32(self, "annotations_offset")
yield UInt32(self, "class_data_offset")
yield UInt32(self, "static_values_offset")
def createValue(self):
return tuple(f.value for f in self)
def createDisplay(self):
disp = self['flags'].display
if not self['flags/interface'].value:
if disp:
disp += ' '
disp += 'class'
disp += ' '+self['class'].display
if self['superclass'].display != 'java.lang.Object':
disp += ' extends '+self['superclass'].display
return disp
class DexFile(HachoirParser, RootSeekableFieldSet):
MAGIC = "dex\n"
PARSER_TAGS = {
"id": "dex",
"category": "program",
"file_ext": ("dex",),
"min_size": 80*8,
"magic": ((MAGIC, 0),),
"description": "Dalvik VM Executable",
}
endian = LITTLE_ENDIAN
def __init__(self, stream, **args):
RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
HachoirParser.__init__(self, stream, **args)
def validate(self):
if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
return "Invalid magic"
if self['header/version'].value != '035':
return "Unknown version"
return True
def createFields(self):
yield DexHeader(self, "header")
self.seekByte(self['header/string_offset'].value)
yield GenericVector(self, "string_offsets", self['header/string_count'].value, UInt32,
description="Offsets for string table")
self.seekByte(self['string_offsets/item[0]'].value)
yield StringTable(self, "string_table",
description="String table")
self.seekByte(self['header/type_desc_offset'].value)
yield GenericVector(self, "type_desc_table", self['header/type_desc_count'].value, TypeDescriptorEntry,
description="Type descriptor table")
self.seekByte(self['header/meth_desc_offset'].value)
yield GenericVector(self, "meth_desc_table", self['header/meth_desc_count'].value, MethodDescriptorEntry,
description="Method descriptor table")
self.seekByte(self['header/field_offset'].value)
yield GenericVector(self, "field_table", self['header/field_count'].value, FieldEntry,
description="Field definition table")
self.seekByte(self['header/method_offset'].value)
yield GenericVector(self, "method_table", self['header/method_count'].value, MethodEntry,
description="Method definition table")
self.seekByte(self['header/class_offset'].value)
yield GenericVector(self, "class_table", self['header/class_count'].value, ClassEntry,
description="Class definition table")

View file

@ -60,7 +60,7 @@ class ExeFile(HachoirParser, RootSeekableFieldSet):
PARSER_TAGS = {
"id": "exe",
"category": "program",
"file_ext": ("exe", "dll", "ocx"),
"file_ext": ("exe", "dll", "ocx", "pyd", "scr"),
"mime": (u"application/x-dosexec",),
"min_size": 64*8,
#"magic": (("MZ", 0),),

View file

@ -732,6 +732,14 @@ class FieldInfo(FieldSet):
yield FieldArray(self, "attributes", AttributeInfo,
self["attributes_count"].value)
def createDescription(self):
bits = []
for mod in ['transient', 'protected', 'private', 'public', 'static', 'final', 'volatile']:
if self[mod].value:
bits.append(mod)
bits.append(parse_field_descriptor(str(self['descriptor_index'].get_cp_entry())))
bits.append(str(self['name_index'].get_cp_entry()))
return ' '.join(bits)
###############################################################################
# method_info {
@ -766,6 +774,15 @@ class MethodInfo(FieldSet):
yield FieldArray(self, "attributes", AttributeInfo,
self["attributes_count"].value)
def createDescription(self):
bits = []
for mod in ['strict', 'static', 'native', 'synchronized', 'protected', 'private', 'public', 'final', 'abstract']:
if self[mod].value:
bits.append(mod)
name = str(self['name_index'].get_cp_entry())
meth = str(self['descriptor_index'].get_cp_entry())
bits.append(parse_method_descriptor(meth, name))
return ' '.join(bits)
###############################################################################
# attribute_info {
@ -954,6 +971,18 @@ class InnerClassesEntry(StaticFieldSet):
(Bit, "public"),
)
def createDescription(self):
bits = []
for mod in ['super', 'static', 'protected', 'private', 'public', 'abstract', 'final', 'interface']:
if self[mod].value:
bits.append(mod)
if not self['interface'].value:
bits.append('class')
name = str(self['inner_class_info_index'].get_cp_entry())
bits.append(name)
return ' '.join(bits)
class LineNumberTableEntry(StaticFieldSet):
format = (
(UInt16, "start_pc"),

View file

@ -0,0 +1,372 @@
'''
Java Object Serialization Stream parser.
References:
- http://docs.oracle.com/javase/7/docs/platform/serialization/spec/protocol.html
- http://www.javaworld.com/article/2072752/the-java-serialization-algorithm-revealed.html
Author: Robert Xiao <nneonneo@gmail.com>
Creation Date: Jun 18, 2015
'''
from hachoir_parser import Parser
from hachoir_core.field import (
ParserError, FieldSet, StaticFieldSet,
Enum, RawBytes, String, PascalString16, Float32, Float64,
Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64,
Bit, NullBits)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal
from hachoir_core.tools import paddingSize
from .java import parse_field_descriptor
class LongString(FieldSet):
def createFields(self):
yield Int64(self, "length")
yield String(self, "value", charset="UTF-8")
def createDescription(self):
return self['value'].description
def createValue(self):
return self['value'].value
class UTF16Character(UInt16):
def createDisplay(self):
return repr(unichr(self.value))
class JavaBool(UInt8):
def createValue(self):
val = UInt8.createValue(self)
return (val != 0)
class SerializedNull(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
def createValue(self):
return None
def createDisplay(self):
return 'null'
class SerializedReference(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield Int32(self, "handle")
@property
def referent(self):
return self.root.handles[self['handle'].value]
def createValue(self):
return self.referent.value
def createDisplay(self):
return "-> " + str(self.referent.display)
class FieldDesc(FieldSet):
def createFields(self):
yield String(self, "typecode", 1)
yield PascalString16(self, "fieldName", charset="UTF-8")
if self['typecode'].value in ('[', 'L'):
yield SerializedContent(self, "className")
@property
def typeDescriptor(self):
typecode = self['typecode'].value
if typecode in ('[', 'L'):
return self['className'].value
else:
return typecode
@property
def typeName(self):
return parse_field_descriptor(self.typeDescriptor)
@property
def fieldName(self):
return self['fieldName'].value
def createValue(self):
return (self.typeDescriptor, self.fieldName)
def createDisplay(self):
return '%s %s' % (self.typeName, self.fieldName)
class ClassAnnotation(FieldSet):
def createFields(self):
# TODO
yield Enum(UInt8(self, "endBlockData"), TYPECODE_NAMES)
class SerializedClassDesc(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield PascalString16(self, "className", charset="UTF-8")
yield Int64(self, "serialVersionUID")
self.root.newHandle(self)
yield NullBits(self, "classDescFlags_reserved", 3)
yield Bit(self, "classDescFlags_enum", "Is the class an Enum?")
yield Bit(self, "classDescFlags_block_data", "Was the externalizable's block data written using stream version 2?")
yield Bit(self, "classDescFlags_externalizable", "Does the class implement java.io.Externalizable?")
yield Bit(self, "classDescFlags_serializable", "Does the class implement java.io.Serializable?")
yield Bit(self, "classDescFlags_write_method", "Does the class have a writeObject method?")
yield Int16(self, "fieldDesc_count")
for i in xrange(self['fieldDesc_count'].value):
yield FieldDesc(self, "fieldDesc[]")
yield ClassAnnotation(self, "classAnnotation")
yield SerializedContent(self, "superClassDesc")
@property
def className(self):
return self['className'].value
class ObjectValue(FieldSet):
def gen_values(self, classDesc):
if isinstance(classDesc, SerializedReference):
classDesc = classDesc.referent
if isinstance(classDesc, SerializedNull):
return
# TODO: proxy class desc
for field in self.gen_values(classDesc['superClassDesc']):
yield field
for fieldDesc in classDesc.array('fieldDesc'):
tc = fieldDesc['typecode'].value
klass = VALUE_CLASS_MAP[tc]
field = klass(self, "field[]", description="%s.%s" % (classDesc.className, fieldDesc.fieldName))
field.fieldName = fieldDesc.fieldName
yield field
def createFields(self):
for field in self.gen_values(self.parent.classDesc):
yield field
class SerializedObject(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield SerializedContent(self, "classDesc")
self.root.newHandle(self)
yield ObjectValue(self, "value")
@property
def classDesc(self):
classDesc = self['classDesc']
if isinstance(classDesc, SerializedReference):
classDesc = classDesc.referent
return classDesc
def createValue(self):
return tuple(field.value for field in self['value'].array('field'))
def createDisplay(self):
out = []
for field in self['value'].array('field'):
if isinstance(field, SerializedReference) and not isinstance(field.referent, SerializedString):
# Avoid recursive references
out.append('%s=#<REF:%s>' % (field.fieldName, field.referent.classDesc.className))
else:
out.append('%s=%s' % (field.fieldName, field.display))
return '%s(%s)' % (self.classDesc.className, ', '.join(out))
class SerializedString(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
self.root.newHandle(self)
yield PascalString16(self, "value", charset="UTF-8")
def createValue(self):
return self['value'].value
def createDisplay(self):
return self['value'].display
class SerializedArray(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield SerializedContent(self, "classDesc")
self.root.newHandle(self)
yield Int32(self, "size")
klass = VALUE_CLASS_MAP[self.classDesc.className[1]] # className is [<elementType>
for i in xrange(self['size'].value):
yield klass(self, "value[]")
@property
def classDesc(self):
classDesc = self['classDesc']
if isinstance(classDesc, SerializedReference):
classDesc = classDesc.referent
return classDesc
def createValue(self):
return [v.value for v in self.array('value')]
def createDisplay(self):
out = []
for field in self.array('value'):
if isinstance(field, SerializedReference) and not isinstance(field.referent, SerializedString):
# Avoid recursive references
out.append('#<REF:%s>' % (field.referent.classDesc.className,))
else:
out.append('%s' % (field.display,))
return '[%s]' % ', '.join(out)
class SerializedClass(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield SerializedContent(self, "classDesc")
self.root.newHandle(self)
class BlockData(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
# TODO
class StreamReset(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
self.root.resetHandles()
class BlockDataLong(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
# TODO
class SerializedException(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
self.root.resetHandles()
yield SerializableObject(self, "object")
self.root.resetHandles()
class SerializedLongString(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
self.root.newHandle(self)
yield LongString(self, "value")
def createValue(self):
return self['value'].value
class SerializedProxyClassDesc(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
# TODO
class SerializedEnum(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield SerializedContent(self, "classDesc")
self.root.newHandle(self)
yield SerializedContent(self, "enumConstantName")
@property
def classDesc(self):
classDesc = self['classDesc']
if isinstance(classDesc, SerializedReference):
classDesc = classDesc.referent
return classDesc
def createValue(self):
return self['enumConstantName'].value
def createDisplay(self):
return '%s.%s' % (self.classDesc.className, self.value)
TYPECODE_NAMES = {
0x70: "NULL",
0x71: "REFERENCE",
0x72: "CLASSDESC",
0x73: "OBJECT",
0x74: "STRING",
0x75: "ARRAY",
0x76: "CLASS",
0x77: "BLOCKDATA",
0x78: "ENDBLOCKDATA",
0x79: "RESET",
0x7A: "BLOCKDATALONG",
0x7B: "EXCEPTION",
0x7C: "LONGSTRING",
0x7D: "PROXYCLASSDESC",
0x7E: "ENUM",
}
TYPECODE_TABLE = {
0x70: SerializedNull,
0x71: SerializedReference,
0x72: SerializedClassDesc,
0x73: SerializedObject,
0x74: SerializedString,
0x75: SerializedArray,
0x76: SerializedClass,
0x77: BlockData,
# 0x78: EndBlockData,
0x79: StreamReset,
0x7a: BlockDataLong,
0x7b: SerializedException,
0x7c: SerializedLongString,
0x7d: SerializedProxyClassDesc,
0x7e: SerializedEnum,
}
def SerializedContent(parent, name, description=None):
tc = parent.stream.readBits(parent.absolute_address + parent.current_size, 8, parent.endian)
klass = TYPECODE_TABLE.get(tc, None)
if klass is None:
raise ParserError("Unknown typecode 0x%02x" % tc)
return klass(parent, name, description)
VALUE_CLASS_MAP = {
'B': Int8,
'C': UTF16Character,
'D': Float64,
'F': Float32,
'I': Int32,
'J': Int64,
'S': Int16,
'Z': JavaBool,
'[': SerializedContent, # SerializedArray or reference
'L': SerializedContent, # SerializedObject or reference
}
class JavaSerializedFile(Parser):
endian = BIG_ENDIAN
MAGIC = 0xaced
KNOWN_VERSIONS = (5,)
PARSER_TAGS = {
"id": "java_serialized",
"category": "program",
"file_ext": ("ser",),
"mime": (u"application/java-serialized-object",),
"min_size": 4*4,
"magic": (("\xac\xed", 0),),
"description": "Serialized Java object",
}
def validate(self):
if self["magic"].value != self.MAGIC:
return "Wrong magic signature!"
if self["version"].value not in self.KNOWN_VERSIONS:
return "Unknown version (%d)" % self["version"].value
return True
def createDescription(self):
return "Serialized Java object, version %s" % self["version"].value
def resetHandles(self):
self.handles = {}
self.nextHandleNum = 0x7E0000
def newHandle(self, obj):
self.handles[self.nextHandleNum] = obj
self.nextHandleNum += 1
def createFields(self):
self.resetHandles()
yield textHandler(UInt16(self, "magic", "Java serialized object signature"),
hexadecimal)
yield UInt16(self, "version", "Stream version")
while not self.eof:
yield SerializedContent(self, "object[]")

View file

@ -0,0 +1,471 @@
"""
Mach-O (Mac OS X executable file format) parser.
Author: Robert Xiao
Creation date: February 11, 2015
"""
from hachoir_parser import HachoirParser
from hachoir_core.field import (RootSeekableFieldSet, FieldSet, ParserError, Bit, NullBits, RawBits,
Int32, UInt8, UInt16, UInt32, UInt64, Enum,
String, RawBytes, Bytes)
from hachoir_core.text_handler import textHandler, hexadecimal
from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN
class ElfHeader(FieldSet):
LITTLE_ENDIAN_ID = 1
BIG_ENDIAN_ID = 2
MACHINE_NAME = {
# e_machine, EM_ defines
0: u"No machine",
1: u"AT&T WE 32100",
2: u"SPARC",
3: u"Intel 80386",
4: u"Motorola 68000",
5: u"Motorola 88000",
6: u"Intel 80486",
7: u"Intel 80860",
8: u"MIPS I Architecture",
9: u"Amdahl UTS on System/370",
10: u"MIPS RS3000 Little-endian",
11: u"IBM RS/6000 XXX reserved",
15: u"Hewlett-Packard PA-RISC",
16: u"NCube XXX reserved",
17: u"Fujitsu VPP500",
18: u"Enhanced instruction set SPARC",
19: u"Intel 80960",
20: u"PowerPC 32-bit",
21: u"PowerPC 64-bit",
36: u"NEC V800",
37: u"Fujitsu FR20",
38: u"TRW RH-32",
39: u"Motorola RCE",
40: u"Advanced RISC Machines (ARM)",
41: u"DIGITAL Alpha",
42: u"Hitachi Super-H",
43: u"SPARC Version 9",
44: u"Siemens Tricore",
45: u"Argonaut RISC Core",
46: u"Hitachi H8/300",
47: u"Hitachi H8/300H",
48: u"Hitachi H8S",
49: u"Hitachi H8/500",
50: u"Intel Merced (IA-64) Processor",
51: u"Stanford MIPS-X",
52: u"Motorola Coldfire",
53: u"Motorola MC68HC12",
62: u"Advanced Micro Devices x86-64",
75: u"DIGITAL VAX",
36902: u"used by NetBSD/alpha; obsolete",
}
CLASS_NAME = {
# e_ident[EI_CLASS], ELFCLASS defines
1: u"32 bits",
2: u"64 bits"
}
TYPE_NAME = {
# e_type, ET_ defines
0: u"No file type",
1: u"Relocatable file",
2: u"Executable file",
3: u"Shared object file",
4: u"Core file",
0xFF00: u"Processor-specific (0xFF00)",
0xFFFF: u"Processor-specific (0xFFFF)",
}
OSABI_NAME = {
# e_ident[EI_OSABI], ELFOSABI_ defines
0: u"UNIX System V ABI",
1: u"HP-UX operating system",
2: u"NetBSD",
3: u"GNU/Linux",
4: u"GNU/Hurd",
5: u"86Open common IA32 ABI",
6: u"Solaris",
7: u"Monterey",
8: u"IRIX",
9: u"FreeBSD",
10: u"TRU64 UNIX",
11: u"Novell Modesto",
12: u"OpenBSD",
97: u"ARM",
255: u"Standalone (embedded) application",
}
ENDIAN_NAME = {
# e_ident[EI_DATA], ELFDATA defines
LITTLE_ENDIAN_ID: "Little endian",
BIG_ENDIAN_ID: "Big endian",
}
def createFields(self):
yield Bytes(self, "signature", 4, r'ELF signature ("\x7fELF")')
yield Enum(UInt8(self, "class", "Class"), self.CLASS_NAME)
if self["class"].value == 1:
ElfLongWord = UInt32
else:
ElfLongWord = UInt64
yield Enum(UInt8(self, "endian", "Endian"), self.ENDIAN_NAME)
yield UInt8(self, "file_version", "File version")
yield Enum(UInt8(self, "osabi_ident", "OS/syscall ABI identification"), self.OSABI_NAME)
yield UInt8(self, "abi_version", "syscall ABI version")
yield String(self, "pad", 7, "Pad")
yield Enum(UInt16(self, "type", "File type"), self.TYPE_NAME)
yield Enum(UInt16(self, "machine", "Machine type"), self.MACHINE_NAME)
yield UInt32(self, "version", "ELF format version")
yield textHandler(ElfLongWord(self, "entry", "Entry point"), hexadecimal)
yield ElfLongWord(self, "phoff", "Program header file offset")
yield ElfLongWord(self, "shoff", "Section header file offset")
yield UInt32(self, "flags", "Architecture-specific flags")
yield UInt16(self, "ehsize", "Elf header size (this header)")
yield UInt16(self, "phentsize", "Program header entry size")
yield UInt16(self, "phnum", "Program header entry count")
yield UInt16(self, "shentsize", "Section header entry size")
yield UInt16(self, "shnum", "Section header entry count")
yield UInt16(self, "shstrndx", "Section header string table index")
def isValid(self):
if self["signature"].value != "\x7FELF":
return "Wrong ELF signature"
if self["class"].value not in self.CLASS_NAME:
return "Unknown class"
if self["endian"].value not in self.ENDIAN_NAME:
return "Unknown endian (%s)" % self["endian"].value
return ""
class SectionFlags(FieldSet):
def createFields(self):
if self.root.endian == BIG_ENDIAN:
if self.root.is64bit:
yield RawBits(self, "reserved[]", 32)
yield RawBits(self, "processor_specific", 4, "Processor specific flags")
yield NullBits(self, "reserved[]", 17)
yield Bit(self, "is_tls", "Section contains TLS data?")
yield NullBits(self, "reserved[]", 7)
yield Bit(self, "is_exec", "Section contains executable instructions?")
yield Bit(self, "is_alloc", "Section occupies memory?")
yield Bit(self, "is_writable", "Section contains writable data?")
else:
yield Bit(self, "is_writable", "Section contains writable data?")
yield Bit(self, "is_alloc", "Section occupies memory?")
yield Bit(self, "is_exec", "Section contains executable instructions?")
yield NullBits(self, "reserved[]", 7)
yield Bit(self, "is_tls", "Section contains TLS data?")
yield RawBits(self, "processor_specific", 4, "Processor specific flags")
yield NullBits(self, "reserved[]", 17)
if self.root.is64bit:
yield RawBits(self, "reserved[]", 32)
class SymbolStringTableOffset(UInt32):
def createDisplay(self):
section_index = self['/header/shstrndx'].value
section = self['/section['+str(section_index)+']']
text = section.value[self.value:]
return text.split('\0',1)[0]
class SectionHeader32(FieldSet):
static_size = 40*8
TYPE_NAME = {
# sh_type, SHT_ defines
0: "Inactive",
1: "Program defined information",
2: "Symbol table section",
3: "String table section",
4: "Relocation section with addends",
5: "Symbol hash table section",
6: "Dynamic section",
7: "Note section",
8: "Block started by symbol (BSS) or No space section",
9: "Relocation section without addends",
10:"Reserved - purpose unknown",
11:"Dynamic symbol table section",
}
def createFields(self):
yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)")
yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME)
yield SectionFlags(self, "flags", "Section flags")
yield textHandler(UInt32(self, "VMA", "Virtual memory address"), hexadecimal)
yield textHandler(UInt32(self, "LMA", "Logical memory address (offset in file)"), hexadecimal)
yield textHandler(UInt32(self, "size", "Section size (bytes)"), hexadecimal)
yield UInt32(self, "link", "Index of a related section")
yield UInt32(self, "info", "Type-dependent information")
yield UInt32(self, "addr_align", "Address alignment (bytes)")
yield UInt32(self, "entry_size", "Size of each entry in section")
def createDescription(self):
return "Section header (name: %s, type: %s)" % \
(self["name"].display, self["type"].display)
class SectionHeader64(SectionHeader32):
static_size = 64*8
def createFields(self):
yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)")
yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME)
yield SectionFlags(self, "flags", "Section flags")
yield textHandler(UInt64(self, "VMA", "Virtual memory address"), hexadecimal)
yield textHandler(UInt64(self, "LMA", "Logical memory address (offset in file)"), hexadecimal)
yield textHandler(UInt64(self, "size", "Section size (bytes)"), hexadecimal)
yield UInt32(self, "link", "Index of a related section")
yield UInt32(self, "info", "Type-dependent information")
yield UInt64(self, "addr_align", "Address alignment (bytes)")
yield UInt64(self, "entry_size", "Size of each entry in section")
class ProgramFlags(FieldSet):
static_size = 32
FLAGS = (('pf_r','readable'),('pf_w','writable'),('pf_x','executable'))
def createFields(self):
if self.root.endian == BIG_ENDIAN:
yield NullBits(self, "padding[]", 29)
for fld, desc in self.FLAGS:
yield Bit(self, fld, "Segment is " + desc)
else:
for fld, desc in reversed(self.FLAGS):
yield Bit(self, fld, "Segment is " + desc)
yield NullBits(self, "padding[]", 29)
def createDescription(self):
attribs=[]
for fld, desc in self.FLAGS:
if self[fld].value:
attribs.append(desc)
return 'Segment is '+', '.join(attribs)
class ProgramHeader32(FieldSet):
TYPE_NAME = {
# p_type, PT_ defines
0: u"Unused program header table entry",
1: u"Loadable program segment",
2: u"Dynamic linking information",
3: u"Program interpreter",
4: u"Auxiliary information",
5: u"Reserved, unspecified semantics",
6: u"Entry for header table itself",
7: u"Thread Local Storage segment",
0x70000000: u"MIPS_REGINFO",
}
static_size = 32*8
def createFields(self):
yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME)
yield UInt32(self, "offset", "Offset")
yield textHandler(UInt32(self, "vaddr", "V. address"), hexadecimal)
yield textHandler(UInt32(self, "paddr", "P. address"), hexadecimal)
yield UInt32(self, "file_size", "File size")
yield UInt32(self, "mem_size", "Memory size")
yield ProgramFlags(self, "flags")
yield UInt32(self, "align", "Alignment padding")
def createDescription(self):
return "Program Header (%s)" % self["type"].display
class ProgramHeader64(ProgramHeader32):
static_size = 56*8
def createFields(self):
yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME)
yield ProgramFlags(self, "flags")
yield UInt64(self, "offset", "Offset")
yield textHandler(UInt64(self, "vaddr", "V. address"), hexadecimal)
yield textHandler(UInt64(self, "paddr", "P. address"), hexadecimal)
yield UInt64(self, "file_size", "File size")
yield UInt64(self, "mem_size", "Memory size")
yield UInt64(self, "align", "Alignment padding")
CPU_ARCH_ABI64 = 0x01000000
CPU_TYPE = {
-1: 'Any',
1: 'VAX',
6: 'MC680x0',
7: 'i386',
7|CPU_ARCH_ABI64: 'x86_64',
8: 'MIPS',
10: 'MC98000',
11: 'HPPA',
12: 'ARM',
12|CPU_ARCH_ABI64: 'ARM64',
13: 'MC88000',
14: 'SPARC',
15: 'I860',
16: 'Alpha',
18: 'PowerPC',
18|CPU_ARCH_ABI64: 'PowerPC64',
}
FILE_TYPE = {
1: 'Relocatable object',
2: 'Demand-paged executable',
3: 'Fixed VM shared library',
4: 'Core file',
5: 'Preloaded executable',
6: 'Dynamically bound shared library',
7: 'Dynamic link editor',
8: 'Dynamically bound bundle',
9: 'Shared library stub for static linking only',
10: 'Companion file with only debug sections',
11: 'x86_64 kext',
}
MACHO_MAGICS = {
"\xfe\xed\xfa\xce": (0, BIG_ENDIAN), # 32-bit big endian
"\xce\xfa\xed\xfe": (0, LITTLE_ENDIAN), # 32-bit little endian
"\xfe\xed\xfa\xcf": (1, BIG_ENDIAN), # 64-bit big endian
"\xcf\xfa\xed\xfe": (1, LITTLE_ENDIAN), # 64-bit little endian
}
class MachoHeader(FieldSet):
def createFields(self):
yield Bytes(self, "magic", 4, "Mach-O signature")
yield Enum(Int32(self, "cputype"), CPU_TYPE)
yield Int32(self, "cpusubtype")
yield Enum(UInt32(self, "filetype"), FILE_TYPE)
yield UInt32(self, "ncmds")
yield UInt32(self, "sizeofcmds")
yield UInt32(self, "flags")
if self.parent.is64bit:
yield UInt32(self, "reserved")
class MachoLoadCommand(FieldSet):
LOAD_COMMANDS = {
}
def createFields(self):
yield Enum(UInt32(self, "cmd"), self.LOAD_COMMANDS)
yield UInt32(self, "cmdsize")
self._size = self['cmdsize'].value * 8
class MachoFileBase(RootSeekableFieldSet):
MAGICS = {"\xfe\xed\xfa\xce": (0, BIG_ENDIAN), # 32-bit big endian
"\xce\xfa\xed\xfe": (0, LITTLE_ENDIAN), # 32-bit little endian
"\xfe\xed\xfa\xcf": (1, BIG_ENDIAN), # 64-bit big endian
"\xcf\xfa\xed\xfe": (1, LITTLE_ENDIAN), # 64-bit little endian
}
def createFields(self):
baseaddr = self.absolute_address
# Choose size and endianness based on magic
magic = self.stream.readBytes(baseaddr, 4)
self.is64bit, self.endian = self.MAGICS[magic]
yield MachoHeader(self, "header", "Header")
for i in xrange(self['header/ncmds'].value):
yield MachoLoadCommand(self, "load_command[]")
return
# Parse header and program headers
yield ElfHeader(self, "header", "Header")
self.is64bit = (self["header/class"].value == 2)
for index in xrange(self["header/phnum"].value):
if self.is64bit:
yield ProgramHeader64(self, "prg_header[]")
else:
yield ProgramHeader32(self, "prg_header[]")
self.seekByte(self["header/shoff"].value, relative=False)
for index in xrange(self["header/shnum"].value):
if self.is64bit:
yield SectionHeader64(self, "section_header[]")
else:
yield SectionHeader32(self, "section_header[]")
for index in xrange(self["header/shnum"].value):
field = self["section_header["+str(index)+"]"]
if field['size'].value != 0:
self.seekByte(field['LMA'].value, relative=False)
yield RawBytes(self, "section["+str(index)+"]", field['size'].value)
def createDescription(self):
return "Mach-O program/library: %s" % (self["header/cputype"].display)
class MachoFile(HachoirParser, MachoFileBase):
PARSER_TAGS = {
"id": "macho",
"category": "program",
"file_ext": ("dylib", "bundle", "o", ""),
"min_size": (28+56)*8, # Header + one segment load command
"mime": (
u"application/x-executable",
u"application/x-object",
u"application/x-sharedlib",
u"application/x-executable-file",
u"application/x-coredump"),
"magic": tuple((m,0) for m in MachoFileBase.MAGICS),
"description": "Mach-O program/library"
}
endian = BIG_ENDIAN
def __init__(self, stream, **args):
MachoFileBase.__init__(self, None, "root", stream, None, stream.askSize(self))
HachoirParser.__init__(self, stream, **args)
def validate(self):
if self.stream.readBytes(0, 4) not in self.MAGICS:
return "Invalid magic"
return True
class MachoFatArch(FieldSet):
def createFields(self):
yield Enum(Int32(self, "cputype"), CPU_TYPE)
yield Int32(self, "cpusubtype")
yield textHandler(UInt32(self, "offset"), hexadecimal)
yield UInt32(self, "size")
yield UInt32(self, "align")
self['align'].createDescription = lambda: str(1 << self['align'].value)
class MachoFatHeader(FieldSet):
def createFields(self):
yield Bytes(self, "magic", 4, "Mach-O signature")
yield UInt32(self, "nfat_arch", "Number of architectures in this fat file")
for i in xrange(self['nfat_arch'].value):
yield MachoFatArch(self, 'arch[]')
class MachoFatFile(HachoirParser, RootSeekableFieldSet):
MAGIC_BE = "\xca\xfe\xba\xbe"
MAGIC_LE = "\xbe\xba\xfe\xca"
PARSER_TAGS = {
"id": "macho_fat",
"category": "program",
"file_ext": ("dylib", "bundle", ""),
"min_size": 4096*8 + MachoFile.PARSER_TAGS['min_size'], # One page + size for one arch
"mime": (
u"application/x-executable",
u"application/x-object",
u"application/x-sharedlib",
u"application/x-executable-file",
u"application/x-coredump"),
"magic": ((MAGIC_LE, 0), (MAGIC_BE, 0)),
"description": "Mach-O fat program/library"
}
endian = BIG_ENDIAN
def __init__(self, stream, **args):
RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
HachoirParser.__init__(self, stream, **args)
def validate(self):
if self.stream.readBytes(0, 4) not in (self.MAGIC_LE, self.MAGIC_BE):
return "Invalid magic"
if self['header/nfat_arch'].value >= 16:
# This helps to distinguish mach-o from java.
return "Too many architectures"
return True
def createFields(self):
# Choose the right endian based on file magic
if self.stream.readBytes(0, 4) == self.MAGIC_LE:
self.endian = LITTLE_ENDIAN
else:
self.endian = BIG_ENDIAN
# Parse header and program headers
yield MachoFatHeader(self, "header", "Header")
for arch in self['header'].array('arch'):
self.seekByte(arch['offset'].value)
yield MachoFileBase(self, 'file[]', self.stream, None, arch['size'].value * 8)

View file

@ -3,4 +3,4 @@ from hachoir_parser.video.flv import FlvFile
from hachoir_parser.video.mov import MovFile
from hachoir_parser.video.mpeg_video import MPEGVideoFile
from hachoir_parser.video.mpeg_ts import MPEG_TS
from hachoir_parser.video.avchd import AVCHDINDX, AVCHDMOBJ, AVCHDMPLS, AVCHDCLPI

View file

@ -0,0 +1,433 @@
"""
Parser for AVCHD/Blu-ray formats
Notice: This parser is based off reverse-engineering efforts.
It is NOT based on official specifications, and is subject to change as
more information becomes available. There's a lot of guesswork here, so if you find
that something disagrees with an official specification, please change it.
Notice: This parser has NOT been tested on Blu-ray disc data, only on files
taken from AVCHD camcorders.
Author: Robert Xiao
Creation: December 30, 2010
References:
- Wikipedia: http://en.wikipedia.org/wiki/AVCHD
- European patent EP1821310: http://www.freepatentsonline.com/EP1821310.html
"""
"""
File structure:
Root (/PRIVATE/AVCHD, /AVCHD, /, etc.)
AVCHDTN/: (AVCHD only)
THUMB.TDT: Thumbnail Data: stored as a series of 16KiB pages, where each thumbnail starts on a page boundary
THUMB.TID: Thumbnail Index (TIDX), unknown format
BDMV/:
INDEX.BDM|index.bdmv: Bluray Disc Metadata (INDX): Clip index file
MOVIEOBJ.BDM|MovieObject.bdmv: Bluray Disc Metadata (MOBJ): Clip description file
AUXDATA/: (Optional, Blu-ray only)
sound.bdmv: Sound(s) associated with HDMV Interactive Graphic streams applications
?????.otf: Font(s) associated with Text subtitle applications
BACKUP/: (Optional)
[Copies of *.bdmv, CLIPINF/* and PLAYLIST/*]
CLIPINF/:
?????.CPI/?????.clpi: Clip information (HDMV)
PLAYLIST/:
?????.MPL/?????.mpls: Movie Playlist information (MPLS)
STREAM/:
?????.MTS|?????.m2ts: BDAV MPEG-2 Transport Stream (video file)
SSIF/: (Blu-ray 3D only)
?????.ssif: Stereoscopic Interleaved file
IISVPL/: (Optional?, AVCHD only?)
?????.VPL: Virtual Playlist? (MPLS)
"""
from hachoir_parser import HachoirParser
from hachoir_core.field import (RootSeekableFieldSet, FieldSet,
RawBytes, Bytes, String, Bits, UInt8, UInt16, UInt32, PascalString8, Enum)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.iso639 import ISO639_2
from hachoir_core.text_handler import textHandler, hexadecimal
from datetime import datetime
def fromhex(field):
return int('%x'%field.value)
class AVCHDTimestamp(FieldSet):
static_size = 8*8
def createFields(self):
yield textHandler(UInt8(self, "unknown", description="0x1E"), hexadecimal)
yield textHandler(UInt8(self, "century"), hexadecimal)
yield textHandler(UInt8(self, "year"), hexadecimal)
yield textHandler(UInt8(self, "month"), hexadecimal)
yield textHandler(UInt8(self, "day"), hexadecimal)
yield textHandler(UInt8(self, "hour"), hexadecimal)
yield textHandler(UInt8(self, "minute"), hexadecimal)
yield textHandler(UInt8(self, "second"), hexadecimal)
def createValue(self):
return datetime(fromhex(self['century'])*100 + fromhex(self['year']),
fromhex(self['month']), fromhex(self['day']),
fromhex(self['hour']), fromhex(self['minute']), fromhex(self['second']))
class AVCHDGenericChunk(FieldSet):
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield RawBytes(self, "raw[]", self['size'].value)
class AVCHDINDX_0(FieldSet):
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield RawBytes(self, "unknown[]", 22)
yield UInt32(self, "count")
for i in xrange(self['count'].value):
yield RawBytes(self, "data[]", 12)
class AVCHDIDEX_0(FieldSet):
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield RawBytes(self, "unknown[]", 40)
yield AVCHDTimestamp(self, "last_modified")
yield RawBytes(self, "unknown[]", self._size//8-52)
class AVCHDMOBJ_Chunk(FieldSet):
def createFields(self):
yield UInt32(self, "unknown[]")
yield UInt32(self, "index")
yield UInt32(self, "unknown[]")
yield textHandler(UInt32(self, "unknown_id"), hexadecimal)
yield UInt32(self, "unknown[]")
yield textHandler(UInt32(self, "playlist_id"), lambda field: '%05d'%field.value)
yield UInt32(self, "unknown[]")
class AVCHDMPLS_StreamEntry(FieldSet):
ENTRYTYPE = {1:'PlayItem on disc',
2:'SubPath on disc',
3:'PlayItem in local storage',
4:'SubPath in local storage'}
def createFields(self):
yield UInt8(self, "size")
self._size = (self['size'].value+1)*8
yield Enum(UInt8(self, "type"), self.ENTRYTYPE)
if self['type'].value in (1,3):
yield textHandler(UInt16(self, "pid", "PID of item in clip stream m2ts file"), hexadecimal)
else: # 2,4
'''
The patent says:
ref_to_SubPath_id
ref_to_SubClip_entry_id
ref_to_Stream_PID_of_subClip
Sizes aren't given, though, so I cannot determine the format without a sample.
'''
pass
class AVCHDMPLS_StreamAttribs(FieldSet):
STREAMTYPE = {
0x01: "V_MPEG1",
0x02: "V_MPEG2",
0x1B: "V_AVC",
0xEA: "V_VC1",
0x03: "A_MPEG1",
0x04: "A_MPEG2",
0x80: "A_LPCM",
0x81: "A_AC3",
0x84: "A_AC3_PLUS",
0xA1: "A_AC3_PLUS_SEC",
0x83: "A_TRUEHD",
0x82: "A_DTS",
0x85: "A_DTS-HD",
0xA2: "A_DTS-HD_SEC",
0x86: "A_DTS-MA",
0x90: "S_PGS",
0x91: "S_IGS",
0x92: "T_SUBTITLE",
}
# Enumerations taken from "ClownBD's CLIPINF Editor". Values may not be accurate.
def createFields(self):
yield UInt8(self, "size")
self._size = (self['size'].value+1)*8
yield Enum(UInt8(self, "type"), self.STREAMTYPE)
if self['type'].display.startswith('V'): # Video
yield Enum(Bits(self, "resolution", 4), {1:'480i', 2:'576i', 3:'480p', 4:'1080i', 5:'720p', 6:'1080p', 7:'576p'})
yield Enum(Bits(self, "fps", 4), {1:'24/1.001', 2:'24', 3:'25', 4:'30/1.001', 6:'50', 7:'60/1.001'})
yield Enum(UInt8(self, "aspect_ratio"), {0x20:'4:3', 0x30:'16:9'})
elif self['type'].display.startswith('A'): # Audio
yield Enum(Bits(self, "channel_layout", 4), {1:'Mono', 3:'Stereo', 6:'Multi', 12:'Combi'})
yield Enum(Bits(self, "sample_rate", 4), {1:'48KHz', 4:'96KHz', 5:'192KHz', 12:'48-192KHz', 14:'48-96KHz'})
yield Enum(String(self, "language", 3), ISO639_2)
elif self['type'].display.startswith('T'): # Text subtitle
yield UInt8(self, "unknown[]")
yield Enum(String(self, "language", 3), ISO639_2)
elif self['type'].display.startswith('S'): # Graphics
yield Enum(String(self, "language", 3), ISO639_2)
else:
pass
class AVCHDMPLS_Stream(FieldSet):
def createFields(self):
yield AVCHDMPLS_StreamEntry(self, "entry")
yield AVCHDMPLS_StreamAttribs(self, "attribs")
class AVCHDMPLS_PlayItem(FieldSet):
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield UInt16(self, "unknown[]")
yield UInt8(self, "video_count", "Number of video stream entries")
yield UInt8(self, "audio_count", "Number of video stream entries")
yield UInt8(self, "subtitle_count", "Number of presentation graphics/text subtitle entries")
yield UInt8(self, "ig_count", "Number of interactive graphics entries")
yield RawBytes(self, "unknown[]", 8)
for i in xrange(self['video_count'].value):
yield AVCHDMPLS_Stream(self, "video[]")
for i in xrange(self['audio_count'].value):
yield AVCHDMPLS_Stream(self, "audio[]")
for i in xrange(self['subtitle_count'].value):
yield AVCHDMPLS_Stream(self, "subtitle[]")
for i in xrange(self['ig_count'].value):
yield AVCHDMPLS_Stream(self, "ig[]")
class AVCHDMPLS_0_Chunk(FieldSet):
def createFields(self):
yield UInt16(self, "size")
self._size = (self['size'].value+2)*8
yield Bytes(self, "clip_id", 5)
yield Bytes(self, "clip_type", 4)
yield RawBytes(self, "unknown[]", 3)
yield UInt32(self, "clip_start_time[]", "clip start time (units unknown)")
yield UInt32(self, "clip_end_time[]", "clip end time (units unknown)")
yield RawBytes(self, "unknown[]", 10)
yield AVCHDMPLS_PlayItem(self, "playitem")
class AVCHDMPLS_0(FieldSet):
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield UInt32(self, "count")
yield UInt16(self, "unknown[]")
for i in xrange(self['count'].value):
yield AVCHDMPLS_0_Chunk(self, "chunk[]")
class AVCHDMPLS_PlayItemMark(FieldSet):
def createFields(self):
yield UInt16(self, "unknown[]")
yield UInt16(self, "playitem_idx", "Index of the associated PlayItem")
yield UInt32(self, "mark_time", "Marker time in clip (units unknown)")
yield RawBytes(self, "unknown", 6)
class AVCHDMPLS_1(FieldSet):
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield UInt16(self, "count")
for i in xrange(self['count'].value):
yield AVCHDMPLS_PlayItemMark(self, "chunk[]")
class AVCHDPLEX_1_Chunk(FieldSet):
static_size = 66*8
def createFields(self):
yield RawBytes(self, "unknown[]", 10)
yield AVCHDTimestamp(self, "date")
yield RawBytes(self, "unknown[]", 1)
yield PascalString8(self, "date")
def createValue(self):
return self['date'].value
class AVCHDPLEX_0(FieldSet):
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield RawBytes(self, "unknown[]", 10)
yield AVCHDTimestamp(self, "last_modified")
yield RawBytes(self, "unknown[]", 2)
yield PascalString8(self, "date")
class AVCHDPLEX_1(FieldSet):
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield UInt16(self, "count")
for i in xrange(self['count'].value):
yield AVCHDPLEX_1_Chunk(self, "chunk[]")
class AVCHDCLPI_1(FieldSet):
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield RawBytes(self, "unknown[]", 10)
yield textHandler(UInt16(self, "video_pid", "PID of video data in stream file"), hexadecimal)
yield AVCHDMPLS_StreamAttribs(self, "video_attribs")
yield textHandler(UInt16(self, "audio_pid", "PID of audio data in stream file"), hexadecimal)
yield AVCHDMPLS_StreamAttribs(self, "audio_attribs")
def AVCHDIDEX(self):
yield AVCHDIDEX_0(self, "chunk[]")
yield AVCHDGenericChunk(self, "chunk[]")
def AVCHDPLEX(self):
yield AVCHDPLEX_0(self, "chunk[]")
yield AVCHDPLEX_1(self, "chunk[]")
yield AVCHDGenericChunk(self, "chunk[]")
def AVCHDCLEX(self):
yield AVCHDGenericChunk(self, "chunk[]")
yield AVCHDGenericChunk(self, "chunk[]")
class AVCHDChunkWithHeader(FieldSet):
TYPES = {'IDEX': AVCHDIDEX,
'PLEX': AVCHDPLEX,
'CLEX': AVCHDCLEX,}
def createFields(self):
yield UInt32(self, "size")
self._size = (self['size'].value+4)*8
yield UInt32(self, "unknown[]", "24")
yield UInt32(self, "unknown[]", "1")
yield UInt32(self, "unknown[]", "0x10000100")
yield UInt32(self, "unknown[]", "24")
yield UInt32(self, "size2")
assert self['size'].value == self['size2'].value+20
yield Bytes(self, "magic", 4)
yield RawBytes(self, "unknown[]", 36)
for field in self.TYPES[self['magic'].value](self):
yield field
class AVCHDINDX(HachoirParser, RootSeekableFieldSet):
endian = BIG_ENDIAN
MAGIC = "INDX0"
PARSER_TAGS = {
"id": "bdmv_index",
"category": "video",
"file_ext": ("bdm","bdmv"),
"magic": ((MAGIC, 0),),
"min_size": 8, # INDX0?00
"description": "INDEX.BDM",
}
def __init__(self, stream, **args):
RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
HachoirParser.__init__(self, stream, **args)
def validate(self):
if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
return "Invalid magic"
return True
def createFields(self):
yield Bytes(self, "filetype", 4, "File type (INDX)")
yield Bytes(self, "fileversion", 4, "File version (0?00)")
yield UInt32(self, "offset[0]")
yield UInt32(self, "offset[1]")
self.seekByte(self['offset[0]'].value)
yield AVCHDINDX_0(self, "chunk[]")
self.seekByte(self['offset[1]'].value)
yield AVCHDChunkWithHeader(self, "chunk[]")
class AVCHDMOBJ(HachoirParser, RootSeekableFieldSet):
endian = BIG_ENDIAN
MAGIC = "MOBJ0"
PARSER_TAGS = {
"id": "bdmv_mobj",
"category": "video",
"file_ext": ("bdm","bdmv"),
"magic": ((MAGIC, 0),),
"min_size": 8, # MOBJ0?00
"description": "MOVIEOBJ.BDM",
}
def __init__(self, stream, **args):
RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
HachoirParser.__init__(self, stream, **args)
def validate(self):
if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
return "Invalid magic"
return True
def createFields(self):
yield Bytes(self, "filetype", 4, "File type (MOBJ)")
yield Bytes(self, "fileversion", 4, "File version (0?00)")
yield RawBytes(self, "unknown[]", 32)
yield UInt32(self, "size")
yield UInt32(self, "unknown[]")
yield UInt16(self, "count")
yield textHandler(UInt32(self, "unknown_id"), hexadecimal)
for i in xrange(1, self['count'].value):
yield AVCHDMOBJ_Chunk(self, "movie_object[]")
class AVCHDMPLS(HachoirParser, RootSeekableFieldSet):
endian = BIG_ENDIAN
MAGIC = "MPLS0"
PARSER_TAGS = {
"id": "bdmv_mpls",
"category": "video",
"file_ext": ("mpl","mpls","vpl"),
"magic": ((MAGIC, 0),),
"min_size": 8, # MPLS0?00
"description": "MPLS",
}
def __init__(self, stream, **args):
RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
HachoirParser.__init__(self, stream, **args)
def validate(self):
if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
return "Invalid magic"
return True
def createFields(self):
yield Bytes(self, "filetype", 4, "File type (MPLS)")
yield Bytes(self, "fileversion", 4, "File version (0?00)")
yield UInt32(self, "offset[0]")
yield UInt32(self, "offset[1]")
yield UInt32(self, "offset[2]")
self.seekByte(self['offset[0]'].value)
yield AVCHDMPLS_0(self, "chunk[]")
self.seekByte(self['offset[1]'].value)
yield AVCHDMPLS_1(self, "chunk[]")
self.seekByte(self['offset[2]'].value)
yield AVCHDChunkWithHeader(self, "chunk[]")
class AVCHDCLPI(HachoirParser, RootSeekableFieldSet):
endian = BIG_ENDIAN
MAGIC = "HDMV0"
PARSER_TAGS = {
"id": "bdmv_clpi",
"category": "video",
"file_ext": ("cpi","clpi"),
"magic": ((MAGIC, 0),),
"min_size": 8, # HDMV0?00
"description": "HDMV",
}
def __init__(self, stream, **args):
RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
HachoirParser.__init__(self, stream, **args)
def validate(self):
if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
return "Invalid magic"
return True
def createFields(self):
yield Bytes(self, "filetype", 4, "File type (HDMV)")
yield Bytes(self, "fileversion", 4, "File version (0?00)")
yield UInt32(self, "offset[]")
yield UInt32(self, "offset[]")
yield UInt32(self, "offset[]")
yield UInt32(self, "offset[]")
yield UInt32(self, "offset[]")
self.seekByte(self['offset[0]'].value)
yield AVCHDGenericChunk(self, "chunk[]")
self.seekByte(self['offset[1]'].value)
yield AVCHDCLPI_1(self, "chunk[]")
self.seekByte(self['offset[2]'].value)
yield AVCHDGenericChunk(self, "chunk[]")
self.seekByte(self['offset[3]'].value)
yield AVCHDGenericChunk(self, "chunk[]")
self.seekByte(self['offset[4]'].value)
yield AVCHDChunkWithHeader(self, "chunk[]")

View file

@ -718,7 +718,7 @@ class Atom(FieldSet):
# ipmc: IPMP control
"moof": (AtomList, "moof", "movie fragment"),
"mfhd": (MovieFragmentHeader, "mfhd", "movie fragment header"),
# traf: track fragment
"traf": (AtomList, "traf", "track fragment"),
# tfhd: track fragment header
# trun: track fragment run
# sdtp: independent and disposable samples

View file

@ -11,10 +11,38 @@ Creation date: 13 january 2007
from hachoir_parser import Parser
from hachoir_core.field import (FieldSet, ParserError, MissingField,
UInt8, Enum, Bit, Bits, RawBytes)
UInt8, Enum, Bit, Bits, RawBytes, RawBits)
from hachoir_core.endian import BIG_ENDIAN
from hachoir_core.text_handler import textHandler, hexadecimal
class AdaptationField(FieldSet):
def createFields(self):
yield UInt8(self, "length")
yield Bit(self, "discontinuity_indicator")
yield Bit(self, "random_access_indicator")
yield Bit(self, "es_prio_indicator")
yield Bit(self, "has_pcr")
yield Bit(self, "has_opcr")
yield Bit(self, "has_splice_point")
yield Bit(self, "private_data")
yield Bit(self, "has_extension")
if self['has_pcr'].value:
yield Bits(self, "pcr_base", 33)
yield Bits(self, "pcr_ext", 9)
if self['has_opcr'].value:
yield Bits(self, "opcr_base", 33)
yield Bits(self, "opcr_ext", 9)
if self['has_splice_point'].value:
yield Bits(self, "splice_countdown", 8)
stuff_len = ((self['length'].value+1)*8) - self.current_size
if self['length'].value and stuff_len:
yield RawBits(self, 'stuffing', stuff_len)
class Packet(FieldSet):
def __init__(self, *args):
FieldSet.__init__(self, *args)
@ -46,7 +74,11 @@ class Packet(FieldSet):
yield Bit(self, "has_adaptation")
yield Bit(self, "has_payload")
yield Bits(self, "counter", 4)
yield RawBytes(self, "payload", 184)
if self["has_adaptation"].value:
yield AdaptationField(self, "adaptation_field")
if self["has_payload"].value:
yield RawBytes(self, "payload", 188-(self.current_size/8))
if self["has_error"].value:
yield RawBytes(self, "error_correction", 16)
@ -54,6 +86,8 @@ class Packet(FieldSet):
text = "Packet: PID %s" % self["pid"].display
if self["payload_unit_start"].value:
text += ", start of payload"
if self["has_adaptation"].value:
text += ", with adaptation field"
return text
def isValid(self):
@ -96,7 +130,7 @@ class MPEG_TS(Parser):
sync = self.stream.searchBytes("\x47", self.current_size, self.current_size+204*8)
if sync is None:
raise ParserError("Unable to find synchronization byte")
elif sync:
elif sync-self.current_size:
yield RawBytes(self, "incomplete_packet[]", (sync-self.current_size)//8)
yield Packet(self, "packet[]")