JackDandy 980e05cc99 Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo.
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014).
Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/*
Remove metadata/qt*

PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5).
Basic Support for XMP Packets.
tga: improvements to adhere more closely to the spec.
pdf: slightly improved parsing.
rar: fix TypeError on unknown block types.
Add MacRoman win32 codepage.
tiff/exif: support SubIFDs and tiled images.
Add method to export metadata in dictionary.
mpeg_video: don't attempt to parse Stream past length.
mpeg_video: parse ESCR correctly, add SCR value.
Change centralise CustomFragments.
field: don't set parser class if class is None, to enable autodetect.
field: add value/display for CustomFragment.
parser: inline warning to enable tracebacks in debug mode.
Fix empty bytestrings in makePrintable.
Fix contentSize in to account for image_data blocks.
Fix the ELF parser.
Enhance the AR archive parser.
elf parser: fix wrong wrong fields order in parsing little endian section flags.
elf parser: add s390 as a machine type.
Flesh out mp4 parser.

PORT: Version 2.0a1 (inline with 3.0a1).
Major refactoring and PEP8.
Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams.
metadata: get comment from ZIP.
Support for
Fix sizeGe when size is None.
Remove unused new_seekable_field_set file.
Remove parser Mapsforge .map.
Remove parser Parallel Realities Starfighter .pak files.
sevenzip: fix for newer archives.
java: update access flags and modifiers for Java 1.7 and update description text for most recent Java.
Support ustar prefix field in tar archives.
Remove file_system* parsers.
Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*.
Remove program parsers macho, nds, prc.
Support non-8bit Character subclasses.
Python parser supports Python 3.7.
Enhance mpeg_ts parser to support MTS/M2TS.
Support for creation date in tiff.
Change don't hardcode errno constant.

PORT: 1.9.1
Internal Only: The following are legacy reference to upstream commit messages.
Relevant changes up to b0a115f8.
Use integer division.
Replace HACHOIR_ERRORS with Exception.
Fix metadata.Data: make it sortable.
Import fixes from e7de492.
PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad).
Replace hachoir.core.field with hachoir.field
Replace with
Remove the compatibility module for PY1.5 to PY2.5.
metadata: support TIFF picture.
metadata: fix string normalization.
metadata: fix datetime regex Fix hachoir bug #57.
FileFromInputStream: fix comparison between None and an int.
InputIOStream: open the file in binary mode.
2018-03-28 00:43:11 +01:00

403 lines
12 KiB

Java Object Serialization Stream parser.
Author: Robert Xiao <>
Creation Date: Jun 18, 2015
from hachoir.parser import Parser
from hachoir.field import (
ParserError, FieldSet, StaticFieldSet,
Enum, RawBytes, String, PascalString16, Float32, Float64,
Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64,
Bit, NullBits)
from hachoir.core.endian import BIG_ENDIAN
from hachoir.core.text_handler import textHandler, hexadecimal
from import paddingSize
from .java import parse_field_descriptor
class LongString(FieldSet):
def createFields(self):
yield Int64(self, "length")
yield String(self, "value", charset="UTF-8")
def createDescription(self):
return self['value'].description
def createValue(self):
return self['value'].value
class UTF16Character(UInt16):
def createDisplay(self):
return repr(unichr(self.value))
class JavaBool(UInt8):
def createValue(self):
val = UInt8.createValue(self)
return val != 0
class SerializedNull(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
def createValue(self):
return None
def createDisplay(self):
return 'null'
class SerializedReference(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield Int32(self, "handle")
def referent(self):
return self.root.handles[self['handle'].value]
def createValue(self):
return self.referent.value
def createDisplay(self):
return "-> " + str(self.referent.display)
class FieldDesc(FieldSet):
def createFields(self):
yield String(self, "typecode", 1)
yield PascalString16(self, "fieldName", charset="UTF-8")
if self['typecode'].value in ('[', 'L'):
yield SerializedContent(self, "className")
def typeDescriptor(self):
typecode = self['typecode'].value
if typecode in ('[', 'L'):
return self['className'].value
return typecode
def typeName(self):
return parse_field_descriptor(self.typeDescriptor)
def fieldName(self):
return self['fieldName'].value
def createValue(self):
return self.typeDescriptor, self.fieldName
def createDisplay(self):
return '%s %s' % (self.typeName, self.fieldName)
class ClassAnnotation(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "endBlockData"), TYPECODE_NAMES)
class SerializedClassDesc(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield PascalString16(self, "className", charset="UTF-8")
yield Int64(self, "serialVersionUID")
yield NullBits(self, "classDescFlags_reserved", 3)
yield Bit(self, "classDescFlags_enum", "Is the class an Enum?")
yield Bit(self, "classDescFlags_block_data",
"Was the externalizable's block data written using stream version 2?")
yield Bit(self, "classDescFlags_externalizable", "Does the class implement")
yield Bit(self, "classDescFlags_serializable", "Does the class implement")
yield Bit(self, "classDescFlags_write_method", "Does the class have a writeObject method?")
yield Int16(self, "fieldDesc_count")
for i in xrange(self['fieldDesc_count'].value):
yield FieldDesc(self, "fieldDesc[]")
yield ClassAnnotation(self, "classAnnotation")
yield SerializedContent(self, "superClassDesc")
def className(self):
return self['className'].value
class ObjectValue(FieldSet):
def gen_values(self, classDesc):
if isinstance(classDesc, SerializedReference):
classDesc = classDesc.referent
if isinstance(classDesc, SerializedNull):
# TODO: proxy class desc
for field in self.gen_values(classDesc['superClassDesc']):
yield field
for fieldDesc in classDesc.array('fieldDesc'):
tc = fieldDesc['typecode'].value
klass = VALUE_CLASS_MAP[tc]
field = klass(self, "field[]", description="%s.%s" % (classDesc.className, fieldDesc.fieldName))
field.fieldName = fieldDesc.fieldName
yield field
def createFields(self):
for field in self.gen_values(self.parent.classDesc):
yield field
class SerializedObject(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield SerializedContent(self, "classDesc")
yield ObjectValue(self, "value")
def classDesc(self):
classDesc = self['classDesc']
if isinstance(classDesc, SerializedReference):
classDesc = classDesc.referent
return classDesc
def createValue(self):
return tuple(field.value for field in self['value'].array('field'))
def createDisplay(self):
out = []
for field in self['value'].array('field'):
if isinstance(field, SerializedReference) and not isinstance(field.referent, SerializedString):
# Avoid recursive references
out.append('%s=#<REF:%s>' % (field.fieldName, field.referent.classDesc.className))
out.append('%s=%s' % (field.fieldName, field.display))
return '%s(%s)' % (self.classDesc.className, ', '.join(out))
class SerializedString(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield PascalString16(self, "value", charset="UTF-8")
def createValue(self):
return self['value'].value
def createDisplay(self):
return self['value'].display
class SerializedArray(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield SerializedContent(self, "classDesc")
yield Int32(self, "size")
klass = VALUE_CLASS_MAP[self.classDesc.className[1]] # className is [<elementType>
for i in xrange(self['size'].value):
yield klass(self, "value[]")
def classDesc(self):
classDesc = self['classDesc']
if isinstance(classDesc, SerializedReference):
classDesc = classDesc.referent
return classDesc
def createValue(self):
return [v.value for v in self.array('value')]
def createDisplay(self):
out = []
for field in self.array('value'):
if isinstance(field, SerializedReference) and not isinstance(field.referent, SerializedString):
# Avoid recursive references
out.append('#<REF:%s>' % (field.referent.classDesc.className,))
out.append('%s' % (field.display,))
return '[%s]' % ', '.join(out)
class SerializedClass(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield SerializedContent(self, "classDesc")
class BlockData(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
class StreamReset(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
class BlockDataLong(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
class SerializedException(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield SerializableObject(self, "object")
class SerializedLongString(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield LongString(self, "value")
def createValue(self):
return self['value'].value
class SerializedProxyClassDesc(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
class SerializedEnum(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "typecode"), TYPECODE_NAMES)
yield SerializedContent(self, "classDesc")
yield SerializedContent(self, "enumConstantName")
def classDesc(self):
classDesc = self['classDesc']
if isinstance(classDesc, SerializedReference):
classDesc = classDesc.referent
return classDesc
def createValue(self):
return self['enumConstantName'].value
def createDisplay(self):
return '%s.%s' % (self.classDesc.className, self.value)
0x70: "NULL",
0x71: "REFERENCE",
0x72: "CLASSDESC",
0x73: "OBJECT",
0x74: "STRING",
0x75: "ARRAY",
0x76: "CLASS",
0x77: "BLOCKDATA",
0x79: "RESET",
0x7E: "ENUM",
0x70: SerializedNull,
0x71: SerializedReference,
0x72: SerializedClassDesc,
0x73: SerializedObject,
0x74: SerializedString,
0x75: SerializedArray,
0x76: SerializedClass,
0x77: BlockData,
# 0x78: EndBlockData,
0x79: StreamReset,
0x7a: BlockDataLong,
0x7b: SerializedException,
0x7c: SerializedLongString,
0x7d: SerializedProxyClassDesc,
0x7e: SerializedEnum,
def SerializedContent(parent, name, description=None):
tc = + parent.current_size, 8, parent.endian)
klass = TYPECODE_TABLE.get(tc, None)
if klass is None:
raise ParserError("Unknown typecode 0x%02x" % tc)
return klass(parent, name, description)
'B': Int8,
'C': UTF16Character,
'D': Float64,
'F': Float32,
'I': Int32,
'J': Int64,
'S': Int16,
'Z': JavaBool,
'[': SerializedContent, # SerializedArray or reference
'L': SerializedContent, # SerializedObject or reference
class JavaSerializedFile(Parser):
endian = BIG_ENDIAN
MAGIC = 0xaced
"id": "java_serialized",
"category": "program",
"file_ext": ("ser",),
"mime": (u"application/java-serialized-object",),
"min_size": 4 * 4,
"magic": (("\xac\xed", 0),),
"description": "Serialized Java object",
def validate(self):
if self["magic"].value != self.MAGIC:
return "Wrong magic signature!"
if self["version"].value not in self.KNOWN_VERSIONS:
return "Unknown version (%d)" % self["version"].value
return True
def createDescription(self):
return "Serialized Java object, version %s" % self["version"].value
def resetHandles(self):
self.handles = {}
self.nextHandleNum = 0x7E0000
def newHandle(self, obj):
self.handles[self.nextHandleNum] = obj
self.nextHandleNum += 1
def createFields(self):
yield textHandler(UInt16(self, "magic", "Java serialized object signature"),
yield UInt16(self, "version", "Stream version")
while not self.eof:
yield SerializedContent(self, "object[]")