Merge branch 'feature/UpdateHachoir3' into dev

This commit is contained in:
JackDandy 2023-10-08 00:06:15 +01:00
commit 9962c1a112
124 changed files with 1639 additions and 482 deletions

View file

@ -2,6 +2,7 @@
* Update Beautiful Soup 4.12.2 to 4.12.2 (30c58a1) * Update Beautiful Soup 4.12.2 to 4.12.2 (30c58a1)
* Update soupsieve 2.4.1 (2e66beb) to 2.5.0 (dc71495) * Update soupsieve 2.4.1 (2e66beb) to 2.5.0 (dc71495)
* Update hachoir 3.1.2 (f739b43) to 3.2.0 (38d759f)
### 3.30.1 (2023-10-02 22:50:00 UTC) ### 3.30.1 (2023-10-02 22:50:00 UTC)

View file

@ -1,2 +1,2 @@
VERSION = (3, 1, 2) VERSION = (3, 2, 0)
__version__ = ".".join(map(str, VERSION)) __version__ = ".".join(map(str, VERSION))

View file

@ -4,7 +4,7 @@ string, number, hexadecimal, etc.
""" """
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from struct import calcsize, unpack, error as struct_error from struct import calcsize, error as struct_error
def swap16(value): def swap16(value):
@ -292,20 +292,11 @@ def str2long(data, endian):
>>> str2long(b"\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d >>> str2long(b"\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d
True True
""" """
assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits if endian == LITTLE_ENDIAN:
try: return int.from_bytes(data, "little")
return unpack(_struct_format[endian][len(data)], data)[0] elif endian == BIG_ENDIAN:
except KeyError: return int.from_bytes(data, "big")
pass elif endian == MIDDLE_ENDIAN:
return int.from_bytes(strswapmid(data), "big")
assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) else:
shift = 0 raise ValueError("Invalid endian %s" % (endian,))
value = 0
if endian is BIG_ENDIAN:
data = reversed(data)
elif endian is MIDDLE_ENDIAN:
data = reversed(strswapmid(data))
for byte in data:
value += (byte << shift)
shift += 8
return value

View file

@ -168,7 +168,7 @@ class Dict(object):
_index = index _index = index
if index < 0: if index < 0:
index += len(self._value_list) index += len(self._value_list)
if not(0 <= index <= len(self._value_list)): if not (0 <= index <= len(self._value_list)):
raise IndexError("Insert error: index '%s' is invalid" % _index) raise IndexError("Insert error: index '%s' is invalid" % _index)
for item_key, item_index in self._index.items(): for item_key, item_index in self._index.items():
if item_index >= index: if item_index >= index:

View file

@ -493,7 +493,7 @@ def timestampUNIX(value):
""" """
if not isinstance(value, (float, int)): if not isinstance(value, (float, int)):
raise TypeError("timestampUNIX(): an integer or float is required") raise TypeError("timestampUNIX(): an integer or float is required")
if not(0 <= value <= 2147483647): if not (0 <= value <= 2147483647):
raise ValueError("timestampUNIX(): value have to be in 0..2147483647") raise ValueError("timestampUNIX(): value have to be in 0..2147483647")
return UNIX_TIMESTAMP_T0 + timedelta(seconds=value) return UNIX_TIMESTAMP_T0 + timedelta(seconds=value)
@ -514,7 +514,7 @@ def timestampMac32(value):
""" """
if not isinstance(value, (float, int)): if not isinstance(value, (float, int)):
raise TypeError("an integer or float is required") raise TypeError("an integer or float is required")
if not(0 <= value <= 4294967295): if not (0 <= value <= 4294967295):
return "invalid Mac timestamp (%s)" % value return "invalid Mac timestamp (%s)" % value
return MAC_TIMESTAMP_T0 + timedelta(seconds=value) return MAC_TIMESTAMP_T0 + timedelta(seconds=value)

View file

@ -20,7 +20,7 @@ class RawBytes(Field):
def __init__(self, parent, name, length, description="Raw data"): def __init__(self, parent, name, length, description="Raw data"):
assert issubclass(parent.__class__, Field) assert issubclass(parent.__class__, Field)
if not(0 < length <= MAX_LENGTH): if not (0 < length <= MAX_LENGTH):
raise FieldError("Invalid RawBytes length (%s)!" % length) raise FieldError("Invalid RawBytes length (%s)!" % length)
Field.__init__(self, parent, name, length * 8, description) Field.__init__(self, parent, name, length * 8, description)
self._display = None self._display = None

View file

@ -117,7 +117,7 @@ class GenericFieldSet(BasicFieldSet):
_getSize, doc="Size in bits, may create all fields to get size") _getSize, doc="Size in bits, may create all fields to get size")
def _getCurrentSize(self): def _getCurrentSize(self):
assert not(self.done) assert not (self.done)
return self._current_size return self._current_size
current_size = property(_getCurrentSize) current_size = property(_getCurrentSize)

View file

@ -23,7 +23,7 @@ class PaddingBits(Bits):
self._display_pattern = self.checkPattern() self._display_pattern = self.checkPattern()
def checkPattern(self): def checkPattern(self):
if not(config.check_padding_pattern): if not (config.check_padding_pattern):
return False return False
if self.pattern != 0: if self.pattern != 0:
return False return False
@ -72,7 +72,7 @@ class PaddingBytes(Bytes):
self._display_pattern = self.checkPattern() self._display_pattern = self.checkPattern()
def checkPattern(self): def checkPattern(self):
if not(config.check_padding_pattern): if not (config.check_padding_pattern):
return False return False
if self.pattern is None: if self.pattern is None:
return False return False

View file

@ -244,7 +244,7 @@ class GenericString(Bytes):
and err.end == len(text) \ and err.end == len(text) \
and self._charset == "UTF-16-LE": and self._charset == "UTF-16-LE":
try: try:
text = str(text + "\0", self._charset, "strict") text = str(text + b"\0", self._charset, "strict")
self.warning( self.warning(
"Fix truncated %s string: add missing nul byte" % self._charset) "Fix truncated %s string: add missing nul byte" % self._charset)
return text return text

View file

@ -61,7 +61,7 @@ class TimeDateMSDOS32(FieldSet):
def createValue(self): def createValue(self):
return datetime( return datetime(
1980 + self["year"].value, self["month"].value, self["day"].value, 1980 + self["year"].value, self["month"].value or 1, self["day"].value or 1,
self["hour"].value, self["minute"].value, 2 * self["second"].value) self["hour"].value, self["minute"].value, 2 * self["second"].value)
def createDisplay(self): def createDisplay(self):

View file

@ -7,7 +7,7 @@ class GenericVector(FieldSet):
# Sanity checks # Sanity checks
assert issubclass(item_class, Field) assert issubclass(item_class, Field)
assert isinstance(item_class.static_size, int) assert isinstance(item_class.static_size, int)
if not(0 < nb_items): if not (0 < nb_items):
raise ParserError('Unable to create empty vector "%s" in %s' raise ParserError('Unable to create empty vector "%s" in %s'
% (name, parent.path)) % (name, parent.path))
size = nb_items * item_class.static_size size = nb_items * item_class.static_size

View file

@ -85,7 +85,7 @@ def processFile(values, filename,
with parser: with parser:
# Extract metadata # Extract metadata
extract_metadata = not(values.mime or values.type) extract_metadata = not (values.mime or values.type)
if extract_metadata: if extract_metadata:
try: try:
metadata = extractMetadata(parser, values.quality) metadata = extractMetadata(parser, values.quality)
@ -124,7 +124,7 @@ def processFile(values, filename,
def processFiles(values, filenames, display=True): def processFiles(values, filenames, display=True):
human = not(values.raw) human = not values.raw
ok = True ok = True
priority = int(values.level) * 100 + 99 priority = int(values.level) * 100 + 99
display_filename = (1 < len(filenames)) display_filename = (1 < len(filenames))

View file

@ -1,5 +1,6 @@
from hachoir.parser.archive.ace import AceFile # noqa from hachoir.parser.archive.ace import AceFile # noqa
from hachoir.parser.archive.ar import ArchiveFile # noqa from hachoir.parser.archive.ar import ArchiveFile # noqa
from hachoir.parser.archive.arj import ArjParser # noqa
from hachoir.parser.archive.bomstore import BomFile # noqa from hachoir.parser.archive.bomstore import BomFile # noqa
from hachoir.parser.archive.bzip2_parser import Bzip2Parser # noqa from hachoir.parser.archive.bzip2_parser import Bzip2Parser # noqa
from hachoir.parser.archive.cab import CabFile # noqa from hachoir.parser.archive.cab import CabFile # noqa

View file

@ -0,0 +1,155 @@
"""
ARJ archive file parser
https://github.com/FarGroup/FarManager/blob/master/plugins/multiarc/arc.doc/arj.txt
"""
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.field import (FieldSet, ParserError,
CString, Enum, RawBytes,
UInt8, UInt16, UInt32,
Bytes)
from hachoir.parser import Parser
HOST_OS = {
0: "MSDOS",
1: "PRIMOS",
2: "UNIX",
3: "AMIGA",
4: "MACDOS",
5: "OS/2",
6: "APPLE GS",
7: "ATARI ST",
8: "NEXT",
9: "VAX VMS",
10: "WIN95",
11: "WIN32",
}
FILE_TYPE = {
0: "BINARY",
1: "TEXT",
2: "COMMENT",
3: "DIRECTORY",
4: "VOLUME",
5: "CHAPTER",
}
MAGIC = b"\x60\xEA"
class BaseBlock(FieldSet):
@property
def isEmpty(self):
return self["basic_header_size"].value == 0
def _header_start_fields(self):
yield Bytes(self, "magic", len(MAGIC))
if self["magic"].value != MAGIC:
raise ParserError("Wrong header magic")
yield UInt16(self, "basic_header_size", "zero if end of archive")
if not self.isEmpty:
yield UInt8(self, "first_hdr_size")
yield UInt8(self, "archiver_version")
yield UInt8(self, "min_archiver_version")
yield Enum(UInt8(self, "host_os"), HOST_OS)
yield UInt8(self, "arj_flags")
def _header_end_fields(self):
yield UInt8(self, "last_chapter")
fhs = self["first_hdr_size"]
name_position = fhs.address // 8 + fhs.value
current_position = self["last_chapter"].address // 8 + 1
if name_position > current_position:
yield RawBytes(self, "reserved2", name_position - current_position)
yield CString(self, "filename", "File name", charset="ASCII")
yield CString(self, "comment", "Comment", charset="ASCII")
yield UInt32(self, "crc", "Header CRC")
i = 0
while not self.eof:
yield UInt16(self, f"extended_header_size_{i}")
cur_size = self[f"extended_header_size_{i}"].value
if cur_size == 0:
break
yield RawBytes(self, "extended_header_data", cur_size)
yield UInt32(self, f"extended_header_crc_{i}")
i += 1
def validate(self):
if self.stream.readBytes(0, 2) != MAGIC:
return "Invalid magic"
return True
class Header(BaseBlock):
def createFields(self):
yield from self._header_start_fields()
if not self.isEmpty:
yield UInt8(self, "security_version")
yield Enum(UInt8(self, "file_type"), FILE_TYPE)
yield UInt8(self, "reserved")
yield UInt32(self, "date_time_created")
yield UInt32(self, "date_time_modified")
yield UInt32(self, "archive_size")
yield UInt32(self, "security_envelope_file_position")
yield UInt16(self, "filespec_position")
yield UInt16(self, "security_envelope_data_len")
yield UInt8(self, "encryption_version")
yield from self._header_end_fields()
def createDescription(self):
if self.isEmpty:
return "Empty main header"
return "Main header of '%s'" % self["filename"].value
class Block(BaseBlock):
def createFields(self):
yield from self._header_start_fields()
if not self.isEmpty:
yield UInt8(self, "method")
yield Enum(UInt8(self, "file_type"), FILE_TYPE)
yield UInt8(self, "reserved")
yield UInt32(self, "date_time_modified")
yield UInt32(self, "compressed_size")
yield UInt32(self, "original_size")
yield UInt32(self, "original_file_crc")
yield UInt16(self, "filespec_position")
yield UInt16(self, "file_access_mode")
yield UInt8(self, "first_chapter")
yield from self._header_end_fields()
compressed_size = self["compressed_size"].value
if compressed_size > 0:
yield RawBytes(self, "compressed_data", compressed_size)
def createDescription(self):
if self.isEmpty:
return "Empty file header"
return "File header of '%s'" % self["filename"].value
class ArjParser(Parser):
endian = LITTLE_ENDIAN
PARSER_TAGS = {
"id": "arj",
"category": "archive",
"file_ext": ("arj",),
"min_size": 4 * 8,
"description": "ARJ archive"
}
def validate(self):
if self.stream.readBytes(0, 2) != MAGIC:
return "Invalid magic"
return True
def createFields(self):
yield Header(self, "header")
if not self["header"].isEmpty:
while not self.eof:
block = Block(self, "file_header[]")
yield block
if block.isEmpty:
break

View file

@ -218,7 +218,7 @@ class Bzip2Parser(Parser):
def validate(self): def validate(self):
if self.stream.readBytes(0, 3) != b'BZh': if self.stream.readBytes(0, 3) != b'BZh':
return "Wrong file signature" return "Wrong file signature"
if not("1" <= self["blocksize"].value <= "9"): if not ("1" <= self["blocksize"].value <= "9"):
return "Wrong blocksize" return "Wrong blocksize"
return True return True

View file

@ -13,6 +13,7 @@ from hachoir.field import (FieldSet,
from hachoir.core.endian import MIDDLE_ENDIAN, LITTLE_ENDIAN from hachoir.core.endian import MIDDLE_ENDIAN, LITTLE_ENDIAN
from hachoir.core.tools import paddingSize from hachoir.core.tools import paddingSize
from hachoir.parser.archive.zlib import build_tree, HuffmanCode, extend_data from hachoir.parser.archive.zlib import build_tree, HuffmanCode, extend_data
import struct
class LZXPreTreeEncodedTree(FieldSet): class LZXPreTreeEncodedTree(FieldSet):
@ -146,6 +147,8 @@ class LZXBlock(FieldSet):
self.window_size = self.WINDOW_SIZE[self.compression_level] self.window_size = self.WINDOW_SIZE[self.compression_level]
self.block_type = self["block_type"].value self.block_type = self["block_type"].value
curlen = len(self.parent.uncompressed_data) curlen = len(self.parent.uncompressed_data)
intel_started = False # Do we perform Intel jump fixups on this block?
if self.block_type in (1, 2): # Verbatim or aligned offset block if self.block_type in (1, 2): # Verbatim or aligned offset block
if self.block_type == 2: if self.block_type == 2:
for i in range(8): for i in range(8):
@ -156,6 +159,8 @@ class LZXBlock(FieldSet):
yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8) yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8)
main_tree = build_tree( main_tree = build_tree(
self["main_tree_start"].lengths + self["main_tree_rest"].lengths) self["main_tree_start"].lengths + self["main_tree_rest"].lengths)
if self["main_tree_start"].lengths[0xE8]:
intel_started = True
yield LZXPreTreeEncodedTree(self, "length_tree", 249) yield LZXPreTreeEncodedTree(self, "length_tree", 249)
length_tree = build_tree(self["length_tree"].lengths) length_tree = build_tree(self["length_tree"].lengths)
current_decoded_size = 0 current_decoded_size = 0
@ -169,7 +174,7 @@ class LZXBlock(FieldSet):
field._description = "Literal value %r" % chr( field._description = "Literal value %r" % chr(
field.realvalue) field.realvalue)
current_decoded_size += 1 current_decoded_size += 1
self.parent.uncompressed_data += chr(field.realvalue) self.parent._lzx_window.append(field.realvalue)
yield field yield field
continue continue
position_header, length_header = divmod( position_header, length_header = divmod(
@ -243,8 +248,7 @@ class LZXBlock(FieldSet):
self.parent.r2 = self.parent.r1 self.parent.r2 = self.parent.r1
self.parent.r1 = self.parent.r0 self.parent.r1 = self.parent.r0
self.parent.r0 = position self.parent.r0 = position
self.parent.uncompressed_data = extend_data( extend_data(self.parent._lzx_window, length, position)
self.parent.uncompressed_data, length, position)
current_decoded_size += length current_decoded_size += length
elif self.block_type == 3: # Uncompressed block elif self.block_type == 3: # Uncompressed block
padding = paddingSize(self.address + self.current_size, 16) padding = paddingSize(self.address + self.current_size, 16)
@ -253,6 +257,7 @@ class LZXBlock(FieldSet):
else: else:
yield PaddingBits(self, "padding[]", 16) yield PaddingBits(self, "padding[]", 16)
self.endian = LITTLE_ENDIAN self.endian = LITTLE_ENDIAN
intel_started = True # apparently intel fixup may be needed on uncompressed blocks?
yield UInt32(self, "r[]", "New value of R0") yield UInt32(self, "r[]", "New value of R0")
yield UInt32(self, "r[]", "New value of R1") yield UInt32(self, "r[]", "New value of R1")
yield UInt32(self, "r[]", "New value of R2") yield UInt32(self, "r[]", "New value of R2")
@ -260,18 +265,50 @@ class LZXBlock(FieldSet):
self.parent.r1 = self["r[1]"].value self.parent.r1 = self["r[1]"].value
self.parent.r2 = self["r[2]"].value self.parent.r2 = self["r[2]"].value
yield RawBytes(self, "data", self.uncompressed_size) yield RawBytes(self, "data", self.uncompressed_size)
self.parent.uncompressed_data += self["data"].value self.parent._lzx_window += self["data"].value
if self["block_size"].value % 2: if self["block_size"].value % 2:
yield PaddingBits(self, "padding", 8) yield PaddingBits(self, "padding", 8)
else: else:
raise ParserError("Unknown block type %d!" % self.block_type) raise ParserError("Unknown block type %d!" % self.block_type)
# Fixup Intel jumps if necessary (fixups are only applied to the final output, not to the LZX window)
self.parent.uncompressed_data += self.parent._lzx_window[-self.uncompressed_size:]
self.parent._lzx_window = self.parent._lzx_window[-(1 << self.root.compr_level):]
if (
intel_started
and self.parent["filesize_indicator"].value
and self.parent["filesize"].value > 0
):
# Note that we're decoding a block-at-a-time instead of a frame-at-a-time,
# so we need to handle the frame boundaries carefully.
filesize = self.parent["filesize"].value
start_pos = max(0, curlen - 10) # We may need to correct something from the last block
end_pos = len(self.parent.uncompressed_data) - 10
while 1:
jmp_pos = self.parent.uncompressed_data.find(b"\xE8", start_pos, end_pos)
if jmp_pos == -1:
break
if (jmp_pos % 32768) >= (32768 - 10):
# jumps at the end of frames are not fixed up
start_pos = jmp_pos + 1
continue
abs_off, = struct.unpack("<i", self.parent.uncompressed_data[jmp_pos + 1:jmp_pos + 5])
if -jmp_pos <= abs_off < filesize:
if abs_off < 0:
rel_off = abs_off + filesize
else:
rel_off = abs_off - jmp_pos
self.parent.uncompressed_data[jmp_pos + 1:jmp_pos + 5] = struct.pack("<i", rel_off)
start_pos = jmp_pos + 5
class LZXStream(Parser): class LZXStream(Parser):
endian = MIDDLE_ENDIAN endian = MIDDLE_ENDIAN
def createFields(self): def createFields(self):
self.uncompressed_data = "" self.uncompressed_data = bytearray()
self._lzx_window = bytearray()
self.r0 = 1 self.r0 = 1
self.r1 = 1 self.r1 = 1
self.r2 = 1 self.r2 = 1
@ -291,6 +328,6 @@ class LZXStream(Parser):
def lzx_decompress(stream, window_bits): def lzx_decompress(stream, window_bits):
data = LZXStream(stream) data = LZXStream(stream)
data.compr_level = window_bits data.compr_level = window_bits
for unused in data: for _ in data:
pass pass
return data.uncompressed_data return data.uncompressed_data

View file

@ -44,7 +44,7 @@ class MarFile(Parser):
return "Invalid magic" return "Invalid magic"
if self["version"].value != 3: if self["version"].value != 3:
return "Invalid version" return "Invalid version"
if not(1 <= self["nb_file"].value <= MAX_NB_FILE): if not (1 <= self["nb_file"].value <= MAX_NB_FILE):
return "Invalid number of file" return "Invalid number of file"
return True return True

View file

@ -14,13 +14,13 @@ from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.core.tools import paddingSize, alignValue from hachoir.core.tools import paddingSize, alignValue
def extend_data(data, length, offset): def extend_data(data: bytearray, length, offset):
"""Extend data using a length and an offset.""" """Extend data using a length and an offset, LZ-style."""
if length >= offset: if length >= offset:
new_data = data[-offset:] * (alignValue(length, offset) // offset) new_data = data[-offset:] * (alignValue(length, offset) // offset)
return data + new_data[:length] data += new_data[:length]
else: else:
return data + data[-offset:-offset + length] data += data[-offset:-offset + length]
def build_tree(lengths): def build_tree(lengths):
@ -136,9 +136,9 @@ class DeflateBlock(FieldSet):
CODE_LENGTH_ORDER = [16, 17, 18, 0, 8, 7, 9, CODE_LENGTH_ORDER = [16, 17, 18, 0, 8, 7, 9,
6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
def __init__(self, parent, name, uncomp_data="", *args, **kwargs): def __init__(self, parent, name, uncomp_data=b"", *args, **kwargs):
FieldSet.__init__(self, parent, name, *args, **kwargs) FieldSet.__init__(self, parent, name, *args, **kwargs)
self.uncomp_data = uncomp_data self.uncomp_data = bytearray(uncomp_data)
def createFields(self): def createFields(self):
yield Bit(self, "final", "Is this the final block?") # BFINAL yield Bit(self, "final", "Is this the final block?") # BFINAL
@ -227,7 +227,7 @@ class DeflateBlock(FieldSet):
field._description = "Literal Code %r (Huffman Code %i)" % ( field._description = "Literal Code %r (Huffman Code %i)" % (
chr(value), field.value) chr(value), field.value)
yield field yield field
self.uncomp_data += chr(value) self.uncomp_data.append(value)
if value == 256: if value == 256:
field._description = "Block Terminator Code (256) (Huffman Code %i)" % field.value field._description = "Block Terminator Code (256) (Huffman Code %i)" % field.value
yield field yield field
@ -267,15 +267,14 @@ class DeflateBlock(FieldSet):
extrafield._description = "Distance Extra Bits (%i), total length %i" % ( extrafield._description = "Distance Extra Bits (%i), total length %i" % (
extrafield.value, distance) extrafield.value, distance)
yield extrafield yield extrafield
self.uncomp_data = extend_data( extend_data(self.uncomp_data, length, distance)
self.uncomp_data, length, distance)
class DeflateData(GenericFieldSet): class DeflateData(GenericFieldSet):
endian = LITTLE_ENDIAN endian = LITTLE_ENDIAN
def createFields(self): def createFields(self):
uncomp_data = "" uncomp_data = bytearray()
blk = DeflateBlock(self, "compressed_block[]", uncomp_data) blk = DeflateBlock(self, "compressed_block[]", uncomp_data)
yield blk yield blk
uncomp_data = blk.uncomp_data uncomp_data = blk.uncomp_data
@ -326,11 +325,11 @@ class ZlibData(Parser):
yield textHandler(UInt32(self, "data_checksum", "ADLER32 checksum of compressed data"), hexadecimal) yield textHandler(UInt32(self, "data_checksum", "ADLER32 checksum of compressed data"), hexadecimal)
def zlib_inflate(stream, wbits=None, prevdata=""): def zlib_inflate(stream, wbits=None):
if wbits is None or wbits >= 0: if wbits is None or wbits >= 0:
return ZlibData(stream)["data"].uncompressed_data return ZlibData(stream)["data"].uncompressed_data
else: else:
data = DeflateData(None, "root", stream, "", stream.askSize(None)) data = DeflateData(None, "root", stream, "", stream.askSize(None))
for unused in data: for _ in data:
pass pass
return data.uncompressed_data return data.uncompressed_data

View file

@ -451,7 +451,7 @@ class ID3_Chunk(FieldSet):
if size: if size:
cls = None cls = None
if not(is_compressed): if not is_compressed:
tag = self["tag"].value tag = self["tag"].value
if tag in ID3_Chunk.handler: if tag in ID3_Chunk.handler:
cls = ID3_Chunk.handler[tag] cls = ID3_Chunk.handler[tag]

View file

@ -128,7 +128,7 @@ class DataObject(FieldSet):
yield padding yield padding
for i in range(self["entry_count"].value): for i in range(self["entry_count"].value):
yield UInt32(self, "index[" + str(i) + "]", "Index of the " + str(i) + "nth mhit") yield UInt32(self, "index[" + str(i) + "]", "Index of the " + str(i) + "nth mhit")
elif(self["type"].value < 15) or (self["type"].value > 17) or (self["type"].value >= 200): elif (self["type"].value < 15) or (self["type"].value > 17) or (self["type"].value >= 200):
yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]")
yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]")
yield UInt32(self, "position", "Position") yield UInt32(self, "position", "Position")

View file

@ -29,7 +29,7 @@ class Integer(Bits):
while True: while True:
bits = stream.readBits(addr, 8, parent.endian) bits = stream.readBits(addr, 8, parent.endian)
value = (value << 7) + (bits & 127) value = (value << 7) + (bits & 127)
if not(bits & 128): if not (bits & 128):
break break
addr += 8 addr += 8
self._size += 8 self._size += 8

View file

@ -1312,7 +1312,7 @@ class MP4File(Parser):
if size < 8: if size < 8:
return "Invalid first atom size" return "Invalid first atom size"
tag = self.stream.readBytes(4 * 8, 4) tag = self.stream.readBytes(4 * 8, 4)
if tag not in (b"ftyp", b"moov", b"free"): if tag not in (b"ftyp", b"moov", b"free", b"skip"):
return "Unknown MOV file type" return "Unknown MOV file type"
return True return True

View file

@ -240,11 +240,13 @@ class Inode(FieldSet):
return out return out
def is_fast_symlink(self): def is_fast_symlink(self):
self.seekByte(4 * 15 + 4) acl_addr = self.absolute_address + self.current_size
acl = UInt32(self, "file_acl") # skip 15 blocks + version field
acl_addr += (4 * 15 + 4) * 8
acl = self.stream.readBits(acl_addr, 32, self.endian)
b = 0 b = 0
if acl.value > 0: if acl > 0:
b = (2 << self["/superblock/log_block_size"].value) b = (2 << self["/superblock/log_block_size"].value)
return (self['blocks'].value - b == 0) return (self['blocks'].value - b == 0)
@ -747,7 +749,7 @@ class EXT2_FS(HachoirParser, RootSeekableFieldSet):
def validate(self): def validate(self):
if self.stream.readBytes((1024 + 56) * 8, 2) != b"\x53\xEF": if self.stream.readBytes((1024 + 56) * 8, 2) != b"\x53\xEF":
return "Invalid magic number" return "Invalid magic number"
if not(0 <= self["superblock/log_block_size"].value <= 2): if not (0 <= self["superblock/log_block_size"].value <= 2):
return "Invalid (log) block size" return "Invalid (log) block size"
if self["superblock/inode_size"].value not in (0, 128): if self["superblock/inode_size"].value not in (0, 128):
return "Unsupported inode size" return "Unsupported inode size"

View file

@ -139,4 +139,7 @@ def createParser(filename, real_filename=None, tags=None):
if not tags: if not tags:
tags = [] tags = []
stream = FileInputStream(filename, real_filename, tags=tags) stream = FileInputStream(filename, real_filename, tags=tags)
return guessParser(stream) guess = guessParser(stream)
if guess is None:
stream.close()
return guess

View file

@ -205,7 +205,7 @@ class SOSComponent(FieldSet):
def createFields(self): def createFields(self):
comp_id = UInt8(self, "component_id") comp_id = UInt8(self, "component_id")
yield comp_id yield comp_id
if not(1 <= comp_id.value <= self["../nr_components"].value): if not (1 <= comp_id.value <= self["../nr_components"].value):
raise ParserError("JPEG error: Invalid component-id") raise ParserError("JPEG error: Invalid component-id")
yield Bits(self, "dc_coding_table", 4, "DC entropy coding table destination selector") yield Bits(self, "dc_coding_table", 4, "DC entropy coding table destination selector")
yield Bits(self, "ac_coding_table", 4, "AC entropy coding table destination selector") yield Bits(self, "ac_coding_table", 4, "AC entropy coding table destination selector")
@ -387,7 +387,10 @@ class JpegImageData(FieldSet):
end = self.stream.searchBytes(b"\xff", start, MAX_FILESIZE * 8) end = self.stream.searchBytes(b"\xff", start, MAX_FILESIZE * 8)
if end is None: if end is None:
# this is a bad sign, since it means there is no terminator # this is a bad sign, since it means there is no terminator
# we ignore this; it likely means a truncated image # this likely means a truncated image:
# set the size to the remaining length of the stream
# to avoid being forced to parse subfields to calculate size
self._size = self.stream._size - self.absolute_address
break break
if self.stream.readBytes(end, 2) == b'\xff\x00': if self.stream.readBytes(end, 2) == b'\xff\x00':
# padding: false alarm # padding: false alarm

View file

@ -45,7 +45,7 @@ UNIT_NAME = {1: "Meter"}
COMPRESSION_NAME = { COMPRESSION_NAME = {
0: "deflate" # with 32K sliding window 0: "deflate" # with 32K sliding window
} }
MAX_CHUNK_SIZE = 5 * 1024 * 1024 # Maximum chunk size (5 MB) MAX_CHUNK_SIZE = 64 * 1024 * 1024 # Maximum chunk size heuristic (64 MB)
def headerParse(parent): def headerParse(parent):

View file

@ -597,7 +597,7 @@ class WMF_File(Parser):
yield UInt32(self, "max_record_size", "The size of largest record in 16-bit words") yield UInt32(self, "max_record_size", "The size of largest record in 16-bit words")
yield UInt16(self, "nb_params", "Not Used (always 0)") yield UInt16(self, "nb_params", "Not Used (always 0)")
while not(self.eof): while not self.eof:
yield Function(self, "func[]") yield Function(self, "func[]")
def isEMF(self): def isEMF(self):

View file

@ -16,3 +16,4 @@ from hachoir.parser.misc.word_doc import WordDocumentParser # noqa
from hachoir.parser.misc.word_2 import Word2DocumentParser # noqa from hachoir.parser.misc.word_2 import Word2DocumentParser # noqa
from hachoir.parser.misc.mstask import MSTaskFile # noqa from hachoir.parser.misc.mstask import MSTaskFile # noqa
from hachoir.parser.misc.mapsforge_map import MapsforgeMapFile # noqa from hachoir.parser.misc.mapsforge_map import MapsforgeMapFile # noqa
from hachoir.parser.misc.fit import FITFile # noqa

View file

@ -0,0 +1,173 @@
"""
Garmin fit file Format parser.
Author: Sebastien Ponce <sebastien.ponce@cern.ch>
"""
from hachoir.parser import Parser
from hachoir.field import FieldSet, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, RawBytes, Bit, Bits, Bytes, String, Float32, Float64
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN
field_types = {
0: UInt8, # enum
1: Int8, # signed int of 8 bits
2: UInt8, # unsigned int of 8 bits
131: Int16, # signed int of 16 bits
132: UInt16, # unsigned int of 16 bits
133: Int32, # signed int of 32 bits
134: UInt32, # unsigned int of 32 bits
7: String, # string
136: Float32, # float
137: Float64, # double
10: UInt8, # unsigned int of 8 bits with 0 as invalid value
139: UInt16, # unsigned int of 16 bits with 0 as invalid value
140: UInt32, # unsigned int of 32 bits with 0 as invalid value
13: Bytes, # bytes
142: Int64, # signed int of 64 bits
143: UInt64, # unsigned int of 64 bits
144: UInt64 # unsigned int of 64 bits with 0 as invalid value
}
class Header(FieldSet):
endian = LITTLE_ENDIAN
def createFields(self):
yield UInt8(self, "size", "Header size")
yield UInt8(self, "protocol", "Protocol version")
yield UInt16(self, "profile", "Profile version")
yield UInt32(self, "datasize", "Data size")
yield RawBytes(self, "datatype", 4)
yield UInt16(self, "crc", "CRC of first 11 bytes or 0x0")
def createDescription(self):
return "Header of fit file. Data size is %d" % (self["datasize"].value)
class NormalRecordHeader(FieldSet):
def createFields(self):
yield Bit(self, "normal", "Normal header (0)")
yield Bit(self, "type", "Message type (0 data, 1 definition")
yield Bit(self, "typespecific", "0")
yield Bit(self, "reserved", "0")
yield Bits(self, "msgType", 4, description="Message type")
def createDescription(self):
return "Record header, this is a %s message" % ("definition" if self["type"].value else "data")
class FieldDefinition(FieldSet):
def createFields(self):
yield UInt8(self, "number", "Field definition number")
yield UInt8(self, "size", "Size in bytes")
yield UInt8(self, "type", "Base type")
def createDescription(self):
return "Field Definition. Number %d, Size %d" % (self["number"].value, self["size"].value)
class DefinitionMessage(FieldSet):
def createFields(self):
yield NormalRecordHeader(self, "RecordHeader")
yield UInt8(self, "reserved", "Reserved (0)")
yield UInt8(self, "architecture", "Architecture (0 little, 1 big endian")
self.endian = BIG_ENDIAN if self["architecture"].value else LITTLE_ENDIAN
yield UInt16(self, "msgNumber", "Message Number")
yield UInt8(self, "nbFields", "Number of fields")
for n in range(self["nbFields"].value):
yield FieldDefinition(self, "fieldDefinition[]")
def createDescription(self):
return "Definition Message. Contains %d fields" % (self["nbFields"].value)
class DataMessage(FieldSet):
def createFields(self):
hdr = NormalRecordHeader(self, "RecordHeader")
yield hdr
msgType = self["RecordHeader"]["msgType"].value
msgDef = self.parent.msgDefs[msgType]
for n in range(msgDef["nbFields"].value):
desc = msgDef["fieldDefinition[%d]" % n]
typ = field_types[desc["type"].value]
self.endian = BIG_ENDIAN if msgDef["architecture"].value else LITTLE_ENDIAN
if typ == String or typ == Bytes:
yield typ(self, "field%d" % n, desc["size"].value)
else:
if typ.static_size // 8 == desc["size"].value:
yield typ(self, "field%d" % n, desc["size"].value)
else:
for p in range(desc["size"].value * 8 // typ.static_size):
yield typ(self, "field%d[]" % n)
def createDescription(self):
return "Data Message"
class TimeStamp(FieldSet):
def createFields(self):
yield Bit(self, "timestamp", "TimeStamp (1)")
yield Bits(self, "msgType", 3, description="Message type")
yield Bits(self, "time", 4, description="TimeOffset")
def createDescription(self):
return "TimeStamp"
class CRC(FieldSet):
def createFields(self):
yield UInt16(self, "crc", "CRC")
def createDescription(self):
return "CRC"
class FITFile(Parser):
endian = BIG_ENDIAN
PARSER_TAGS = {
"id": "fit",
"category": "misc",
"file_ext": ("fit",),
"mime": ("application/fit",),
"min_size": 14 * 8,
"description": "Garmin binary fit format"
}
def __init__(self, *args, **kwargs):
Parser.__init__(self, *args, **kwargs)
self.msgDefs = {}
def validate(self):
s = self.stream.readBytes(0, 12)
if s[8:12] != b'.FIT':
return "Invalid header %d %d %d %d" % tuple([int(b) for b in s[8:12]])
return True
def createFields(self):
yield Header(self, "header")
while self.current_size < self["header"]["datasize"].value * 8:
b = self.stream.readBits(self.absolute_address + self.current_size, 2, self.endian)
if b == 1:
defMsg = DefinitionMessage(self, "definition[]")
msgType = defMsg["RecordHeader"]["msgType"].value
sizes = ''
ts = 0
for n in range(defMsg["nbFields"].value):
fname = "fieldDefinition[%d]" % n
size = defMsg[fname]["size"].value
ts += size
sizes += "%d/" % size
sizes += "%d" % ts
self.msgDefs[msgType] = defMsg
yield defMsg
elif b == 0:
yield DataMessage(self, "data[]")
else:
yield TimeStamp(self, "timestamp[]")
yield CRC(self, "crc")

View file

@ -41,7 +41,7 @@ class UIntVbe(Field):
size += 1 size += 1
assert size < 100, "UIntVBE is too large" assert size < 100, "UIntVBE is too large"
if not(haveMoreData): if not haveMoreData:
break break
self._size = size * 8 self._size = size * 8
@ -71,7 +71,7 @@ class IntVbe(Field):
size += 1 size += 1
assert size < 100, "IntVBE is too large" assert size < 100, "IntVBE is too large"
if not(haveMoreData): if not haveMoreData:
break break
if isNegative: if isNegative:
@ -142,7 +142,7 @@ class TileHeader(FieldSet):
def createFields(self): def createFields(self):
numLevels = int(self.zoomIntervalCfg[ numLevels = int(self.zoomIntervalCfg[
"max_zoom_level"].value - self.zoomIntervalCfg["min_zoom_level"].value) + 1 "max_zoom_level"].value - self.zoomIntervalCfg["min_zoom_level"].value) + 1
assert(numLevels < 50) assert (numLevels < 50)
for i in range(numLevels): for i in range(numLevels):
yield TileZoomTable(self, "zoom_table_entry[]") yield TileZoomTable(self, "zoom_table_entry[]")
yield UIntVbe(self, "first_way_offset") yield UIntVbe(self, "first_way_offset")

View file

@ -211,7 +211,7 @@ class OLE2_File(HachoirParser, RootSeekableFieldSet):
return "Unknown major version (%s)" % self["header/ver_maj"].value return "Unknown major version (%s)" % self["header/ver_maj"].value
if self["header/endian"].value not in (b"\xFF\xFE", b"\xFE\xFF"): if self["header/endian"].value not in (b"\xFF\xFE", b"\xFE\xFF"):
return "Unknown endian (%s)" % self["header/endian"].raw_display return "Unknown endian (%s)" % self["header/endian"].raw_display
if not(MIN_BIG_BLOCK_LOG2 <= self["header/bb_shift"].value <= MAX_BIG_BLOCK_LOG2): if not (MIN_BIG_BLOCK_LOG2 <= self["header/bb_shift"].value <= MAX_BIG_BLOCK_LOG2):
return "Invalid (log 2 of) big block size (%s)" % self["header/bb_shift"].value return "Invalid (log 2 of) big block size (%s)" % self["header/bb_shift"].value
if self["header/bb_shift"].value < self["header/sb_shift"].value: if self["header/bb_shift"].value < self["header/sb_shift"].value:
return "Small block size (log2=%s) is bigger than big block size (log2=%s)!" \ return "Small block size (log2=%s) is bigger than big block size (log2=%s)!" \

View file

@ -44,7 +44,7 @@ def getElementEnd(s, limit=b' ', offset=0):
class PDFNumber(Field): class PDFNumber(Field):
LIMITS = [b'[', b'/', b'\x0D', b']'] LIMITS = [b'[', b'/', b'\x0A', b'\x0D', b'>', b']']
""" """
sprintf("%i") or sprinf("%.?f") sprintf("%i") or sprinf("%.?f")
""" """
@ -81,18 +81,18 @@ class PDFString(Field):
def __init__(self, parent, name, desc=None): def __init__(self, parent, name, desc=None):
Field.__init__(self, parent, name, description=desc) Field.__init__(self, parent, name, description=desc)
val = "" val = bytearray()
count = 1 count = 1
off = 1 off = 1
while not parent.eof: while not parent.eof:
char = parent.stream.readBytes(self.absolute_address + 8 * off, 1) char = parent.stream.readBytes(self.absolute_address + 8 * off, 1)
# Non-ASCII # Non-ASCII
if not char.isalpha() or char == '\\': if not char.isalpha() or char == b'\\':
off += 1 off += 1
continue continue
if char == '(': if char == b'(':
count += 1 count += 1
if char == ')': if char == b')':
count -= 1 count -= 1
# Parenthesis block = 0 => end of string # Parenthesis block = 0 => end of string
if count == 0: if count == 0:
@ -101,13 +101,15 @@ class PDFString(Field):
# Add it to the string # Add it to the string
val += char val += char
off += 1
val = bytes(val)
self._size = 8 * off self._size = 8 * off
self.createValue = lambda: val self.createValue = lambda: val
class PDFName(Field): class PDFName(Field):
LIMITS = [b'[', b'/', b'<', b']'] LIMITS = [b'[', b'/', b'<', b'>', b']']
""" """
String starting with '/', where characters may be written using their String starting with '/', where characters may be written using their
ASCII code (exemple: '#20' would be ' ' ASCII code (exemple: '#20' would be ' '
@ -145,7 +147,7 @@ class PDFID(Field):
def __init__(self, parent, name, desc=None): def __init__(self, parent, name, desc=None):
Field.__init__(self, parent, name, description=desc) Field.__init__(self, parent, name, description=desc)
self._size = 8 * getElementEnd(parent, '>') self._size = 8 * getElementEnd(parent, b'>')
self.createValue = lambda: parent.stream.readBytes( self.createValue = lambda: parent.stream.readBytes(
self.absolute_address + 8, (self._size // 8) - 1) self.absolute_address + 8, (self._size // 8) - 1)
@ -254,7 +256,7 @@ def parsePDFType(s):
else: else:
# First parse size # First parse size
size = getElementEnd(s) size = getElementEnd(s)
for limit in ['/', '>', '<']: for limit in [b'/', b'>', b'<']:
other_size = getElementEnd(s, limit) other_size = getElementEnd(s, limit)
if other_size is not None: if other_size is not None:
other_size -= 1 other_size -= 1
@ -424,7 +426,7 @@ class Catalog(FieldSet):
new_length = getElementEnd(self, limit) new_length = getElementEnd(self, limit)
if length is None or (new_length is not None and new_length - len(limit) < length): if length is None or (new_length is not None and new_length - len(limit) < length):
length = new_length - len(limit) length = new_length - len(limit)
yield String(self, "object", length, strip=' ') yield String(self, "object", length, strip=' \n')
if self.stream.readBytes(self.absolute_address + self.current_size, 2) == b'<<': if self.stream.readBytes(self.absolute_address + self.current_size, 2) == b'<<':
yield PDFDictionary(self, "key_list") yield PDFDictionary(self, "key_list")
# End of catalog: this one has "endobj" # End of catalog: this one has "endobj"
@ -441,9 +443,9 @@ class Trailer(FieldSet):
yield RawBytes(self, "marker", len(self.MAGIC)) yield RawBytes(self, "marker", len(self.MAGIC))
yield WhiteSpace(self, "sep[]") yield WhiteSpace(self, "sep[]")
yield String(self, "start_attribute_marker", 2) yield String(self, "start_attribute_marker", 2)
yield WhiteSpace(self, "sep[]")
addr = self.absolute_address + self.current_size addr = self.absolute_address + self.current_size
while self.stream.readBytes(addr, 2) != b'>>': while self.stream.readBytes(addr, 2) != b'>>':
yield WhiteSpace(self, "sep[]")
t = PDFName(self, "type[]") t = PDFName(self, "type[]")
yield t yield t
name = t.value.decode() name = t.value.decode()
@ -462,6 +464,7 @@ class Trailer(FieldSet):
yield PDFDictionary(self, "decrypt") yield PDFDictionary(self, "decrypt")
else: else:
raise ParserError("Don't know trailer type '%s'" % name) raise ParserError("Don't know trailer type '%s'" % name)
yield WhiteSpace(self, "sep[]")
addr = self.absolute_address + self.current_size addr = self.absolute_address + self.current_size
yield String(self, "end_attribute_marker", 2) yield String(self, "end_attribute_marker", 2)
yield LineEnd(self, "line_end[]") yield LineEnd(self, "line_end[]")

View file

@ -2,6 +2,8 @@
TrueType Font parser. TrueType Font parser.
Documents: Documents:
- "The OpenType Specification"
https://docs.microsoft.com/en-us/typography/opentype/spec/
- "An Introduction to TrueType Fonts: A look inside the TTF format" - "An Introduction to TrueType Fonts: A look inside the TTF format"
written by "NRSI: Computers & Writing Systems" written by "NRSI: Computers & Writing Systems"
http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-Chapter08 http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-Chapter08
@ -11,11 +13,26 @@ Creation date: 2007-02-08
""" """
from hachoir.parser import Parser from hachoir.parser import Parser
from hachoir.field import (FieldSet, ParserError, from hachoir.field import (
UInt16, UInt32, Bit, Bits, FieldSet,
PaddingBits, NullBytes, ParserError,
String, RawBytes, Bytes, Enum, UInt8,
TimestampMac32) UInt16,
UInt24,
UInt32,
Int16,
Bit,
Bits,
PaddingBits,
NullBytes,
String,
RawBytes,
Bytes,
Enum,
TimestampMac32,
GenericVector,
PascalString8,
)
from hachoir.core.endian import BIG_ENDIAN from hachoir.core.endian import BIG_ENDIAN
from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler
@ -69,11 +86,65 @@ CHARSET_MAP = {
3: {1: "UTF-16-BE"}, 3: {1: "UTF-16-BE"},
} }
PERMISSIONS = {
0: "Installable embedding",
2: "Restricted License embedding",
4: "Preview & Print embedding",
8: "Editable embedding",
}
class TableHeader(FieldSet): FWORD = Int16
UFWORD = UInt16
class Tag(String):
def __init__(self, parent, name, description=None):
String.__init__(self, parent, name, 4, description)
class Version16Dot16(FieldSet):
static_size = 32
def createFields(self): def createFields(self):
yield String(self, "tag", 4) yield UInt16(self, "major")
yield UInt16(self, "minor")
def createValue(self):
return float("%u.%x" % (self["major"].value, self["minor"].value))
class Fixed(FieldSet):
def createFields(self):
yield UInt16(self, "int_part")
yield UInt16(self, "float_part")
def createValue(self):
return self["int_part"].value + float(self["float_part"].value) / 65536
class Tuple(FieldSet):
def __init__(self, parent, name, axisCount):
super().__init__(parent, name, description="Tuple Record")
self.axisCount = axisCount
def createFields(self):
for _ in range(self.axisCount):
yield (Fixed(self, "coordinate[]"))
class F2DOT14(FieldSet):
static_size = 16
def createFields(self):
yield Int16(self, "int_part")
def createValue(self):
return self["int_part"].value / 16384
class TableHeader(FieldSet):
def createFields(self):
yield Tag(self, "tag")
yield textHandler(UInt32(self, "checksum"), hexadecimal) yield textHandler(UInt32(self, "checksum"), hexadecimal)
yield UInt32(self, "offset") yield UInt32(self, "offset")
yield filesizeHandler(UInt32(self, "size")) yield filesizeHandler(UInt32(self, "size"))
@ -83,7 +154,6 @@ class TableHeader(FieldSet):
class NameHeader(FieldSet): class NameHeader(FieldSet):
def createFields(self): def createFields(self):
yield Enum(UInt16(self, "platformID"), PLATFORM_NAME) yield Enum(UInt16(self, "platformID"), PLATFORM_NAME)
yield UInt16(self, "encodingID") yield UInt16(self, "encodingID")
@ -135,7 +205,7 @@ def parseFontHeader(self):
yield Bits(self, "adobe", 2, "(used by Adobe)") yield Bits(self, "adobe", 2, "(used by Adobe)")
yield UInt16(self, "unit_per_em", "Units per em") yield UInt16(self, "unit_per_em", "Units per em")
if not(16 <= self["unit_per_em"].value <= 16384): if not (16 <= self["unit_per_em"].value <= 16384):
raise ParserError("TTF: Invalid unit/em value") raise ParserError("TTF: Invalid unit/em value")
yield UInt32(self, "created_high") yield UInt32(self, "created_high")
yield TimestampMac32(self, "created") yield TimestampMac32(self, "created")
@ -162,17 +232,273 @@ def parseFontHeader(self):
yield UInt16(self, "glyph_format", "(=0)") yield UInt16(self, "glyph_format", "(=0)")
class AxisValueMap(FieldSet):
static_size = 32
def createFields(self):
yield F2DOT14(self, "fromCoordinate")
yield F2DOT14(self, "toCoordinate")
class SegmentMaps(FieldSet):
def createFields(self):
yield UInt16(
self, "positionMapCount", "The number of correspondence pairs for this axis"
)
for _ in range(self["positionMapCount"].value):
yield (AxisValueMap(self, "axisValueMaps[]"))
def parseAvar(self):
yield UInt16(self, "majorVersion", "Major version")
yield UInt16(self, "minorVersion", "Minor version")
yield PaddingBits(self, "reserved[]", 16)
yield UInt16(self, "axisCount", "The number of variation axes for this font")
for _ in range(self["axisCount"].value):
yield (SegmentMaps(self, "segmentMaps[]"))
class VariationAxisRecord(FieldSet):
def createFields(self):
yield Tag(self, "axisTag", "Tag identifying the design variation for the axis")
yield Fixed(self, "minValue", "The minimum coordinate value for the axis")
yield Fixed(self, "defaultValue", "The default coordinate value for the axis")
yield Fixed(self, "maxValue", "The maximum coordinate value for the axis")
yield PaddingBits(self, "reservedFlags", 15)
yield Bit(
self, "hidden", "The axis should not be exposed directly in user interfaces"
)
yield UInt16(
self,
"axisNameID",
"The name ID for entries in the 'name' table that provide a display name for this axis",
)
class InstanceRecord(FieldSet):
def __init__(self, parent, name, axisCount, hasPSNameID=False):
super().__init__(parent, name, description="Instance record")
self.axisCount = axisCount
self.hasPSNameID = hasPSNameID
def createFields(self):
yield UInt16(
self, "subfamilyNameID", "Name ID for subfamily names for this instance"
)
yield PaddingBits(self, "reservedFlags", 16)
yield Tuple(self, "coordinates", axisCount=self.axisCount)
if self.hasPSNameID:
yield UInt16(
self,
"postScriptNameID",
"Name ID for PostScript names for this instance",
)
def parseFvar(self):
yield UInt16(self, "majorVersion", "Major version")
yield UInt16(self, "minorVersion", "Minor version")
yield UInt16(
self, "axisArrayOffset", "Offset to the start of the VariationAxisRecord array."
)
yield PaddingBits(self, "reserved[]", 16)
yield UInt16(self, "axisCount", "The number of variation axes for this font")
yield UInt16(self, "axisSize", "The size in bytes of each VariationAxisRecord")
yield UInt16(self, "instanceCount", "The number of named instances for this font")
yield UInt16(self, "instanceSize", "The size in bytes of each InstanceRecord")
if self["axisArrayOffset"].value > 16:
yield PaddingBits(self, "padding", 8 * (self["axisArrayOffset"].value - 16))
for _ in range(self["axisCount"].value):
yield (VariationAxisRecord(self, "axes[]"))
for _ in range(self["instanceCount"].value):
yield (
InstanceRecord(
self,
"instances[]",
axisCount=self["axisCount"].value,
hasPSNameID=(
self["instanceSize"].value == (2 * self["axisCount"].value + 6)
),
)
)
class EncodingRecord(FieldSet):
static_size = 64
def createFields(self):
yield Enum(UInt16(self, "platformID"), PLATFORM_NAME)
yield UInt16(self, "encodingID")
self.offset = UInt32(self, "subtableOffset")
yield self.offset
class CmapTable0(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt16(self, "length", "Length in bytes")
yield UInt16(self, "language", "Language ID")
yield GenericVector(self, "mapping", 256, UInt8)
class CmapTable4(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt16(self, "length", "Length in bytes")
yield UInt16(self, "language", "Language ID")
yield UInt16(self, "segCountX2", "Twice the number of segments")
segments = self["segCountX2"].value // 2
yield UInt16(self, "searchRange")
yield UInt16(self, "entrySelector")
yield UInt16(self, "rangeShift")
yield GenericVector(self, "endCode", segments, UInt16)
yield PaddingBits(self, "reserved[]", 16)
yield GenericVector(self, "startCode", segments, UInt16)
yield GenericVector(self, "idDelta", segments, Int16)
yield GenericVector(self, "idRangeOffsets", segments, UInt16)
remainder = (self["length"].value - (self.current_size / 8)) / 2
if remainder:
yield GenericVector(self, "glyphIdArray", remainder, UInt16)
class CmapTable6(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt16(self, "length", "Length in bytes")
yield UInt16(self, "language", "Language ID")
yield UInt16(self, "firstCode", "First character code of subrange")
yield UInt16(self, "entryCount", "Number of character codes in subrange")
yield GenericVector(self, "glyphIdArray", self["entryCount"].value, UInt16)
class SequentialMapGroup(FieldSet):
def createFields(self):
yield UInt32(self, "startCharCode", "First character code in this group")
yield UInt32(self, "endCharCode", "First character code in this group")
yield UInt32(
self,
"startGlyphID",
"Glyph index corresponding to the starting character code",
)
class CmapTable12(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield PaddingBits(self, "reserved[]", 16)
yield UInt32(self, "length", "Length in bytes")
yield UInt32(self, "language", "Language ID")
yield UInt32(self, "numGroups", "Number of groupings which follow")
for i in range(self["numGroups"].value):
yield SequentialMapGroup(self, "mapgroup[]")
class VariationSelector(FieldSet):
def createFields(self):
yield UInt24(self, "varSelector", "Variation selector")
yield UInt32(self, "defaultUVSOffset", "Offset to default UVS table")
yield UInt32(self, "nonDefaultUVSOffset", "Offset to non-default UVS table")
class CmapTable14(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt32(self, "length", "Length in bytes")
yield UInt32(
self, "numVarSelectorRecords", "Number of variation selector records"
)
for i in range(self["numVarSelectorRecords"].value):
yield VariationSelector(self, "variationSelector[]")
def parseCmap(self):
yield UInt16(self, "version")
numTables = UInt16(self, "numTables", "Number of encoding tables")
yield numTables
encodingRecords = []
for index in range(numTables.value):
entry = EncodingRecord(self, "encodingRecords[]")
yield entry
encodingRecords.append(entry)
encodingRecords.sort(key=lambda field: field["subtableOffset"].value)
last = None
for er in encodingRecords:
offset = er["subtableOffset"].value
if last and last == offset:
continue
last = offset
# Add padding if any
padding = self.seekByte(offset, relative=True, null=False)
if padding:
yield padding
format = UInt16(self, "format").value
if format == 0:
yield CmapTable0(self, "cmap table format 0")
elif format == 4:
yield CmapTable4(self, "cmap table format 4")
elif format == 6:
yield CmapTable6(self, "cmap table format 6")
elif format == 12:
yield CmapTable12(self, "cmap table format 12")
elif format == 14:
yield CmapTable14(self, "cmap table format 14")
class SignatureRecord(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt16(self, "length", "Length of signature")
yield UInt16(self, "signatureBlockOffset", "Offset to signature block")
class SignatureBlock(FieldSet):
def createFields(self):
yield PaddingBits(self, "reserved[]", 32)
yield UInt32(
self,
"length",
"Length (in bytes) of the PKCS#7 packet in the signature field",
)
yield String(self, "signature", self["length"].value, "Signature block")
def parseDSIG(self):
yield UInt32(self, "version")
yield UInt16(self, "numSignatures", "Number of signatures in the table")
yield Bit(self, "flag", "Cannot be resigned")
yield PaddingBits(self, "reserved[]", 7)
entries = []
for i in range(self["numSignatures"].value):
record = SignatureRecord(self, "signatureRecords[]")
entries.append(record)
yield record
entries.sort(key=lambda field: field["signatureBlockOffset"].value)
last = None
for entry in entries:
offset = entry["signatureBlockOffset"].value
if last and last == offset:
continue
last = offset
# Add padding if any
padding = self.seekByte(offset, relative=True, null=False)
if padding:
yield padding
padding = (self.size - self.current_size) // 8
if padding:
yield NullBytes(self, "padding_end", padding)
def parseNames(self): def parseNames(self):
# Read header # Read header
yield UInt16(self, "format") yield UInt16(self, "format")
if self["format"].value != 0: if self["format"].value != 0:
raise ParserError("TTF (names): Invalid format (%u)" % raise ParserError("TTF (names): Invalid format (%u)" % self["format"].value)
self["format"].value)
yield UInt16(self, "count") yield UInt16(self, "count")
yield UInt16(self, "offset") yield UInt16(self, "offset")
if MAX_NAME_COUNT < self["count"].value: if MAX_NAME_COUNT < self["count"].value:
raise ParserError("Invalid number of names (%s)" raise ParserError("Invalid number of names (%s)" % self["count"].value)
% self["count"].value)
# Read name index # Read name index
entries = [] entries = []
@ -208,17 +534,210 @@ def parseNames(self):
# Read value # Read value
size = entry["length"].value size = entry["length"].value
if size: if size:
yield String(self, "value[]", size, entry.description, charset=entry.getCharset()) yield String(
self, "value[]", size, entry.description, charset=entry.getCharset()
)
padding = (self.size - self.current_size) // 8 padding = (self.size - self.current_size) // 8
if padding: if padding:
yield NullBytes(self, "padding_end", padding) yield NullBytes(self, "padding_end", padding)
def parseMaxp(self):
# Read header
yield Version16Dot16(self, "format", "format version")
yield UInt16(self, "numGlyphs", "Number of glyphs")
if self["format"].value >= 1:
yield UInt16(self, "maxPoints", "Maximum points in a non-composite glyph")
yield UInt16(self, "maxContours", "Maximum contours in a non-composite glyph")
yield UInt16(self, "maxCompositePoints", "Maximum points in a composite glyph")
yield UInt16(
self, "maxCompositeContours", "Maximum contours in a composite glyph"
)
yield UInt16(self, "maxZones", "Do instructions use the twilight zone?")
yield UInt16(self, "maxTwilightPoints", "Maximum points used in Z0")
yield UInt16(self, "maxStorage", "Number of Storage Area locations")
yield UInt16(self, "maxFunctionDefs", "Number of function definitions")
yield UInt16(self, "maxInstructionDefs", "Number of instruction definitions")
yield UInt16(self, "maxStackElements", "Maximum stack depth")
yield UInt16(
self, "maxSizeOfInstructions", "Maximum byte count for glyph instructions"
)
yield UInt16(
self,
"maxComponentElements",
"Maximum number of components at glyph top level",
)
yield UInt16(self, "maxComponentDepth", "Maximum level of recursion")
def parseHhea(self):
yield UInt16(self, "majorVersion", "Major version")
yield UInt16(self, "minorVersion", "Minor version")
yield FWORD(self, "ascender", "Typographic ascent")
yield FWORD(self, "descender", "Typographic descent")
yield FWORD(self, "lineGap", "Typographic linegap")
yield UFWORD(self, "advanceWidthMax", "Maximum advance width")
yield FWORD(self, "minLeftSideBearing", "Minimum left sidebearing value")
yield FWORD(self, "minRightSideBearing", "Minimum right sidebearing value")
yield FWORD(self, "xMaxExtent", "Maximum X extent")
yield Int16(self, "caretSlopeRise", "Caret slope rise")
yield Int16(self, "caretSlopeRun", "Caret slope run")
yield Int16(self, "caretOffset", "Caret offset")
yield GenericVector(self, "reserved", 4, Int16)
yield Int16(self, "metricDataFormat", "Metric data format")
yield UInt16(self, "numberOfHMetrics", "Number of horizontal metrics")
class fsType(FieldSet):
def createFields(self):
yield Enum(Bits(self, "usage_permissions", 4), PERMISSIONS)
yield PaddingBits(self, "reserved[]", 4)
yield Bit(self, "no_subsetting", "Font may not be subsetted prior to embedding")
yield Bit(
self,
"bitmap_embedding",
"Only bitmaps contained in the font may be embedded",
)
yield PaddingBits(self, "reserved[]", 6)
def parseOS2(self):
yield UInt16(self, "version", "Table version")
yield Int16(self, "xAvgCharWidth")
yield UInt16(self, "usWeightClass")
yield UInt16(self, "usWidthClass")
yield fsType(self, "fsType")
yield Int16(self, "ySubscriptXSize")
yield Int16(self, "ySubscriptYSize")
yield Int16(self, "ySubscriptXOffset")
yield Int16(self, "ySubscriptYOffset")
yield Int16(self, "ySuperscriptXSize")
yield Int16(self, "ySuperscriptYSize")
yield Int16(self, "ySuperscriptXOffset")
yield Int16(self, "ySuperscriptYOffset")
yield Int16(self, "yStrikeoutSize")
yield Int16(self, "yStrikeoutPosition")
yield Int16(self, "sFamilyClass")
yield GenericVector(self, "panose", 10, UInt8)
yield UInt32(self, "ulUnicodeRange1")
yield UInt32(self, "ulUnicodeRange2")
yield UInt32(self, "ulUnicodeRange3")
yield UInt32(self, "ulUnicodeRange4")
yield Tag(self, "achVendID", "Vendor ID")
yield UInt16(self, "fsSelection")
yield UInt16(self, "usFirstCharIndex")
yield UInt16(self, "usLastCharIndex")
yield Int16(self, "sTypoAscender")
yield Int16(self, "sTypoDescender")
yield Int16(self, "sTypoLineGap")
yield UInt16(self, "usWinAscent")
yield UInt16(self, "usWinDescent")
if self["version"].value >= 1:
yield UInt32(self, "ulCodePageRange1")
yield UInt32(self, "ulCodePageRange2")
if self["version"].value >= 2:
yield Int16(self, "sxHeight")
yield Int16(self, "sCapHeight")
yield UInt16(self, "usDefaultChar")
yield UInt16(self, "usBreakChar")
yield UInt16(self, "usMaxContext")
if self["version"].value >= 5:
yield UInt16(self, "usLowerOpticalPointSize")
yield UInt16(self, "usUpperOpticalPointSize")
def parsePost(self):
yield Version16Dot16(self, "version", "Table version")
yield Fixed(
self,
"italicAngle",
"Italic angle in counter-clockwise degrees from the vertical.",
)
yield FWORD(self, "underlinePosition", "Top of underline to baseline")
yield FWORD(self, "underlineThickness", "Suggested underline thickness")
yield UInt32(self, "isFixedPitch", "Is the font fixed pitch?")
yield UInt32(self, "minMemType42", "Minimum memory usage (OpenType)")
yield UInt32(self, "maxMemType42", "Maximum memory usage (OpenType)")
yield UInt32(self, "minMemType1", "Minimum memory usage (Type 1)")
yield UInt32(self, "maxMemType1", "Maximum memory usage (Type 1)")
if self["version"].value == 2.0:
yield UInt16(self, "numGlyphs")
indices = GenericVector(
self,
"Array of indices into the string data",
self["numGlyphs"].value,
UInt16,
"glyphNameIndex",
)
yield indices
for gid, index in enumerate(indices):
if index.value >= 258:
yield PascalString8(self, "glyphname[%i]" % gid)
elif self["version"].value == 2.0:
yield UInt16(self, "numGlyphs")
indices = GenericVector(
self,
"Difference between graphic index and standard order of glyph",
self["numGlyphs"].value,
UInt16,
"offset",
)
yield indices
# This is work-in-progress until I work out good ways to do random-access on offsets
parseScriptList = (
parseFeatureList
) = parseLookupList = parseFeatureVariationsTable = lambda x: None
def parseGSUB(self):
yield UInt16(self, "majorVersion", "Major version")
yield UInt16(self, "minorVersion", "Minor version")
SUBTABLES = [
("script list", parseScriptList),
("feature list", parseFeatureList),
("lookup list", parseLookupList),
]
offsets = []
for description, parser in SUBTABLES:
name = description.title().replace(" ", "")
offset = UInt16(
self, name[0].lower() + name[1:], "Offset to %s table" % description
)
yield offset
offsets.append((offset.value, parser))
if self["min_ver"].value == 1:
offset = UInt32(
self, "featureVariationsOffset", "Offset to feature variations table"
)
offsets.append((offset.value, parseFeatureVariationsTable))
offsets.sort(key=lambda field: field[0])
padding = self.seekByte(offsets[0][0], null=True)
if padding:
yield padding
lastOffset, first_parser = offsets[0]
for offset, parser in offsets[1:]:
# yield parser(self)
yield RawBytes(self, "content", offset - lastOffset)
lastOffset = offset
class Table(FieldSet): class Table(FieldSet):
TAG_INFO = { TAG_INFO = {
"DSIG": ("DSIG", "Digital Signature", parseDSIG),
"GSUB": ("GSUB", "Glyph Substitutions", parseGSUB),
"avar": ("avar", "Axis variation table", parseAvar),
"cmap": ("cmap", "Character to Glyph Index Mapping", parseCmap),
"fvar": ("fvar", "Font variations table", parseFvar),
"head": ("header", "Font header", parseFontHeader), "head": ("header", "Font header", parseFontHeader),
"hhea": ("hhea", "Horizontal Header", parseHhea),
"maxp": ("maxp", "Maximum Profile", parseMaxp),
"name": ("names", "Names", parseNames), "name": ("names", "Names", parseNames),
"OS/2": ("OS_2", "OS/2 and Windows Metrics", parseOS2),
"post": ("post", "PostScript", parsePost),
} }
def __init__(self, parent, name, table, **kw): def __init__(self, parent, name, table, **kw):
@ -251,10 +770,15 @@ class TrueTypeFontFile(Parser):
} }
def validate(self): def validate(self):
if self["maj_ver"].value != 1: if self["maj_ver"].value == 1 and self["min_ver"].value == 0:
return "Invalid major version (%u)" % self["maj_ver"].value pass
if self["min_ver"].value != 0: elif self["maj_ver"].value == 0x4F54 and self["min_ver"].value == 0x544F:
return "Invalid minor version (%u)" % self["min_ver"].value pass
else:
return "Invalid version (%u.%u)" % (
self["maj_ver"].value,
self["min_ver"].value,
)
if not (MIN_NB_TABLE <= self["nb_table"].value <= MAX_NB_TABLE): if not (MIN_NB_TABLE <= self["nb_table"].value <= MAX_NB_TABLE):
return "Invalid number of table (%u)" % self["nb_table"].value return "Invalid number of table (%u)" % self["nb_table"].value
return True return True

View file

@ -13,7 +13,7 @@ class HachoirParser(object):
""" """
A parser is the root of all other fields. It create first level of fields A parser is the root of all other fields. It create first level of fields
and have special attributes and methods: and have special attributes and methods:
- tags: dictionnary with keys: - tags: dictionary with keys:
- "file_ext": classical file extensions (string or tuple of strings) ; - "file_ext": classical file extensions (string or tuple of strings) ;
- "mime": MIME type(s) (string or tuple of strings) ; - "mime": MIME type(s) (string or tuple of strings) ;
- "description": String describing the parser. - "description": String describing the parser.

View file

@ -19,7 +19,7 @@ from hachoir.parser.program.exe_ne import NE_Header
from hachoir.parser.program.exe_pe import PE_Header, PE_OptHeader, SectionHeader from hachoir.parser.program.exe_pe import PE_Header, PE_OptHeader, SectionHeader
from hachoir.parser.program.exe_res import PE_Resource, NE_VersionInfoNode from hachoir.parser.program.exe_res import PE_Resource, NE_VersionInfoNode
MAX_NB_SECTION = 50 MAX_NB_SECTION = 100
class MSDosHeader(FieldSet): class MSDosHeader(FieldSet):

View file

@ -1,5 +1,5 @@
from hachoir.field import (FieldSet, ParserError, from hachoir.field import (FieldSet, ParserError,
Bit, UInt8, UInt16, UInt32, TimestampUnix32, Bit, UInt8, UInt16, UInt32, UInt64, TimestampUnix32,
Bytes, String, Enum, Bytes, String, Enum,
PaddingBytes, PaddingBits, NullBytes, NullBits) PaddingBytes, PaddingBits, NullBytes, NullBits)
from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler
@ -175,10 +175,13 @@ class PE_OptHeader(FieldSet):
} }
def createFields(self): def createFields(self):
yield UInt16(self, "signature", "PE optional header signature (0x010b)") yield UInt16(self, "signature", "PE optional header signature (0x010b | 0x020b)")
# TODO: Support PE32+ (signature=0x020b)
if self["signature"].value != 0x010b: if self["signature"].value != 0x010b and self["signature"].value != 0x020b:
raise ParserError("Invalid PE optional header signature") raise ParserError("Invalid PE optional header signature")
is_pe32plus = self["signature"].value == 0x020b
VarUInt = UInt64 if is_pe32plus else UInt32
yield UInt8(self, "maj_lnk_ver", "Major linker version") yield UInt8(self, "maj_lnk_ver", "Major linker version")
yield UInt8(self, "min_lnk_ver", "Minor linker version") yield UInt8(self, "min_lnk_ver", "Minor linker version")
yield filesizeHandler(UInt32(self, "size_code", "Size of code")) yield filesizeHandler(UInt32(self, "size_code", "Size of code"))
@ -186,8 +189,9 @@ class PE_OptHeader(FieldSet):
yield filesizeHandler(UInt32(self, "size_uninit_data", "Size of uninitialized data")) yield filesizeHandler(UInt32(self, "size_uninit_data", "Size of uninitialized data"))
yield textHandler(UInt32(self, "entry_point", "Address (RVA) of the code entry point"), hexadecimal) yield textHandler(UInt32(self, "entry_point", "Address (RVA) of the code entry point"), hexadecimal)
yield textHandler(UInt32(self, "base_code", "Base (RVA) of code"), hexadecimal) yield textHandler(UInt32(self, "base_code", "Base (RVA) of code"), hexadecimal)
if not is_pe32plus:
yield textHandler(UInt32(self, "base_data", "Base (RVA) of data"), hexadecimal) yield textHandler(UInt32(self, "base_data", "Base (RVA) of data"), hexadecimal)
yield textHandler(UInt32(self, "image_base", "Image base (RVA)"), hexadecimal) yield textHandler(VarUInt(self, "image_base", "Image base (RVA)"), hexadecimal)
yield filesizeHandler(UInt32(self, "sect_align", "Section alignment")) yield filesizeHandler(UInt32(self, "sect_align", "Section alignment"))
yield filesizeHandler(UInt32(self, "file_align", "File alignment")) yield filesizeHandler(UInt32(self, "file_align", "File alignment"))
yield UInt16(self, "maj_os_ver", "Major OS version") yield UInt16(self, "maj_os_ver", "Major OS version")
@ -202,10 +206,10 @@ class PE_OptHeader(FieldSet):
yield textHandler(UInt32(self, "checksum"), hexadecimal) yield textHandler(UInt32(self, "checksum"), hexadecimal)
yield Enum(UInt16(self, "subsystem"), self.SUBSYSTEM_NAME) yield Enum(UInt16(self, "subsystem"), self.SUBSYSTEM_NAME)
yield UInt16(self, "dll_flags") yield UInt16(self, "dll_flags")
yield filesizeHandler(UInt32(self, "size_stack_reserve")) yield filesizeHandler(VarUInt(self, "size_stack_reserve"))
yield filesizeHandler(UInt32(self, "size_stack_commit")) yield filesizeHandler(VarUInt(self, "size_stack_commit"))
yield filesizeHandler(UInt32(self, "size_heap_reserve")) yield filesizeHandler(VarUInt(self, "size_heap_reserve"))
yield filesizeHandler(UInt32(self, "size_heap_commit")) yield filesizeHandler(VarUInt(self, "size_heap_commit"))
yield UInt32(self, "loader_flags") yield UInt32(self, "loader_flags")
yield UInt32(self, "nb_directory", "Number of RVA and sizes") yield UInt32(self, "nb_directory", "Number of RVA and sizes")
for index in range(self["nb_directory"].value): for index in range(self["nb_directory"].value):

View file

@ -435,6 +435,19 @@ class OpcodeSpecial_invokeinterface(JavaOpcode):
return "%s(%i,%i,%i)" % (self.op, self["index"].value, self["count"].value, self["zero"].value) return "%s(%i,%i,%i)" % (self.op, self["index"].value, self["count"].value, self["zero"].value)
class OpcodeSpecial_invokedynamic(JavaOpcode):
OPSIZE = 5
def createFields(self):
yield UInt8(self, "opcode")
yield CPIndex(self, "index")
yield UInt8(self, "zero1", "Must be zero.")
yield UInt8(self, "zero2", "Must be zero.")
def createDisplay(self):
return "%s(%i,%i,%i)" % (self.op, self["index"].value, self["zero1"].value, self["zero2"].value)
class OpcodeSpecial_newarray(JavaOpcode): class OpcodeSpecial_newarray(JavaOpcode):
OPSIZE = 2 OPSIZE = 2
@ -659,6 +672,7 @@ class JavaBytecode(FieldSet):
0x98: ("dcmpg", OpcodeNoArgs, "compares two doubles. Stack: value1, value2 -> result"), 0x98: ("dcmpg", OpcodeNoArgs, "compares two doubles. Stack: value1, value2 -> result"),
0x99: ("ifeq", OpcodeShortJump, "if 'value' is 0, branch to the 16-bit instruction offset argument. Stack: value ->"), 0x99: ("ifeq", OpcodeShortJump, "if 'value' is 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9a: ("ifne", OpcodeShortJump, "if 'value' is not 0, branch to the 16-bit instruction offset argument. Stack: value ->"), 0x9a: ("ifne", OpcodeShortJump, "if 'value' is not 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9b: ("iflt", OpcodeShortJump, "if 'value' is less than 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9c: ("ifge", OpcodeShortJump, "if 'value' is greater than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"), 0x9c: ("ifge", OpcodeShortJump, "if 'value' is greater than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9d: ("ifgt", OpcodeShortJump, "if 'value' is greater than 0, branch to the 16-bit instruction offset argument. Stack: value ->"), 0x9d: ("ifgt", OpcodeShortJump, "if 'value' is greater than 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9e: ("ifle", OpcodeShortJump, "if 'value' is less than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"), 0x9e: ("ifle", OpcodeShortJump, "if 'value' is less than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
@ -689,7 +703,7 @@ class JavaBytecode(FieldSet):
0xb7: ("invokespecial", OpcodeCPIndex, "invoke instance method on object 'objectref', where the method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"), 0xb7: ("invokespecial", OpcodeCPIndex, "invoke instance method on object 'objectref', where the method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"),
0xb8: ("invokestatic", OpcodeCPIndex, "invoke a static method, where the method is identified by method reference <argument> in the constant pool. Stack: [arg1, arg2, ...] ->"), 0xb8: ("invokestatic", OpcodeCPIndex, "invoke a static method, where the method is identified by method reference <argument> in the constant pool. Stack: [arg1, arg2, ...] ->"),
0xb9: ("invokeinterface", OpcodeSpecial_invokeinterface, "invokes an interface method on object 'objectref', where the interface method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"), 0xb9: ("invokeinterface", OpcodeSpecial_invokeinterface, "invokes an interface method on object 'objectref', where the interface method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"),
0xba: ("xxxunusedxxx", OpcodeNoArgs, "this opcode is reserved for historical reasons. Stack: "), 0xba: ("invokedynamic", OpcodeSpecial_invokedynamic, "invokes a dynamically-computed call site, where the bootstrap method is identified by <argument> in constant pool. Stack: [arg1, arg2, ...] -> "),
0xbb: ("new", OpcodeCPIndex, "creates new object of type identified by class reference <argument> in constant pool. Stack: -> objectref"), 0xbb: ("new", OpcodeCPIndex, "creates new object of type identified by class reference <argument> in constant pool. Stack: -> objectref"),
0xbc: ("newarray", OpcodeSpecial_newarray, "creates new array with 'count' elements of primitive type given in the argument. Stack: count -> arrayref"), 0xbc: ("newarray", OpcodeSpecial_newarray, "creates new array with 'count' elements of primitive type given in the argument. Stack: count -> arrayref"),
0xbd: ("anewarray", OpcodeCPIndex, "creates a new array of references of length 'count' and component type identified by the class reference <argument> in the constant pool. Stack: count -> arrayref"), 0xbd: ("anewarray", OpcodeCPIndex, "creates a new array of references of length 'count' and component type identified by the class reference <argument> in the constant pool. Stack: count -> arrayref"),
@ -762,6 +776,33 @@ class CPInfo(FieldSet):
elif self.constant_type == "NameAndType": elif self.constant_type == "NameAndType":
yield CPIndex(self, "name_index", target_types="Utf8") yield CPIndex(self, "name_index", target_types="Utf8")
yield CPIndex(self, "descriptor_index", target_types="Utf8") yield CPIndex(self, "descriptor_index", target_types="Utf8")
elif self.constant_type == "MethodHandle":
refkind_map = {
1: ("getField", "Fieldref"),
2: ("getStatic", "Fieldref"),
3: ("putField", "Fieldref"),
4: ("putStatic", "Fieldref"),
5: ("invokeVirtual", "Methodref"),
6: ("invokeStatic", ("Methodref", "InterfaceMethodref")),
7: ("invokeSpecial", ("Methodref", "InterfaceMethodref")),
8: ("newInvokeSpecial", "Methodref"),
9: ("invokeInterface", "InterfaceMethodref"),
}
yield Enum(UInt8(self, "reference_kind"), {k: v[0] for k, v in refkind_map.items()})
target_types = refkind_map[self["reference_kind"].value][1]
yield CPIndex(self, "reference_index", target_types=target_types)
elif self.constant_type == "MethodType":
yield CPIndex(self, "descriptor_index", target_types="Utf8")
elif self.constant_type == "Dynamic":
yield UInt16(self, "bootstrap_method_attr_index")
yield CPIndex(self, "name_and_type_index", target_types="NameAndType")
elif self.constant_type == "InvokeDynamic":
yield UInt16(self, "bootstrap_method_attr_index")
yield CPIndex(self, "name_and_type_index", target_types="NameAndType")
elif self.constant_type == "Module":
yield CPIndex(self, "name_index", target_types="Utf8")
elif self.constant_type == "Package":
yield CPIndex(self, "name_index", target_types="Utf8")
else: else:
raise ParserError("Not a valid constant pool element type: " raise ParserError("Not a valid constant pool element type: "
+ self["tag"].value) + self["tag"].value)
@ -785,6 +826,21 @@ class CPInfo(FieldSet):
elif self.constant_type == "NameAndType": elif self.constant_type == "NameAndType":
return (self["descriptor_index"].rawvalue(), return (self["descriptor_index"].rawvalue(),
self["name_index"].rawvalue()) self["name_index"].rawvalue())
elif self.constant_type == "MethodHandle":
return (self["reference_kind"].display,
self["reference_index"].rawvalue())
elif self.constant_type == "MethodType":
return self["descriptor_index"].rawvalue()
elif self.constant_type == "Dynamic":
return (self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].rawvalue())
elif self.constant_type == "InvokeDynamic":
return (self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].rawvalue())
elif self.constant_type == "Module":
return self["name_index"].rawvalue()
elif self.constant_type == "Package":
return self["name_index"].rawvalue()
else: else:
# FIXME: Return "<error>" instead of raising an exception? # FIXME: Return "<error>" instead of raising an exception?
raise ParserError("Not a valid constant pool element type: " raise ParserError("Not a valid constant pool element type: "
@ -811,6 +867,24 @@ class CPInfo(FieldSet):
elif self.constant_type == "NameAndType": elif self.constant_type == "NameAndType":
descriptor, name = self.rawvalue() descriptor, name = self.rawvalue()
return parse_any_descriptor(descriptor, name=name) return parse_any_descriptor(descriptor, name=name)
elif self.constant_type == "MethodHandle":
return "%s(%s)" % (self["reference_kind"].display, self["reference_index"].str())
elif self.constant_type == "MethodType":
return self["descriptor_index"].str()
elif self.constant_type == "Dynamic":
return "%d, %s" % (
self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].str()
)
elif self.constant_type == "InvokeDynamic":
return "%d, %s" % (
self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].str()
)
elif self.constant_type == "Module":
return self["name_index"].str()
elif self.constant_type == "Package":
return self["name_index"].str()
else: else:
# FIXME: Return "<error>" instead of raising an exception? # FIXME: Return "<error>" instead of raising an exception?
raise ParserError("Not a valid constant pool element type: " raise ParserError("Not a valid constant pool element type: "
@ -1192,6 +1266,12 @@ class JavaCompiledClassFile(Parser):
"50.0": "JDK 1.6", "50.0": "JDK 1.6",
"51.0": "JDK 1.7", "51.0": "JDK 1.7",
"52.0": "JDK 1.8", "52.0": "JDK 1.8",
"53.0": "JDK 9",
"54.0": "JDK 10",
"55.0": "JDK 11",
"56.0": "JDK 12",
"57.0": "JDK 13",
"58.0": "JDK 14",
} }
# Constants go here since they will probably depend on the detected format # Constants go here since they will probably depend on the detected format
@ -1208,7 +1288,13 @@ class JavaCompiledClassFile(Parser):
9: "Fieldref", 9: "Fieldref",
10: "Methodref", 10: "Methodref",
11: "InterfaceMethodref", 11: "InterfaceMethodref",
12: "NameAndType" 12: "NameAndType",
15: "MethodHandle",
16: "MethodType",
17: "Dynamic",
18: "InvokeDynamic",
19: "Module",
20: "Package",
} }
def validate(self): def validate(self):

View file

@ -10,13 +10,16 @@ Creation: 25 march 2005
""" """
from hachoir.parser import Parser from hachoir.parser import Parser
from hachoir.field import (FieldSet, UInt8, from hachoir.field import (
UInt16, Int32, UInt32, Int64, ParserError, Float64, Field, FieldSet, UInt8,
UInt16, Int32, UInt32, Int64, UInt64,
ParserError, Float64,
Character, RawBytes, PascalString8, TimestampUnix32, Character, RawBytes, PascalString8, TimestampUnix32,
Bit, String) Bit, String, NullBits)
from hachoir.core.endian import LITTLE_ENDIAN from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.bits import long2raw from hachoir.core.bits import long2raw
from hachoir.core.text_handler import textHandler, hexadecimal from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.core import config
DISASSEMBLE = False DISASSEMBLE = False
@ -51,6 +54,12 @@ def parseString(parent):
disassembleBytecode(parent["text"]) disassembleBytecode(parent["text"])
def createStringValue(parent):
if parent.name == "lnotab":
return "<lnotab>"
return parent["text"]
def parseStringRef(parent): def parseStringRef(parent):
yield textHandler(UInt32(parent, "ref"), hexadecimal) yield textHandler(UInt32(parent, "ref"), hexadecimal)
@ -58,6 +67,13 @@ def parseStringRef(parent):
def createStringRefDesc(parent): def createStringRefDesc(parent):
return "String ref: %s" % parent["ref"].display return "String ref: %s" % parent["ref"].display
def createStringRefValue(parent):
value = parent["ref"].value
if hasattr(parent.root, 'string_table') and 0 <= value < len(parent.root.string_table):
return parent.root.string_table[value]
return None
# --- Integers --- # --- Integers ---
@ -69,17 +85,37 @@ def parseInt64(parent):
yield Int64(parent, "value") yield Int64(parent, "value")
def createIntValue(parent):
return parent["value"]
def parseLong(parent): def parseLong(parent):
yield Int32(parent, "digit_count") yield Int32(parent, "digit_count")
for index in range(abs(parent["digit_count"].value)): for index in range(abs(parent["digit_count"].value)):
yield UInt16(parent, "digit[]") yield UInt16(parent, "digit[]")
def createLongValue(parent):
is_negative = parent["digit_count"].value < 0
count = abs(parent["digit_count"].value)
total = 0
for index in range(count - 1, -1, -1):
total <<= 15
total += parent["digit[%u]" % index].value
if is_negative:
total = -total
return total
# --- Float and complex --- # --- Float and complex ---
def parseFloat(parent): def parseFloat(parent):
yield PascalString8(parent, "value") yield PascalString8(parent, "value")
def createFloatValue(parent):
return float(parent["value"].value)
def parseBinaryFloat(parent): def parseBinaryFloat(parent):
yield Float64(parent, "value") yield Float64(parent, "value")
@ -94,6 +130,12 @@ def parseBinaryComplex(parent):
yield Float64(parent, "complex") yield Float64(parent, "complex")
def createComplexValue(parent):
return complex(
float(parent["real"].value),
float(parent["complex"].value))
# --- Tuple and list --- # --- Tuple and list ---
def parseTuple(parent): def parseTuple(parent):
yield UInt32(parent, "count", "Item count") yield UInt32(parent, "count", "Item count")
@ -119,6 +161,12 @@ def createTupleDesc(parent):
return "%s: %s" % (parent.code_info[2], items) return "%s: %s" % (parent.code_info[2], items)
def tupleValueCreator(constructor):
def createTupleValue(parent):
return constructor([v.value for v in parent.array("item")])
return createTupleValue
# --- Dict --- # --- Dict ---
def parseDict(parent): def parseDict(parent):
""" """
@ -139,26 +187,58 @@ def createDictDesc(parent):
return "Dict: %s" % ("%s keys" % parent.count) return "Dict: %s" % ("%s keys" % parent.count)
def createDictValue(parent):
return {k.value: v.value for k, v in zip(parent.array("key"), parent.array("value"))}
def parseRef(parent): def parseRef(parent):
yield UInt32(parent, "n", "Reference") yield UInt32(parent, "n", "Reference")
def createRefDesc(parent):
value = parent["n"].value
if hasattr(parent.root, 'object_table') and 0 <= value < len(parent.root.object_table):
return 'Reference: %s' % parent.root.object_table[value].description
else:
return 'Reference: %d' % value
def createRefValue(parent):
value = parent["n"].value
if hasattr(parent.root, 'object_table') and 0 <= value < len(parent.root.object_table):
return parent.root.object_table[value]
else:
return None
def parseASCII(parent):
size = UInt32(parent, "len", "Number of ASCII characters")
yield size
if size.value:
yield String(parent, "text", size.value, "String content", charset="ASCII")
def parseShortASCII(parent): def parseShortASCII(parent):
size = UInt8(parent, "len", "Number of ASCII characters") size = UInt8(parent, "len", "Number of ASCII characters")
yield size yield size
if size.value:
yield String(parent, "text", size.value, "String content", charset="ASCII") yield String(parent, "text", size.value, "String content", charset="ASCII")
# --- Code --- # --- Code ---
def parseCode(parent): def parseCode(parent):
if 0x3000000 <= parent.root.getVersion(): version = parent.root.getVersion()
if 0x3000000 <= version:
yield UInt32(parent, "arg_count", "Argument count") yield UInt32(parent, "arg_count", "Argument count")
if 0x3080000 <= version:
yield UInt32(parent, "posonlyargcount", "Positional only argument count")
yield UInt32(parent, "kwonlyargcount", "Keyword only argument count") yield UInt32(parent, "kwonlyargcount", "Keyword only argument count")
if version < 0x30B0000:
yield UInt32(parent, "nb_locals", "Number of local variables") yield UInt32(parent, "nb_locals", "Number of local variables")
yield UInt32(parent, "stack_size", "Stack size") yield UInt32(parent, "stack_size", "Stack size")
yield UInt32(parent, "flags") yield UInt32(parent, "flags")
elif 0x2030000 <= parent.root.getVersion(): elif 0x2030000 <= version:
yield UInt32(parent, "arg_count", "Argument count") yield UInt32(parent, "arg_count", "Argument count")
yield UInt32(parent, "nb_locals", "Number of local variables") yield UInt32(parent, "nb_locals", "Number of local variables")
yield UInt32(parent, "stack_size", "Stack size") yield UInt32(parent, "stack_size", "Stack size")
@ -168,54 +248,70 @@ def parseCode(parent):
yield UInt16(parent, "nb_locals", "Number of local variables") yield UInt16(parent, "nb_locals", "Number of local variables")
yield UInt16(parent, "stack_size", "Stack size") yield UInt16(parent, "stack_size", "Stack size")
yield UInt16(parent, "flags") yield UInt16(parent, "flags")
yield Object(parent, "compiled_code") yield Object(parent, "compiled_code")
yield Object(parent, "consts") yield Object(parent, "consts")
yield Object(parent, "names") yield Object(parent, "names")
if 0x30B0000 <= version:
yield Object(parent, "co_localsplusnames")
yield Object(parent, "co_localspluskinds")
else:
yield Object(parent, "varnames") yield Object(parent, "varnames")
if 0x2000000 <= parent.root.getVersion(): if 0x2000000 <= version:
yield Object(parent, "freevars") yield Object(parent, "freevars")
yield Object(parent, "cellvars") yield Object(parent, "cellvars")
yield Object(parent, "filename") yield Object(parent, "filename")
yield Object(parent, "name") yield Object(parent, "name")
if 0x2030000 <= parent.root.getVersion(): if 0x30B0000 <= version:
yield Object(parent, "qualname")
if 0x2030000 <= version:
yield UInt32(parent, "firstlineno", "First line number") yield UInt32(parent, "firstlineno", "First line number")
else: else:
yield UInt16(parent, "firstlineno", "First line number") yield UInt16(parent, "firstlineno", "First line number")
if 0x30A0000 <= version:
yield Object(parent, "linetable")
if 0x30B0000 <= version:
yield Object(parent, "exceptiontable")
else:
yield Object(parent, "lnotab") yield Object(parent, "lnotab")
class Object(FieldSet): class Object(FieldSet):
bytecode_info = { bytecode_info = {
# Don't contains any data # Don't contains any data
'0': ("null", None, "NULL", None), '0': ("null", None, "NULL", None, None),
'N': ("none", None, "None", None), 'N': ("none", None, "None", None, lambda parent: None),
'F': ("false", None, "False", None), 'F': ("false", None, "False", None, lambda parent: False),
'T': ("true", None, "True", None), 'T': ("true", None, "True", None, lambda parent: True),
'S': ("stop_iter", None, "StopIter", None), 'S': ("stop_iter", None, "StopIter", None, None),
'.': ("ellipsis", None, "ELLIPSIS", None), '.': ("ellipsis", None, "ELLIPSIS", None, lambda parent: ...),
'?': ("unknown", None, "Unknown", None), '?': ("unknown", None, "Unknown", None, None),
'i': ("int32", parseInt32, "Int32", None), 'i': ("int32", parseInt32, "Int32", None, createIntValue),
'I': ("int64", parseInt64, "Int64", None), 'I': ("int64", parseInt64, "Int64", None, createIntValue),
'f': ("float", parseFloat, "Float", None), 'f': ("float", parseFloat, "Float", None, createFloatValue),
'g': ("bin_float", parseBinaryFloat, "Binary float", None), 'g': ("bin_float", parseBinaryFloat, "Binary float", None, createFloatValue),
'x': ("complex", parseComplex, "Complex", None), 'x': ("complex", parseComplex, "Complex", None, createComplexValue),
'y': ("bin_complex", parseBinaryComplex, "Binary complex", None), 'y': ("bin_complex", parseBinaryComplex, "Binary complex", None, createComplexValue),
'l': ("long", parseLong, "Long", None), 'l': ("long", parseLong, "Long", None, createLongValue),
's': ("string", parseString, "String", None), 's': ("string", parseString, "String", None, createStringValue),
't': ("interned", parseString, "Interned", None), 't': ("interned", parseString, "Interned", None, createStringValue),
'u': ("unicode", parseString, "Unicode", None), 'u': ("unicode", parseString, "Unicode", None, createStringValue),
'R': ("string_ref", parseStringRef, "String ref", createStringRefDesc), 'R': ("string_ref", parseStringRef, "String ref", createStringRefDesc, createStringRefValue),
'(': ("tuple", parseTuple, "Tuple", createTupleDesc), '(': ("tuple", parseTuple, "Tuple", createTupleDesc, tupleValueCreator(tuple)),
')': ("small_tuple", parseSmallTuple, "Tuple", createTupleDesc), ')': ("small_tuple", parseSmallTuple, "Tuple", createTupleDesc, tupleValueCreator(tuple)),
'[': ("list", parseTuple, "List", createTupleDesc), '[': ("list", parseTuple, "List", createTupleDesc, tupleValueCreator(list)),
'<': ("set", parseTuple, "Set", createTupleDesc), '<': ("set", parseTuple, "Set", createTupleDesc, tupleValueCreator(set)),
'>': ("frozenset", parseTuple, "Frozen set", createTupleDesc), '>': ("frozenset", parseTuple, "Frozen set", createTupleDesc, tupleValueCreator(frozenset)),
'{': ("dict", parseDict, "Dict", createDictDesc), '{': ("dict", parseDict, "Dict", createDictDesc, createDictValue),
'c': ("code", parseCode, "Code", None), 'c': ("code", parseCode, "Code", None, None),
'r': ("ref", parseRef, "Reference", None), 'r': ("ref", parseRef, "Reference", createRefDesc, createRefValue),
'z': ("short_ascii", parseShortASCII, "Short ASCII", None), 'a': ("ascii", parseASCII, "ASCII", None, createStringValue),
'Z': ("short_ascii_interned", parseShortASCII, "Short ASCII interned", None), 'A': ("ascii_interned", parseASCII, "ASCII interned", None, createStringValue),
'z': ("short_ascii", parseShortASCII, "Short ASCII", None, createStringValue),
'Z': ("short_ascii_interned", parseShortASCII, "Short ASCII interned", None, createStringValue),
} }
def __init__(self, parent, name, **kw): def __init__(self, parent, name, **kw):
@ -227,64 +323,40 @@ class Object(FieldSet):
self.code_info = self.bytecode_info[code] self.code_info = self.bytecode_info[code]
if not name: if not name:
self._name = self.code_info[0] self._name = self.code_info[0]
if code == "l": if code in ("t", "A", "Z"):
self.createValue = self.createValueLong
elif code in ("i", "I", "f", "g"):
self.createValue = lambda: self["value"].value
elif code == "T":
self.createValue = lambda: True
elif code == "F":
self.createValue = lambda: False
elif code in ("x", "y"):
self.createValue = self.createValueComplex
elif code in ("s", "t", "u"):
self.createValue = self.createValueString
self.createDisplay = self.createDisplayString
if code == 't':
if not hasattr(self.root, 'string_table'): if not hasattr(self.root, 'string_table'):
self.root.string_table = [] self.root.string_table = []
self.root.string_table.append(self) self.root.string_table.append(self)
elif code == 'R':
if hasattr(self.root, 'string_table'):
self.createValue = self.createValueStringRef
def createValueString(self): def createValue(self):
if "text" in self: create = self.code_info[4]
return self["text"].value if create:
res = create(self)
if isinstance(res, Field):
return res.value
else: else:
return "" return res
return None
def createDisplayString(self): def createDisplay(self):
if "text" in self: create = self.code_info[4]
return self["text"].display if create:
else: res = create(self)
return "(empty)" if isinstance(res, Field):
return res.display
def createValueLong(self): res = repr(res)
is_negative = self["digit_count"].value < 0 if len(res) >= config.max_string_length:
count = abs(self["digit_count"].value) res = res[:config.max_string_length] + "..."
total = 0 return res
for index in range(count - 1, -1, -1): return None
total <<= 15
total += self["digit[%u]" % index].value
if is_negative:
total = -total
return total
def createValueStringRef(self):
return self.root.string_table[self['ref'].value].value
def createDisplayStringRef(self):
return self.root.string_table[self['ref'].value].display
def createValueComplex(self):
return complex(
float(self["real"].value),
float(self["complex"].value))
def createFields(self): def createFields(self):
yield BytecodeChar(self, "bytecode", "Bytecode") yield BytecodeChar(self, "bytecode", "Bytecode")
yield Bit(self, "flag_ref", "Is a reference?") yield Bit(self, "flag_ref", "Is a reference?")
if self["flag_ref"].value:
if not hasattr(self.root, 'object_table'):
self.root.object_table = []
self.root.object_table.append(self)
parser = self.code_info[1] parser = self.code_info[1]
if parser: if parser:
yield from parser(self) yield from parser(self)
@ -301,6 +373,16 @@ class BytecodeChar(Character):
static_size = 7 static_size = 7
PY_RELEASE_LEVEL_ALPHA = 0xA
PY_RELEASE_LEVEL_FINAL = 0xF
def VERSION(major, minor, release_level=PY_RELEASE_LEVEL_FINAL, serial=0):
micro = 0
return ((major << 24) + (minor << 16) + (micro << 8)
+ (release_level << 4) + (serial << 0))
class PythonCompiledFile(Parser): class PythonCompiledFile(Parser):
PARSER_TAGS = { PARSER_TAGS = {
"id": "python", "id": "python",
@ -394,7 +476,90 @@ class PythonCompiledFile(Parser):
3377: ("Python 3.6b1 ", 0x3060000), 3377: ("Python 3.6b1 ", 0x3060000),
3378: ("Python 3.6b2 ", 0x3060000), 3378: ("Python 3.6b2 ", 0x3060000),
3379: ("Python 3.6rc1", 0x3060000), 3379: ("Python 3.6rc1", 0x3060000),
3390: ("Python 3.7a0 ", 0x3070000), 3390: ("Python 3.7a1", 0x30700A1),
3391: ("Python 3.7a2", 0x30700A2),
3392: ("Python 3.7a4", 0x30700A4),
3393: ("Python 3.7b1", 0x30700B1),
3394: ("Python 3.7b5", 0x30700B5),
3400: ("Python 3.8a1", VERSION(3, 8)),
3401: ("Python 3.8a1", VERSION(3, 8)),
3410: ("Python 3.8a1", VERSION(3, 8)),
3411: ("Python 3.8b2", VERSION(3, 8)),
3412: ("Python 3.8b2", VERSION(3, 8)),
3413: ("Python 3.8b4", VERSION(3, 8)),
3420: ("Python 3.9a0", VERSION(3, 9)),
3421: ("Python 3.9a0", VERSION(3, 9)),
3422: ("Python 3.9a0", VERSION(3, 9)),
3423: ("Python 3.9a2", VERSION(3, 9)),
3424: ("Python 3.9a2", VERSION(3, 9)),
3425: ("Python 3.9a2", VERSION(3, 9)),
3430: ("Python 3.10a1", VERSION(3, 10)),
3431: ("Python 3.10a1", VERSION(3, 10)),
3432: ("Python 3.10a2", VERSION(3, 10)),
3433: ("Python 3.10a2", VERSION(3, 10)),
3434: ("Python 3.10a6", VERSION(3, 10)),
3435: ("Python 3.10a7", VERSION(3, 10)),
3436: ("Python 3.10b1", VERSION(3, 10)),
3437: ("Python 3.10b1", VERSION(3, 10)),
3438: ("Python 3.10b1", VERSION(3, 10)),
3439: ("Python 3.10b1", VERSION(3, 10)),
3450: ("Python 3.11a1", VERSION(3, 11)),
3451: ("Python 3.11a1", VERSION(3, 11)),
3452: ("Python 3.11a1", VERSION(3, 11)),
3453: ("Python 3.11a1", VERSION(3, 11)),
3454: ("Python 3.11a1", VERSION(3, 11)),
3455: ("Python 3.11a1", VERSION(3, 11)),
3456: ("Python 3.11a1", VERSION(3, 11)),
3457: ("Python 3.11a1", VERSION(3, 11)),
3458: ("Python 3.11a1", VERSION(3, 11)),
3459: ("Python 3.11a1", VERSION(3, 11)),
3460: ("Python 3.11a1", VERSION(3, 11)),
3461: ("Python 3.11a1", VERSION(3, 11)),
3462: ("Python 3.11a2", VERSION(3, 11)),
3463: ("Python 3.11a3", VERSION(3, 11)),
3464: ("Python 3.11a3", VERSION(3, 11)),
3465: ("Python 3.11a3", VERSION(3, 11)),
3466: ("Python 3.11a4", VERSION(3, 11)),
3467: ("Python 3.11a4", VERSION(3, 11)),
3468: ("Python 3.11a4", VERSION(3, 11)),
3469: ("Python 3.11a4", VERSION(3, 11)),
3470: ("Python 3.11a4", VERSION(3, 11)),
3471: ("Python 3.11a4", VERSION(3, 11)),
3472: ("Python 3.11a4", VERSION(3, 11)),
3473: ("Python 3.11a4", VERSION(3, 11)),
3474: ("Python 3.11a4", VERSION(3, 11)),
3475: ("Python 3.11a5", VERSION(3, 11)),
3476: ("Python 3.11a5", VERSION(3, 11)),
3477: ("Python 3.11a5", VERSION(3, 11)),
3478: ("Python 3.11a5", VERSION(3, 11)),
3479: ("Python 3.11a5", VERSION(3, 11)),
3480: ("Python 3.11a5", VERSION(3, 11)),
3481: ("Python 3.11a5", VERSION(3, 11)),
3482: ("Python 3.11a5", VERSION(3, 11)),
3483: ("Python 3.11a5", VERSION(3, 11)),
3484: ("Python 3.11a5", VERSION(3, 11)),
3485: ("Python 3.11a5", VERSION(3, 11)),
3486: ("Python 3.11a6", VERSION(3, 11)),
3487: ("Python 3.11a6", VERSION(3, 11)),
3488: ("Python 3.11a6", VERSION(3, 11)),
3489: ("Python 3.11a6", VERSION(3, 11)),
3490: ("Python 3.11a6", VERSION(3, 11)),
3491: ("Python 3.11a6", VERSION(3, 11)),
3492: ("Python 3.11a7", VERSION(3, 11)),
3493: ("Python 3.11a7", VERSION(3, 11)),
3494: ("Python 3.11a7", VERSION(3, 11)),
3500: ("Python 3.12a1", VERSION(3, 12)),
3501: ("Python 3.12a1", VERSION(3, 12)),
3502: ("Python 3.12a1", VERSION(3, 12)),
3503: ("Python 3.12a1", VERSION(3, 12)),
3504: ("Python 3.12a1", VERSION(3, 12)),
3505: ("Python 3.12a1", VERSION(3, 12)),
3506: ("Python 3.12a1", VERSION(3, 12)),
3507: ("Python 3.12a1", VERSION(3, 12)),
3508: ("Python 3.12a1", VERSION(3, 12)),
3509: ("Python 3.12a1", VERSION(3, 12)),
3510: ("Python 3.12a1", VERSION(3, 12)),
3511: ("Python 3.12a1", VERSION(3, 12)),
} }
# Dictionnary which associate the pyc signature (4-byte long string) # Dictionnary which associate the pyc signature (4-byte long string)
@ -411,13 +576,7 @@ class PythonCompiledFile(Parser):
if self["magic_string"].value != "\r\n": if self["magic_string"].value != "\r\n":
return r"Wrong magic string (\r\n)" return r"Wrong magic string (\r\n)"
version = self.getVersion() if self["content/bytecode"].value != "c":
if version >= 0x3030000 and self['magic_number'].value >= 3200:
offset = 12
else:
offset = 8
value = self.stream.readBits(offset * 8, 7, self.endian)
if value != ord(b'c'):
return "First object bytecode is not code" return "First object bytecode is not code"
return True return True
@ -430,8 +589,23 @@ class PythonCompiledFile(Parser):
def createFields(self): def createFields(self):
yield UInt16(self, "magic_number", "Magic number") yield UInt16(self, "magic_number", "Magic number")
yield String(self, "magic_string", 2, r"Magic string \r\n", charset="ASCII") yield String(self, "magic_string", 2, r"Magic string \r\n", charset="ASCII")
yield TimestampUnix32(self, "timestamp", "Timestamp")
version = self.getVersion() version = self.getVersion()
# PEP 552: Deterministic pycs #31650 (Python 3.7a4); magic=3392
if version >= 0x30700A4:
yield Bit(self, "use_hash", "Is hash based?")
yield Bit(self, "checked")
yield NullBits(self, "reserved", 30)
use_hash = self['use_hash'].value
else:
use_hash = False
if use_hash:
yield UInt64(self, "hash", "SipHash hash of the source file")
else:
yield TimestampUnix32(self, "timestamp", "Timestamp modulo 2**32")
if version >= 0x3030000 and self['magic_number'].value >= 3200: if version >= 0x3030000 and self['magic_number'].value >= 3200:
yield UInt32(self, "filesize", "Size of the Python source file (.py) modulo 2**32") yield UInt32(self, "filesize", "Size of the Python source file (.py) modulo 2**32")
yield Object(self, "content") yield Object(self, "content")

View file

@ -355,7 +355,7 @@ class AsfFile(Parser):
if self.stream.readBytes(0, len(magic)) != magic: if self.stream.readBytes(0, len(magic)) != magic:
return "Invalid magic" return "Invalid magic"
header = self[0] header = self[0]
if not(30 <= header["size"].value <= MAX_HEADER_SIZE): if not (30 <= header["size"].value <= MAX_HEADER_SIZE):
return "Invalid header size (%u)" % header["size"].value return "Invalid header size (%u)" % header["size"].value
return True return True

View file

@ -134,7 +134,7 @@ class MPEG_TS(Parser):
# FIXME: detect using file content, not file name # FIXME: detect using file content, not file name
# maybe detect sync at offset+4 bytes? # maybe detect sync at offset+4 bytes?
source = self.stream.source source = self.stream.source
if not(source and source.startswith("file:")): if not (source and source.startswith("file:")):
return True return True
filename = source[5:].lower() filename = source[5:].lower()
return filename.endswith((".m2ts", ".mts")) return filename.endswith((".m2ts", ".mts"))

View file

@ -244,7 +244,7 @@ class PacketElement(FieldSet):
yield Bits(self, "sync[]", 4) # =2, or 3 if has_dts=True yield Bits(self, "sync[]", 4) # =2, or 3 if has_dts=True
yield Timestamp(self, "pts") yield Timestamp(self, "pts")
if self["has_dts"].value: if self["has_dts"].value:
if not(self["has_pts"].value): if not self["has_pts"].value:
raise ParserError("Invalid PTS/DTS values") raise ParserError("Invalid PTS/DTS values")
yield Bits(self, "sync[]", 4) # =1 yield Bits(self, "sync[]", 4) # =1
yield Timestamp(self, "dts") yield Timestamp(self, "dts")

View file

@ -164,7 +164,7 @@ def _parse(text, start=0, until=None):
if char == 'b': if char == 'b':
new_regex = RegexWord() new_regex = RegexWord()
else: else:
if not(char in REGEX_COMMAND_CHARACTERS or char in " '"): if not (char in REGEX_COMMAND_CHARACTERS or char in " '"):
raise SyntaxError( raise SyntaxError(
"Operator '\\%s' is not supported" % char) "Operator '\\%s' is not supported" % char)
new_regex = RegexString(char) new_regex = RegexString(char)

View file

@ -125,7 +125,7 @@ class PatternMatching:
item = RegexPattern(regex, user) item = RegexPattern(regex, user)
if item.regex.maxLength() is None: if item.regex.maxLength() is None:
raise ValueError( raise ValueError(
"Regular expression with no maximum size has forbidden") "Regular expression with no maximum size is forbidden")
self.regex_patterns.append(item) self.regex_patterns.append(item)
self._need_commit = True self._need_commit = True

View file

@ -1,3 +1,3 @@
#!/usr/bin/env python #!/usr/bin/env python3
from hachoir.subfile.main import main from hachoir.subfile.main import main
main() main()

View file

@ -85,7 +85,7 @@ def main():
stream = FileInputStream(filename) stream = FileInputStream(filename)
with stream: with stream:
subfile = SearchSubfile(stream, values.offset, values.size) subfile = SearchSubfile(stream, values.offset, values.size)
subfile.verbose = not(values.quiet) subfile.verbose = not values.quiet
subfile.debug = values.debug subfile.debug = values.debug
if output: if output:
subfile.setOutput(output) subfile.setOutput(output)

View file

@ -95,7 +95,7 @@ class SearchSubfile:
print("[!] Memory error!", file=stderr) print("[!] Memory error!", file=stderr)
self.mainFooter() self.mainFooter()
self.stream.close() self.stream.close()
return not(main_error) return (not main_error)
def mainHeader(self): def mainHeader(self):
# Fix slice size if needed # Fix slice size if needed
@ -149,7 +149,7 @@ class SearchSubfile:
if parser.content_size is not None: if parser.content_size is not None:
text += " size=%s (%s)" % (parser.content_size // text += " size=%s (%s)" % (parser.content_size //
8, humanFilesize(parser.content_size // 8)) 8, humanFilesize(parser.content_size // 8))
if not(parser.content_size) or parser.content_size // 8 < FILE_MAX_SIZE: if not parser.content_size or parser.content_size // 8 < FILE_MAX_SIZE:
text += ": " + parser.description text += ": " + parser.description
else: else:
text += ": " + parser.__class__.__name__ text += ": " + parser.__class__.__name__