Update hachoir 3.1.2 (f739b43) → 3.2.0 (38d759f).

This commit is contained in:
JackDandy 2023-10-08 00:04:41 +01:00
parent ecd70f546f
commit cbde47c95c
124 changed files with 1639 additions and 482 deletions

View file

@ -2,6 +2,7 @@
* Update Beautiful Soup 4.12.2 to 4.12.2 (30c58a1)
* Update soupsieve 2.4.1 (2e66beb) to 2.5.0 (dc71495)
* Update hachoir 3.1.2 (f739b43) to 3.2.0 (38d759f)
### 3.30.1 (2023-10-02 22:50:00 UTC)

View file

@ -1,2 +1,2 @@
VERSION = (3, 1, 2)
VERSION = (3, 2, 0)
__version__ = ".".join(map(str, VERSION))

View file

@ -4,7 +4,7 @@ string, number, hexadecimal, etc.
"""
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from struct import calcsize, unpack, error as struct_error
from struct import calcsize, error as struct_error
def swap16(value):
@ -292,20 +292,11 @@ def str2long(data, endian):
>>> str2long(b"\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d
True
"""
assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits
try:
return unpack(_struct_format[endian][len(data)], data)[0]
except KeyError:
pass
assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN)
shift = 0
value = 0
if endian is BIG_ENDIAN:
data = reversed(data)
elif endian is MIDDLE_ENDIAN:
data = reversed(strswapmid(data))
for byte in data:
value += (byte << shift)
shift += 8
return value
if endian == LITTLE_ENDIAN:
return int.from_bytes(data, "little")
elif endian == BIG_ENDIAN:
return int.from_bytes(data, "big")
elif endian == MIDDLE_ENDIAN:
return int.from_bytes(strswapmid(data), "big")
else:
raise ValueError("Invalid endian %s" % (endian,))

View file

@ -244,7 +244,7 @@ class GenericString(Bytes):
and err.end == len(text) \
and self._charset == "UTF-16-LE":
try:
text = str(text + "\0", self._charset, "strict")
text = str(text + b"\0", self._charset, "strict")
self.warning(
"Fix truncated %s string: add missing nul byte" % self._charset)
return text

View file

@ -61,7 +61,7 @@ class TimeDateMSDOS32(FieldSet):
def createValue(self):
return datetime(
1980 + self["year"].value, self["month"].value, self["day"].value,
1980 + self["year"].value, self["month"].value or 1, self["day"].value or 1,
self["hour"].value, self["minute"].value, 2 * self["second"].value)
def createDisplay(self):

View file

@ -124,7 +124,7 @@ def processFile(values, filename,
def processFiles(values, filenames, display=True):
human = not(values.raw)
human = not values.raw
ok = True
priority = int(values.level) * 100 + 99
display_filename = (1 < len(filenames))

View file

@ -1,5 +1,6 @@
from hachoir.parser.archive.ace import AceFile # noqa
from hachoir.parser.archive.ar import ArchiveFile # noqa
from hachoir.parser.archive.arj import ArjParser # noqa
from hachoir.parser.archive.bomstore import BomFile # noqa
from hachoir.parser.archive.bzip2_parser import Bzip2Parser # noqa
from hachoir.parser.archive.cab import CabFile # noqa

View file

@ -0,0 +1,155 @@
"""
ARJ archive file parser
https://github.com/FarGroup/FarManager/blob/master/plugins/multiarc/arc.doc/arj.txt
"""
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.field import (FieldSet, ParserError,
CString, Enum, RawBytes,
UInt8, UInt16, UInt32,
Bytes)
from hachoir.parser import Parser
HOST_OS = {
0: "MSDOS",
1: "PRIMOS",
2: "UNIX",
3: "AMIGA",
4: "MACDOS",
5: "OS/2",
6: "APPLE GS",
7: "ATARI ST",
8: "NEXT",
9: "VAX VMS",
10: "WIN95",
11: "WIN32",
}
FILE_TYPE = {
0: "BINARY",
1: "TEXT",
2: "COMMENT",
3: "DIRECTORY",
4: "VOLUME",
5: "CHAPTER",
}
MAGIC = b"\x60\xEA"
class BaseBlock(FieldSet):
@property
def isEmpty(self):
return self["basic_header_size"].value == 0
def _header_start_fields(self):
yield Bytes(self, "magic", len(MAGIC))
if self["magic"].value != MAGIC:
raise ParserError("Wrong header magic")
yield UInt16(self, "basic_header_size", "zero if end of archive")
if not self.isEmpty:
yield UInt8(self, "first_hdr_size")
yield UInt8(self, "archiver_version")
yield UInt8(self, "min_archiver_version")
yield Enum(UInt8(self, "host_os"), HOST_OS)
yield UInt8(self, "arj_flags")
def _header_end_fields(self):
yield UInt8(self, "last_chapter")
fhs = self["first_hdr_size"]
name_position = fhs.address // 8 + fhs.value
current_position = self["last_chapter"].address // 8 + 1
if name_position > current_position:
yield RawBytes(self, "reserved2", name_position - current_position)
yield CString(self, "filename", "File name", charset="ASCII")
yield CString(self, "comment", "Comment", charset="ASCII")
yield UInt32(self, "crc", "Header CRC")
i = 0
while not self.eof:
yield UInt16(self, f"extended_header_size_{i}")
cur_size = self[f"extended_header_size_{i}"].value
if cur_size == 0:
break
yield RawBytes(self, "extended_header_data", cur_size)
yield UInt32(self, f"extended_header_crc_{i}")
i += 1
def validate(self):
if self.stream.readBytes(0, 2) != MAGIC:
return "Invalid magic"
return True
class Header(BaseBlock):
def createFields(self):
yield from self._header_start_fields()
if not self.isEmpty:
yield UInt8(self, "security_version")
yield Enum(UInt8(self, "file_type"), FILE_TYPE)
yield UInt8(self, "reserved")
yield UInt32(self, "date_time_created")
yield UInt32(self, "date_time_modified")
yield UInt32(self, "archive_size")
yield UInt32(self, "security_envelope_file_position")
yield UInt16(self, "filespec_position")
yield UInt16(self, "security_envelope_data_len")
yield UInt8(self, "encryption_version")
yield from self._header_end_fields()
def createDescription(self):
if self.isEmpty:
return "Empty main header"
return "Main header of '%s'" % self["filename"].value
class Block(BaseBlock):
def createFields(self):
yield from self._header_start_fields()
if not self.isEmpty:
yield UInt8(self, "method")
yield Enum(UInt8(self, "file_type"), FILE_TYPE)
yield UInt8(self, "reserved")
yield UInt32(self, "date_time_modified")
yield UInt32(self, "compressed_size")
yield UInt32(self, "original_size")
yield UInt32(self, "original_file_crc")
yield UInt16(self, "filespec_position")
yield UInt16(self, "file_access_mode")
yield UInt8(self, "first_chapter")
yield from self._header_end_fields()
compressed_size = self["compressed_size"].value
if compressed_size > 0:
yield RawBytes(self, "compressed_data", compressed_size)
def createDescription(self):
if self.isEmpty:
return "Empty file header"
return "File header of '%s'" % self["filename"].value
class ArjParser(Parser):
endian = LITTLE_ENDIAN
PARSER_TAGS = {
"id": "arj",
"category": "archive",
"file_ext": ("arj",),
"min_size": 4 * 8,
"description": "ARJ archive"
}
def validate(self):
if self.stream.readBytes(0, 2) != MAGIC:
return "Invalid magic"
return True
def createFields(self):
yield Header(self, "header")
if not self["header"].isEmpty:
while not self.eof:
block = Block(self, "file_header[]")
yield block
if block.isEmpty:
break

View file

@ -13,6 +13,7 @@ from hachoir.field import (FieldSet,
from hachoir.core.endian import MIDDLE_ENDIAN, LITTLE_ENDIAN
from hachoir.core.tools import paddingSize
from hachoir.parser.archive.zlib import build_tree, HuffmanCode, extend_data
import struct
class LZXPreTreeEncodedTree(FieldSet):
@ -146,6 +147,8 @@ class LZXBlock(FieldSet):
self.window_size = self.WINDOW_SIZE[self.compression_level]
self.block_type = self["block_type"].value
curlen = len(self.parent.uncompressed_data)
intel_started = False # Do we perform Intel jump fixups on this block?
if self.block_type in (1, 2): # Verbatim or aligned offset block
if self.block_type == 2:
for i in range(8):
@ -156,6 +159,8 @@ class LZXBlock(FieldSet):
yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8)
main_tree = build_tree(
self["main_tree_start"].lengths + self["main_tree_rest"].lengths)
if self["main_tree_start"].lengths[0xE8]:
intel_started = True
yield LZXPreTreeEncodedTree(self, "length_tree", 249)
length_tree = build_tree(self["length_tree"].lengths)
current_decoded_size = 0
@ -169,7 +174,7 @@ class LZXBlock(FieldSet):
field._description = "Literal value %r" % chr(
field.realvalue)
current_decoded_size += 1
self.parent.uncompressed_data += chr(field.realvalue)
self.parent._lzx_window.append(field.realvalue)
yield field
continue
position_header, length_header = divmod(
@ -243,8 +248,7 @@ class LZXBlock(FieldSet):
self.parent.r2 = self.parent.r1
self.parent.r1 = self.parent.r0
self.parent.r0 = position
self.parent.uncompressed_data = extend_data(
self.parent.uncompressed_data, length, position)
extend_data(self.parent._lzx_window, length, position)
current_decoded_size += length
elif self.block_type == 3: # Uncompressed block
padding = paddingSize(self.address + self.current_size, 16)
@ -253,6 +257,7 @@ class LZXBlock(FieldSet):
else:
yield PaddingBits(self, "padding[]", 16)
self.endian = LITTLE_ENDIAN
intel_started = True # apparently intel fixup may be needed on uncompressed blocks?
yield UInt32(self, "r[]", "New value of R0")
yield UInt32(self, "r[]", "New value of R1")
yield UInt32(self, "r[]", "New value of R2")
@ -260,18 +265,50 @@ class LZXBlock(FieldSet):
self.parent.r1 = self["r[1]"].value
self.parent.r2 = self["r[2]"].value
yield RawBytes(self, "data", self.uncompressed_size)
self.parent.uncompressed_data += self["data"].value
self.parent._lzx_window += self["data"].value
if self["block_size"].value % 2:
yield PaddingBits(self, "padding", 8)
else:
raise ParserError("Unknown block type %d!" % self.block_type)
# Fixup Intel jumps if necessary (fixups are only applied to the final output, not to the LZX window)
self.parent.uncompressed_data += self.parent._lzx_window[-self.uncompressed_size:]
self.parent._lzx_window = self.parent._lzx_window[-(1 << self.root.compr_level):]
if (
intel_started
and self.parent["filesize_indicator"].value
and self.parent["filesize"].value > 0
):
# Note that we're decoding a block-at-a-time instead of a frame-at-a-time,
# so we need to handle the frame boundaries carefully.
filesize = self.parent["filesize"].value
start_pos = max(0, curlen - 10) # We may need to correct something from the last block
end_pos = len(self.parent.uncompressed_data) - 10
while 1:
jmp_pos = self.parent.uncompressed_data.find(b"\xE8", start_pos, end_pos)
if jmp_pos == -1:
break
if (jmp_pos % 32768) >= (32768 - 10):
# jumps at the end of frames are not fixed up
start_pos = jmp_pos + 1
continue
abs_off, = struct.unpack("<i", self.parent.uncompressed_data[jmp_pos + 1:jmp_pos + 5])
if -jmp_pos <= abs_off < filesize:
if abs_off < 0:
rel_off = abs_off + filesize
else:
rel_off = abs_off - jmp_pos
self.parent.uncompressed_data[jmp_pos + 1:jmp_pos + 5] = struct.pack("<i", rel_off)
start_pos = jmp_pos + 5
class LZXStream(Parser):
endian = MIDDLE_ENDIAN
def createFields(self):
self.uncompressed_data = ""
self.uncompressed_data = bytearray()
self._lzx_window = bytearray()
self.r0 = 1
self.r1 = 1
self.r2 = 1
@ -291,6 +328,6 @@ class LZXStream(Parser):
def lzx_decompress(stream, window_bits):
data = LZXStream(stream)
data.compr_level = window_bits
for unused in data:
for _ in data:
pass
return data.uncompressed_data

View file

@ -14,13 +14,13 @@ from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.core.tools import paddingSize, alignValue
def extend_data(data, length, offset):
"""Extend data using a length and an offset."""
def extend_data(data: bytearray, length, offset):
"""Extend data using a length and an offset, LZ-style."""
if length >= offset:
new_data = data[-offset:] * (alignValue(length, offset) // offset)
return data + new_data[:length]
data += new_data[:length]
else:
return data + data[-offset:-offset + length]
data += data[-offset:-offset + length]
def build_tree(lengths):
@ -136,9 +136,9 @@ class DeflateBlock(FieldSet):
CODE_LENGTH_ORDER = [16, 17, 18, 0, 8, 7, 9,
6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
def __init__(self, parent, name, uncomp_data="", *args, **kwargs):
def __init__(self, parent, name, uncomp_data=b"", *args, **kwargs):
FieldSet.__init__(self, parent, name, *args, **kwargs)
self.uncomp_data = uncomp_data
self.uncomp_data = bytearray(uncomp_data)
def createFields(self):
yield Bit(self, "final", "Is this the final block?") # BFINAL
@ -227,7 +227,7 @@ class DeflateBlock(FieldSet):
field._description = "Literal Code %r (Huffman Code %i)" % (
chr(value), field.value)
yield field
self.uncomp_data += chr(value)
self.uncomp_data.append(value)
if value == 256:
field._description = "Block Terminator Code (256) (Huffman Code %i)" % field.value
yield field
@ -267,15 +267,14 @@ class DeflateBlock(FieldSet):
extrafield._description = "Distance Extra Bits (%i), total length %i" % (
extrafield.value, distance)
yield extrafield
self.uncomp_data = extend_data(
self.uncomp_data, length, distance)
extend_data(self.uncomp_data, length, distance)
class DeflateData(GenericFieldSet):
endian = LITTLE_ENDIAN
def createFields(self):
uncomp_data = ""
uncomp_data = bytearray()
blk = DeflateBlock(self, "compressed_block[]", uncomp_data)
yield blk
uncomp_data = blk.uncomp_data
@ -326,11 +325,11 @@ class ZlibData(Parser):
yield textHandler(UInt32(self, "data_checksum", "ADLER32 checksum of compressed data"), hexadecimal)
def zlib_inflate(stream, wbits=None, prevdata=""):
def zlib_inflate(stream, wbits=None):
if wbits is None or wbits >= 0:
return ZlibData(stream)["data"].uncompressed_data
else:
data = DeflateData(None, "root", stream, "", stream.askSize(None))
for unused in data:
for _ in data:
pass
return data.uncompressed_data

View file

@ -451,7 +451,7 @@ class ID3_Chunk(FieldSet):
if size:
cls = None
if not(is_compressed):
if not is_compressed:
tag = self["tag"].value
if tag in ID3_Chunk.handler:
cls = ID3_Chunk.handler[tag]

View file

@ -1312,7 +1312,7 @@ class MP4File(Parser):
if size < 8:
return "Invalid first atom size"
tag = self.stream.readBytes(4 * 8, 4)
if tag not in (b"ftyp", b"moov", b"free"):
if tag not in (b"ftyp", b"moov", b"free", b"skip"):
return "Unknown MOV file type"
return True

View file

@ -240,11 +240,13 @@ class Inode(FieldSet):
return out
def is_fast_symlink(self):
self.seekByte(4 * 15 + 4)
acl = UInt32(self, "file_acl")
acl_addr = self.absolute_address + self.current_size
# skip 15 blocks + version field
acl_addr += (4 * 15 + 4) * 8
acl = self.stream.readBits(acl_addr, 32, self.endian)
b = 0
if acl.value > 0:
if acl > 0:
b = (2 << self["/superblock/log_block_size"].value)
return (self['blocks'].value - b == 0)

View file

@ -139,4 +139,7 @@ def createParser(filename, real_filename=None, tags=None):
if not tags:
tags = []
stream = FileInputStream(filename, real_filename, tags=tags)
return guessParser(stream)
guess = guessParser(stream)
if guess is None:
stream.close()
return guess

View file

@ -387,7 +387,10 @@ class JpegImageData(FieldSet):
end = self.stream.searchBytes(b"\xff", start, MAX_FILESIZE * 8)
if end is None:
# this is a bad sign, since it means there is no terminator
# we ignore this; it likely means a truncated image
# this likely means a truncated image:
# set the size to the remaining length of the stream
# to avoid being forced to parse subfields to calculate size
self._size = self.stream._size - self.absolute_address
break
if self.stream.readBytes(end, 2) == b'\xff\x00':
# padding: false alarm

View file

@ -45,7 +45,7 @@ UNIT_NAME = {1: "Meter"}
COMPRESSION_NAME = {
0: "deflate" # with 32K sliding window
}
MAX_CHUNK_SIZE = 5 * 1024 * 1024 # Maximum chunk size (5 MB)
MAX_CHUNK_SIZE = 64 * 1024 * 1024 # Maximum chunk size heuristic (64 MB)
def headerParse(parent):

View file

@ -597,7 +597,7 @@ class WMF_File(Parser):
yield UInt32(self, "max_record_size", "The size of largest record in 16-bit words")
yield UInt16(self, "nb_params", "Not Used (always 0)")
while not(self.eof):
while not self.eof:
yield Function(self, "func[]")
def isEMF(self):

View file

@ -16,3 +16,4 @@ from hachoir.parser.misc.word_doc import WordDocumentParser # noqa
from hachoir.parser.misc.word_2 import Word2DocumentParser # noqa
from hachoir.parser.misc.mstask import MSTaskFile # noqa
from hachoir.parser.misc.mapsforge_map import MapsforgeMapFile # noqa
from hachoir.parser.misc.fit import FITFile # noqa

View file

@ -0,0 +1,173 @@
"""
Garmin fit file Format parser.
Author: Sebastien Ponce <sebastien.ponce@cern.ch>
"""
from hachoir.parser import Parser
from hachoir.field import FieldSet, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, RawBytes, Bit, Bits, Bytes, String, Float32, Float64
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN
field_types = {
0: UInt8, # enum
1: Int8, # signed int of 8 bits
2: UInt8, # unsigned int of 8 bits
131: Int16, # signed int of 16 bits
132: UInt16, # unsigned int of 16 bits
133: Int32, # signed int of 32 bits
134: UInt32, # unsigned int of 32 bits
7: String, # string
136: Float32, # float
137: Float64, # double
10: UInt8, # unsigned int of 8 bits with 0 as invalid value
139: UInt16, # unsigned int of 16 bits with 0 as invalid value
140: UInt32, # unsigned int of 32 bits with 0 as invalid value
13: Bytes, # bytes
142: Int64, # signed int of 64 bits
143: UInt64, # unsigned int of 64 bits
144: UInt64 # unsigned int of 64 bits with 0 as invalid value
}
class Header(FieldSet):
endian = LITTLE_ENDIAN
def createFields(self):
yield UInt8(self, "size", "Header size")
yield UInt8(self, "protocol", "Protocol version")
yield UInt16(self, "profile", "Profile version")
yield UInt32(self, "datasize", "Data size")
yield RawBytes(self, "datatype", 4)
yield UInt16(self, "crc", "CRC of first 11 bytes or 0x0")
def createDescription(self):
return "Header of fit file. Data size is %d" % (self["datasize"].value)
class NormalRecordHeader(FieldSet):
def createFields(self):
yield Bit(self, "normal", "Normal header (0)")
yield Bit(self, "type", "Message type (0 data, 1 definition")
yield Bit(self, "typespecific", "0")
yield Bit(self, "reserved", "0")
yield Bits(self, "msgType", 4, description="Message type")
def createDescription(self):
return "Record header, this is a %s message" % ("definition" if self["type"].value else "data")
class FieldDefinition(FieldSet):
def createFields(self):
yield UInt8(self, "number", "Field definition number")
yield UInt8(self, "size", "Size in bytes")
yield UInt8(self, "type", "Base type")
def createDescription(self):
return "Field Definition. Number %d, Size %d" % (self["number"].value, self["size"].value)
class DefinitionMessage(FieldSet):
def createFields(self):
yield NormalRecordHeader(self, "RecordHeader")
yield UInt8(self, "reserved", "Reserved (0)")
yield UInt8(self, "architecture", "Architecture (0 little, 1 big endian")
self.endian = BIG_ENDIAN if self["architecture"].value else LITTLE_ENDIAN
yield UInt16(self, "msgNumber", "Message Number")
yield UInt8(self, "nbFields", "Number of fields")
for n in range(self["nbFields"].value):
yield FieldDefinition(self, "fieldDefinition[]")
def createDescription(self):
return "Definition Message. Contains %d fields" % (self["nbFields"].value)
class DataMessage(FieldSet):
def createFields(self):
hdr = NormalRecordHeader(self, "RecordHeader")
yield hdr
msgType = self["RecordHeader"]["msgType"].value
msgDef = self.parent.msgDefs[msgType]
for n in range(msgDef["nbFields"].value):
desc = msgDef["fieldDefinition[%d]" % n]
typ = field_types[desc["type"].value]
self.endian = BIG_ENDIAN if msgDef["architecture"].value else LITTLE_ENDIAN
if typ == String or typ == Bytes:
yield typ(self, "field%d" % n, desc["size"].value)
else:
if typ.static_size // 8 == desc["size"].value:
yield typ(self, "field%d" % n, desc["size"].value)
else:
for p in range(desc["size"].value * 8 // typ.static_size):
yield typ(self, "field%d[]" % n)
def createDescription(self):
return "Data Message"
class TimeStamp(FieldSet):
def createFields(self):
yield Bit(self, "timestamp", "TimeStamp (1)")
yield Bits(self, "msgType", 3, description="Message type")
yield Bits(self, "time", 4, description="TimeOffset")
def createDescription(self):
return "TimeStamp"
class CRC(FieldSet):
def createFields(self):
yield UInt16(self, "crc", "CRC")
def createDescription(self):
return "CRC"
class FITFile(Parser):
endian = BIG_ENDIAN
PARSER_TAGS = {
"id": "fit",
"category": "misc",
"file_ext": ("fit",),
"mime": ("application/fit",),
"min_size": 14 * 8,
"description": "Garmin binary fit format"
}
def __init__(self, *args, **kwargs):
Parser.__init__(self, *args, **kwargs)
self.msgDefs = {}
def validate(self):
s = self.stream.readBytes(0, 12)
if s[8:12] != b'.FIT':
return "Invalid header %d %d %d %d" % tuple([int(b) for b in s[8:12]])
return True
def createFields(self):
yield Header(self, "header")
while self.current_size < self["header"]["datasize"].value * 8:
b = self.stream.readBits(self.absolute_address + self.current_size, 2, self.endian)
if b == 1:
defMsg = DefinitionMessage(self, "definition[]")
msgType = defMsg["RecordHeader"]["msgType"].value
sizes = ''
ts = 0
for n in range(defMsg["nbFields"].value):
fname = "fieldDefinition[%d]" % n
size = defMsg[fname]["size"].value
ts += size
sizes += "%d/" % size
sizes += "%d" % ts
self.msgDefs[msgType] = defMsg
yield defMsg
elif b == 0:
yield DataMessage(self, "data[]")
else:
yield TimeStamp(self, "timestamp[]")
yield CRC(self, "crc")

View file

@ -41,7 +41,7 @@ class UIntVbe(Field):
size += 1
assert size < 100, "UIntVBE is too large"
if not(haveMoreData):
if not haveMoreData:
break
self._size = size * 8
@ -71,7 +71,7 @@ class IntVbe(Field):
size += 1
assert size < 100, "IntVBE is too large"
if not(haveMoreData):
if not haveMoreData:
break
if isNegative:

View file

@ -44,7 +44,7 @@ def getElementEnd(s, limit=b' ', offset=0):
class PDFNumber(Field):
LIMITS = [b'[', b'/', b'\x0D', b']']
LIMITS = [b'[', b'/', b'\x0A', b'\x0D', b'>', b']']
"""
sprintf("%i") or sprinf("%.?f")
"""
@ -81,18 +81,18 @@ class PDFString(Field):
def __init__(self, parent, name, desc=None):
Field.__init__(self, parent, name, description=desc)
val = ""
val = bytearray()
count = 1
off = 1
while not parent.eof:
char = parent.stream.readBytes(self.absolute_address + 8 * off, 1)
# Non-ASCII
if not char.isalpha() or char == '\\':
if not char.isalpha() or char == b'\\':
off += 1
continue
if char == '(':
if char == b'(':
count += 1
if char == ')':
if char == b')':
count -= 1
# Parenthesis block = 0 => end of string
if count == 0:
@ -101,13 +101,15 @@ class PDFString(Field):
# Add it to the string
val += char
off += 1
val = bytes(val)
self._size = 8 * off
self.createValue = lambda: val
class PDFName(Field):
LIMITS = [b'[', b'/', b'<', b']']
LIMITS = [b'[', b'/', b'<', b'>', b']']
"""
String starting with '/', where characters may be written using their
ASCII code (exemple: '#20' would be ' '
@ -145,7 +147,7 @@ class PDFID(Field):
def __init__(self, parent, name, desc=None):
Field.__init__(self, parent, name, description=desc)
self._size = 8 * getElementEnd(parent, '>')
self._size = 8 * getElementEnd(parent, b'>')
self.createValue = lambda: parent.stream.readBytes(
self.absolute_address + 8, (self._size // 8) - 1)
@ -254,7 +256,7 @@ def parsePDFType(s):
else:
# First parse size
size = getElementEnd(s)
for limit in ['/', '>', '<']:
for limit in [b'/', b'>', b'<']:
other_size = getElementEnd(s, limit)
if other_size is not None:
other_size -= 1
@ -424,7 +426,7 @@ class Catalog(FieldSet):
new_length = getElementEnd(self, limit)
if length is None or (new_length is not None and new_length - len(limit) < length):
length = new_length - len(limit)
yield String(self, "object", length, strip=' ')
yield String(self, "object", length, strip=' \n')
if self.stream.readBytes(self.absolute_address + self.current_size, 2) == b'<<':
yield PDFDictionary(self, "key_list")
# End of catalog: this one has "endobj"
@ -441,9 +443,9 @@ class Trailer(FieldSet):
yield RawBytes(self, "marker", len(self.MAGIC))
yield WhiteSpace(self, "sep[]")
yield String(self, "start_attribute_marker", 2)
yield WhiteSpace(self, "sep[]")
addr = self.absolute_address + self.current_size
while self.stream.readBytes(addr, 2) != b'>>':
yield WhiteSpace(self, "sep[]")
t = PDFName(self, "type[]")
yield t
name = t.value.decode()
@ -462,6 +464,7 @@ class Trailer(FieldSet):
yield PDFDictionary(self, "decrypt")
else:
raise ParserError("Don't know trailer type '%s'" % name)
yield WhiteSpace(self, "sep[]")
addr = self.absolute_address + self.current_size
yield String(self, "end_attribute_marker", 2)
yield LineEnd(self, "line_end[]")

View file

@ -2,6 +2,8 @@
TrueType Font parser.
Documents:
- "The OpenType Specification"
https://docs.microsoft.com/en-us/typography/opentype/spec/
- "An Introduction to TrueType Fonts: A look inside the TTF format"
written by "NRSI: Computers & Writing Systems"
http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-Chapter08
@ -11,11 +13,26 @@ Creation date: 2007-02-08
"""
from hachoir.parser import Parser
from hachoir.field import (FieldSet, ParserError,
UInt16, UInt32, Bit, Bits,
PaddingBits, NullBytes,
String, RawBytes, Bytes, Enum,
TimestampMac32)
from hachoir.field import (
FieldSet,
ParserError,
UInt8,
UInt16,
UInt24,
UInt32,
Int16,
Bit,
Bits,
PaddingBits,
NullBytes,
String,
RawBytes,
Bytes,
Enum,
TimestampMac32,
GenericVector,
PascalString8,
)
from hachoir.core.endian import BIG_ENDIAN
from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler
@ -69,11 +86,65 @@ CHARSET_MAP = {
3: {1: "UTF-16-BE"},
}
PERMISSIONS = {
0: "Installable embedding",
2: "Restricted License embedding",
4: "Preview & Print embedding",
8: "Editable embedding",
}
class TableHeader(FieldSet):
FWORD = Int16
UFWORD = UInt16
class Tag(String):
def __init__(self, parent, name, description=None):
String.__init__(self, parent, name, 4, description)
class Version16Dot16(FieldSet):
static_size = 32
def createFields(self):
yield String(self, "tag", 4)
yield UInt16(self, "major")
yield UInt16(self, "minor")
def createValue(self):
return float("%u.%x" % (self["major"].value, self["minor"].value))
class Fixed(FieldSet):
def createFields(self):
yield UInt16(self, "int_part")
yield UInt16(self, "float_part")
def createValue(self):
return self["int_part"].value + float(self["float_part"].value) / 65536
class Tuple(FieldSet):
def __init__(self, parent, name, axisCount):
super().__init__(parent, name, description="Tuple Record")
self.axisCount = axisCount
def createFields(self):
for _ in range(self.axisCount):
yield (Fixed(self, "coordinate[]"))
class F2DOT14(FieldSet):
static_size = 16
def createFields(self):
yield Int16(self, "int_part")
def createValue(self):
return self["int_part"].value / 16384
class TableHeader(FieldSet):
def createFields(self):
yield Tag(self, "tag")
yield textHandler(UInt32(self, "checksum"), hexadecimal)
yield UInt32(self, "offset")
yield filesizeHandler(UInt32(self, "size"))
@ -83,7 +154,6 @@ class TableHeader(FieldSet):
class NameHeader(FieldSet):
def createFields(self):
yield Enum(UInt16(self, "platformID"), PLATFORM_NAME)
yield UInt16(self, "encodingID")
@ -162,17 +232,273 @@ def parseFontHeader(self):
yield UInt16(self, "glyph_format", "(=0)")
class AxisValueMap(FieldSet):
static_size = 32
def createFields(self):
yield F2DOT14(self, "fromCoordinate")
yield F2DOT14(self, "toCoordinate")
class SegmentMaps(FieldSet):
def createFields(self):
yield UInt16(
self, "positionMapCount", "The number of correspondence pairs for this axis"
)
for _ in range(self["positionMapCount"].value):
yield (AxisValueMap(self, "axisValueMaps[]"))
def parseAvar(self):
yield UInt16(self, "majorVersion", "Major version")
yield UInt16(self, "minorVersion", "Minor version")
yield PaddingBits(self, "reserved[]", 16)
yield UInt16(self, "axisCount", "The number of variation axes for this font")
for _ in range(self["axisCount"].value):
yield (SegmentMaps(self, "segmentMaps[]"))
class VariationAxisRecord(FieldSet):
def createFields(self):
yield Tag(self, "axisTag", "Tag identifying the design variation for the axis")
yield Fixed(self, "minValue", "The minimum coordinate value for the axis")
yield Fixed(self, "defaultValue", "The default coordinate value for the axis")
yield Fixed(self, "maxValue", "The maximum coordinate value for the axis")
yield PaddingBits(self, "reservedFlags", 15)
yield Bit(
self, "hidden", "The axis should not be exposed directly in user interfaces"
)
yield UInt16(
self,
"axisNameID",
"The name ID for entries in the 'name' table that provide a display name for this axis",
)
class InstanceRecord(FieldSet):
def __init__(self, parent, name, axisCount, hasPSNameID=False):
super().__init__(parent, name, description="Instance record")
self.axisCount = axisCount
self.hasPSNameID = hasPSNameID
def createFields(self):
yield UInt16(
self, "subfamilyNameID", "Name ID for subfamily names for this instance"
)
yield PaddingBits(self, "reservedFlags", 16)
yield Tuple(self, "coordinates", axisCount=self.axisCount)
if self.hasPSNameID:
yield UInt16(
self,
"postScriptNameID",
"Name ID for PostScript names for this instance",
)
def parseFvar(self):
yield UInt16(self, "majorVersion", "Major version")
yield UInt16(self, "minorVersion", "Minor version")
yield UInt16(
self, "axisArrayOffset", "Offset to the start of the VariationAxisRecord array."
)
yield PaddingBits(self, "reserved[]", 16)
yield UInt16(self, "axisCount", "The number of variation axes for this font")
yield UInt16(self, "axisSize", "The size in bytes of each VariationAxisRecord")
yield UInt16(self, "instanceCount", "The number of named instances for this font")
yield UInt16(self, "instanceSize", "The size in bytes of each InstanceRecord")
if self["axisArrayOffset"].value > 16:
yield PaddingBits(self, "padding", 8 * (self["axisArrayOffset"].value - 16))
for _ in range(self["axisCount"].value):
yield (VariationAxisRecord(self, "axes[]"))
for _ in range(self["instanceCount"].value):
yield (
InstanceRecord(
self,
"instances[]",
axisCount=self["axisCount"].value,
hasPSNameID=(
self["instanceSize"].value == (2 * self["axisCount"].value + 6)
),
)
)
class EncodingRecord(FieldSet):
static_size = 64
def createFields(self):
yield Enum(UInt16(self, "platformID"), PLATFORM_NAME)
yield UInt16(self, "encodingID")
self.offset = UInt32(self, "subtableOffset")
yield self.offset
class CmapTable0(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt16(self, "length", "Length in bytes")
yield UInt16(self, "language", "Language ID")
yield GenericVector(self, "mapping", 256, UInt8)
class CmapTable4(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt16(self, "length", "Length in bytes")
yield UInt16(self, "language", "Language ID")
yield UInt16(self, "segCountX2", "Twice the number of segments")
segments = self["segCountX2"].value // 2
yield UInt16(self, "searchRange")
yield UInt16(self, "entrySelector")
yield UInt16(self, "rangeShift")
yield GenericVector(self, "endCode", segments, UInt16)
yield PaddingBits(self, "reserved[]", 16)
yield GenericVector(self, "startCode", segments, UInt16)
yield GenericVector(self, "idDelta", segments, Int16)
yield GenericVector(self, "idRangeOffsets", segments, UInt16)
remainder = (self["length"].value - (self.current_size / 8)) / 2
if remainder:
yield GenericVector(self, "glyphIdArray", remainder, UInt16)
class CmapTable6(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt16(self, "length", "Length in bytes")
yield UInt16(self, "language", "Language ID")
yield UInt16(self, "firstCode", "First character code of subrange")
yield UInt16(self, "entryCount", "Number of character codes in subrange")
yield GenericVector(self, "glyphIdArray", self["entryCount"].value, UInt16)
class SequentialMapGroup(FieldSet):
def createFields(self):
yield UInt32(self, "startCharCode", "First character code in this group")
yield UInt32(self, "endCharCode", "First character code in this group")
yield UInt32(
self,
"startGlyphID",
"Glyph index corresponding to the starting character code",
)
class CmapTable12(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield PaddingBits(self, "reserved[]", 16)
yield UInt32(self, "length", "Length in bytes")
yield UInt32(self, "language", "Language ID")
yield UInt32(self, "numGroups", "Number of groupings which follow")
for i in range(self["numGroups"].value):
yield SequentialMapGroup(self, "mapgroup[]")
class VariationSelector(FieldSet):
def createFields(self):
yield UInt24(self, "varSelector", "Variation selector")
yield UInt32(self, "defaultUVSOffset", "Offset to default UVS table")
yield UInt32(self, "nonDefaultUVSOffset", "Offset to non-default UVS table")
class CmapTable14(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt32(self, "length", "Length in bytes")
yield UInt32(
self, "numVarSelectorRecords", "Number of variation selector records"
)
for i in range(self["numVarSelectorRecords"].value):
yield VariationSelector(self, "variationSelector[]")
def parseCmap(self):
yield UInt16(self, "version")
numTables = UInt16(self, "numTables", "Number of encoding tables")
yield numTables
encodingRecords = []
for index in range(numTables.value):
entry = EncodingRecord(self, "encodingRecords[]")
yield entry
encodingRecords.append(entry)
encodingRecords.sort(key=lambda field: field["subtableOffset"].value)
last = None
for er in encodingRecords:
offset = er["subtableOffset"].value
if last and last == offset:
continue
last = offset
# Add padding if any
padding = self.seekByte(offset, relative=True, null=False)
if padding:
yield padding
format = UInt16(self, "format").value
if format == 0:
yield CmapTable0(self, "cmap table format 0")
elif format == 4:
yield CmapTable4(self, "cmap table format 4")
elif format == 6:
yield CmapTable6(self, "cmap table format 6")
elif format == 12:
yield CmapTable12(self, "cmap table format 12")
elif format == 14:
yield CmapTable14(self, "cmap table format 14")
class SignatureRecord(FieldSet):
def createFields(self):
yield UInt16(self, "format", "Table format")
yield UInt16(self, "length", "Length of signature")
yield UInt16(self, "signatureBlockOffset", "Offset to signature block")
class SignatureBlock(FieldSet):
def createFields(self):
yield PaddingBits(self, "reserved[]", 32)
yield UInt32(
self,
"length",
"Length (in bytes) of the PKCS#7 packet in the signature field",
)
yield String(self, "signature", self["length"].value, "Signature block")
def parseDSIG(self):
yield UInt32(self, "version")
yield UInt16(self, "numSignatures", "Number of signatures in the table")
yield Bit(self, "flag", "Cannot be resigned")
yield PaddingBits(self, "reserved[]", 7)
entries = []
for i in range(self["numSignatures"].value):
record = SignatureRecord(self, "signatureRecords[]")
entries.append(record)
yield record
entries.sort(key=lambda field: field["signatureBlockOffset"].value)
last = None
for entry in entries:
offset = entry["signatureBlockOffset"].value
if last and last == offset:
continue
last = offset
# Add padding if any
padding = self.seekByte(offset, relative=True, null=False)
if padding:
yield padding
padding = (self.size - self.current_size) // 8
if padding:
yield NullBytes(self, "padding_end", padding)
def parseNames(self):
# Read header
yield UInt16(self, "format")
if self["format"].value != 0:
raise ParserError("TTF (names): Invalid format (%u)" %
self["format"].value)
raise ParserError("TTF (names): Invalid format (%u)" % self["format"].value)
yield UInt16(self, "count")
yield UInt16(self, "offset")
if MAX_NAME_COUNT < self["count"].value:
raise ParserError("Invalid number of names (%s)"
% self["count"].value)
raise ParserError("Invalid number of names (%s)" % self["count"].value)
# Read name index
entries = []
@ -208,17 +534,210 @@ def parseNames(self):
# Read value
size = entry["length"].value
if size:
yield String(self, "value[]", size, entry.description, charset=entry.getCharset())
yield String(
self, "value[]", size, entry.description, charset=entry.getCharset()
)
padding = (self.size - self.current_size) // 8
if padding:
yield NullBytes(self, "padding_end", padding)
def parseMaxp(self):
# Read header
yield Version16Dot16(self, "format", "format version")
yield UInt16(self, "numGlyphs", "Number of glyphs")
if self["format"].value >= 1:
yield UInt16(self, "maxPoints", "Maximum points in a non-composite glyph")
yield UInt16(self, "maxContours", "Maximum contours in a non-composite glyph")
yield UInt16(self, "maxCompositePoints", "Maximum points in a composite glyph")
yield UInt16(
self, "maxCompositeContours", "Maximum contours in a composite glyph"
)
yield UInt16(self, "maxZones", "Do instructions use the twilight zone?")
yield UInt16(self, "maxTwilightPoints", "Maximum points used in Z0")
yield UInt16(self, "maxStorage", "Number of Storage Area locations")
yield UInt16(self, "maxFunctionDefs", "Number of function definitions")
yield UInt16(self, "maxInstructionDefs", "Number of instruction definitions")
yield UInt16(self, "maxStackElements", "Maximum stack depth")
yield UInt16(
self, "maxSizeOfInstructions", "Maximum byte count for glyph instructions"
)
yield UInt16(
self,
"maxComponentElements",
"Maximum number of components at glyph top level",
)
yield UInt16(self, "maxComponentDepth", "Maximum level of recursion")
def parseHhea(self):
yield UInt16(self, "majorVersion", "Major version")
yield UInt16(self, "minorVersion", "Minor version")
yield FWORD(self, "ascender", "Typographic ascent")
yield FWORD(self, "descender", "Typographic descent")
yield FWORD(self, "lineGap", "Typographic linegap")
yield UFWORD(self, "advanceWidthMax", "Maximum advance width")
yield FWORD(self, "minLeftSideBearing", "Minimum left sidebearing value")
yield FWORD(self, "minRightSideBearing", "Minimum right sidebearing value")
yield FWORD(self, "xMaxExtent", "Maximum X extent")
yield Int16(self, "caretSlopeRise", "Caret slope rise")
yield Int16(self, "caretSlopeRun", "Caret slope run")
yield Int16(self, "caretOffset", "Caret offset")
yield GenericVector(self, "reserved", 4, Int16)
yield Int16(self, "metricDataFormat", "Metric data format")
yield UInt16(self, "numberOfHMetrics", "Number of horizontal metrics")
class fsType(FieldSet):
def createFields(self):
yield Enum(Bits(self, "usage_permissions", 4), PERMISSIONS)
yield PaddingBits(self, "reserved[]", 4)
yield Bit(self, "no_subsetting", "Font may not be subsetted prior to embedding")
yield Bit(
self,
"bitmap_embedding",
"Only bitmaps contained in the font may be embedded",
)
yield PaddingBits(self, "reserved[]", 6)
def parseOS2(self):
yield UInt16(self, "version", "Table version")
yield Int16(self, "xAvgCharWidth")
yield UInt16(self, "usWeightClass")
yield UInt16(self, "usWidthClass")
yield fsType(self, "fsType")
yield Int16(self, "ySubscriptXSize")
yield Int16(self, "ySubscriptYSize")
yield Int16(self, "ySubscriptXOffset")
yield Int16(self, "ySubscriptYOffset")
yield Int16(self, "ySuperscriptXSize")
yield Int16(self, "ySuperscriptYSize")
yield Int16(self, "ySuperscriptXOffset")
yield Int16(self, "ySuperscriptYOffset")
yield Int16(self, "yStrikeoutSize")
yield Int16(self, "yStrikeoutPosition")
yield Int16(self, "sFamilyClass")
yield GenericVector(self, "panose", 10, UInt8)
yield UInt32(self, "ulUnicodeRange1")
yield UInt32(self, "ulUnicodeRange2")
yield UInt32(self, "ulUnicodeRange3")
yield UInt32(self, "ulUnicodeRange4")
yield Tag(self, "achVendID", "Vendor ID")
yield UInt16(self, "fsSelection")
yield UInt16(self, "usFirstCharIndex")
yield UInt16(self, "usLastCharIndex")
yield Int16(self, "sTypoAscender")
yield Int16(self, "sTypoDescender")
yield Int16(self, "sTypoLineGap")
yield UInt16(self, "usWinAscent")
yield UInt16(self, "usWinDescent")
if self["version"].value >= 1:
yield UInt32(self, "ulCodePageRange1")
yield UInt32(self, "ulCodePageRange2")
if self["version"].value >= 2:
yield Int16(self, "sxHeight")
yield Int16(self, "sCapHeight")
yield UInt16(self, "usDefaultChar")
yield UInt16(self, "usBreakChar")
yield UInt16(self, "usMaxContext")
if self["version"].value >= 5:
yield UInt16(self, "usLowerOpticalPointSize")
yield UInt16(self, "usUpperOpticalPointSize")
def parsePost(self):
yield Version16Dot16(self, "version", "Table version")
yield Fixed(
self,
"italicAngle",
"Italic angle in counter-clockwise degrees from the vertical.",
)
yield FWORD(self, "underlinePosition", "Top of underline to baseline")
yield FWORD(self, "underlineThickness", "Suggested underline thickness")
yield UInt32(self, "isFixedPitch", "Is the font fixed pitch?")
yield UInt32(self, "minMemType42", "Minimum memory usage (OpenType)")
yield UInt32(self, "maxMemType42", "Maximum memory usage (OpenType)")
yield UInt32(self, "minMemType1", "Minimum memory usage (Type 1)")
yield UInt32(self, "maxMemType1", "Maximum memory usage (Type 1)")
if self["version"].value == 2.0:
yield UInt16(self, "numGlyphs")
indices = GenericVector(
self,
"Array of indices into the string data",
self["numGlyphs"].value,
UInt16,
"glyphNameIndex",
)
yield indices
for gid, index in enumerate(indices):
if index.value >= 258:
yield PascalString8(self, "glyphname[%i]" % gid)
elif self["version"].value == 2.0:
yield UInt16(self, "numGlyphs")
indices = GenericVector(
self,
"Difference between graphic index and standard order of glyph",
self["numGlyphs"].value,
UInt16,
"offset",
)
yield indices
# This is work-in-progress until I work out good ways to do random-access on offsets
parseScriptList = (
parseFeatureList
) = parseLookupList = parseFeatureVariationsTable = lambda x: None
def parseGSUB(self):
yield UInt16(self, "majorVersion", "Major version")
yield UInt16(self, "minorVersion", "Minor version")
SUBTABLES = [
("script list", parseScriptList),
("feature list", parseFeatureList),
("lookup list", parseLookupList),
]
offsets = []
for description, parser in SUBTABLES:
name = description.title().replace(" ", "")
offset = UInt16(
self, name[0].lower() + name[1:], "Offset to %s table" % description
)
yield offset
offsets.append((offset.value, parser))
if self["min_ver"].value == 1:
offset = UInt32(
self, "featureVariationsOffset", "Offset to feature variations table"
)
offsets.append((offset.value, parseFeatureVariationsTable))
offsets.sort(key=lambda field: field[0])
padding = self.seekByte(offsets[0][0], null=True)
if padding:
yield padding
lastOffset, first_parser = offsets[0]
for offset, parser in offsets[1:]:
# yield parser(self)
yield RawBytes(self, "content", offset - lastOffset)
lastOffset = offset
class Table(FieldSet):
TAG_INFO = {
"DSIG": ("DSIG", "Digital Signature", parseDSIG),
"GSUB": ("GSUB", "Glyph Substitutions", parseGSUB),
"avar": ("avar", "Axis variation table", parseAvar),
"cmap": ("cmap", "Character to Glyph Index Mapping", parseCmap),
"fvar": ("fvar", "Font variations table", parseFvar),
"head": ("header", "Font header", parseFontHeader),
"hhea": ("hhea", "Horizontal Header", parseHhea),
"maxp": ("maxp", "Maximum Profile", parseMaxp),
"name": ("names", "Names", parseNames),
"OS/2": ("OS_2", "OS/2 and Windows Metrics", parseOS2),
"post": ("post", "PostScript", parsePost),
}
def __init__(self, parent, name, table, **kw):
@ -251,10 +770,15 @@ class TrueTypeFontFile(Parser):
}
def validate(self):
if self["maj_ver"].value != 1:
return "Invalid major version (%u)" % self["maj_ver"].value
if self["min_ver"].value != 0:
return "Invalid minor version (%u)" % self["min_ver"].value
if self["maj_ver"].value == 1 and self["min_ver"].value == 0:
pass
elif self["maj_ver"].value == 0x4F54 and self["min_ver"].value == 0x544F:
pass
else:
return "Invalid version (%u.%u)" % (
self["maj_ver"].value,
self["min_ver"].value,
)
if not (MIN_NB_TABLE <= self["nb_table"].value <= MAX_NB_TABLE):
return "Invalid number of table (%u)" % self["nb_table"].value
return True

View file

@ -13,7 +13,7 @@ class HachoirParser(object):
"""
A parser is the root of all other fields. It create first level of fields
and have special attributes and methods:
- tags: dictionnary with keys:
- tags: dictionary with keys:
- "file_ext": classical file extensions (string or tuple of strings) ;
- "mime": MIME type(s) (string or tuple of strings) ;
- "description": String describing the parser.

View file

@ -19,7 +19,7 @@ from hachoir.parser.program.exe_ne import NE_Header
from hachoir.parser.program.exe_pe import PE_Header, PE_OptHeader, SectionHeader
from hachoir.parser.program.exe_res import PE_Resource, NE_VersionInfoNode
MAX_NB_SECTION = 50
MAX_NB_SECTION = 100
class MSDosHeader(FieldSet):

View file

@ -1,5 +1,5 @@
from hachoir.field import (FieldSet, ParserError,
Bit, UInt8, UInt16, UInt32, TimestampUnix32,
Bit, UInt8, UInt16, UInt32, UInt64, TimestampUnix32,
Bytes, String, Enum,
PaddingBytes, PaddingBits, NullBytes, NullBits)
from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler
@ -175,10 +175,13 @@ class PE_OptHeader(FieldSet):
}
def createFields(self):
yield UInt16(self, "signature", "PE optional header signature (0x010b)")
# TODO: Support PE32+ (signature=0x020b)
if self["signature"].value != 0x010b:
yield UInt16(self, "signature", "PE optional header signature (0x010b | 0x020b)")
if self["signature"].value != 0x010b and self["signature"].value != 0x020b:
raise ParserError("Invalid PE optional header signature")
is_pe32plus = self["signature"].value == 0x020b
VarUInt = UInt64 if is_pe32plus else UInt32
yield UInt8(self, "maj_lnk_ver", "Major linker version")
yield UInt8(self, "min_lnk_ver", "Minor linker version")
yield filesizeHandler(UInt32(self, "size_code", "Size of code"))
@ -186,8 +189,9 @@ class PE_OptHeader(FieldSet):
yield filesizeHandler(UInt32(self, "size_uninit_data", "Size of uninitialized data"))
yield textHandler(UInt32(self, "entry_point", "Address (RVA) of the code entry point"), hexadecimal)
yield textHandler(UInt32(self, "base_code", "Base (RVA) of code"), hexadecimal)
if not is_pe32plus:
yield textHandler(UInt32(self, "base_data", "Base (RVA) of data"), hexadecimal)
yield textHandler(UInt32(self, "image_base", "Image base (RVA)"), hexadecimal)
yield textHandler(VarUInt(self, "image_base", "Image base (RVA)"), hexadecimal)
yield filesizeHandler(UInt32(self, "sect_align", "Section alignment"))
yield filesizeHandler(UInt32(self, "file_align", "File alignment"))
yield UInt16(self, "maj_os_ver", "Major OS version")
@ -202,10 +206,10 @@ class PE_OptHeader(FieldSet):
yield textHandler(UInt32(self, "checksum"), hexadecimal)
yield Enum(UInt16(self, "subsystem"), self.SUBSYSTEM_NAME)
yield UInt16(self, "dll_flags")
yield filesizeHandler(UInt32(self, "size_stack_reserve"))
yield filesizeHandler(UInt32(self, "size_stack_commit"))
yield filesizeHandler(UInt32(self, "size_heap_reserve"))
yield filesizeHandler(UInt32(self, "size_heap_commit"))
yield filesizeHandler(VarUInt(self, "size_stack_reserve"))
yield filesizeHandler(VarUInt(self, "size_stack_commit"))
yield filesizeHandler(VarUInt(self, "size_heap_reserve"))
yield filesizeHandler(VarUInt(self, "size_heap_commit"))
yield UInt32(self, "loader_flags")
yield UInt32(self, "nb_directory", "Number of RVA and sizes")
for index in range(self["nb_directory"].value):

View file

@ -435,6 +435,19 @@ class OpcodeSpecial_invokeinterface(JavaOpcode):
return "%s(%i,%i,%i)" % (self.op, self["index"].value, self["count"].value, self["zero"].value)
class OpcodeSpecial_invokedynamic(JavaOpcode):
OPSIZE = 5
def createFields(self):
yield UInt8(self, "opcode")
yield CPIndex(self, "index")
yield UInt8(self, "zero1", "Must be zero.")
yield UInt8(self, "zero2", "Must be zero.")
def createDisplay(self):
return "%s(%i,%i,%i)" % (self.op, self["index"].value, self["zero1"].value, self["zero2"].value)
class OpcodeSpecial_newarray(JavaOpcode):
OPSIZE = 2
@ -659,6 +672,7 @@ class JavaBytecode(FieldSet):
0x98: ("dcmpg", OpcodeNoArgs, "compares two doubles. Stack: value1, value2 -> result"),
0x99: ("ifeq", OpcodeShortJump, "if 'value' is 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9a: ("ifne", OpcodeShortJump, "if 'value' is not 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9b: ("iflt", OpcodeShortJump, "if 'value' is less than 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9c: ("ifge", OpcodeShortJump, "if 'value' is greater than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9d: ("ifgt", OpcodeShortJump, "if 'value' is greater than 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9e: ("ifle", OpcodeShortJump, "if 'value' is less than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
@ -689,7 +703,7 @@ class JavaBytecode(FieldSet):
0xb7: ("invokespecial", OpcodeCPIndex, "invoke instance method on object 'objectref', where the method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"),
0xb8: ("invokestatic", OpcodeCPIndex, "invoke a static method, where the method is identified by method reference <argument> in the constant pool. Stack: [arg1, arg2, ...] ->"),
0xb9: ("invokeinterface", OpcodeSpecial_invokeinterface, "invokes an interface method on object 'objectref', where the interface method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"),
0xba: ("xxxunusedxxx", OpcodeNoArgs, "this opcode is reserved for historical reasons. Stack: "),
0xba: ("invokedynamic", OpcodeSpecial_invokedynamic, "invokes a dynamically-computed call site, where the bootstrap method is identified by <argument> in constant pool. Stack: [arg1, arg2, ...] -> "),
0xbb: ("new", OpcodeCPIndex, "creates new object of type identified by class reference <argument> in constant pool. Stack: -> objectref"),
0xbc: ("newarray", OpcodeSpecial_newarray, "creates new array with 'count' elements of primitive type given in the argument. Stack: count -> arrayref"),
0xbd: ("anewarray", OpcodeCPIndex, "creates a new array of references of length 'count' and component type identified by the class reference <argument> in the constant pool. Stack: count -> arrayref"),
@ -762,6 +776,33 @@ class CPInfo(FieldSet):
elif self.constant_type == "NameAndType":
yield CPIndex(self, "name_index", target_types="Utf8")
yield CPIndex(self, "descriptor_index", target_types="Utf8")
elif self.constant_type == "MethodHandle":
refkind_map = {
1: ("getField", "Fieldref"),
2: ("getStatic", "Fieldref"),
3: ("putField", "Fieldref"),
4: ("putStatic", "Fieldref"),
5: ("invokeVirtual", "Methodref"),
6: ("invokeStatic", ("Methodref", "InterfaceMethodref")),
7: ("invokeSpecial", ("Methodref", "InterfaceMethodref")),
8: ("newInvokeSpecial", "Methodref"),
9: ("invokeInterface", "InterfaceMethodref"),
}
yield Enum(UInt8(self, "reference_kind"), {k: v[0] for k, v in refkind_map.items()})
target_types = refkind_map[self["reference_kind"].value][1]
yield CPIndex(self, "reference_index", target_types=target_types)
elif self.constant_type == "MethodType":
yield CPIndex(self, "descriptor_index", target_types="Utf8")
elif self.constant_type == "Dynamic":
yield UInt16(self, "bootstrap_method_attr_index")
yield CPIndex(self, "name_and_type_index", target_types="NameAndType")
elif self.constant_type == "InvokeDynamic":
yield UInt16(self, "bootstrap_method_attr_index")
yield CPIndex(self, "name_and_type_index", target_types="NameAndType")
elif self.constant_type == "Module":
yield CPIndex(self, "name_index", target_types="Utf8")
elif self.constant_type == "Package":
yield CPIndex(self, "name_index", target_types="Utf8")
else:
raise ParserError("Not a valid constant pool element type: "
+ self["tag"].value)
@ -785,6 +826,21 @@ class CPInfo(FieldSet):
elif self.constant_type == "NameAndType":
return (self["descriptor_index"].rawvalue(),
self["name_index"].rawvalue())
elif self.constant_type == "MethodHandle":
return (self["reference_kind"].display,
self["reference_index"].rawvalue())
elif self.constant_type == "MethodType":
return self["descriptor_index"].rawvalue()
elif self.constant_type == "Dynamic":
return (self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].rawvalue())
elif self.constant_type == "InvokeDynamic":
return (self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].rawvalue())
elif self.constant_type == "Module":
return self["name_index"].rawvalue()
elif self.constant_type == "Package":
return self["name_index"].rawvalue()
else:
# FIXME: Return "<error>" instead of raising an exception?
raise ParserError("Not a valid constant pool element type: "
@ -811,6 +867,24 @@ class CPInfo(FieldSet):
elif self.constant_type == "NameAndType":
descriptor, name = self.rawvalue()
return parse_any_descriptor(descriptor, name=name)
elif self.constant_type == "MethodHandle":
return "%s(%s)" % (self["reference_kind"].display, self["reference_index"].str())
elif self.constant_type == "MethodType":
return self["descriptor_index"].str()
elif self.constant_type == "Dynamic":
return "%d, %s" % (
self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].str()
)
elif self.constant_type == "InvokeDynamic":
return "%d, %s" % (
self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].str()
)
elif self.constant_type == "Module":
return self["name_index"].str()
elif self.constant_type == "Package":
return self["name_index"].str()
else:
# FIXME: Return "<error>" instead of raising an exception?
raise ParserError("Not a valid constant pool element type: "
@ -1192,6 +1266,12 @@ class JavaCompiledClassFile(Parser):
"50.0": "JDK 1.6",
"51.0": "JDK 1.7",
"52.0": "JDK 1.8",
"53.0": "JDK 9",
"54.0": "JDK 10",
"55.0": "JDK 11",
"56.0": "JDK 12",
"57.0": "JDK 13",
"58.0": "JDK 14",
}
# Constants go here since they will probably depend on the detected format
@ -1208,7 +1288,13 @@ class JavaCompiledClassFile(Parser):
9: "Fieldref",
10: "Methodref",
11: "InterfaceMethodref",
12: "NameAndType"
12: "NameAndType",
15: "MethodHandle",
16: "MethodType",
17: "Dynamic",
18: "InvokeDynamic",
19: "Module",
20: "Package",
}
def validate(self):

View file

@ -10,13 +10,16 @@ Creation: 25 march 2005
"""
from hachoir.parser import Parser
from hachoir.field import (FieldSet, UInt8,
UInt16, Int32, UInt32, Int64, ParserError, Float64,
from hachoir.field import (
Field, FieldSet, UInt8,
UInt16, Int32, UInt32, Int64, UInt64,
ParserError, Float64,
Character, RawBytes, PascalString8, TimestampUnix32,
Bit, String)
Bit, String, NullBits)
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.bits import long2raw
from hachoir.core.text_handler import textHandler, hexadecimal
from hachoir.core import config
DISASSEMBLE = False
@ -51,6 +54,12 @@ def parseString(parent):
disassembleBytecode(parent["text"])
def createStringValue(parent):
if parent.name == "lnotab":
return "<lnotab>"
return parent["text"]
def parseStringRef(parent):
yield textHandler(UInt32(parent, "ref"), hexadecimal)
@ -58,6 +67,13 @@ def parseStringRef(parent):
def createStringRefDesc(parent):
return "String ref: %s" % parent["ref"].display
def createStringRefValue(parent):
value = parent["ref"].value
if hasattr(parent.root, 'string_table') and 0 <= value < len(parent.root.string_table):
return parent.root.string_table[value]
return None
# --- Integers ---
@ -69,17 +85,37 @@ def parseInt64(parent):
yield Int64(parent, "value")
def createIntValue(parent):
return parent["value"]
def parseLong(parent):
yield Int32(parent, "digit_count")
for index in range(abs(parent["digit_count"].value)):
yield UInt16(parent, "digit[]")
def createLongValue(parent):
is_negative = parent["digit_count"].value < 0
count = abs(parent["digit_count"].value)
total = 0
for index in range(count - 1, -1, -1):
total <<= 15
total += parent["digit[%u]" % index].value
if is_negative:
total = -total
return total
# --- Float and complex ---
def parseFloat(parent):
yield PascalString8(parent, "value")
def createFloatValue(parent):
return float(parent["value"].value)
def parseBinaryFloat(parent):
yield Float64(parent, "value")
@ -94,6 +130,12 @@ def parseBinaryComplex(parent):
yield Float64(parent, "complex")
def createComplexValue(parent):
return complex(
float(parent["real"].value),
float(parent["complex"].value))
# --- Tuple and list ---
def parseTuple(parent):
yield UInt32(parent, "count", "Item count")
@ -119,6 +161,12 @@ def createTupleDesc(parent):
return "%s: %s" % (parent.code_info[2], items)
def tupleValueCreator(constructor):
def createTupleValue(parent):
return constructor([v.value for v in parent.array("item")])
return createTupleValue
# --- Dict ---
def parseDict(parent):
"""
@ -139,26 +187,58 @@ def createDictDesc(parent):
return "Dict: %s" % ("%s keys" % parent.count)
def createDictValue(parent):
return {k.value: v.value for k, v in zip(parent.array("key"), parent.array("value"))}
def parseRef(parent):
yield UInt32(parent, "n", "Reference")
def createRefDesc(parent):
value = parent["n"].value
if hasattr(parent.root, 'object_table') and 0 <= value < len(parent.root.object_table):
return 'Reference: %s' % parent.root.object_table[value].description
else:
return 'Reference: %d' % value
def createRefValue(parent):
value = parent["n"].value
if hasattr(parent.root, 'object_table') and 0 <= value < len(parent.root.object_table):
return parent.root.object_table[value]
else:
return None
def parseASCII(parent):
size = UInt32(parent, "len", "Number of ASCII characters")
yield size
if size.value:
yield String(parent, "text", size.value, "String content", charset="ASCII")
def parseShortASCII(parent):
size = UInt8(parent, "len", "Number of ASCII characters")
yield size
if size.value:
yield String(parent, "text", size.value, "String content", charset="ASCII")
# --- Code ---
def parseCode(parent):
if 0x3000000 <= parent.root.getVersion():
version = parent.root.getVersion()
if 0x3000000 <= version:
yield UInt32(parent, "arg_count", "Argument count")
if 0x3080000 <= version:
yield UInt32(parent, "posonlyargcount", "Positional only argument count")
yield UInt32(parent, "kwonlyargcount", "Keyword only argument count")
if version < 0x30B0000:
yield UInt32(parent, "nb_locals", "Number of local variables")
yield UInt32(parent, "stack_size", "Stack size")
yield UInt32(parent, "flags")
elif 0x2030000 <= parent.root.getVersion():
elif 0x2030000 <= version:
yield UInt32(parent, "arg_count", "Argument count")
yield UInt32(parent, "nb_locals", "Number of local variables")
yield UInt32(parent, "stack_size", "Stack size")
@ -168,54 +248,70 @@ def parseCode(parent):
yield UInt16(parent, "nb_locals", "Number of local variables")
yield UInt16(parent, "stack_size", "Stack size")
yield UInt16(parent, "flags")
yield Object(parent, "compiled_code")
yield Object(parent, "consts")
yield Object(parent, "names")
if 0x30B0000 <= version:
yield Object(parent, "co_localsplusnames")
yield Object(parent, "co_localspluskinds")
else:
yield Object(parent, "varnames")
if 0x2000000 <= parent.root.getVersion():
if 0x2000000 <= version:
yield Object(parent, "freevars")
yield Object(parent, "cellvars")
yield Object(parent, "filename")
yield Object(parent, "name")
if 0x2030000 <= parent.root.getVersion():
if 0x30B0000 <= version:
yield Object(parent, "qualname")
if 0x2030000 <= version:
yield UInt32(parent, "firstlineno", "First line number")
else:
yield UInt16(parent, "firstlineno", "First line number")
if 0x30A0000 <= version:
yield Object(parent, "linetable")
if 0x30B0000 <= version:
yield Object(parent, "exceptiontable")
else:
yield Object(parent, "lnotab")
class Object(FieldSet):
bytecode_info = {
# Don't contains any data
'0': ("null", None, "NULL", None),
'N': ("none", None, "None", None),
'F': ("false", None, "False", None),
'T': ("true", None, "True", None),
'S': ("stop_iter", None, "StopIter", None),
'.': ("ellipsis", None, "ELLIPSIS", None),
'?': ("unknown", None, "Unknown", None),
'0': ("null", None, "NULL", None, None),
'N': ("none", None, "None", None, lambda parent: None),
'F': ("false", None, "False", None, lambda parent: False),
'T': ("true", None, "True", None, lambda parent: True),
'S': ("stop_iter", None, "StopIter", None, None),
'.': ("ellipsis", None, "ELLIPSIS", None, lambda parent: ...),
'?': ("unknown", None, "Unknown", None, None),
'i': ("int32", parseInt32, "Int32", None),
'I': ("int64", parseInt64, "Int64", None),
'f': ("float", parseFloat, "Float", None),
'g': ("bin_float", parseBinaryFloat, "Binary float", None),
'x': ("complex", parseComplex, "Complex", None),
'y': ("bin_complex", parseBinaryComplex, "Binary complex", None),
'l': ("long", parseLong, "Long", None),
's': ("string", parseString, "String", None),
't': ("interned", parseString, "Interned", None),
'u': ("unicode", parseString, "Unicode", None),
'R': ("string_ref", parseStringRef, "String ref", createStringRefDesc),
'(': ("tuple", parseTuple, "Tuple", createTupleDesc),
')': ("small_tuple", parseSmallTuple, "Tuple", createTupleDesc),
'[': ("list", parseTuple, "List", createTupleDesc),
'<': ("set", parseTuple, "Set", createTupleDesc),
'>': ("frozenset", parseTuple, "Frozen set", createTupleDesc),
'{': ("dict", parseDict, "Dict", createDictDesc),
'c': ("code", parseCode, "Code", None),
'r': ("ref", parseRef, "Reference", None),
'z': ("short_ascii", parseShortASCII, "Short ASCII", None),
'Z': ("short_ascii_interned", parseShortASCII, "Short ASCII interned", None),
'i': ("int32", parseInt32, "Int32", None, createIntValue),
'I': ("int64", parseInt64, "Int64", None, createIntValue),
'f': ("float", parseFloat, "Float", None, createFloatValue),
'g': ("bin_float", parseBinaryFloat, "Binary float", None, createFloatValue),
'x': ("complex", parseComplex, "Complex", None, createComplexValue),
'y': ("bin_complex", parseBinaryComplex, "Binary complex", None, createComplexValue),
'l': ("long", parseLong, "Long", None, createLongValue),
's': ("string", parseString, "String", None, createStringValue),
't': ("interned", parseString, "Interned", None, createStringValue),
'u': ("unicode", parseString, "Unicode", None, createStringValue),
'R': ("string_ref", parseStringRef, "String ref", createStringRefDesc, createStringRefValue),
'(': ("tuple", parseTuple, "Tuple", createTupleDesc, tupleValueCreator(tuple)),
')': ("small_tuple", parseSmallTuple, "Tuple", createTupleDesc, tupleValueCreator(tuple)),
'[': ("list", parseTuple, "List", createTupleDesc, tupleValueCreator(list)),
'<': ("set", parseTuple, "Set", createTupleDesc, tupleValueCreator(set)),
'>': ("frozenset", parseTuple, "Frozen set", createTupleDesc, tupleValueCreator(frozenset)),
'{': ("dict", parseDict, "Dict", createDictDesc, createDictValue),
'c': ("code", parseCode, "Code", None, None),
'r': ("ref", parseRef, "Reference", createRefDesc, createRefValue),
'a': ("ascii", parseASCII, "ASCII", None, createStringValue),
'A': ("ascii_interned", parseASCII, "ASCII interned", None, createStringValue),
'z': ("short_ascii", parseShortASCII, "Short ASCII", None, createStringValue),
'Z': ("short_ascii_interned", parseShortASCII, "Short ASCII interned", None, createStringValue),
}
def __init__(self, parent, name, **kw):
@ -227,64 +323,40 @@ class Object(FieldSet):
self.code_info = self.bytecode_info[code]
if not name:
self._name = self.code_info[0]
if code == "l":
self.createValue = self.createValueLong
elif code in ("i", "I", "f", "g"):
self.createValue = lambda: self["value"].value
elif code == "T":
self.createValue = lambda: True
elif code == "F":
self.createValue = lambda: False
elif code in ("x", "y"):
self.createValue = self.createValueComplex
elif code in ("s", "t", "u"):
self.createValue = self.createValueString
self.createDisplay = self.createDisplayString
if code == 't':
if code in ("t", "A", "Z"):
if not hasattr(self.root, 'string_table'):
self.root.string_table = []
self.root.string_table.append(self)
elif code == 'R':
if hasattr(self.root, 'string_table'):
self.createValue = self.createValueStringRef
def createValueString(self):
if "text" in self:
return self["text"].value
def createValue(self):
create = self.code_info[4]
if create:
res = create(self)
if isinstance(res, Field):
return res.value
else:
return ""
return res
return None
def createDisplayString(self):
if "text" in self:
return self["text"].display
else:
return "(empty)"
def createValueLong(self):
is_negative = self["digit_count"].value < 0
count = abs(self["digit_count"].value)
total = 0
for index in range(count - 1, -1, -1):
total <<= 15
total += self["digit[%u]" % index].value
if is_negative:
total = -total
return total
def createValueStringRef(self):
return self.root.string_table[self['ref'].value].value
def createDisplayStringRef(self):
return self.root.string_table[self['ref'].value].display
def createValueComplex(self):
return complex(
float(self["real"].value),
float(self["complex"].value))
def createDisplay(self):
create = self.code_info[4]
if create:
res = create(self)
if isinstance(res, Field):
return res.display
res = repr(res)
if len(res) >= config.max_string_length:
res = res[:config.max_string_length] + "..."
return res
return None
def createFields(self):
yield BytecodeChar(self, "bytecode", "Bytecode")
yield Bit(self, "flag_ref", "Is a reference?")
if self["flag_ref"].value:
if not hasattr(self.root, 'object_table'):
self.root.object_table = []
self.root.object_table.append(self)
parser = self.code_info[1]
if parser:
yield from parser(self)
@ -301,6 +373,16 @@ class BytecodeChar(Character):
static_size = 7
PY_RELEASE_LEVEL_ALPHA = 0xA
PY_RELEASE_LEVEL_FINAL = 0xF
def VERSION(major, minor, release_level=PY_RELEASE_LEVEL_FINAL, serial=0):
micro = 0
return ((major << 24) + (minor << 16) + (micro << 8)
+ (release_level << 4) + (serial << 0))
class PythonCompiledFile(Parser):
PARSER_TAGS = {
"id": "python",
@ -394,7 +476,90 @@ class PythonCompiledFile(Parser):
3377: ("Python 3.6b1 ", 0x3060000),
3378: ("Python 3.6b2 ", 0x3060000),
3379: ("Python 3.6rc1", 0x3060000),
3390: ("Python 3.7a0 ", 0x3070000),
3390: ("Python 3.7a1", 0x30700A1),
3391: ("Python 3.7a2", 0x30700A2),
3392: ("Python 3.7a4", 0x30700A4),
3393: ("Python 3.7b1", 0x30700B1),
3394: ("Python 3.7b5", 0x30700B5),
3400: ("Python 3.8a1", VERSION(3, 8)),
3401: ("Python 3.8a1", VERSION(3, 8)),
3410: ("Python 3.8a1", VERSION(3, 8)),
3411: ("Python 3.8b2", VERSION(3, 8)),
3412: ("Python 3.8b2", VERSION(3, 8)),
3413: ("Python 3.8b4", VERSION(3, 8)),
3420: ("Python 3.9a0", VERSION(3, 9)),
3421: ("Python 3.9a0", VERSION(3, 9)),
3422: ("Python 3.9a0", VERSION(3, 9)),
3423: ("Python 3.9a2", VERSION(3, 9)),
3424: ("Python 3.9a2", VERSION(3, 9)),
3425: ("Python 3.9a2", VERSION(3, 9)),
3430: ("Python 3.10a1", VERSION(3, 10)),
3431: ("Python 3.10a1", VERSION(3, 10)),
3432: ("Python 3.10a2", VERSION(3, 10)),
3433: ("Python 3.10a2", VERSION(3, 10)),
3434: ("Python 3.10a6", VERSION(3, 10)),
3435: ("Python 3.10a7", VERSION(3, 10)),
3436: ("Python 3.10b1", VERSION(3, 10)),
3437: ("Python 3.10b1", VERSION(3, 10)),
3438: ("Python 3.10b1", VERSION(3, 10)),
3439: ("Python 3.10b1", VERSION(3, 10)),
3450: ("Python 3.11a1", VERSION(3, 11)),
3451: ("Python 3.11a1", VERSION(3, 11)),
3452: ("Python 3.11a1", VERSION(3, 11)),
3453: ("Python 3.11a1", VERSION(3, 11)),
3454: ("Python 3.11a1", VERSION(3, 11)),
3455: ("Python 3.11a1", VERSION(3, 11)),
3456: ("Python 3.11a1", VERSION(3, 11)),
3457: ("Python 3.11a1", VERSION(3, 11)),
3458: ("Python 3.11a1", VERSION(3, 11)),
3459: ("Python 3.11a1", VERSION(3, 11)),
3460: ("Python 3.11a1", VERSION(3, 11)),
3461: ("Python 3.11a1", VERSION(3, 11)),
3462: ("Python 3.11a2", VERSION(3, 11)),
3463: ("Python 3.11a3", VERSION(3, 11)),
3464: ("Python 3.11a3", VERSION(3, 11)),
3465: ("Python 3.11a3", VERSION(3, 11)),
3466: ("Python 3.11a4", VERSION(3, 11)),
3467: ("Python 3.11a4", VERSION(3, 11)),
3468: ("Python 3.11a4", VERSION(3, 11)),
3469: ("Python 3.11a4", VERSION(3, 11)),
3470: ("Python 3.11a4", VERSION(3, 11)),
3471: ("Python 3.11a4", VERSION(3, 11)),
3472: ("Python 3.11a4", VERSION(3, 11)),
3473: ("Python 3.11a4", VERSION(3, 11)),
3474: ("Python 3.11a4", VERSION(3, 11)),
3475: ("Python 3.11a5", VERSION(3, 11)),
3476: ("Python 3.11a5", VERSION(3, 11)),
3477: ("Python 3.11a5", VERSION(3, 11)),
3478: ("Python 3.11a5", VERSION(3, 11)),
3479: ("Python 3.11a5", VERSION(3, 11)),
3480: ("Python 3.11a5", VERSION(3, 11)),
3481: ("Python 3.11a5", VERSION(3, 11)),
3482: ("Python 3.11a5", VERSION(3, 11)),
3483: ("Python 3.11a5", VERSION(3, 11)),
3484: ("Python 3.11a5", VERSION(3, 11)),
3485: ("Python 3.11a5", VERSION(3, 11)),
3486: ("Python 3.11a6", VERSION(3, 11)),
3487: ("Python 3.11a6", VERSION(3, 11)),
3488: ("Python 3.11a6", VERSION(3, 11)),
3489: ("Python 3.11a6", VERSION(3, 11)),
3490: ("Python 3.11a6", VERSION(3, 11)),
3491: ("Python 3.11a6", VERSION(3, 11)),
3492: ("Python 3.11a7", VERSION(3, 11)),
3493: ("Python 3.11a7", VERSION(3, 11)),
3494: ("Python 3.11a7", VERSION(3, 11)),
3500: ("Python 3.12a1", VERSION(3, 12)),
3501: ("Python 3.12a1", VERSION(3, 12)),
3502: ("Python 3.12a1", VERSION(3, 12)),
3503: ("Python 3.12a1", VERSION(3, 12)),
3504: ("Python 3.12a1", VERSION(3, 12)),
3505: ("Python 3.12a1", VERSION(3, 12)),
3506: ("Python 3.12a1", VERSION(3, 12)),
3507: ("Python 3.12a1", VERSION(3, 12)),
3508: ("Python 3.12a1", VERSION(3, 12)),
3509: ("Python 3.12a1", VERSION(3, 12)),
3510: ("Python 3.12a1", VERSION(3, 12)),
3511: ("Python 3.12a1", VERSION(3, 12)),
}
# Dictionnary which associate the pyc signature (4-byte long string)
@ -411,13 +576,7 @@ class PythonCompiledFile(Parser):
if self["magic_string"].value != "\r\n":
return r"Wrong magic string (\r\n)"
version = self.getVersion()
if version >= 0x3030000 and self['magic_number'].value >= 3200:
offset = 12
else:
offset = 8
value = self.stream.readBits(offset * 8, 7, self.endian)
if value != ord(b'c'):
if self["content/bytecode"].value != "c":
return "First object bytecode is not code"
return True
@ -430,8 +589,23 @@ class PythonCompiledFile(Parser):
def createFields(self):
yield UInt16(self, "magic_number", "Magic number")
yield String(self, "magic_string", 2, r"Magic string \r\n", charset="ASCII")
yield TimestampUnix32(self, "timestamp", "Timestamp")
version = self.getVersion()
# PEP 552: Deterministic pycs #31650 (Python 3.7a4); magic=3392
if version >= 0x30700A4:
yield Bit(self, "use_hash", "Is hash based?")
yield Bit(self, "checked")
yield NullBits(self, "reserved", 30)
use_hash = self['use_hash'].value
else:
use_hash = False
if use_hash:
yield UInt64(self, "hash", "SipHash hash of the source file")
else:
yield TimestampUnix32(self, "timestamp", "Timestamp modulo 2**32")
if version >= 0x3030000 and self['magic_number'].value >= 3200:
yield UInt32(self, "filesize", "Size of the Python source file (.py) modulo 2**32")
yield Object(self, "content")

View file

@ -244,7 +244,7 @@ class PacketElement(FieldSet):
yield Bits(self, "sync[]", 4) # =2, or 3 if has_dts=True
yield Timestamp(self, "pts")
if self["has_dts"].value:
if not(self["has_pts"].value):
if not self["has_pts"].value:
raise ParserError("Invalid PTS/DTS values")
yield Bits(self, "sync[]", 4) # =1
yield Timestamp(self, "dts")

View file

@ -125,7 +125,7 @@ class PatternMatching:
item = RegexPattern(regex, user)
if item.regex.maxLength() is None:
raise ValueError(
"Regular expression with no maximum size has forbidden")
"Regular expression with no maximum size is forbidden")
self.regex_patterns.append(item)
self._need_commit = True

View file

@ -1,3 +1,3 @@
#!/usr/bin/env python
#!/usr/bin/env python3
from hachoir.subfile.main import main
main()

View file

@ -85,7 +85,7 @@ def main():
stream = FileInputStream(filename)
with stream:
subfile = SearchSubfile(stream, values.offset, values.size)
subfile.verbose = not(values.quiet)
subfile.verbose = not values.quiet
subfile.debug = values.debug
if output:
subfile.setOutput(output)

View file

@ -95,7 +95,7 @@ class SearchSubfile:
print("[!] Memory error!", file=stderr)
self.mainFooter()
self.stream.close()
return not(main_error)
return (not main_error)
def mainHeader(self):
# Fix slice size if needed
@ -149,7 +149,7 @@ class SearchSubfile:
if parser.content_size is not None:
text += " size=%s (%s)" % (parser.content_size //
8, humanFilesize(parser.content_size // 8))
if not(parser.content_size) or parser.content_size // 8 < FILE_MAX_SIZE:
if not parser.content_size or parser.content_size // 8 < FILE_MAX_SIZE:
text += ": " + parser.description
else:
text += ": " + parser.__class__.__name__