Update hachoir 3.1.2 (f739b43) → 3.2.0 (38d759f).

2024-11-15 01:15:05 +00:00 · 2023-10-08 00:04:41 +01:00 · 2023-10-08 00:04:41 +01:00 · cbde47c95c
commit cbde47c95c
parent ecd70f546f
124 changed files with 1639 additions and 482 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -2,6 +2,7 @@

 * Update Beautiful Soup 4.12.2 to 4.12.2 (30c58a1)
 * Update soupsieve 2.4.1 (2e66beb) to 2.5.0 (dc71495)
+* Update hachoir 3.1.2 (f739b43) to 3.2.0 (38d759f)


 ### 3.30.1 (2023-10-02 22:50:00 UTC)
--- a/lib/hachoir/init.py
+++ b/lib/hachoir/init.py
@ -1,2 +1,2 @@
-VERSION = (3, 1, 2)
+VERSION = (3, 2, 0)
 __version__ = ".".join(map(str, VERSION))
--- a/lib/hachoir/core/bits.py
+++ b/lib/hachoir/core/bits.py
@ -4,7 +4,7 @@ string, number, hexadecimal, etc.
 """

 from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
-from struct import calcsize, unpack, error as struct_error
+from struct import calcsize, error as struct_error


 def swap16(value):
@ -292,20 +292,11 @@ def str2long(data, endian):
    >>> str2long(b"\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d
    True
    """
-    assert 1 <= len(data) <= 32   # arbitrary limit: 256 bits
-    try:
-        return unpack(_struct_format[endian][len(data)], data)[0]
-    except KeyError:
-        pass
-
-    assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN)
-    shift = 0
-    value = 0
-    if endian is BIG_ENDIAN:
-        data = reversed(data)
-    elif endian is MIDDLE_ENDIAN:
-        data = reversed(strswapmid(data))
-    for byte in data:
-        value += (byte << shift)
-        shift += 8
-    return value
+    if endian == LITTLE_ENDIAN:
+        return int.from_bytes(data, "little")
+    elif endian == BIG_ENDIAN:
+        return int.from_bytes(data, "big")
+    elif endian == MIDDLE_ENDIAN:
+        return int.from_bytes(strswapmid(data), "big")
+    else:
+        raise ValueError("Invalid endian %s" % (endian,))
--- a/lib/hachoir/field/string_field.py
+++ b/lib/hachoir/field/string_field.py
@ -244,7 +244,7 @@ class GenericString(Bytes):
                and err.end == len(text) \
                and self._charset == "UTF-16-LE":
            try:
-                text = str(text + "\0", self._charset, "strict")
+                text = str(text + b"\0", self._charset, "strict")
                self.warning(
                    "Fix truncated %s string: add missing nul byte" % self._charset)
                return text
--- a/lib/hachoir/field/timestamp.py
+++ b/lib/hachoir/field/timestamp.py
@ -61,7 +61,7 @@ class TimeDateMSDOS32(FieldSet):

    def createValue(self):
        return datetime(
-            1980 + self["year"].value, self["month"].value, self["day"].value,
+            1980 + self["year"].value, self["month"].value or 1, self["day"].value or 1,
            self["hour"].value, self["minute"].value, 2 * self["second"].value)

    def createDisplay(self):
--- a/lib/hachoir/metadata/main.py
+++ b/lib/hachoir/metadata/main.py
@ -124,7 +124,7 @@ def processFile(values, filename,


 def processFiles(values, filenames, display=True):
-    human = not(values.raw)
+    human = not values.raw
    ok = True
    priority = int(values.level) * 100 + 99
    display_filename = (1 < len(filenames))
--- a/lib/hachoir/parser/archive/init.py
+++ b/lib/hachoir/parser/archive/init.py
@ -1,5 +1,6 @@
 from hachoir.parser.archive.ace import AceFile  # noqa
 from hachoir.parser.archive.ar import ArchiveFile  # noqa
+from hachoir.parser.archive.arj import ArjParser  # noqa
 from hachoir.parser.archive.bomstore import BomFile  # noqa
 from hachoir.parser.archive.bzip2_parser import Bzip2Parser  # noqa
 from hachoir.parser.archive.cab import CabFile  # noqa
--- a/lib/hachoir/parser/archive/arj.py
+++ b/lib/hachoir/parser/archive/arj.py
@ -0,0 +1,155 @@
+"""
+ARJ archive file parser
+
+https://github.com/FarGroup/FarManager/blob/master/plugins/multiarc/arc.doc/arj.txt
+"""
+
+from hachoir.core.endian import LITTLE_ENDIAN
+from hachoir.field import (FieldSet, ParserError,
+                           CString, Enum, RawBytes,
+                           UInt8, UInt16, UInt32,
+                           Bytes)
+from hachoir.parser import Parser
+
+HOST_OS = {
+    0: "MSDOS",
+    1: "PRIMOS",
+    2: "UNIX",
+    3: "AMIGA",
+    4: "MACDOS",
+    5: "OS/2",
+    6: "APPLE GS",
+    7: "ATARI ST",
+    8: "NEXT",
+    9: "VAX VMS",
+    10: "WIN95",
+    11: "WIN32",
+}
+
+FILE_TYPE = {
+    0: "BINARY",
+    1: "TEXT",
+    2: "COMMENT",
+    3: "DIRECTORY",
+    4: "VOLUME",
+    5: "CHAPTER",
+}
+
+MAGIC = b"\x60\xEA"
+
+
+class BaseBlock(FieldSet):
+    @property
+    def isEmpty(self):
+        return self["basic_header_size"].value == 0
+
+    def _header_start_fields(self):
+        yield Bytes(self, "magic", len(MAGIC))
+        if self["magic"].value != MAGIC:
+            raise ParserError("Wrong header magic")
+        yield UInt16(self, "basic_header_size", "zero if end of archive")
+        if not self.isEmpty:
+            yield UInt8(self, "first_hdr_size")
+            yield UInt8(self, "archiver_version")
+            yield UInt8(self, "min_archiver_version")
+            yield Enum(UInt8(self, "host_os"), HOST_OS)
+            yield UInt8(self, "arj_flags")
+
+    def _header_end_fields(self):
+        yield UInt8(self, "last_chapter")
+        fhs = self["first_hdr_size"]
+        name_position = fhs.address // 8 + fhs.value
+        current_position = self["last_chapter"].address // 8 + 1
+        if name_position > current_position:
+            yield RawBytes(self, "reserved2", name_position - current_position)
+
+        yield CString(self, "filename", "File name", charset="ASCII")
+        yield CString(self, "comment", "Comment", charset="ASCII")
+        yield UInt32(self, "crc", "Header CRC")
+
+        i = 0
+        while not self.eof:
+            yield UInt16(self, f"extended_header_size_{i}")
+            cur_size = self[f"extended_header_size_{i}"].value
+            if cur_size == 0:
+                break
+            yield RawBytes(self, "extended_header_data", cur_size)
+            yield UInt32(self, f"extended_header_crc_{i}")
+            i += 1
+
+    def validate(self):
+        if self.stream.readBytes(0, 2) != MAGIC:
+            return "Invalid magic"
+        return True
+
+
+class Header(BaseBlock):
+    def createFields(self):
+        yield from self._header_start_fields()
+        if not self.isEmpty:
+            yield UInt8(self, "security_version")
+            yield Enum(UInt8(self, "file_type"), FILE_TYPE)
+            yield UInt8(self, "reserved")
+            yield UInt32(self, "date_time_created")
+            yield UInt32(self, "date_time_modified")
+            yield UInt32(self, "archive_size")
+            yield UInt32(self, "security_envelope_file_position")
+            yield UInt16(self, "filespec_position")
+            yield UInt16(self, "security_envelope_data_len")
+            yield UInt8(self, "encryption_version")
+            yield from self._header_end_fields()
+
+    def createDescription(self):
+        if self.isEmpty:
+            return "Empty main header"
+        return "Main header of '%s'" % self["filename"].value
+
+
+class Block(BaseBlock):
+    def createFields(self):
+        yield from self._header_start_fields()
+        if not self.isEmpty:
+            yield UInt8(self, "method")
+            yield Enum(UInt8(self, "file_type"), FILE_TYPE)
+            yield UInt8(self, "reserved")
+            yield UInt32(self, "date_time_modified")
+            yield UInt32(self, "compressed_size")
+            yield UInt32(self, "original_size")
+            yield UInt32(self, "original_file_crc")
+            yield UInt16(self, "filespec_position")
+            yield UInt16(self, "file_access_mode")
+            yield UInt8(self, "first_chapter")
+            yield from self._header_end_fields()
+            compressed_size = self["compressed_size"].value
+            if compressed_size > 0:
+                yield RawBytes(self, "compressed_data", compressed_size)
+
+    def createDescription(self):
+        if self.isEmpty:
+            return "Empty file header"
+        return "File header of '%s'" % self["filename"].value
+
+
+class ArjParser(Parser):
+    endian = LITTLE_ENDIAN
+    PARSER_TAGS = {
+        "id": "arj",
+        "category": "archive",
+        "file_ext": ("arj",),
+        "min_size": 4 * 8,
+        "description": "ARJ archive"
+    }
+
+    def validate(self):
+        if self.stream.readBytes(0, 2) != MAGIC:
+            return "Invalid magic"
+        return True
+
+    def createFields(self):
+        yield Header(self, "header")
+        if not self["header"].isEmpty:
+            while not self.eof:
+                block = Block(self, "file_header[]")
+                yield block
+                if block.isEmpty:
+                    break
--- a/lib/hachoir/parser/archive/lzx.py
+++ b/lib/hachoir/parser/archive/lzx.py
@ -13,6 +13,7 @@ from hachoir.field import (FieldSet,
 from hachoir.core.endian import MIDDLE_ENDIAN, LITTLE_ENDIAN
 from hachoir.core.tools import paddingSize
 from hachoir.parser.archive.zlib import build_tree, HuffmanCode, extend_data
+import struct


 class LZXPreTreeEncodedTree(FieldSet):
@ -146,6 +147,8 @@ class LZXBlock(FieldSet):
        self.window_size = self.WINDOW_SIZE[self.compression_level]
        self.block_type = self["block_type"].value
        curlen = len(self.parent.uncompressed_data)
+        intel_started = False  # Do we perform Intel jump fixups on this block?
+
        if self.block_type in (1, 2):  # Verbatim or aligned offset block
            if self.block_type == 2:
                for i in range(8):
@ -156,6 +159,8 @@ class LZXBlock(FieldSet):
            yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8)
            main_tree = build_tree(
                self["main_tree_start"].lengths + self["main_tree_rest"].lengths)
+            if self["main_tree_start"].lengths[0xE8]:
+                intel_started = True
            yield LZXPreTreeEncodedTree(self, "length_tree", 249)
            length_tree = build_tree(self["length_tree"].lengths)
            current_decoded_size = 0
@ -169,7 +174,7 @@ class LZXBlock(FieldSet):
                    field._description = "Literal value %r" % chr(
                        field.realvalue)
                    current_decoded_size += 1
-                    self.parent.uncompressed_data += chr(field.realvalue)
+                    self.parent._lzx_window.append(field.realvalue)
                    yield field
                    continue
                position_header, length_header = divmod(
@ -243,8 +248,7 @@ class LZXBlock(FieldSet):
                    self.parent.r2 = self.parent.r1
                    self.parent.r1 = self.parent.r0
                    self.parent.r0 = position
-                self.parent.uncompressed_data = extend_data(
-                    self.parent.uncompressed_data, length, position)
+                extend_data(self.parent._lzx_window, length, position)
                current_decoded_size += length
        elif self.block_type == 3:  # Uncompressed block
            padding = paddingSize(self.address + self.current_size, 16)
@ -253,6 +257,7 @@ class LZXBlock(FieldSet):
            else:
                yield PaddingBits(self, "padding[]", 16)
            self.endian = LITTLE_ENDIAN
+            intel_started = True  # apparently intel fixup may be needed on uncompressed blocks?
            yield UInt32(self, "r[]", "New value of R0")
            yield UInt32(self, "r[]", "New value of R1")
            yield UInt32(self, "r[]", "New value of R2")
@ -260,18 +265,50 @@ class LZXBlock(FieldSet):
            self.parent.r1 = self["r[1]"].value
            self.parent.r2 = self["r[2]"].value
            yield RawBytes(self, "data", self.uncompressed_size)
-            self.parent.uncompressed_data += self["data"].value
+            self.parent._lzx_window += self["data"].value
            if self["block_size"].value % 2:
                yield PaddingBits(self, "padding", 8)
        else:
            raise ParserError("Unknown block type %d!" % self.block_type)

+        # Fixup Intel jumps if necessary (fixups are only applied to the final output, not to the LZX window)
+        self.parent.uncompressed_data += self.parent._lzx_window[-self.uncompressed_size:]
+        self.parent._lzx_window = self.parent._lzx_window[-(1 << self.root.compr_level):]
+
+        if (
+            intel_started
+            and self.parent["filesize_indicator"].value
+            and self.parent["filesize"].value > 0
+        ):
+            # Note that we're decoding a block-at-a-time instead of a frame-at-a-time,
+            # so we need to handle the frame boundaries carefully.
+            filesize = self.parent["filesize"].value
+            start_pos = max(0, curlen - 10)  # We may need to correct something from the last block
+            end_pos = len(self.parent.uncompressed_data) - 10
+            while 1:
+                jmp_pos = self.parent.uncompressed_data.find(b"\xE8", start_pos, end_pos)
+                if jmp_pos == -1:
+                    break
+                if (jmp_pos % 32768) >= (32768 - 10):
+                    # jumps at the end of frames are not fixed up
+                    start_pos = jmp_pos + 1
+                    continue
+                abs_off, = struct.unpack("<i", self.parent.uncompressed_data[jmp_pos + 1:jmp_pos + 5])
+                if -jmp_pos <= abs_off < filesize:
+                    if abs_off < 0:
+                        rel_off = abs_off + filesize
+                    else:
+                        rel_off = abs_off - jmp_pos
+                    self.parent.uncompressed_data[jmp_pos + 1:jmp_pos + 5] = struct.pack("<i", rel_off)
+                start_pos = jmp_pos + 5
+

 class LZXStream(Parser):
    endian = MIDDLE_ENDIAN

    def createFields(self):
-        self.uncompressed_data = ""
+        self.uncompressed_data = bytearray()
+        self._lzx_window = bytearray()
        self.r0 = 1
        self.r1 = 1
        self.r2 = 1
@ -291,6 +328,6 @@ class LZXStream(Parser):
 def lzx_decompress(stream, window_bits):
    data = LZXStream(stream)
    data.compr_level = window_bits
-    for unused in data:
+    for _ in data:
        pass
    return data.uncompressed_data
--- a/lib/hachoir/parser/archive/zlib.py
+++ b/lib/hachoir/parser/archive/zlib.py
@ -14,13 +14,13 @@ from hachoir.core.text_handler import textHandler, hexadecimal
 from hachoir.core.tools import paddingSize, alignValue


-def extend_data(data, length, offset):
-    """Extend data using a length and an offset."""
+def extend_data(data: bytearray, length, offset):
+    """Extend data using a length and an offset, LZ-style."""
    if length >= offset:
        new_data = data[-offset:] * (alignValue(length, offset) // offset)
-        return data + new_data[:length]
+        data += new_data[:length]
    else:
-        return data + data[-offset:-offset + length]
+        data += data[-offset:-offset + length]


 def build_tree(lengths):
@ -136,9 +136,9 @@ class DeflateBlock(FieldSet):
    CODE_LENGTH_ORDER = [16, 17, 18, 0, 8, 7, 9,
                         6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]

-    def __init__(self, parent, name, uncomp_data="", *args, **kwargs):
+    def __init__(self, parent, name, uncomp_data=b"", *args, **kwargs):
        FieldSet.__init__(self, parent, name, *args, **kwargs)
-        self.uncomp_data = uncomp_data
+        self.uncomp_data = bytearray(uncomp_data)

    def createFields(self):
        yield Bit(self, "final", "Is this the final block?")  # BFINAL
@ -227,7 +227,7 @@ class DeflateBlock(FieldSet):
                field._description = "Literal Code %r (Huffman Code %i)" % (
                    chr(value), field.value)
                yield field
-                self.uncomp_data += chr(value)
+                self.uncomp_data.append(value)
            if value == 256:
                field._description = "Block Terminator Code (256) (Huffman Code %i)" % field.value
                yield field
@ -267,15 +267,14 @@ class DeflateBlock(FieldSet):
                    extrafield._description = "Distance Extra Bits (%i), total length %i" % (
                        extrafield.value, distance)
                    yield extrafield
-                self.uncomp_data = extend_data(
-                    self.uncomp_data, length, distance)
+                extend_data(self.uncomp_data, length, distance)


 class DeflateData(GenericFieldSet):
    endian = LITTLE_ENDIAN

    def createFields(self):
-        uncomp_data = ""
+        uncomp_data = bytearray()
        blk = DeflateBlock(self, "compressed_block[]", uncomp_data)
        yield blk
        uncomp_data = blk.uncomp_data
@ -326,11 +325,11 @@ class ZlibData(Parser):
        yield textHandler(UInt32(self, "data_checksum", "ADLER32 checksum of compressed data"), hexadecimal)


-def zlib_inflate(stream, wbits=None, prevdata=""):
+def zlib_inflate(stream, wbits=None):
    if wbits is None or wbits >= 0:
        return ZlibData(stream)["data"].uncompressed_data
    else:
        data = DeflateData(None, "root", stream, "", stream.askSize(None))
-        for unused in data:
+        for _ in data:
            pass
        return data.uncompressed_data
--- a/lib/hachoir/parser/audio/id3.py
+++ b/lib/hachoir/parser/audio/id3.py
@ -451,7 +451,7 @@ class ID3_Chunk(FieldSet):

        if size:
            cls = None
-            if not(is_compressed):
+            if not is_compressed:
                tag = self["tag"].value
                if tag in ID3_Chunk.handler:
                    cls = ID3_Chunk.handler[tag]
--- a/lib/hachoir/parser/container/mp4.py
+++ b/lib/hachoir/parser/container/mp4.py
@ -1312,7 +1312,7 @@ class MP4File(Parser):
        if size < 8:
            return "Invalid first atom size"
        tag = self.stream.readBytes(4 * 8, 4)
-        if tag not in (b"ftyp", b"moov", b"free"):
+        if tag not in (b"ftyp", b"moov", b"free", b"skip"):
            return "Unknown MOV file type"
        return True

--- a/lib/hachoir/parser/file_system/ext2.py
+++ b/lib/hachoir/parser/file_system/ext2.py
@ -240,11 +240,13 @@ class Inode(FieldSet):
        return out

    def is_fast_symlink(self):
-        self.seekByte(4 * 15 + 4)
-        acl = UInt32(self, "file_acl")
+        acl_addr = self.absolute_address + self.current_size
+        # skip 15 blocks + version field
+        acl_addr += (4 * 15 + 4) * 8
+        acl = self.stream.readBits(acl_addr, 32, self.endian)

        b = 0
-        if acl.value > 0:
+        if acl > 0:
            b = (2 << self["/superblock/log_block_size"].value)

        return (self['blocks'].value - b == 0)
--- a/lib/hachoir/parser/guess.py
+++ b/lib/hachoir/parser/guess.py
@ -139,4 +139,7 @@ def createParser(filename, real_filename=None, tags=None):
    if not tags:
        tags = []
    stream = FileInputStream(filename, real_filename, tags=tags)
-    return guessParser(stream)
+    guess = guessParser(stream)
+    if guess is None:
+        stream.close()
+    return guess
--- a/lib/hachoir/parser/image/jpeg.py
+++ b/lib/hachoir/parser/image/jpeg.py
@ -387,7 +387,10 @@ class JpegImageData(FieldSet):
            end = self.stream.searchBytes(b"\xff", start, MAX_FILESIZE * 8)
            if end is None:
                # this is a bad sign, since it means there is no terminator
-                # we ignore this; it likely means a truncated image
+                # this likely means a truncated image:
+                # set the size to the remaining length of the stream
+                # to avoid being forced to parse subfields to calculate size
+                self._size = self.stream._size - self.absolute_address
                break
            if self.stream.readBytes(end, 2) == b'\xff\x00':
                # padding: false alarm
--- a/lib/hachoir/parser/image/png.py
+++ b/lib/hachoir/parser/image/png.py
@ -45,7 +45,7 @@ UNIT_NAME = {1: "Meter"}
 COMPRESSION_NAME = {
    0: "deflate"  # with 32K sliding window
 }
-MAX_CHUNK_SIZE = 5 * 1024 * 1024  # Maximum chunk size (5 MB)
+MAX_CHUNK_SIZE = 64 * 1024 * 1024  # Maximum chunk size heuristic (64 MB)


 def headerParse(parent):
--- a/lib/hachoir/parser/image/wmf.py
+++ b/lib/hachoir/parser/image/wmf.py
@ -597,7 +597,7 @@ class WMF_File(Parser):
            yield UInt32(self, "max_record_size", "The size of largest record in 16-bit words")
            yield UInt16(self, "nb_params", "Not Used (always 0)")

-        while not(self.eof):
+        while not self.eof:
            yield Function(self, "func[]")

    def isEMF(self):
--- a/lib/hachoir/parser/misc/init.py
+++ b/lib/hachoir/parser/misc/init.py
@ -16,3 +16,4 @@ from hachoir.parser.misc.word_doc import WordDocumentParser  # noqa
 from hachoir.parser.misc.word_2 import Word2DocumentParser  # noqa
 from hachoir.parser.misc.mstask import MSTaskFile  # noqa
 from hachoir.parser.misc.mapsforge_map import MapsforgeMapFile  # noqa
+from hachoir.parser.misc.fit import FITFile  # noqa
--- a/lib/hachoir/parser/misc/fit.py
+++ b/lib/hachoir/parser/misc/fit.py
@ -0,0 +1,173 @@
+"""
+Garmin fit file Format parser.
+
+Author: Sebastien Ponce <sebastien.ponce@cern.ch>
+"""
+
+from hachoir.parser import Parser
+from hachoir.field import FieldSet, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, RawBytes, Bit, Bits, Bytes, String, Float32, Float64
+from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN
+
+field_types = {
+    0: UInt8,      # enum
+    1: Int8,       # signed int of 8 bits
+    2: UInt8,      # unsigned int of 8 bits
+    131: Int16,    # signed int of 16 bits
+    132: UInt16,   # unsigned int of 16 bits
+    133: Int32,    # signed int of 32 bits
+    134: UInt32,   # unsigned int of 32 bits
+    7: String,     # string
+    136: Float32,  # float
+    137: Float64,  # double
+    10: UInt8,     # unsigned int of 8 bits with 0 as invalid value
+    139: UInt16,   # unsigned int of 16 bits with 0 as invalid value
+    140: UInt32,   # unsigned int of 32 bits with 0 as invalid value
+    13: Bytes,     # bytes
+    142: Int64,    # signed int of 64 bits
+    143: UInt64,   # unsigned int of 64 bits
+    144: UInt64    # unsigned int of 64 bits with 0 as invalid value
+}
+
+
+class Header(FieldSet):
+    endian = LITTLE_ENDIAN
+
+    def createFields(self):
+        yield UInt8(self, "size", "Header size")
+        yield UInt8(self, "protocol", "Protocol version")
+        yield UInt16(self, "profile", "Profile version")
+        yield UInt32(self, "datasize", "Data size")
+        yield RawBytes(self, "datatype", 4)
+        yield UInt16(self, "crc", "CRC of first 11 bytes or 0x0")
+
+    def createDescription(self):
+        return "Header of fit file. Data size is %d" % (self["datasize"].value)
+
+
+class NormalRecordHeader(FieldSet):
+
+    def createFields(self):
+        yield Bit(self, "normal", "Normal header (0)")
+        yield Bit(self, "type", "Message type (0 data, 1 definition")
+        yield Bit(self, "typespecific", "0")
+        yield Bit(self, "reserved", "0")
+        yield Bits(self, "msgType", 4, description="Message type")
+
+    def createDescription(self):
+        return "Record header, this is a %s message" % ("definition" if self["type"].value else "data")
+
+
+class FieldDefinition(FieldSet):
+
+    def createFields(self):
+        yield UInt8(self, "number", "Field definition number")
+        yield UInt8(self, "size", "Size in bytes")
+        yield UInt8(self, "type", "Base type")
+
+    def createDescription(self):
+        return "Field Definition. Number %d, Size %d" % (self["number"].value, self["size"].value)
+
+
+class DefinitionMessage(FieldSet):
+
+    def createFields(self):
+        yield NormalRecordHeader(self, "RecordHeader")
+        yield UInt8(self, "reserved", "Reserved (0)")
+        yield UInt8(self, "architecture", "Architecture (0 little, 1 big endian")
+        self.endian = BIG_ENDIAN if self["architecture"].value else LITTLE_ENDIAN
+        yield UInt16(self, "msgNumber", "Message Number")
+        yield UInt8(self, "nbFields", "Number of fields")
+        for n in range(self["nbFields"].value):
+            yield FieldDefinition(self, "fieldDefinition[]")
+
+    def createDescription(self):
+        return "Definition Message. Contains %d fields" % (self["nbFields"].value)
+
+
+class DataMessage(FieldSet):
+
+    def createFields(self):
+        hdr = NormalRecordHeader(self, "RecordHeader")
+        yield hdr
+        msgType = self["RecordHeader"]["msgType"].value
+        msgDef = self.parent.msgDefs[msgType]
+        for n in range(msgDef["nbFields"].value):
+            desc = msgDef["fieldDefinition[%d]" % n]
+            typ = field_types[desc["type"].value]
+            self.endian = BIG_ENDIAN if msgDef["architecture"].value else LITTLE_ENDIAN
+            if typ == String or typ == Bytes:
+                yield typ(self, "field%d" % n, desc["size"].value)
+            else:
+                if typ.static_size // 8 == desc["size"].value:
+                    yield typ(self, "field%d" % n, desc["size"].value)
+                else:
+                    for p in range(desc["size"].value * 8 // typ.static_size):
+                        yield typ(self, "field%d[]" % n)
+
+    def createDescription(self):
+        return "Data Message"
+
+
+class TimeStamp(FieldSet):
+
+    def createFields(self):
+        yield Bit(self, "timestamp", "TimeStamp (1)")
+        yield Bits(self, "msgType", 3, description="Message type")
+        yield Bits(self, "time", 4, description="TimeOffset")
+
+    def createDescription(self):
+        return "TimeStamp"
+
+
+class CRC(FieldSet):
+
+    def createFields(self):
+        yield UInt16(self, "crc", "CRC")
+
+    def createDescription(self):
+        return "CRC"
+
+
+class FITFile(Parser):
+    endian = BIG_ENDIAN
+    PARSER_TAGS = {
+        "id": "fit",
+        "category": "misc",
+        "file_ext": ("fit",),
+        "mime": ("application/fit",),
+        "min_size": 14 * 8,
+        "description": "Garmin binary fit format"
+    }
+
+    def __init__(self, *args, **kwargs):
+        Parser.__init__(self, *args, **kwargs)
+        self.msgDefs = {}
+
+    def validate(self):
+        s = self.stream.readBytes(0, 12)
+        if s[8:12] != b'.FIT':
+            return "Invalid header %d %d %d %d" % tuple([int(b) for b in s[8:12]])
+        return True
+
+    def createFields(self):
+        yield Header(self, "header")
+        while self.current_size < self["header"]["datasize"].value * 8:
+            b = self.stream.readBits(self.absolute_address + self.current_size, 2, self.endian)
+            if b == 1:
+                defMsg = DefinitionMessage(self, "definition[]")
+                msgType = defMsg["RecordHeader"]["msgType"].value
+                sizes = ''
+                ts = 0
+                for n in range(defMsg["nbFields"].value):
+                    fname = "fieldDefinition[%d]" % n
+                    size = defMsg[fname]["size"].value
+                    ts += size
+                    sizes += "%d/" % size
+                sizes += "%d" % ts
+                self.msgDefs[msgType] = defMsg
+                yield defMsg
+            elif b == 0:
+                yield DataMessage(self, "data[]")
+            else:
+                yield TimeStamp(self, "timestamp[]")
+        yield CRC(self, "crc")
--- a/lib/hachoir/parser/misc/mapsforge_map.py
+++ b/lib/hachoir/parser/misc/mapsforge_map.py
@ -41,7 +41,7 @@ class UIntVbe(Field):
            size += 1
            assert size < 100, "UIntVBE is too large"

-            if not(haveMoreData):
+            if not haveMoreData:
                break

        self._size = size * 8
@ -71,7 +71,7 @@ class IntVbe(Field):
            size += 1
            assert size < 100, "IntVBE is too large"

-            if not(haveMoreData):
+            if not haveMoreData:
                break

        if isNegative:
--- a/lib/hachoir/parser/misc/pdf.py
+++ b/lib/hachoir/parser/misc/pdf.py
@ -44,7 +44,7 @@ def getElementEnd(s, limit=b' ', offset=0):


 class PDFNumber(Field):
-    LIMITS = [b'[', b'/', b'\x0D', b']']
+    LIMITS = [b'[', b'/', b'\x0A', b'\x0D', b'>', b']']
    """
    sprintf("%i") or sprinf("%.?f")
    """
@ -81,18 +81,18 @@ class PDFString(Field):

    def __init__(self, parent, name, desc=None):
        Field.__init__(self, parent, name, description=desc)
-        val = ""
+        val = bytearray()
        count = 1
        off = 1
        while not parent.eof:
            char = parent.stream.readBytes(self.absolute_address + 8 * off, 1)
            # Non-ASCII
-            if not char.isalpha() or char == '\\':
+            if not char.isalpha() or char == b'\\':
                off += 1
                continue
-            if char == '(':
+            if char == b'(':
                count += 1
-            if char == ')':
+            if char == b')':
                count -= 1
            # Parenthesis block = 0 => end of string
            if count == 0:
@ -101,13 +101,15 @@ class PDFString(Field):

            # Add it to the string
            val += char
+            off += 1

+        val = bytes(val)
        self._size = 8 * off
        self.createValue = lambda: val


 class PDFName(Field):
-    LIMITS = [b'[', b'/', b'<', b']']
+    LIMITS = [b'[', b'/', b'<', b'>', b']']
    """
    String starting with '/', where characters may be written using their
    ASCII code (exemple: '#20' would be ' '
@ -145,7 +147,7 @@ class PDFID(Field):

    def __init__(self, parent, name, desc=None):
        Field.__init__(self, parent, name, description=desc)
-        self._size = 8 * getElementEnd(parent, '>')
+        self._size = 8 * getElementEnd(parent, b'>')
        self.createValue = lambda: parent.stream.readBytes(
            self.absolute_address + 8, (self._size // 8) - 1)

@ -254,7 +256,7 @@ def parsePDFType(s):
    else:
        # First parse size
        size = getElementEnd(s)
-        for limit in ['/', '>', '<']:
+        for limit in [b'/', b'>', b'<']:
            other_size = getElementEnd(s, limit)
            if other_size is not None:
                other_size -= 1
@ -424,7 +426,7 @@ class Catalog(FieldSet):
            new_length = getElementEnd(self, limit)
            if length is None or (new_length is not None and new_length - len(limit) < length):
                length = new_length - len(limit)
-        yield String(self, "object", length, strip=' ')
+        yield String(self, "object", length, strip=' \n')
        if self.stream.readBytes(self.absolute_address + self.current_size, 2) == b'<<':
            yield PDFDictionary(self, "key_list")
        # End of catalog: this one has "endobj"
@ -441,9 +443,9 @@ class Trailer(FieldSet):
        yield RawBytes(self, "marker", len(self.MAGIC))
        yield WhiteSpace(self, "sep[]")
        yield String(self, "start_attribute_marker", 2)
+        yield WhiteSpace(self, "sep[]")
        addr = self.absolute_address + self.current_size
        while self.stream.readBytes(addr, 2) != b'>>':
-            yield WhiteSpace(self, "sep[]")
            t = PDFName(self, "type[]")
            yield t
            name = t.value.decode()
@ -462,6 +464,7 @@ class Trailer(FieldSet):
                yield PDFDictionary(self, "decrypt")
            else:
                raise ParserError("Don't know trailer type '%s'" % name)
+            yield WhiteSpace(self, "sep[]")
            addr = self.absolute_address + self.current_size
        yield String(self, "end_attribute_marker", 2)
        yield LineEnd(self, "line_end[]")
--- a/lib/hachoir/parser/misc/ttf.py
+++ b/lib/hachoir/parser/misc/ttf.py
@ -2,6 +2,8 @@
 TrueType Font parser.

 Documents:
+ - "The OpenType Specification"
+   https://docs.microsoft.com/en-us/typography/opentype/spec/
 - "An Introduction to TrueType Fonts: A look inside the TTF format"
   written by "NRSI: Computers & Writing Systems"
   http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-Chapter08
@ -11,11 +13,26 @@ Creation date: 2007-02-08
 """

 from hachoir.parser import Parser
-from hachoir.field import (FieldSet, ParserError,
-                               UInt16, UInt32, Bit, Bits,
-                               PaddingBits, NullBytes,
-                               String, RawBytes, Bytes, Enum,
-                               TimestampMac32)
+from hachoir.field import (
+    FieldSet,
+    ParserError,
+    UInt8,
+    UInt16,
+    UInt24,
+    UInt32,
+    Int16,
+    Bit,
+    Bits,
+    PaddingBits,
+    NullBytes,
+    String,
+    RawBytes,
+    Bytes,
+    Enum,
+    TimestampMac32,
+    GenericVector,
+    PascalString8,
+)
 from hachoir.core.endian import BIG_ENDIAN
 from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler

@ -69,11 +86,65 @@ CHARSET_MAP = {
    3: {1: "UTF-16-BE"},
 }

+PERMISSIONS = {
+    0: "Installable embedding",
+    2: "Restricted License embedding",
+    4: "Preview & Print embedding",
+    8: "Editable embedding",
+}

-class TableHeader(FieldSet):
+FWORD = Int16
+UFWORD = UInt16
+
+
+class Tag(String):
+    def __init__(self, parent, name, description=None):
+        String.__init__(self, parent, name, 4, description)
+
+
+class Version16Dot16(FieldSet):
+    static_size = 32

    def createFields(self):
-        yield String(self, "tag", 4)
+        yield UInt16(self, "major")
+        yield UInt16(self, "minor")
+
+    def createValue(self):
+        return float("%u.%x" % (self["major"].value, self["minor"].value))
+
+
+class Fixed(FieldSet):
+    def createFields(self):
+        yield UInt16(self, "int_part")
+        yield UInt16(self, "float_part")
+
+    def createValue(self):
+        return self["int_part"].value + float(self["float_part"].value) / 65536
+
+
+class Tuple(FieldSet):
+    def __init__(self, parent, name, axisCount):
+        super().__init__(parent, name, description="Tuple Record")
+        self.axisCount = axisCount
+
+    def createFields(self):
+        for _ in range(self.axisCount):
+            yield (Fixed(self, "coordinate[]"))
+
+
+class F2DOT14(FieldSet):
+    static_size = 16
+
+    def createFields(self):
+        yield Int16(self, "int_part")
+
+    def createValue(self):
+        return self["int_part"].value / 16384
+
+
+class TableHeader(FieldSet):
+    def createFields(self):
+        yield Tag(self, "tag")
        yield textHandler(UInt32(self, "checksum"), hexadecimal)
        yield UInt32(self, "offset")
        yield filesizeHandler(UInt32(self, "size"))
@ -83,7 +154,6 @@ class TableHeader(FieldSet):


 class NameHeader(FieldSet):
-
    def createFields(self):
        yield Enum(UInt16(self, "platformID"), PLATFORM_NAME)
        yield UInt16(self, "encodingID")
@ -162,17 +232,273 @@ def parseFontHeader(self):
    yield UInt16(self, "glyph_format", "(=0)")


+class AxisValueMap(FieldSet):
+    static_size = 32
+
+    def createFields(self):
+        yield F2DOT14(self, "fromCoordinate")
+        yield F2DOT14(self, "toCoordinate")
+
+
+class SegmentMaps(FieldSet):
+    def createFields(self):
+        yield UInt16(
+            self, "positionMapCount", "The number of correspondence pairs for this axis"
+        )
+        for _ in range(self["positionMapCount"].value):
+            yield (AxisValueMap(self, "axisValueMaps[]"))
+
+
+def parseAvar(self):
+    yield UInt16(self, "majorVersion", "Major version")
+    yield UInt16(self, "minorVersion", "Minor version")
+    yield PaddingBits(self, "reserved[]", 16)
+    yield UInt16(self, "axisCount", "The number of variation axes for this font")
+    for _ in range(self["axisCount"].value):
+        yield (SegmentMaps(self, "segmentMaps[]"))
+
+
+class VariationAxisRecord(FieldSet):
+    def createFields(self):
+        yield Tag(self, "axisTag", "Tag identifying the design variation for the axis")
+        yield Fixed(self, "minValue", "The minimum coordinate value for the axis")
+        yield Fixed(self, "defaultValue", "The default coordinate value for the axis")
+        yield Fixed(self, "maxValue", "The maximum coordinate value for the axis")
+        yield PaddingBits(self, "reservedFlags", 15)
+        yield Bit(
+            self, "hidden", "The axis should not be exposed directly in user interfaces"
+        )
+        yield UInt16(
+            self,
+            "axisNameID",
+            "The name ID for entries in the 'name' table that provide a display name for this axis",
+        )
+
+
+class InstanceRecord(FieldSet):
+    def __init__(self, parent, name, axisCount, hasPSNameID=False):
+        super().__init__(parent, name, description="Instance record")
+        self.axisCount = axisCount
+        self.hasPSNameID = hasPSNameID
+
+    def createFields(self):
+        yield UInt16(
+            self, "subfamilyNameID", "Name ID for subfamily names for this instance"
+        )
+        yield PaddingBits(self, "reservedFlags", 16)
+        yield Tuple(self, "coordinates", axisCount=self.axisCount)
+        if self.hasPSNameID:
+            yield UInt16(
+                self,
+                "postScriptNameID",
+                "Name ID for PostScript names for this instance",
+            )
+
+
+def parseFvar(self):
+    yield UInt16(self, "majorVersion", "Major version")
+    yield UInt16(self, "minorVersion", "Minor version")
+    yield UInt16(
+        self, "axisArrayOffset", "Offset to the start of the VariationAxisRecord array."
+    )
+    yield PaddingBits(self, "reserved[]", 16)
+    yield UInt16(self, "axisCount", "The number of variation axes for this font")
+    yield UInt16(self, "axisSize", "The size in bytes of each VariationAxisRecord")
+    yield UInt16(self, "instanceCount", "The number of named instances for this font")
+    yield UInt16(self, "instanceSize", "The size in bytes of each InstanceRecord")
+    if self["axisArrayOffset"].value > 16:
+        yield PaddingBits(self, "padding", 8 * (self["axisArrayOffset"].value - 16))
+    for _ in range(self["axisCount"].value):
+        yield (VariationAxisRecord(self, "axes[]"))
+    for _ in range(self["instanceCount"].value):
+        yield (
+            InstanceRecord(
+                self,
+                "instances[]",
+                axisCount=self["axisCount"].value,
+                hasPSNameID=(
+                    self["instanceSize"].value == (2 * self["axisCount"].value + 6)
+                ),
+            )
+        )
+
+
+class EncodingRecord(FieldSet):
+    static_size = 64
+
+    def createFields(self):
+        yield Enum(UInt16(self, "platformID"), PLATFORM_NAME)
+        yield UInt16(self, "encodingID")
+        self.offset = UInt32(self, "subtableOffset")
+        yield self.offset
+
+
+class CmapTable0(FieldSet):
+    def createFields(self):
+        yield UInt16(self, "format", "Table format")
+        yield UInt16(self, "length", "Length in bytes")
+        yield UInt16(self, "language", "Language ID")
+        yield GenericVector(self, "mapping", 256, UInt8)
+
+
+class CmapTable4(FieldSet):
+    def createFields(self):
+        yield UInt16(self, "format", "Table format")
+        yield UInt16(self, "length", "Length in bytes")
+        yield UInt16(self, "language", "Language ID")
+        yield UInt16(self, "segCountX2", "Twice the number of segments")
+        segments = self["segCountX2"].value // 2
+        yield UInt16(self, "searchRange")
+        yield UInt16(self, "entrySelector")
+        yield UInt16(self, "rangeShift")
+        yield GenericVector(self, "endCode", segments, UInt16)
+        yield PaddingBits(self, "reserved[]", 16)
+        yield GenericVector(self, "startCode", segments, UInt16)
+        yield GenericVector(self, "idDelta", segments, Int16)
+        yield GenericVector(self, "idRangeOffsets", segments, UInt16)
+        remainder = (self["length"].value - (self.current_size / 8)) / 2
+        if remainder:
+            yield GenericVector(self, "glyphIdArray", remainder, UInt16)
+
+
+class CmapTable6(FieldSet):
+    def createFields(self):
+        yield UInt16(self, "format", "Table format")
+        yield UInt16(self, "length", "Length in bytes")
+        yield UInt16(self, "language", "Language ID")
+        yield UInt16(self, "firstCode", "First character code of subrange")
+        yield UInt16(self, "entryCount", "Number of character codes in subrange")
+        yield GenericVector(self, "glyphIdArray", self["entryCount"].value, UInt16)
+
+
+class SequentialMapGroup(FieldSet):
+    def createFields(self):
+        yield UInt32(self, "startCharCode", "First character code in this group")
+        yield UInt32(self, "endCharCode", "First character code in this group")
+        yield UInt32(
+            self,
+            "startGlyphID",
+            "Glyph index corresponding to the starting character code",
+        )
+
+
+class CmapTable12(FieldSet):
+    def createFields(self):
+        yield UInt16(self, "format", "Table format")
+        yield PaddingBits(self, "reserved[]", 16)
+        yield UInt32(self, "length", "Length in bytes")
+        yield UInt32(self, "language", "Language ID")
+        yield UInt32(self, "numGroups", "Number of groupings which follow")
+        for i in range(self["numGroups"].value):
+            yield SequentialMapGroup(self, "mapgroup[]")
+
+
+class VariationSelector(FieldSet):
+    def createFields(self):
+        yield UInt24(self, "varSelector", "Variation selector")
+        yield UInt32(self, "defaultUVSOffset", "Offset to default UVS table")
+        yield UInt32(self, "nonDefaultUVSOffset", "Offset to non-default UVS table")
+
+
+class CmapTable14(FieldSet):
+    def createFields(self):
+        yield UInt16(self, "format", "Table format")
+        yield UInt32(self, "length", "Length in bytes")
+        yield UInt32(
+            self, "numVarSelectorRecords", "Number of variation selector records"
+        )
+        for i in range(self["numVarSelectorRecords"].value):
+            yield VariationSelector(self, "variationSelector[]")
+
+
+def parseCmap(self):
+    yield UInt16(self, "version")
+    numTables = UInt16(self, "numTables", "Number of encoding tables")
+    yield numTables
+    encodingRecords = []
+    for index in range(numTables.value):
+        entry = EncodingRecord(self, "encodingRecords[]")
+        yield entry
+        encodingRecords.append(entry)
+    encodingRecords.sort(key=lambda field: field["subtableOffset"].value)
+    last = None
+    for er in encodingRecords:
+        offset = er["subtableOffset"].value
+        if last and last == offset:
+            continue
+        last = offset
+
+        # Add padding if any
+        padding = self.seekByte(offset, relative=True, null=False)
+        if padding:
+            yield padding
+        format = UInt16(self, "format").value
+        if format == 0:
+            yield CmapTable0(self, "cmap table format 0")
+        elif format == 4:
+            yield CmapTable4(self, "cmap table format 4")
+        elif format == 6:
+            yield CmapTable6(self, "cmap table format 6")
+        elif format == 12:
+            yield CmapTable12(self, "cmap table format 12")
+        elif format == 14:
+            yield CmapTable14(self, "cmap table format 14")
+
+
+class SignatureRecord(FieldSet):
+    def createFields(self):
+        yield UInt16(self, "format", "Table format")
+        yield UInt16(self, "length", "Length of signature")
+        yield UInt16(self, "signatureBlockOffset", "Offset to signature block")
+
+
+class SignatureBlock(FieldSet):
+    def createFields(self):
+        yield PaddingBits(self, "reserved[]", 32)
+        yield UInt32(
+            self,
+            "length",
+            "Length (in bytes) of the PKCS#7 packet in the signature field",
+        )
+        yield String(self, "signature", self["length"].value, "Signature block")
+
+
+def parseDSIG(self):
+    yield UInt32(self, "version")
+    yield UInt16(self, "numSignatures", "Number of signatures in the table")
+    yield Bit(self, "flag", "Cannot be resigned")
+    yield PaddingBits(self, "reserved[]", 7)
+    entries = []
+    for i in range(self["numSignatures"].value):
+        record = SignatureRecord(self, "signatureRecords[]")
+        entries.append(record)
+        yield record
+    entries.sort(key=lambda field: field["signatureBlockOffset"].value)
+    last = None
+    for entry in entries:
+        offset = entry["signatureBlockOffset"].value
+        if last and last == offset:
+            continue
+        last = offset
+        # Add padding if any
+        padding = self.seekByte(offset, relative=True, null=False)
+        if padding:
+            yield padding
+
+    padding = (self.size - self.current_size) // 8
+    if padding:
+        yield NullBytes(self, "padding_end", padding)
+
+
 def parseNames(self):
    # Read header
    yield UInt16(self, "format")
    if self["format"].value != 0:
-        raise ParserError("TTF (names): Invalid format (%u)" %
-                          self["format"].value)
+        raise ParserError("TTF (names): Invalid format (%u)" % self["format"].value)
    yield UInt16(self, "count")
    yield UInt16(self, "offset")
    if MAX_NAME_COUNT < self["count"].value:
-        raise ParserError("Invalid number of names (%s)"
-                          % self["count"].value)
+        raise ParserError("Invalid number of names (%s)" % self["count"].value)

    # Read name index
    entries = []
@ -208,17 +534,210 @@ def parseNames(self):
        # Read value
        size = entry["length"].value
        if size:
-            yield String(self, "value[]", size, entry.description, charset=entry.getCharset())
+            yield String(
+                self, "value[]", size, entry.description, charset=entry.getCharset()
+            )

    padding = (self.size - self.current_size) // 8
    if padding:
        yield NullBytes(self, "padding_end", padding)


+def parseMaxp(self):
+    # Read header
+    yield Version16Dot16(self, "format", "format version")
+    yield UInt16(self, "numGlyphs", "Number of glyphs")
+    if self["format"].value >= 1:
+        yield UInt16(self, "maxPoints", "Maximum points in a non-composite glyph")
+        yield UInt16(self, "maxContours", "Maximum contours in a non-composite glyph")
+        yield UInt16(self, "maxCompositePoints", "Maximum points in a composite glyph")
+        yield UInt16(
+            self, "maxCompositeContours", "Maximum contours in a composite glyph"
+        )
+        yield UInt16(self, "maxZones", "Do instructions use the twilight zone?")
+        yield UInt16(self, "maxTwilightPoints", "Maximum points used in Z0")
+        yield UInt16(self, "maxStorage", "Number of Storage Area locations")
+        yield UInt16(self, "maxFunctionDefs", "Number of function definitions")
+        yield UInt16(self, "maxInstructionDefs", "Number of instruction definitions")
+        yield UInt16(self, "maxStackElements", "Maximum stack depth")
+        yield UInt16(
+            self, "maxSizeOfInstructions", "Maximum byte count for glyph instructions"
+        )
+        yield UInt16(
+            self,
+            "maxComponentElements",
+            "Maximum number of components at glyph top level",
+        )
+        yield UInt16(self, "maxComponentDepth", "Maximum level of recursion")
+
+
+def parseHhea(self):
+    yield UInt16(self, "majorVersion", "Major version")
+    yield UInt16(self, "minorVersion", "Minor version")
+    yield FWORD(self, "ascender", "Typographic ascent")
+    yield FWORD(self, "descender", "Typographic descent")
+    yield FWORD(self, "lineGap", "Typographic linegap")
+    yield UFWORD(self, "advanceWidthMax", "Maximum advance width")
+    yield FWORD(self, "minLeftSideBearing", "Minimum left sidebearing value")
+    yield FWORD(self, "minRightSideBearing", "Minimum right sidebearing value")
+    yield FWORD(self, "xMaxExtent", "Maximum X extent")
+    yield Int16(self, "caretSlopeRise", "Caret slope rise")
+    yield Int16(self, "caretSlopeRun", "Caret slope run")
+    yield Int16(self, "caretOffset", "Caret offset")
+    yield GenericVector(self, "reserved", 4, Int16)
+    yield Int16(self, "metricDataFormat", "Metric data format")
+    yield UInt16(self, "numberOfHMetrics", "Number of horizontal metrics")
+
+
+class fsType(FieldSet):
+    def createFields(self):
+        yield Enum(Bits(self, "usage_permissions", 4), PERMISSIONS)
+        yield PaddingBits(self, "reserved[]", 4)
+        yield Bit(self, "no_subsetting", "Font may not be subsetted prior to embedding")
+        yield Bit(
+            self,
+            "bitmap_embedding",
+            "Only bitmaps contained in the font may be embedded",
+        )
+        yield PaddingBits(self, "reserved[]", 6)
+
+
+def parseOS2(self):
+    yield UInt16(self, "version", "Table version")
+    yield Int16(self, "xAvgCharWidth")
+    yield UInt16(self, "usWeightClass")
+    yield UInt16(self, "usWidthClass")
+    yield fsType(self, "fsType")
+    yield Int16(self, "ySubscriptXSize")
+    yield Int16(self, "ySubscriptYSize")
+    yield Int16(self, "ySubscriptXOffset")
+    yield Int16(self, "ySubscriptYOffset")
+    yield Int16(self, "ySuperscriptXSize")
+    yield Int16(self, "ySuperscriptYSize")
+    yield Int16(self, "ySuperscriptXOffset")
+    yield Int16(self, "ySuperscriptYOffset")
+    yield Int16(self, "yStrikeoutSize")
+    yield Int16(self, "yStrikeoutPosition")
+    yield Int16(self, "sFamilyClass")
+    yield GenericVector(self, "panose", 10, UInt8)
+    yield UInt32(self, "ulUnicodeRange1")
+    yield UInt32(self, "ulUnicodeRange2")
+    yield UInt32(self, "ulUnicodeRange3")
+    yield UInt32(self, "ulUnicodeRange4")
+    yield Tag(self, "achVendID", "Vendor ID")
+    yield UInt16(self, "fsSelection")
+    yield UInt16(self, "usFirstCharIndex")
+    yield UInt16(self, "usLastCharIndex")
+    yield Int16(self, "sTypoAscender")
+    yield Int16(self, "sTypoDescender")
+    yield Int16(self, "sTypoLineGap")
+    yield UInt16(self, "usWinAscent")
+    yield UInt16(self, "usWinDescent")
+    if self["version"].value >= 1:
+        yield UInt32(self, "ulCodePageRange1")
+        yield UInt32(self, "ulCodePageRange2")
+    if self["version"].value >= 2:
+        yield Int16(self, "sxHeight")
+        yield Int16(self, "sCapHeight")
+        yield UInt16(self, "usDefaultChar")
+        yield UInt16(self, "usBreakChar")
+        yield UInt16(self, "usMaxContext")
+    if self["version"].value >= 5:
+        yield UInt16(self, "usLowerOpticalPointSize")
+        yield UInt16(self, "usUpperOpticalPointSize")
+
+
+def parsePost(self):
+    yield Version16Dot16(self, "version", "Table version")
+    yield Fixed(
+        self,
+        "italicAngle",
+        "Italic angle in counter-clockwise degrees from the vertical.",
+    )
+    yield FWORD(self, "underlinePosition", "Top of underline to baseline")
+    yield FWORD(self, "underlineThickness", "Suggested underline thickness")
+    yield UInt32(self, "isFixedPitch", "Is the font fixed pitch?")
+    yield UInt32(self, "minMemType42", "Minimum memory usage (OpenType)")
+    yield UInt32(self, "maxMemType42", "Maximum memory usage (OpenType)")
+    yield UInt32(self, "minMemType1", "Minimum memory usage (Type 1)")
+    yield UInt32(self, "maxMemType1", "Maximum memory usage (Type 1)")
+    if self["version"].value == 2.0:
+        yield UInt16(self, "numGlyphs")
+        indices = GenericVector(
+            self,
+            "Array of indices into the string data",
+            self["numGlyphs"].value,
+            UInt16,
+            "glyphNameIndex",
+        )
+        yield indices
+        for gid, index in enumerate(indices):
+            if index.value >= 258:
+                yield PascalString8(self, "glyphname[%i]" % gid)
+    elif self["version"].value == 2.0:
+        yield UInt16(self, "numGlyphs")
+        indices = GenericVector(
+            self,
+            "Difference between graphic index and standard order of glyph",
+            self["numGlyphs"].value,
+            UInt16,
+            "offset",
+        )
+        yield indices
+
+
+# This is work-in-progress until I work out good ways to do random-access on offsets
+parseScriptList = (
+    parseFeatureList
+) = parseLookupList = parseFeatureVariationsTable = lambda x: None
+
+
+def parseGSUB(self):
+    yield UInt16(self, "majorVersion", "Major version")
+    yield UInt16(self, "minorVersion", "Minor version")
+    SUBTABLES = [
+        ("script list", parseScriptList),
+        ("feature list", parseFeatureList),
+        ("lookup list", parseLookupList),
+    ]
+    offsets = []
+    for description, parser in SUBTABLES:
+        name = description.title().replace(" ", "")
+        offset = UInt16(
+            self, name[0].lower() + name[1:], "Offset to %s table" % description
+        )
+        yield offset
+        offsets.append((offset.value, parser))
+    if self["min_ver"].value == 1:
+        offset = UInt32(
+            self, "featureVariationsOffset", "Offset to feature variations table"
+        )
+        offsets.append((offset.value, parseFeatureVariationsTable))
+
+    offsets.sort(key=lambda field: field[0])
+    padding = self.seekByte(offsets[0][0], null=True)
+    if padding:
+        yield padding
+    lastOffset, first_parser = offsets[0]
+    for offset, parser in offsets[1:]:
+        # yield parser(self)
+        yield RawBytes(self, "content", offset - lastOffset)
+        lastOffset = offset
+
+
 class Table(FieldSet):
    TAG_INFO = {
+        "DSIG": ("DSIG", "Digital Signature", parseDSIG),
+        "GSUB": ("GSUB", "Glyph Substitutions", parseGSUB),
+        "avar": ("avar", "Axis variation table", parseAvar),
+        "cmap": ("cmap", "Character to Glyph Index Mapping", parseCmap),
+        "fvar": ("fvar", "Font variations table", parseFvar),
        "head": ("header", "Font header", parseFontHeader),
+        "hhea": ("hhea", "Horizontal Header", parseHhea),
+        "maxp": ("maxp", "Maximum Profile", parseMaxp),
        "name": ("names", "Names", parseNames),
+        "OS/2": ("OS_2", "OS/2 and Windows Metrics", parseOS2),
+        "post": ("post", "PostScript", parsePost),
    }

    def __init__(self, parent, name, table, **kw):
@ -251,10 +770,15 @@ class TrueTypeFontFile(Parser):
    }

    def validate(self):
-        if self["maj_ver"].value != 1:
-            return "Invalid major version (%u)" % self["maj_ver"].value
-        if self["min_ver"].value != 0:
-            return "Invalid minor version (%u)" % self["min_ver"].value
+        if self["maj_ver"].value == 1 and self["min_ver"].value == 0:
+            pass
+        elif self["maj_ver"].value == 0x4F54 and self["min_ver"].value == 0x544F:
+            pass
+        else:
+            return "Invalid version (%u.%u)" % (
+                self["maj_ver"].value,
+                self["min_ver"].value,
+            )
        if not (MIN_NB_TABLE <= self["nb_table"].value <= MAX_NB_TABLE):
            return "Invalid number of table (%u)" % self["nb_table"].value
        return True
--- a/lib/hachoir/parser/parser.py
+++ b/lib/hachoir/parser/parser.py
@ -13,7 +13,7 @@ class HachoirParser(object):
    """
    A parser is the root of all other fields. It create first level of fields
    and have special attributes and methods:
-    - tags: dictionnary with keys:
+    - tags: dictionary with keys:
      - "file_ext": classical file extensions (string or tuple of strings) ;
      - "mime": MIME type(s) (string or tuple of strings) ;
      - "description": String describing the parser.
--- a/lib/hachoir/parser/program/exe.py
+++ b/lib/hachoir/parser/program/exe.py
@ -19,7 +19,7 @@ from hachoir.parser.program.exe_ne import NE_Header
 from hachoir.parser.program.exe_pe import PE_Header, PE_OptHeader, SectionHeader
 from hachoir.parser.program.exe_res import PE_Resource, NE_VersionInfoNode

-MAX_NB_SECTION = 50
+MAX_NB_SECTION = 100


 class MSDosHeader(FieldSet):
--- a/lib/hachoir/parser/program/exe_pe.py
+++ b/lib/hachoir/parser/program/exe_pe.py
@ -1,5 +1,5 @@
 from hachoir.field import (FieldSet, ParserError,
-                               Bit, UInt8, UInt16, UInt32, TimestampUnix32,
+                           Bit, UInt8, UInt16, UInt32, UInt64, TimestampUnix32,
                           Bytes, String, Enum,
                           PaddingBytes, PaddingBits, NullBytes, NullBits)
 from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler
@ -175,10 +175,13 @@ class PE_OptHeader(FieldSet):
    }

    def createFields(self):
-        yield UInt16(self, "signature", "PE optional header signature (0x010b)")
-        # TODO: Support PE32+ (signature=0x020b)
-        if self["signature"].value != 0x010b:
+        yield UInt16(self, "signature", "PE optional header signature (0x010b | 0x020b)")
+
+        if self["signature"].value != 0x010b and self["signature"].value != 0x020b:
            raise ParserError("Invalid PE optional header signature")
+        is_pe32plus = self["signature"].value == 0x020b
+        VarUInt = UInt64 if is_pe32plus else UInt32
+
        yield UInt8(self, "maj_lnk_ver", "Major linker version")
        yield UInt8(self, "min_lnk_ver", "Minor linker version")
        yield filesizeHandler(UInt32(self, "size_code", "Size of code"))
@ -186,8 +189,9 @@ class PE_OptHeader(FieldSet):
        yield filesizeHandler(UInt32(self, "size_uninit_data", "Size of uninitialized data"))
        yield textHandler(UInt32(self, "entry_point", "Address (RVA) of the code entry point"), hexadecimal)
        yield textHandler(UInt32(self, "base_code", "Base (RVA) of code"), hexadecimal)
+        if not is_pe32plus:
            yield textHandler(UInt32(self, "base_data", "Base (RVA) of data"), hexadecimal)
-        yield textHandler(UInt32(self, "image_base", "Image base (RVA)"), hexadecimal)
+        yield textHandler(VarUInt(self, "image_base", "Image base (RVA)"), hexadecimal)
        yield filesizeHandler(UInt32(self, "sect_align", "Section alignment"))
        yield filesizeHandler(UInt32(self, "file_align", "File alignment"))
        yield UInt16(self, "maj_os_ver", "Major OS version")
@ -202,10 +206,10 @@ class PE_OptHeader(FieldSet):
        yield textHandler(UInt32(self, "checksum"), hexadecimal)
        yield Enum(UInt16(self, "subsystem"), self.SUBSYSTEM_NAME)
        yield UInt16(self, "dll_flags")
-        yield filesizeHandler(UInt32(self, "size_stack_reserve"))
-        yield filesizeHandler(UInt32(self, "size_stack_commit"))
-        yield filesizeHandler(UInt32(self, "size_heap_reserve"))
-        yield filesizeHandler(UInt32(self, "size_heap_commit"))
+        yield filesizeHandler(VarUInt(self, "size_stack_reserve"))
+        yield filesizeHandler(VarUInt(self, "size_stack_commit"))
+        yield filesizeHandler(VarUInt(self, "size_heap_reserve"))
+        yield filesizeHandler(VarUInt(self, "size_heap_commit"))
        yield UInt32(self, "loader_flags")
        yield UInt32(self, "nb_directory", "Number of RVA and sizes")
        for index in range(self["nb_directory"].value):
--- a/lib/hachoir/parser/program/java.py
+++ b/lib/hachoir/parser/program/java.py
@ -435,6 +435,19 @@ class OpcodeSpecial_invokeinterface(JavaOpcode):
        return "%s(%i,%i,%i)" % (self.op, self["index"].value, self["count"].value, self["zero"].value)


+class OpcodeSpecial_invokedynamic(JavaOpcode):
+    OPSIZE = 5
+
+    def createFields(self):
+        yield UInt8(self, "opcode")
+        yield CPIndex(self, "index")
+        yield UInt8(self, "zero1", "Must be zero.")
+        yield UInt8(self, "zero2", "Must be zero.")
+
+    def createDisplay(self):
+        return "%s(%i,%i,%i)" % (self.op, self["index"].value, self["zero1"].value, self["zero2"].value)
+
+
 class OpcodeSpecial_newarray(JavaOpcode):
    OPSIZE = 2

@ -659,6 +672,7 @@ class JavaBytecode(FieldSet):
        0x98: ("dcmpg", OpcodeNoArgs, "compares two doubles. Stack: value1, value2 -> result"),
        0x99: ("ifeq", OpcodeShortJump, "if 'value' is 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
        0x9a: ("ifne", OpcodeShortJump, "if 'value' is not 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
+        0x9b: ("iflt", OpcodeShortJump, "if 'value' is less than 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
        0x9c: ("ifge", OpcodeShortJump, "if 'value' is greater than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
        0x9d: ("ifgt", OpcodeShortJump, "if 'value' is greater than 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
        0x9e: ("ifle", OpcodeShortJump, "if 'value' is less than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
@ -689,7 +703,7 @@ class JavaBytecode(FieldSet):
        0xb7: ("invokespecial", OpcodeCPIndex, "invoke instance method on object 'objectref', where the method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"),
        0xb8: ("invokestatic", OpcodeCPIndex, "invoke a static method, where the method is identified by method reference <argument> in the constant pool. Stack: [arg1, arg2, ...] ->"),
        0xb9: ("invokeinterface", OpcodeSpecial_invokeinterface, "invokes an interface method on object 'objectref', where the interface method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"),
-        0xba: ("xxxunusedxxx", OpcodeNoArgs, "this opcode is reserved for historical reasons. Stack: "),
+        0xba: ("invokedynamic", OpcodeSpecial_invokedynamic, "invokes a dynamically-computed call site, where the bootstrap method is identified by <argument> in constant pool. Stack: [arg1, arg2, ...] -> "),
        0xbb: ("new", OpcodeCPIndex, "creates new object of type identified by class reference <argument> in constant pool. Stack: -> objectref"),
        0xbc: ("newarray", OpcodeSpecial_newarray, "creates new array with 'count' elements of primitive type given in the argument. Stack: count -> arrayref"),
        0xbd: ("anewarray", OpcodeCPIndex, "creates a new array of references of length 'count' and component type identified by the class reference <argument> in the constant pool. Stack: count -> arrayref"),
@ -762,6 +776,33 @@ class CPInfo(FieldSet):
        elif self.constant_type == "NameAndType":
            yield CPIndex(self, "name_index", target_types="Utf8")
            yield CPIndex(self, "descriptor_index", target_types="Utf8")
+        elif self.constant_type == "MethodHandle":
+            refkind_map = {
+                1: ("getField", "Fieldref"),
+                2: ("getStatic", "Fieldref"),
+                3: ("putField", "Fieldref"),
+                4: ("putStatic", "Fieldref"),
+                5: ("invokeVirtual", "Methodref"),
+                6: ("invokeStatic", ("Methodref", "InterfaceMethodref")),
+                7: ("invokeSpecial", ("Methodref", "InterfaceMethodref")),
+                8: ("newInvokeSpecial", "Methodref"),
+                9: ("invokeInterface", "InterfaceMethodref"),
+            }
+            yield Enum(UInt8(self, "reference_kind"), {k: v[0] for k, v in refkind_map.items()})
+            target_types = refkind_map[self["reference_kind"].value][1]
+            yield CPIndex(self, "reference_index", target_types=target_types)
+        elif self.constant_type == "MethodType":
+            yield CPIndex(self, "descriptor_index", target_types="Utf8")
+        elif self.constant_type == "Dynamic":
+            yield UInt16(self, "bootstrap_method_attr_index")
+            yield CPIndex(self, "name_and_type_index", target_types="NameAndType")
+        elif self.constant_type == "InvokeDynamic":
+            yield UInt16(self, "bootstrap_method_attr_index")
+            yield CPIndex(self, "name_and_type_index", target_types="NameAndType")
+        elif self.constant_type == "Module":
+            yield CPIndex(self, "name_index", target_types="Utf8")
+        elif self.constant_type == "Package":
+            yield CPIndex(self, "name_index", target_types="Utf8")
        else:
            raise ParserError("Not a valid constant pool element type: "
                              + self["tag"].value)
@ -785,6 +826,21 @@ class CPInfo(FieldSet):
        elif self.constant_type == "NameAndType":
            return (self["descriptor_index"].rawvalue(),
                    self["name_index"].rawvalue())
+        elif self.constant_type == "MethodHandle":
+            return (self["reference_kind"].display,
+                    self["reference_index"].rawvalue())
+        elif self.constant_type == "MethodType":
+            return self["descriptor_index"].rawvalue()
+        elif self.constant_type == "Dynamic":
+            return (self["bootstrap_method_attr_index"].value,
+                    self["name_and_type_index"].rawvalue())
+        elif self.constant_type == "InvokeDynamic":
+            return (self["bootstrap_method_attr_index"].value,
+                    self["name_and_type_index"].rawvalue())
+        elif self.constant_type == "Module":
+            return self["name_index"].rawvalue()
+        elif self.constant_type == "Package":
+            return self["name_index"].rawvalue()
        else:
            # FIXME: Return "<error>" instead of raising an exception?
            raise ParserError("Not a valid constant pool element type: "
@ -811,6 +867,24 @@ class CPInfo(FieldSet):
        elif self.constant_type == "NameAndType":
            descriptor, name = self.rawvalue()
            return parse_any_descriptor(descriptor, name=name)
+        elif self.constant_type == "MethodHandle":
+            return "%s(%s)" % (self["reference_kind"].display, self["reference_index"].str())
+        elif self.constant_type == "MethodType":
+            return self["descriptor_index"].str()
+        elif self.constant_type == "Dynamic":
+            return "%d, %s" % (
+                self["bootstrap_method_attr_index"].value,
+                self["name_and_type_index"].str()
+            )
+        elif self.constant_type == "InvokeDynamic":
+            return "%d, %s" % (
+                self["bootstrap_method_attr_index"].value,
+                self["name_and_type_index"].str()
+            )
+        elif self.constant_type == "Module":
+            return self["name_index"].str()
+        elif self.constant_type == "Package":
+            return self["name_index"].str()
        else:
            # FIXME: Return "<error>" instead of raising an exception?
            raise ParserError("Not a valid constant pool element type: "
@ -1192,6 +1266,12 @@ class JavaCompiledClassFile(Parser):
        "50.0": "JDK 1.6",
        "51.0": "JDK 1.7",
        "52.0": "JDK 1.8",
+        "53.0": "JDK 9",
+        "54.0": "JDK 10",
+        "55.0": "JDK 11",
+        "56.0": "JDK 12",
+        "57.0": "JDK 13",
+        "58.0": "JDK 14",
    }

    # Constants go here since they will probably depend on the detected format
@ -1208,7 +1288,13 @@ class JavaCompiledClassFile(Parser):
        9: "Fieldref",
        10: "Methodref",
        11: "InterfaceMethodref",
-        12: "NameAndType"
+        12: "NameAndType",
+        15: "MethodHandle",
+        16: "MethodType",
+        17: "Dynamic",
+        18: "InvokeDynamic",
+        19: "Module",
+        20: "Package",
    }

    def validate(self):
--- a/lib/hachoir/parser/program/python.py
+++ b/lib/hachoir/parser/program/python.py
@ -10,13 +10,16 @@ Creation: 25 march 2005
 """

 from hachoir.parser import Parser
-from hachoir.field import (FieldSet, UInt8,
-                               UInt16, Int32, UInt32, Int64, ParserError, Float64,
+from hachoir.field import (
+    Field, FieldSet, UInt8,
+    UInt16, Int32, UInt32, Int64, UInt64,
+    ParserError, Float64,
    Character, RawBytes, PascalString8, TimestampUnix32,
-                               Bit, String)
+    Bit, String, NullBits)
 from hachoir.core.endian import LITTLE_ENDIAN
 from hachoir.core.bits import long2raw
 from hachoir.core.text_handler import textHandler, hexadecimal
+from hachoir.core import config

 DISASSEMBLE = False

@ -51,6 +54,12 @@ def parseString(parent):
        disassembleBytecode(parent["text"])


+def createStringValue(parent):
+    if parent.name == "lnotab":
+        return "<lnotab>"
+    return parent["text"]
+
+
 def parseStringRef(parent):
    yield textHandler(UInt32(parent, "ref"), hexadecimal)

@ -58,6 +67,13 @@ def parseStringRef(parent):
 def createStringRefDesc(parent):
    return "String ref: %s" % parent["ref"].display

+
+def createStringRefValue(parent):
+    value = parent["ref"].value
+    if hasattr(parent.root, 'string_table') and 0 <= value < len(parent.root.string_table):
+        return parent.root.string_table[value]
+    return None
+
 # --- Integers ---


@ -69,17 +85,37 @@ def parseInt64(parent):
    yield Int64(parent, "value")


+def createIntValue(parent):
+    return parent["value"]
+
+
 def parseLong(parent):
    yield Int32(parent, "digit_count")
    for index in range(abs(parent["digit_count"].value)):
        yield UInt16(parent, "digit[]")


+def createLongValue(parent):
+    is_negative = parent["digit_count"].value < 0
+    count = abs(parent["digit_count"].value)
+    total = 0
+    for index in range(count - 1, -1, -1):
+        total <<= 15
+        total += parent["digit[%u]" % index].value
+    if is_negative:
+        total = -total
+    return total
+
+
 # --- Float and complex ---
 def parseFloat(parent):
    yield PascalString8(parent, "value")


+def createFloatValue(parent):
+    return float(parent["value"].value)
+
+
 def parseBinaryFloat(parent):
    yield Float64(parent, "value")

@ -94,6 +130,12 @@ def parseBinaryComplex(parent):
    yield Float64(parent, "complex")


+def createComplexValue(parent):
+    return complex(
+        float(parent["real"].value),
+        float(parent["complex"].value))
+
+
 # --- Tuple and list ---
 def parseTuple(parent):
    yield UInt32(parent, "count", "Item count")
@ -119,6 +161,12 @@ def createTupleDesc(parent):
    return "%s: %s" % (parent.code_info[2], items)


+def tupleValueCreator(constructor):
+    def createTupleValue(parent):
+        return constructor([v.value for v in parent.array("item")])
+    return createTupleValue
+
+
 # --- Dict ---
 def parseDict(parent):
    """
@ -139,26 +187,58 @@ def createDictDesc(parent):
    return "Dict: %s" % ("%s keys" % parent.count)


+def createDictValue(parent):
+    return {k.value: v.value for k, v in zip(parent.array("key"), parent.array("value"))}
+
+
 def parseRef(parent):
    yield UInt32(parent, "n", "Reference")


+def createRefDesc(parent):
+    value = parent["n"].value
+    if hasattr(parent.root, 'object_table') and 0 <= value < len(parent.root.object_table):
+        return 'Reference: %s' % parent.root.object_table[value].description
+    else:
+        return 'Reference: %d' % value
+
+
+def createRefValue(parent):
+    value = parent["n"].value
+    if hasattr(parent.root, 'object_table') and 0 <= value < len(parent.root.object_table):
+        return parent.root.object_table[value]
+    else:
+        return None
+
+
+def parseASCII(parent):
+    size = UInt32(parent, "len", "Number of ASCII characters")
+    yield size
+    if size.value:
+        yield String(parent, "text", size.value, "String content", charset="ASCII")
+
+
 def parseShortASCII(parent):
    size = UInt8(parent, "len", "Number of ASCII characters")
    yield size
+    if size.value:
        yield String(parent, "text", size.value, "String content", charset="ASCII")

 # --- Code ---


 def parseCode(parent):
-    if 0x3000000 <= parent.root.getVersion():
+    version = parent.root.getVersion()
+    if 0x3000000 <= version:
        yield UInt32(parent, "arg_count", "Argument count")
+        if 0x3080000 <= version:
+            yield UInt32(parent, "posonlyargcount", "Positional only argument count")
        yield UInt32(parent, "kwonlyargcount", "Keyword only argument count")
+        if version < 0x30B0000:
            yield UInt32(parent, "nb_locals", "Number of local variables")
        yield UInt32(parent, "stack_size", "Stack size")
        yield UInt32(parent, "flags")
-    elif 0x2030000 <= parent.root.getVersion():
+    elif 0x2030000 <= version:
        yield UInt32(parent, "arg_count", "Argument count")
        yield UInt32(parent, "nb_locals", "Number of local variables")
        yield UInt32(parent, "stack_size", "Stack size")
@ -168,54 +248,70 @@ def parseCode(parent):
        yield UInt16(parent, "nb_locals", "Number of local variables")
        yield UInt16(parent, "stack_size", "Stack size")
        yield UInt16(parent, "flags")
+
    yield Object(parent, "compiled_code")
    yield Object(parent, "consts")
    yield Object(parent, "names")
+    if 0x30B0000 <= version:
+        yield Object(parent, "co_localsplusnames")
+        yield Object(parent, "co_localspluskinds")
+    else:
        yield Object(parent, "varnames")
-    if 0x2000000 <= parent.root.getVersion():
+        if 0x2000000 <= version:
            yield Object(parent, "freevars")
            yield Object(parent, "cellvars")
+
    yield Object(parent, "filename")
    yield Object(parent, "name")
-    if 0x2030000 <= parent.root.getVersion():
+    if 0x30B0000 <= version:
+        yield Object(parent, "qualname")
+
+    if 0x2030000 <= version:
        yield UInt32(parent, "firstlineno", "First line number")
    else:
        yield UInt16(parent, "firstlineno", "First line number")
+    if 0x30A0000 <= version:
+        yield Object(parent, "linetable")
+        if 0x30B0000 <= version:
+            yield Object(parent, "exceptiontable")
+    else:
        yield Object(parent, "lnotab")


 class Object(FieldSet):
    bytecode_info = {
        # Don't contains any data
-        '0': ("null", None, "NULL", None),
-        'N': ("none", None, "None", None),
-        'F': ("false", None, "False", None),
-        'T': ("true", None, "True", None),
-        'S': ("stop_iter", None, "StopIter", None),
-        '.': ("ellipsis", None, "ELLIPSIS", None),
-        '?': ("unknown", None, "Unknown", None),
+        '0': ("null", None, "NULL", None, None),
+        'N': ("none", None, "None", None, lambda parent: None),
+        'F': ("false", None, "False", None, lambda parent: False),
+        'T': ("true", None, "True", None, lambda parent: True),
+        'S': ("stop_iter", None, "StopIter", None, None),
+        '.': ("ellipsis", None, "ELLIPSIS", None, lambda parent: ...),
+        '?': ("unknown", None, "Unknown", None, None),

-        'i': ("int32", parseInt32, "Int32", None),
-        'I': ("int64", parseInt64, "Int64", None),
-        'f': ("float", parseFloat, "Float", None),
-        'g': ("bin_float", parseBinaryFloat, "Binary float", None),
-        'x': ("complex", parseComplex, "Complex", None),
-        'y': ("bin_complex", parseBinaryComplex, "Binary complex", None),
-        'l': ("long", parseLong, "Long", None),
-        's': ("string", parseString, "String", None),
-        't': ("interned", parseString, "Interned", None),
-        'u': ("unicode", parseString, "Unicode", None),
-        'R': ("string_ref", parseStringRef, "String ref", createStringRefDesc),
-        '(': ("tuple", parseTuple, "Tuple", createTupleDesc),
-        ')': ("small_tuple", parseSmallTuple, "Tuple", createTupleDesc),
-        '[': ("list", parseTuple, "List", createTupleDesc),
-        '<': ("set", parseTuple, "Set", createTupleDesc),
-        '>': ("frozenset", parseTuple, "Frozen set", createTupleDesc),
-        '{': ("dict", parseDict, "Dict", createDictDesc),
-        'c': ("code", parseCode, "Code", None),
-        'r': ("ref", parseRef, "Reference", None),
-        'z': ("short_ascii", parseShortASCII, "Short ASCII", None),
-        'Z': ("short_ascii_interned", parseShortASCII, "Short ASCII interned", None),
+        'i': ("int32", parseInt32, "Int32", None, createIntValue),
+        'I': ("int64", parseInt64, "Int64", None, createIntValue),
+        'f': ("float", parseFloat, "Float", None, createFloatValue),
+        'g': ("bin_float", parseBinaryFloat, "Binary float", None, createFloatValue),
+        'x': ("complex", parseComplex, "Complex", None, createComplexValue),
+        'y': ("bin_complex", parseBinaryComplex, "Binary complex", None, createComplexValue),
+        'l': ("long", parseLong, "Long", None, createLongValue),
+        's': ("string", parseString, "String", None, createStringValue),
+        't': ("interned", parseString, "Interned", None, createStringValue),
+        'u': ("unicode", parseString, "Unicode", None, createStringValue),
+        'R': ("string_ref", parseStringRef, "String ref", createStringRefDesc, createStringRefValue),
+        '(': ("tuple", parseTuple, "Tuple", createTupleDesc, tupleValueCreator(tuple)),
+        ')': ("small_tuple", parseSmallTuple, "Tuple", createTupleDesc, tupleValueCreator(tuple)),
+        '[': ("list", parseTuple, "List", createTupleDesc, tupleValueCreator(list)),
+        '<': ("set", parseTuple, "Set", createTupleDesc, tupleValueCreator(set)),
+        '>': ("frozenset", parseTuple, "Frozen set", createTupleDesc, tupleValueCreator(frozenset)),
+        '{': ("dict", parseDict, "Dict", createDictDesc, createDictValue),
+        'c': ("code", parseCode, "Code", None, None),
+        'r': ("ref", parseRef, "Reference", createRefDesc, createRefValue),
+        'a': ("ascii", parseASCII, "ASCII", None, createStringValue),
+        'A': ("ascii_interned", parseASCII, "ASCII interned", None, createStringValue),
+        'z': ("short_ascii", parseShortASCII, "Short ASCII", None, createStringValue),
+        'Z': ("short_ascii_interned", parseShortASCII, "Short ASCII interned", None, createStringValue),
    }

    def __init__(self, parent, name, **kw):
@ -227,64 +323,40 @@ class Object(FieldSet):
        self.code_info = self.bytecode_info[code]
        if not name:
            self._name = self.code_info[0]
-        if code == "l":
-            self.createValue = self.createValueLong
-        elif code in ("i", "I", "f", "g"):
-            self.createValue = lambda: self["value"].value
-        elif code == "T":
-            self.createValue = lambda: True
-        elif code == "F":
-            self.createValue = lambda: False
-        elif code in ("x", "y"):
-            self.createValue = self.createValueComplex
-        elif code in ("s", "t", "u"):
-            self.createValue = self.createValueString
-            self.createDisplay = self.createDisplayString
-            if code == 't':
+        if code in ("t", "A", "Z"):
            if not hasattr(self.root, 'string_table'):
                self.root.string_table = []
            self.root.string_table.append(self)
-        elif code == 'R':
-            if hasattr(self.root, 'string_table'):
-                self.createValue = self.createValueStringRef

-    def createValueString(self):
-        if "text" in self:
-            return self["text"].value
+    def createValue(self):
+        create = self.code_info[4]
+        if create:
+            res = create(self)
+            if isinstance(res, Field):
+                return res.value
            else:
-            return ""
+                return res
+        return None

-    def createDisplayString(self):
-        if "text" in self:
-            return self["text"].display
-        else:
-            return "(empty)"
-
-    def createValueLong(self):
-        is_negative = self["digit_count"].value < 0
-        count = abs(self["digit_count"].value)
-        total = 0
-        for index in range(count - 1, -1, -1):
-            total <<= 15
-            total += self["digit[%u]" % index].value
-        if is_negative:
-            total = -total
-        return total
-
-    def createValueStringRef(self):
-        return self.root.string_table[self['ref'].value].value
-
-    def createDisplayStringRef(self):
-        return self.root.string_table[self['ref'].value].display
-
-    def createValueComplex(self):
-        return complex(
-            float(self["real"].value),
-            float(self["complex"].value))
+    def createDisplay(self):
+        create = self.code_info[4]
+        if create:
+            res = create(self)
+            if isinstance(res, Field):
+                return res.display
+            res = repr(res)
+            if len(res) >= config.max_string_length:
+                res = res[:config.max_string_length] + "..."
+            return res
+        return None

    def createFields(self):
        yield BytecodeChar(self, "bytecode", "Bytecode")
        yield Bit(self, "flag_ref", "Is a reference?")
+        if self["flag_ref"].value:
+            if not hasattr(self.root, 'object_table'):
+                self.root.object_table = []
+            self.root.object_table.append(self)
        parser = self.code_info[1]
        if parser:
            yield from parser(self)
@ -301,6 +373,16 @@ class BytecodeChar(Character):
    static_size = 7


+PY_RELEASE_LEVEL_ALPHA = 0xA
+PY_RELEASE_LEVEL_FINAL = 0xF
+
+
+def VERSION(major, minor, release_level=PY_RELEASE_LEVEL_FINAL, serial=0):
+    micro = 0
+    return ((major << 24) + (minor << 16) + (micro << 8)
+            + (release_level << 4) + (serial << 0))
+
+
 class PythonCompiledFile(Parser):
    PARSER_TAGS = {
        "id": "python",
@ -394,7 +476,90 @@ class PythonCompiledFile(Parser):
        3377: ("Python 3.6b1 ", 0x3060000),
        3378: ("Python 3.6b2 ", 0x3060000),
        3379: ("Python 3.6rc1", 0x3060000),
-        3390: ("Python 3.7a0 ", 0x3070000),
+        3390: ("Python 3.7a1", 0x30700A1),
+        3391: ("Python 3.7a2", 0x30700A2),
+        3392: ("Python 3.7a4", 0x30700A4),
+        3393: ("Python 3.7b1", 0x30700B1),
+        3394: ("Python 3.7b5", 0x30700B5),
+        3400: ("Python 3.8a1", VERSION(3, 8)),
+        3401: ("Python 3.8a1", VERSION(3, 8)),
+        3410: ("Python 3.8a1", VERSION(3, 8)),
+        3411: ("Python 3.8b2", VERSION(3, 8)),
+        3412: ("Python 3.8b2", VERSION(3, 8)),
+        3413: ("Python 3.8b4", VERSION(3, 8)),
+        3420: ("Python 3.9a0", VERSION(3, 9)),
+        3421: ("Python 3.9a0", VERSION(3, 9)),
+        3422: ("Python 3.9a0", VERSION(3, 9)),
+        3423: ("Python 3.9a2", VERSION(3, 9)),
+        3424: ("Python 3.9a2", VERSION(3, 9)),
+        3425: ("Python 3.9a2", VERSION(3, 9)),
+        3430: ("Python 3.10a1", VERSION(3, 10)),
+        3431: ("Python 3.10a1", VERSION(3, 10)),
+        3432: ("Python 3.10a2", VERSION(3, 10)),
+        3433: ("Python 3.10a2", VERSION(3, 10)),
+        3434: ("Python 3.10a6", VERSION(3, 10)),
+        3435: ("Python 3.10a7", VERSION(3, 10)),
+        3436: ("Python 3.10b1", VERSION(3, 10)),
+        3437: ("Python 3.10b1", VERSION(3, 10)),
+        3438: ("Python 3.10b1", VERSION(3, 10)),
+        3439: ("Python 3.10b1", VERSION(3, 10)),
+        3450: ("Python 3.11a1", VERSION(3, 11)),
+        3451: ("Python 3.11a1", VERSION(3, 11)),
+        3452: ("Python 3.11a1", VERSION(3, 11)),
+        3453: ("Python 3.11a1", VERSION(3, 11)),
+        3454: ("Python 3.11a1", VERSION(3, 11)),
+        3455: ("Python 3.11a1", VERSION(3, 11)),
+        3456: ("Python 3.11a1", VERSION(3, 11)),
+        3457: ("Python 3.11a1", VERSION(3, 11)),
+        3458: ("Python 3.11a1", VERSION(3, 11)),
+        3459: ("Python 3.11a1", VERSION(3, 11)),
+        3460: ("Python 3.11a1", VERSION(3, 11)),
+        3461: ("Python 3.11a1", VERSION(3, 11)),
+        3462: ("Python 3.11a2", VERSION(3, 11)),
+        3463: ("Python 3.11a3", VERSION(3, 11)),
+        3464: ("Python 3.11a3", VERSION(3, 11)),
+        3465: ("Python 3.11a3", VERSION(3, 11)),
+        3466: ("Python 3.11a4", VERSION(3, 11)),
+        3467: ("Python 3.11a4", VERSION(3, 11)),
+        3468: ("Python 3.11a4", VERSION(3, 11)),
+        3469: ("Python 3.11a4", VERSION(3, 11)),
+        3470: ("Python 3.11a4", VERSION(3, 11)),
+        3471: ("Python 3.11a4", VERSION(3, 11)),
+        3472: ("Python 3.11a4", VERSION(3, 11)),
+        3473: ("Python 3.11a4", VERSION(3, 11)),
+        3474: ("Python 3.11a4", VERSION(3, 11)),
+        3475: ("Python 3.11a5", VERSION(3, 11)),
+        3476: ("Python 3.11a5", VERSION(3, 11)),
+        3477: ("Python 3.11a5", VERSION(3, 11)),
+        3478: ("Python 3.11a5", VERSION(3, 11)),
+        3479: ("Python 3.11a5", VERSION(3, 11)),
+        3480: ("Python 3.11a5", VERSION(3, 11)),
+        3481: ("Python 3.11a5", VERSION(3, 11)),
+        3482: ("Python 3.11a5", VERSION(3, 11)),
+        3483: ("Python 3.11a5", VERSION(3, 11)),
+        3484: ("Python 3.11a5", VERSION(3, 11)),
+        3485: ("Python 3.11a5", VERSION(3, 11)),
+        3486: ("Python 3.11a6", VERSION(3, 11)),
+        3487: ("Python 3.11a6", VERSION(3, 11)),
+        3488: ("Python 3.11a6", VERSION(3, 11)),
+        3489: ("Python 3.11a6", VERSION(3, 11)),
+        3490: ("Python 3.11a6", VERSION(3, 11)),
+        3491: ("Python 3.11a6", VERSION(3, 11)),
+        3492: ("Python 3.11a7", VERSION(3, 11)),
+        3493: ("Python 3.11a7", VERSION(3, 11)),
+        3494: ("Python 3.11a7", VERSION(3, 11)),
+        3500: ("Python 3.12a1", VERSION(3, 12)),
+        3501: ("Python 3.12a1", VERSION(3, 12)),
+        3502: ("Python 3.12a1", VERSION(3, 12)),
+        3503: ("Python 3.12a1", VERSION(3, 12)),
+        3504: ("Python 3.12a1", VERSION(3, 12)),
+        3505: ("Python 3.12a1", VERSION(3, 12)),
+        3506: ("Python 3.12a1", VERSION(3, 12)),
+        3507: ("Python 3.12a1", VERSION(3, 12)),
+        3508: ("Python 3.12a1", VERSION(3, 12)),
+        3509: ("Python 3.12a1", VERSION(3, 12)),
+        3510: ("Python 3.12a1", VERSION(3, 12)),
+        3511: ("Python 3.12a1", VERSION(3, 12)),
    }

    # Dictionnary which associate the pyc signature (4-byte long string)
@ -411,13 +576,7 @@ class PythonCompiledFile(Parser):
        if self["magic_string"].value != "\r\n":
            return r"Wrong magic string (\r\n)"

-        version = self.getVersion()
-        if version >= 0x3030000 and self['magic_number'].value >= 3200:
-            offset = 12
-        else:
-            offset = 8
-        value = self.stream.readBits(offset * 8, 7, self.endian)
-        if value != ord(b'c'):
+        if self["content/bytecode"].value != "c":
            return "First object bytecode is not code"
        return True

@ -430,8 +589,23 @@ class PythonCompiledFile(Parser):
    def createFields(self):
        yield UInt16(self, "magic_number", "Magic number")
        yield String(self, "magic_string", 2, r"Magic string \r\n", charset="ASCII")
-        yield TimestampUnix32(self, "timestamp", "Timestamp")
+
        version = self.getVersion()
+
+        # PEP 552: Deterministic pycs #31650 (Python 3.7a4); magic=3392
+        if version >= 0x30700A4:
+            yield Bit(self, "use_hash", "Is hash based?")
+            yield Bit(self, "checked")
+            yield NullBits(self, "reserved", 30)
+            use_hash = self['use_hash'].value
+        else:
+            use_hash = False
+
+        if use_hash:
+            yield UInt64(self, "hash", "SipHash hash of the source file")
+        else:
+            yield TimestampUnix32(self, "timestamp", "Timestamp modulo 2**32")
            if version >= 0x3030000 and self['magic_number'].value >= 3200:
                yield UInt32(self, "filesize", "Size of the Python source file (.py) modulo 2**32")
+
        yield Object(self, "content")
--- a/lib/hachoir/parser/video/mpeg_video.py
+++ b/lib/hachoir/parser/video/mpeg_video.py
@ -244,7 +244,7 @@ class PacketElement(FieldSet):
            yield Bits(self, "sync[]", 4)  # =2, or 3 if has_dts=True
            yield Timestamp(self, "pts")
        if self["has_dts"].value:
-            if not(self["has_pts"].value):
+            if not self["has_pts"].value:
                raise ParserError("Invalid PTS/DTS values")
            yield Bits(self, "sync[]", 4)  # =1
            yield Timestamp(self, "dts")
--- a/lib/hachoir/regex/pattern.py
+++ b/lib/hachoir/regex/pattern.py
@ -125,7 +125,7 @@ class PatternMatching:
        item = RegexPattern(regex, user)
        if item.regex.maxLength() is None:
            raise ValueError(
-                "Regular expression with no maximum size has forbidden")
+                "Regular expression with no maximum size is forbidden")
        self.regex_patterns.append(item)
        self._need_commit = True

--- a/lib/hachoir/subfile/main.py
+++ b/lib/hachoir/subfile/main.py
@ -1,3 +1,3 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 from hachoir.subfile.main import main
 main()
--- a/lib/hachoir/subfile/main.py
+++ b/lib/hachoir/subfile/main.py
@ -85,7 +85,7 @@ def main():
    stream = FileInputStream(filename)
    with stream:
        subfile = SearchSubfile(stream, values.offset, values.size)
-        subfile.verbose = not(values.quiet)
+        subfile.verbose = not values.quiet
        subfile.debug = values.debug
        if output:
            subfile.setOutput(output)
--- a/lib/hachoir/subfile/search.py
+++ b/lib/hachoir/subfile/search.py
@ -95,7 +95,7 @@ class SearchSubfile:
            print("[!] Memory error!", file=stderr)
        self.mainFooter()
        self.stream.close()
-        return not(main_error)
+        return (not main_error)

    def mainHeader(self):
        # Fix slice size if needed
@ -149,7 +149,7 @@ class SearchSubfile:
        if parser.content_size is not None:
            text += " size=%s (%s)" % (parser.content_size //
                                       8, humanFilesize(parser.content_size // 8))
-        if not(parser.content_size) or parser.content_size // 8 < FILE_MAX_SIZE:
+        if not parser.content_size or parser.content_size // 8 < FILE_MAX_SIZE:
            text += ": " + parser.description
        else:
            text += ": " + parser.__class__.__name__