SickGear/lib/hachoir/parser/archive/cab.py
JackDandy 980e05cc99 Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo.
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014).
Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/*
Remove metadata/qt*

PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5).
Basic Support for XMP Packets.
tga: improvements to adhere more closely to the spec.
pdf: slightly improved parsing.
rar: fix TypeError on unknown block types.
Add MacRoman win32 codepage.
tiff/exif: support SubIFDs and tiled images.
Add method to export metadata in dictionary.
mpeg_video: don't attempt to parse Stream past length.
mpeg_video: parse ESCR correctly, add SCR value.
Change centralise CustomFragments.
field: don't set parser class if class is None, to enable autodetect.
field: add value/display for CustomFragment.
parser: inline warning to enable tracebacks in debug mode.
Fix empty bytestrings in makePrintable.
Fix contentSize in jpeg.py to account for image_data blocks.
Fix the ELF parser.
Enhance the AR archive parser.
elf parser: fix wrong wrong fields order in parsing little endian section flags.
elf parser: add s390 as a machine type.
Flesh out mp4 parser.

PORT: Version 2.0a1 (inline with 3.0a1).
Major refactoring and PEP8.
Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams.
metadata: get comment from ZIP.
Support for InputIOStream.read(0).
Fix sizeGe when size is None.
Remove unused new_seekable_field_set file.
Remove parser Mapsforge .map.
Remove parser Parallel Realities Starfighter .pak files.
sevenzip: fix for newer archives.
java: update access flags and modifiers for Java 1.7 and update description text for most recent Java.
Support ustar prefix field in tar archives.
Remove file_system* parsers.
Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*.
Remove program parsers macho, nds, prc.
Support non-8bit Character subclasses.
Python parser supports Python 3.7.
Enhance mpeg_ts parser to support MTS/M2TS.
Support for creation date in tiff.
Change don't hardcode errno constant.

PORT: 1.9.1
Internal Only: The following are legacy reference to upstream commit messages.
Relevant changes up to b0a115f8.
Use integer division.
Replace HACHOIR_ERRORS with Exception.
Fix metadata.Data: make it sortable.
Import fixes from e7de492.
PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad).
Replace hachoir.core.field with hachoir.field
Replace hachoir.core.stream with hachoir.stream
Remove the compatibility module for PY1.5 to PY2.5.
metadata: support TIFF picture.
metadata: fix string normalization.
metadata: fix datetime regex Fix hachoir bug #57.
FileFromInputStream: fix comparison between None and an int.
InputIOStream: open the file in binary mode.
2018-03-28 00:43:11 +01:00

258 lines
11 KiB
Python

"""
Microsoft Cabinet (CAB) archive.
Author: Victor Stinner, Robert Xiao
Creation date: 31 january 2007
- Microsoft Cabinet SDK
http://msdn2.microsoft.com/en-us/library/ms974336.aspx
"""
from __future__ import absolute_import
from hachoir.parser import Parser
from hachoir.field import (FieldSet, Enum,
CString, String,
UInt8, UInt16, UInt32, Bit, Bits, PaddingBits, NullBits,
DateTimeMSDOS32, RawBytes, CustomFragment)
from hachoir.core.text_handler import textHandler, hexadecimal, filesizeHandler
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.core.tools import paddingSize
from hachoir.stream import StringInputStream
from hachoir.parser.archive.lzx import LZXStream, lzx_decompress
from hachoir.parser.archive.zlib import DeflateBlock
MAX_NB_FOLDER = 30
COMPRESSION_NONE = 0
COMPRESSION_NAME = {
0: "Uncompressed",
1: "Deflate",
2: "Quantum",
3: "LZX",
}
class Folder(FieldSet):
def createFields(self):
yield UInt32(self, "offset", "Offset to data (from file start)")
yield UInt16(self, "data_blocks", "Number of data blocks which are in this cabinet")
yield Enum(Bits(self, "compr_method", 4, "Compression method"), COMPRESSION_NAME)
if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level
yield PaddingBits(self, "padding[]", 4)
yield Bits(self, "compr_level", 5, "Compression level")
yield PaddingBits(self, "padding[]", 3)
else:
yield PaddingBits(self, "padding[]", 12)
if self["../flags/has_reserved"].value and self["../reserved_folder_size"].value:
yield RawBytes(self, "reserved_folder", self["../reserved_folder_size"].value, "Per-folder reserved area")
def createDescription(self):
text = "Folder: compression %s" % self["compr_method"].display
if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level
text += " (level %u: window size %u)" % (self["compr_level"].value, 2 ** self["compr_level"].value)
return text
class CabFileAttributes(FieldSet):
def createFields(self):
yield Bit(self, "readonly")
yield Bit(self, "hidden")
yield Bit(self, "system")
yield Bits(self, "reserved[]", 2)
yield Bit(self, "archive", "Has the file been modified since the last backup?")
yield Bit(self, "exec", "Run file after extraction?")
yield Bit(self, "name_is_utf", "Is the filename using UTF-8?")
yield Bits(self, "reserved[]", 8)
class File(FieldSet):
def createFields(self):
yield filesizeHandler(UInt32(self, "filesize", "Uncompressed file size"))
yield UInt32(self, "folder_offset", "File offset in uncompressed folder")
yield Enum(UInt16(self, "folder_index", "Containing folder ID (index)"), {
0xFFFD: "Folder continued from previous cabinet (real folder ID = 0)",
0xFFFE: "Folder continued to next cabinet (real folder ID = %i)" % (self["../nb_folder"].value - 1),
0xFFFF: "Folder spanning previous, current and next cabinets (real folder ID = 0)"})
yield DateTimeMSDOS32(self, "timestamp")
yield CabFileAttributes(self, "attributes")
if self["attributes/name_is_utf"].value:
yield CString(self, "filename", charset="UTF-8")
else:
yield CString(self, "filename", charset="ASCII")
def createDescription(self):
return "File %s (%s)" % (
self["filename"].display, self["filesize"].display)
class Flags(FieldSet):
static_size = 16
def createFields(self):
yield Bit(self, "has_previous")
yield Bit(self, "has_next")
yield Bit(self, "has_reserved")
yield NullBits(self, "padding", 13)
class DataBlock(FieldSet):
def __init__(self, *args, **kwargs):
FieldSet.__init__(self, *args, **kwargs)
size = (self["size"].value + 8) * 8 # +8 for header values
if self["/flags/has_reserved"].value:
size += self["/reserved_data_size"].value * 8
self._size = size
def createFields(self):
yield textHandler(UInt32(self, "crc32"), hexadecimal)
yield UInt16(self, "size")
yield UInt16(self, "uncompressed_size", "If this is 0, this block is continued in a subsequent cabinet")
if self["/flags/has_reserved"].value and self["/reserved_data_size"].value:
yield RawBytes(self, "reserved_data", self["/reserved_data_size"].value, "Per-datablock reserved area")
compr_method = self.parent.folder["compr_method"].value
if compr_method == 0: # Uncompressed
yield RawBytes(self, "data", self["size"].value, "Folder Data")
self.parent.uncompressed_data += self["data"].value
elif compr_method == 1: # MSZIP
yield String(self, "mszip_signature", 2, "MSZIP Signature (CK)")
yield DeflateBlock(self, "deflate_block", self.parent.uncompressed_data)
padding = paddingSize(self.current_size, 8)
if padding:
yield PaddingBits(self, "padding[]", padding)
self.parent.uncompressed_data = self["deflate_block"].uncomp_data
elif compr_method == 2: # Quantum
yield RawBytes(self, "compr_data", self["size"].value, "Compressed Folder Data")
elif compr_method == 3: # LZX
group = getattr(self.parent.folder, "lzx_group", None)
field = CustomFragment(self, "data", self["size"].value * 8, LZXStream, "LZX data fragment", group)
if group is None:
field.group.args["compr_level"] = self.parent.folder["compr_level"].value
self.parent.folder.lzx_group = field.group
yield field
class FolderParser(Parser):
endian = LITTLE_ENDIAN
def createFields(self):
for file in sorted(self.files, key=lambda x: x["folder_offset"].value):
padding = self.seekByte(file["folder_offset"].value)
if padding:
yield padding
yield RawBytes(self, "file[]", file["filesize"].value, file.description)
class FolderData(FieldSet):
def __init__(self, parent, name, folder, files, *args, **kwargs):
FieldSet.__init__(self, parent, name, *args, **kwargs)
def createInputStream(cis, source=None, **args):
stream = cis(source=source)
tags = args.setdefault("tags", [])
tags.extend(stream.tags)
tags.append(("class", FolderParser))
tags.append(("args", {'files': files}))
for unused in self:
pass
if folder["compr_method"].value == 3: # LZX
self.uncompressed_data = lzx_decompress(self["block[0]/data"].getSubIStream(),
folder["compr_level"].value)
return StringInputStream(self.uncompressed_data, source=source, **args)
self.setSubIStream(createInputStream)
self.files = files
self.folder = folder # Folder fieldset
def createFields(self):
self.uncompressed_data = ""
for index in xrange(self.folder["data_blocks"].value):
block = DataBlock(self, "block[]")
for i in block:
pass
yield block
class CabFile(Parser):
endian = LITTLE_ENDIAN
MAGIC = "MSCF"
PARSER_TAGS = {
"id": "cab",
"category": "archive",
"file_ext": ("cab",),
"mime": (u"application/vnd.ms-cab-compressed",),
"magic": ((MAGIC, 0),),
"min_size": 1 * 8, # header + file entry
"description": "Microsoft Cabinet archive"
}
def validate(self):
if self.stream.readBytes(0, 4) != self.MAGIC:
return "Invalid magic"
if self["major_version"].value != 1 or self["minor_version"].value != 3:
return "Unknown version (%i.%i)" % (self["major_version"].value, self["minor_version"].value)
if not (1 <= self["nb_folder"].value <= MAX_NB_FOLDER):
return "Invalid number of folder (%s)" % self["nb_folder"].value
return True
def createFields(self):
yield String(self, "magic", 4, "Magic (MSCF)", charset="ASCII")
yield textHandler(UInt32(self, "hdr_checksum", "Header checksum (0 if not used)"), hexadecimal)
yield filesizeHandler(UInt32(self, "filesize", "Cabinet file size"))
yield textHandler(UInt32(self, "fld_checksum", "Folders checksum (0 if not used)"), hexadecimal)
yield UInt32(self, "off_file", "Offset of first file")
yield textHandler(UInt32(self, "files_checksum", "Files checksum (0 if not used)"), hexadecimal)
yield UInt8(self, "minor_version", "Minor version (should be 3)")
yield UInt8(self, "major_version", "Major version (should be 1)")
yield UInt16(self, "nb_folder", "Number of folders")
yield UInt16(self, "nb_files", "Number of files")
yield Flags(self, "flags")
yield UInt16(self, "setid")
yield UInt16(self, "cabinet_serial", "Zero-based cabinet number")
if self["flags/has_reserved"].value:
yield UInt16(self, "reserved_header_size", "Size of per-cabinet reserved area")
yield UInt8(self, "reserved_folder_size", "Size of per-folder reserved area")
yield UInt8(self, "reserved_data_size", "Size of per-datablock reserved area")
if self["reserved_header_size"].value:
yield RawBytes(self, "reserved_header", self["reserved_header_size"].value, "Per-cabinet reserved area")
if self["flags/has_previous"].value:
yield CString(self, "previous_cabinet", "File name of previous cabinet", charset="ASCII")
yield CString(self, "previous_disk", "Description of disk/media on which previous cabinet resides",
charset="ASCII")
if self["flags/has_next"].value:
yield CString(self, "next_cabinet", "File name of next cabinet", charset="ASCII")
yield CString(self, "next_disk", "Description of disk/media on which next cabinet resides", charset="ASCII")
folders = []
files = []
for index in xrange(self["nb_folder"].value):
folder = Folder(self, "folder[]")
yield folder
folders.append(folder)
for index in xrange(self["nb_files"].value):
file = File(self, "file[]")
yield file
files.append(file)
folders = sorted(enumerate(folders), key=lambda x: x[1]["offset"].value)
for i in xrange(len(folders)):
index, folder = folders[i]
padding = self.seekByte(folder["offset"].value)
if padding:
yield padding
files = []
for file in files:
if file["folder_index"].value == index:
files.append(file)
if i + 1 == len(folders):
size = (self.size // 8) - folder["offset"].value
else:
size = (folders[i + 1][1]["offset"].value) - folder["offset"].value
yield FolderData(self, "folder_data[%i]" % index, folder, files, size=size * 8)
end = self.seekBit(self.size, "endraw")
if end:
yield end
def createContentSize(self):
return self["filesize"].value * 8