SickGear/lib/hachoir/parser/archive/rar.py

383 lines
13 KiB
Python
Raw Permalink Normal View History

"""
RAR parser
Status: can only read higher-level attructures
Author: Christophe Gisquet
"""
from hachoir.parser import Parser
from hachoir.field import (StaticFieldSet, FieldSet,
Bit, Bits, Enum,
UInt8, UInt16, UInt32, UInt64,
String, TimeDateMSDOS32,
NullBytes, NullBits, RawBytes)
from hachoir.core.text_handler import textHandler, filesizeHandler, hexadecimal
from hachoir.core.endian import LITTLE_ENDIAN
from hachoir.parser.common.msdos import MSDOSFileAttr32
MAX_FILESIZE = 1000 * 1024 * 1024
BLOCK_NAME = {
0x72: "Marker",
0x73: "Archive",
0x74: "File",
0x75: "Comment",
0x76: "Extra info",
0x77: "Subblock",
0x78: "Recovery record",
0x79: "Archive authenticity",
0x7A: "New-format subblock",
0x7B: "Archive end",
}
COMPRESSION_NAME = {
0x30: "Storing",
0x31: "Fastest compression",
0x32: "Fast compression",
0x33: "Normal compression",
0x34: "Good compression",
0x35: "Best compression"
}
OS_MSDOS = 0
OS_WIN32 = 2
OS_NAME = {
0: "MS DOS",
1: "OS/2",
2: "Win32",
3: "Unix",
}
DICTIONARY_SIZE = {
0: "Dictionary size 64 Kb",
1: "Dictionary size 128 Kb",
2: "Dictionary size 256 Kb",
3: "Dictionary size 512 Kb",
4: "Dictionary size 1024 Kb",
7: "File is a directory",
}
def formatRARVersion(field):
"""
Decodes the RAR version stored on 1 byte
"""
return "%u.%u" % divmod(field.value, 10)
def commonFlags(s):
yield Bit(s, "has_added_size", "Additional field indicating additional size")
yield Bit(s, "is_ignorable", "Old versions of RAR should ignore this block when copying data")
class ArchiveFlags(StaticFieldSet):
format = (
(Bit, "vol", "Archive volume"),
(Bit, "has_comment", "Whether there is a comment"),
(Bit, "is_locked", "Archive volume"),
(Bit, "is_solid", "Whether files can be extracted separately"),
(Bit, "new_numbering", "New numbering, or compressed comment"), # From unrar
(Bit, "has_authenticity_information",
"The integrity/authenticity of the archive can be checked"),
(Bit, "is_protected", "The integrity/authenticity of the archive can be checked"),
(Bit, "is_passworded", "Needs a password to be decrypted"),
(Bit, "is_first_vol", "Whether it is the first volume"),
(Bit, "is_encrypted", "Whether the encryption version is present"),
(NullBits, "internal", 6, "Reserved for 'internal use'")
)
def archiveFlags(s):
yield ArchiveFlags(s, "flags", "Archiver block flags")
def archiveHeader(s):
yield NullBytes(s, "reserved[]", 2, "Reserved word")
yield NullBytes(s, "reserved[]", 4, "Reserved dword")
def commentHeader(s):
yield filesizeHandler(UInt16(s, "total_size", "Comment header size + comment size"))
yield filesizeHandler(UInt16(s, "uncompressed_size", "Uncompressed comment size"))
yield UInt8(s, "required_version", "RAR version needed to extract comment")
yield UInt8(s, "packing_method", "Comment packing method")
yield UInt16(s, "comment_crc16", "Comment CRC")
def commentBody(s):
size = s["total_size"].value - s.current_size
if size > 0:
yield RawBytes(s, "comment_data", size, "Compressed comment data")
def signatureHeader(s):
yield TimeDateMSDOS32(s, "creation_time")
yield filesizeHandler(UInt16(s, "arc_name_size"))
yield filesizeHandler(UInt16(s, "user_name_size"))
def recoveryHeader(s):
yield filesizeHandler(UInt32(s, "total_size"))
yield textHandler(UInt8(s, "version"), hexadecimal)
yield UInt16(s, "rec_sectors")
yield UInt32(s, "total_blocks")
yield RawBytes(s, "mark", 8)
def avInfoHeader(s):
yield filesizeHandler(UInt16(s, "total_size", "Total block size"))
yield UInt8(s, "version", "Version needed to decompress", handler=hexadecimal)
yield UInt8(s, "method", "Compression method", handler=hexadecimal)
yield UInt8(s, "av_version", "Version for AV", handler=hexadecimal)
yield UInt32(s, "av_crc", "AV info CRC32", handler=hexadecimal)
def avInfoBody(s):
size = s["total_size"].value - s.current_size
if size > 0:
yield RawBytes(s, "av_info_data", size, "AV info")
class FileFlags(FieldSet):
static_size = 16
def createFields(self):
yield Bit(self, "continued_from", "File continued from previous volume")
yield Bit(self, "continued_in", "File continued in next volume")
yield Bit(self, "is_encrypted", "File encrypted with password")
yield Bit(self, "has_comment", "File comment present")
yield Bit(self, "is_solid", "Information from previous files is used (solid flag)")
# The 3 following lines are what blocks more staticity
yield Enum(Bits(self, "dictionary_size", 3, "Dictionary size"), DICTIONARY_SIZE)
for bit in commonFlags(self):
yield bit
yield Bit(self, "is_large", "file64 operations needed")
yield Bit(self, "is_unicode", "Filename also encoded using Unicode")
yield Bit(self, "has_salt", "Has salt for encryption")
yield Bit(self, "uses_file_version", "File versioning is used")
yield Bit(self, "has_ext_time", "Extra time ??")
yield Bit(self, "has_ext_flags", "Extra flag ??")
def fileFlags(s):
yield FileFlags(s, "flags", "File block flags")
class ExtTime(FieldSet):
def createFields(self):
yield textHandler(UInt16(self, "time_flags", "Flags for extended time"), hexadecimal)
flags = self["time_flags"].value
for index in range(4):
rmode = flags >> ((3 - index) * 4)
if rmode & 8:
if index:
yield TimeDateMSDOS32(self, "dos_time[]", "DOS Time")
if rmode & 3:
yield RawBytes(self, "remainder[]", rmode & 3, "Time remainder")
def specialHeader(s, is_file):
yield filesizeHandler(UInt32(s, "compressed_size", "Compressed size (bytes)"))
yield filesizeHandler(UInt32(s, "uncompressed_size", "Uncompressed size (bytes)"))
yield Enum(UInt8(s, "host_os", "Operating system used for archiving"), OS_NAME)
yield textHandler(UInt32(s, "crc32", "File CRC32"), hexadecimal)
yield TimeDateMSDOS32(s, "ftime", "Date and time (MS DOS format)")
yield textHandler(UInt8(s, "version", "RAR version needed to extract file"), formatRARVersion)
yield Enum(UInt8(s, "method", "Packing method"), COMPRESSION_NAME)
yield filesizeHandler(UInt16(s, "filename_length", "File name size"))
if s["host_os"].value in (OS_MSDOS, OS_WIN32):
yield MSDOSFileAttr32(s, "file_attr", "File attributes")
else:
yield textHandler(UInt32(s, "file_attr", "File attributes"), hexadecimal)
# Start additional field from unrar
if s["flags/is_large"].value:
yield filesizeHandler(UInt64(s, "large_size", "Extended 64bits filesize"))
# End additional field
size = s["filename_length"].value
if size > 0:
if s["flags/is_unicode"].value:
charset = "UTF-8"
else:
charset = "ISO-8859-15"
yield String(s, "filename", size, "Filename", charset=charset)
# Start additional fields from unrar - file only
if is_file:
if s["flags/has_salt"].value:
yield textHandler(UInt8(s, "salt", "Salt"), hexadecimal)
if s["flags/has_ext_time"].value:
yield ExtTime(s, "extra_time", "Extra time info")
def fileHeader(s):
return specialHeader(s, True)
def fileBody(s):
# File compressed data
size = s["compressed_size"].value
if s["flags/is_large"].value:
size += s["large_size"].value
if size > 0:
yield RawBytes(s, "compressed_data", size, "File compressed data")
def fileDescription(s):
return "File entry: %s (%s)" % \
(s["filename"].display, s["compressed_size"].display)
def newSubHeader(s):
return specialHeader(s, False)
class EndFlags(StaticFieldSet):
format = (
(Bit, "has_next_vol", "Whether there is another next volume"),
(Bit, "has_data_crc", "Whether a CRC value is present"),
(Bit, "rev_space"),
(Bit, "has_vol_number", "Whether the volume number is present"),
(Bits, "unused[]", 4),
(Bit, "has_added_size", "Additional field indicating additional size"),
(Bit, "is_ignorable",
"Old versions of RAR should ignore this block when copying data"),
(Bits, "unused[]", 6),
)
def endFlags(s):
yield EndFlags(s, "flags", "End block flags")
class BlockFlags(FieldSet):
static_size = 16
def createFields(self):
yield textHandler(Bits(self, "unused[]", 8, "Unused flag bits"), hexadecimal)
yield Bit(self, "has_added_size", "Additional field indicating additional size")
yield Bit(self, "is_ignorable", "Old versions of RAR should ignore this block when copying data")
yield Bits(self, "unused[]", 6)
class Block(FieldSet):
BLOCK_INFO = {
# None means 'use default function'
0x72: ("marker", "Archive header", None, None, None),
0x73: ("archive_start", "Archive info", archiveFlags, archiveHeader, None),
0x74: ("file[]", fileDescription, fileFlags, fileHeader, fileBody),
0x75: ("comment[]", "Stray comment", None, commentHeader, commentBody),
0x76: ("av_info[]", "Extra information", None, avInfoHeader, avInfoBody),
0x77: ("sub_block[]", "Stray subblock", None, newSubHeader, fileBody),
0x78: ("recovery[]", "Recovery block", None, recoveryHeader, None),
0x79: ("signature", "Signature block", None, signatureHeader, None),
0x7A: ("new_sub_block[]", "Stray new-format subblock", fileFlags,
newSubHeader, fileBody),
0x7B: ("archive_end", "Archive end block", endFlags, None, None),
}
def __init__(self, parent, name):
FieldSet.__init__(self, parent, name)
t = self["block_type"].value
if t in self.BLOCK_INFO:
self._name, desc, parseFlags, parseHeader, parseBody = self.BLOCK_INFO[
t]
if callable(desc):
self.createDescription = lambda: desc(self)
elif desc:
self._description = desc
if parseFlags:
self.parseFlags = lambda: parseFlags(self)
if parseHeader:
self.parseHeader = lambda: parseHeader(self)
if parseBody:
self.parseBody = lambda: parseBody(self)
else:
self.info("Processing as unknown block of type %u" % t)
self._size = 8 * self["block_size"].value
if t == 0x74 or t == 0x7A:
self._size += 8 * self["compressed_size"].value
if "is_large" in self["flags"] and self["flags/is_large"].value:
self._size += 8 * self["large_size"].value
elif "has_added_size" in self:
self._size += 8 * self["added_size"].value
# TODO: check if any other member is needed here
def createFields(self):
yield textHandler(UInt16(self, "crc16", "Block CRC16"), hexadecimal)
yield textHandler(UInt8(self, "block_type", "Block type"), hexadecimal)
# Parse flags
yield from self.parseFlags()
# Get block size
yield filesizeHandler(UInt16(self, "block_size", "Block size"))
# Parse remaining header
yield from self.parseHeader()
# Finish header with stuff of unknow size
size = self["block_size"].value - (self.current_size // 8)
if size > 0:
yield RawBytes(self, "unknown", size, "Unknow data (UInt32 probably)")
# Parse body
yield from self.parseBody()
def createDescription(self):
return "Block entry: %s" % self["type"].display
def parseFlags(self):
yield BlockFlags(self, "flags", "Block header flags")
def parseHeader(self):
if "has_added_size" in self["flags"] and \
self["flags/has_added_size"].value:
yield filesizeHandler(UInt32(self, "added_size",
"Supplementary block size"))
def parseBody(self):
"""
Parse what is left of the block
"""
size = self["block_size"].value - (self.current_size // 8)
if "has_added_size" in self["flags"] and self["flags/has_added_size"].value:
size += self["added_size"].value
if size > 0:
yield RawBytes(self, "body", size, "Body data")
class RarFile(Parser):
MAGIC = b"Rar!\x1A\x07\x00"
PARSER_TAGS = {
"id": "rar",
"category": "archive",
"file_ext": ("rar",),
"mime": ("application/x-rar-compressed", ),
"min_size": 7 * 8,
"magic": ((MAGIC, 0),),
"description": "Roshal archive (RAR)",
}
endian = LITTLE_ENDIAN
def validate(self):
magic = self.MAGIC
if self.stream.readBytes(0, len(magic)) != magic:
return "Invalid magic"
return True
def createFields(self):
while not self.eof:
yield Block(self, "block[]")
def createContentSize(self):
start = 0
end = MAX_FILESIZE * 8
pos = self.stream.searchBytes(
b"\xC4\x3D\x7B\x00\x40\x07\x00", start, end)
if pos is not None:
return pos + 7 * 8
return None