mirror of
https://github.com/SickGear/SickGear.git
synced 2025-01-25 02:33:43 +00:00
980e05cc99
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014). Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/* Remove metadata/qt* PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5). Basic Support for XMP Packets. tga: improvements to adhere more closely to the spec. pdf: slightly improved parsing. rar: fix TypeError on unknown block types. Add MacRoman win32 codepage. tiff/exif: support SubIFDs and tiled images. Add method to export metadata in dictionary. mpeg_video: don't attempt to parse Stream past length. mpeg_video: parse ESCR correctly, add SCR value. Change centralise CustomFragments. field: don't set parser class if class is None, to enable autodetect. field: add value/display for CustomFragment. parser: inline warning to enable tracebacks in debug mode. Fix empty bytestrings in makePrintable. Fix contentSize in jpeg.py to account for image_data blocks. Fix the ELF parser. Enhance the AR archive parser. elf parser: fix wrong wrong fields order in parsing little endian section flags. elf parser: add s390 as a machine type. Flesh out mp4 parser. PORT: Version 2.0a1 (inline with 3.0a1). Major refactoring and PEP8. Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams. metadata: get comment from ZIP. Support for InputIOStream.read(0). Fix sizeGe when size is None. Remove unused new_seekable_field_set file. Remove parser Mapsforge .map. Remove parser Parallel Realities Starfighter .pak files. sevenzip: fix for newer archives. java: update access flags and modifiers for Java 1.7 and update description text for most recent Java. Support ustar prefix field in tar archives. Remove file_system* parsers. Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*. Remove program parsers macho, nds, prc. Support non-8bit Character subclasses. Python parser supports Python 3.7. Enhance mpeg_ts parser to support MTS/M2TS. Support for creation date in tiff. Change don't hardcode errno constant. PORT: 1.9.1 Internal Only: The following are legacy reference to upstream commit messages. Relevant changes up to b0a115f8. Use integer division. Replace HACHOIR_ERRORS with Exception. Fix metadata.Data: make it sortable. Import fixes from e7de492. PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad). Replace hachoir.core.field with hachoir.field Replace hachoir.core.stream with hachoir.stream Remove the compatibility module for PY1.5 to PY2.5. metadata: support TIFF picture. metadata: fix string normalization. metadata: fix datetime regex Fix hachoir bug #57. FileFromInputStream: fix comparison between None and an int. InputIOStream: open the file in binary mode.
412 lines
14 KiB
Python
412 lines
14 KiB
Python
"""
|
|
RAR parser
|
|
|
|
Status: can only read higher-level attructures
|
|
Author: Christophe Gisquet
|
|
"""
|
|
|
|
from hachoir.parser import Parser
|
|
from hachoir.field import (StaticFieldSet, FieldSet,
|
|
Bit, Bits, Enum,
|
|
UInt8, UInt16, UInt32, UInt64,
|
|
String, TimeDateMSDOS32,
|
|
NullBytes, NullBits, RawBytes)
|
|
from hachoir.core.text_handler import textHandler, filesizeHandler, hexadecimal
|
|
from hachoir.core.endian import LITTLE_ENDIAN
|
|
from hachoir.parser.common.msdos import MSDOSFileAttr32
|
|
from datetime import timedelta
|
|
|
|
MAX_FILESIZE = 1000 * 1024 * 1024
|
|
|
|
BLOCK_NAME = {
|
|
0x72: "Marker",
|
|
0x73: "Archive",
|
|
0x74: "File",
|
|
0x75: "Comment",
|
|
0x76: "Extra info",
|
|
0x77: "Subblock",
|
|
0x78: "Recovery record",
|
|
0x79: "Archive authenticity",
|
|
0x7A: "New-format subblock",
|
|
0x7B: "Archive end",
|
|
}
|
|
|
|
COMPRESSION_NAME = {
|
|
0x30: "Storing",
|
|
0x31: "Fastest compression",
|
|
0x32: "Fast compression",
|
|
0x33: "Normal compression",
|
|
0x34: "Good compression",
|
|
0x35: "Best compression"
|
|
}
|
|
|
|
OS_MSDOS = 0
|
|
OS_WIN32 = 2
|
|
OS_NAME = {
|
|
0: "MS DOS",
|
|
1: "OS/2",
|
|
2: "Win32",
|
|
3: "Unix",
|
|
}
|
|
|
|
DICTIONARY_SIZE = {
|
|
0: "Dictionary size 64 Kb",
|
|
1: "Dictionary size 128 Kb",
|
|
2: "Dictionary size 256 Kb",
|
|
3: "Dictionary size 512 Kb",
|
|
4: "Dictionary size 1024 Kb",
|
|
7: "File is a directory",
|
|
}
|
|
|
|
|
|
def formatRARVersion(field):
|
|
"""
|
|
Decodes the RAR version stored on 1 byte
|
|
"""
|
|
return "%u.%u" % divmod(field.value, 10)
|
|
|
|
|
|
def markerFlags(s):
|
|
yield UInt16(s, "flags", "Marker flags, always 0x1a21")
|
|
|
|
|
|
commonFlags = (
|
|
(Bit, "is_ignorable", "Old versions of RAR should ignore this block when copying data"),
|
|
(Bit, "has_added_size", "Additional field indicating additional size"),
|
|
)
|
|
|
|
|
|
class ArchiveFlags(StaticFieldSet):
|
|
format = (
|
|
(Bit, "vol", "Archive volume"),
|
|
(Bit, "has_comment", "Whether there is a comment"),
|
|
(Bit, "is_locked", "Archive volume"),
|
|
(Bit, "is_solid", "Whether files can be extracted separately"),
|
|
(Bit, "new_numbering", "New numbering, or compressed comment"), # From unrar
|
|
(Bit, "has_authenticity_information", "The integrity/authenticity of the archive can be checked"),
|
|
(Bit, "is_protected", "The integrity/authenticity of the archive can be checked"),
|
|
(Bit, "is_passworded", "Needs a password to be decrypted"),
|
|
(Bit, "is_first_vol", "Whether it is the first volume"),
|
|
(Bit, "is_encrypted", "Whether the encryption version is present"),
|
|
(NullBits, "internal", 4, "Reserved for 'internal use'"),
|
|
) + commonFlags
|
|
|
|
|
|
def archiveFlags(s):
|
|
yield ArchiveFlags(s, "flags", "Archiver block flags")
|
|
|
|
|
|
def archiveHeader(s):
|
|
yield NullBytes(s, "reserved[]", 2, "Reserved word")
|
|
yield NullBytes(s, "reserved[]", 4, "Reserved dword")
|
|
|
|
|
|
def commentHeader(s):
|
|
yield filesizeHandler(UInt16(s, "total_size", "Comment header size + comment size"))
|
|
yield filesizeHandler(UInt16(s, "uncompressed_size", "Uncompressed comment size"))
|
|
yield UInt8(s, "required_version", "RAR version needed to extract comment")
|
|
yield UInt8(s, "packing_method", "Comment packing method")
|
|
yield UInt16(s, "comment_crc16", "Comment CRC")
|
|
|
|
|
|
def commentBody(s):
|
|
size = s["total_size"].value - s.current_size
|
|
if size > 0:
|
|
yield RawBytes(s, "comment_data", size, "Compressed comment data")
|
|
|
|
|
|
def signatureHeader(s):
|
|
yield TimeDateMSDOS32(s, "creation_time")
|
|
yield filesizeHandler(UInt16(s, "arc_name_size"))
|
|
yield filesizeHandler(UInt16(s, "user_name_size"))
|
|
|
|
|
|
def recoveryHeader(s):
|
|
yield filesizeHandler(UInt32(s, "total_size"))
|
|
yield textHandler(UInt8(s, "version"), hexadecimal)
|
|
yield UInt16(s, "rec_sectors")
|
|
yield UInt32(s, "total_blocks")
|
|
yield RawBytes(s, "mark", 8)
|
|
|
|
|
|
def avInfoHeader(s):
|
|
yield filesizeHandler(UInt16(s, "total_size", "Total block size"))
|
|
yield UInt8(s, "version", "Version needed to decompress", handler=hexadecimal)
|
|
yield UInt8(s, "method", "Compression method", handler=hexadecimal)
|
|
yield UInt8(s, "av_version", "Version for AV", handler=hexadecimal)
|
|
yield UInt32(s, "av_crc", "AV info CRC32", handler=hexadecimal)
|
|
|
|
|
|
def avInfoBody(s):
|
|
size = s["total_size"].value - s.current_size
|
|
if size > 0:
|
|
yield RawBytes(s, "av_info_data", size, "AV info")
|
|
|
|
|
|
class FileFlags(FieldSet):
|
|
static_size = 16
|
|
|
|
def createFields(self):
|
|
yield Bit(self, "continued_from", "File continued from previous volume")
|
|
yield Bit(self, "continued_in", "File continued in next volume")
|
|
yield Bit(self, "is_encrypted", "File encrypted with password")
|
|
yield Bit(self, "has_comment", "File comment present")
|
|
yield Bit(self, "is_solid", "Information from previous files is used (solid flag)")
|
|
# The 3 following lines are what blocks more staticity
|
|
yield Enum(Bits(self, "dictionary_size", 3, "Dictionary size"), DICTIONARY_SIZE)
|
|
yield Bit(self, "is_large", "file64 operations needed")
|
|
yield Bit(self, "is_unicode", "Filename also encoded using Unicode")
|
|
yield Bit(self, "has_salt", "Has salt for encryption")
|
|
yield Bit(self, "uses_file_version", "File versioning is used")
|
|
yield Bit(self, "has_ext_time", "Extra time info present")
|
|
yield Bit(self, "has_ext_flags", "Extra flag ??")
|
|
for field in commonFlags:
|
|
yield field[0](self, *field[1:])
|
|
|
|
|
|
def fileFlags(s):
|
|
yield FileFlags(s, "flags", "File block flags")
|
|
|
|
|
|
class ExtTimeFlags(FieldSet):
|
|
static_size = 16
|
|
|
|
def createFields(self):
|
|
for name in ['arctime', 'atime', 'ctime', 'mtime']:
|
|
yield Bits(self, "%s_count" % name, 2, "Number of %s bytes" % name)
|
|
yield Bit(self, "%s_onesec" % name, "Add one second to the timestamp?")
|
|
yield Bit(self, "%s_present" % name, "Is %s extra time present?" % name)
|
|
|
|
|
|
class ExtTime(FieldSet):
|
|
def createFields(self):
|
|
yield ExtTimeFlags(self, "time_flags")
|
|
for name in ['mtime', 'ctime', 'atime', 'arctime']:
|
|
if self['time_flags/%s_present' % name].value:
|
|
if name != 'mtime':
|
|
yield TimeDateMSDOS32(self, "%s" % name, "%s DOS timestamp" % name)
|
|
count = self['time_flags/%s_count' % name].value
|
|
if count:
|
|
yield Bits(self, "%s_remainder" % name, 8 * count,
|
|
"%s extra precision time (in 100ns increments)" % name)
|
|
|
|
def createDescription(self):
|
|
out = 'Time extension'
|
|
pieces = []
|
|
for name in ['mtime', 'ctime', 'atime', 'arctime']:
|
|
if not self['time_flags/%s_present' % name].value:
|
|
continue
|
|
|
|
if name == 'mtime':
|
|
basetime = self['../ftime'].value
|
|
else:
|
|
basetime = self['%s' % name].value
|
|
delta = timedelta()
|
|
if self['time_flags/%s_onesec' % name].value:
|
|
delta += timedelta(seconds=1)
|
|
if '%s_remainder' % name in self:
|
|
delta += timedelta(microseconds=self['%s_remainder' % name].value / 10.0)
|
|
pieces.append('%s=%s' % (name, basetime + delta))
|
|
if pieces:
|
|
out += ': ' + ', '.join(pieces)
|
|
return out
|
|
|
|
|
|
def specialHeader(s, is_file):
|
|
yield filesizeHandler(UInt32(s, "compressed_size", "Compressed size (bytes)"))
|
|
yield filesizeHandler(UInt32(s, "uncompressed_size", "Uncompressed size (bytes)"))
|
|
yield Enum(UInt8(s, "host_os", "Operating system used for archiving"), OS_NAME)
|
|
yield textHandler(UInt32(s, "crc32", "File CRC32"), hexadecimal)
|
|
yield TimeDateMSDOS32(s, "ftime", "Date and time (MS DOS format)")
|
|
yield textHandler(UInt8(s, "version", "RAR version needed to extract file"), formatRARVersion)
|
|
yield Enum(UInt8(s, "method", "Packing method"), COMPRESSION_NAME)
|
|
yield filesizeHandler(UInt16(s, "filename_length", "File name size"))
|
|
if s["host_os"].value in (OS_MSDOS, OS_WIN32):
|
|
yield MSDOSFileAttr32(s, "file_attr", "File attributes")
|
|
else:
|
|
yield textHandler(UInt32(s, "file_attr", "File attributes"), hexadecimal)
|
|
|
|
# Start additional field from unrar
|
|
if s["flags/is_large"].value:
|
|
yield filesizeHandler(UInt64(s, "large_size", "Extended 64bits filesize"))
|
|
|
|
# End additional field
|
|
size = s["filename_length"].value
|
|
if size > 0:
|
|
if s["flags/is_unicode"].value:
|
|
charset = "UTF-8"
|
|
else:
|
|
charset = "ISO-8859-15"
|
|
yield String(s, "filename", size, "Filename", charset=charset)
|
|
# Start additional fields from unrar - file only
|
|
if is_file:
|
|
if s["flags/has_salt"].value:
|
|
yield RawBytes(s, "salt", 8, "Encryption salt to increase security")
|
|
if s["flags/has_ext_time"].value:
|
|
yield ExtTime(s, "extra_time")
|
|
|
|
|
|
def fileHeader(s):
|
|
return specialHeader(s, True)
|
|
|
|
|
|
def fileBody(s):
|
|
# File compressed data
|
|
size = s["compressed_size"].value
|
|
if s["flags/is_large"].value:
|
|
size += s["large_size"].value
|
|
if size > 0:
|
|
yield RawBytes(s, "compressed_data", size, "File compressed data")
|
|
|
|
|
|
def fileDescription(tag):
|
|
def _fileDescription(s):
|
|
return "%s: %s (%s)" % \
|
|
(tag, s["filename"].display, s["compressed_size"].display)
|
|
|
|
return _fileDescription
|
|
|
|
|
|
def newSubHeader(s):
|
|
return specialHeader(s, False)
|
|
|
|
|
|
class EndFlags(StaticFieldSet):
|
|
format = (
|
|
(Bit, "has_next_vol", "Whether there is another next volume"),
|
|
(Bit, "has_data_crc", "Whether a CRC value is present"),
|
|
(Bit, "rev_space"),
|
|
(Bit, "has_vol_number", "Whether the volume number is present"),
|
|
(NullBits, "unused[]", 10),
|
|
) + commonFlags
|
|
|
|
|
|
def endFlags(s):
|
|
yield EndFlags(s, "flags", "End block flags")
|
|
|
|
|
|
class BlockFlags(StaticFieldSet):
|
|
static_size = 16
|
|
|
|
format = (
|
|
(NullBits, "unused[]", 14),
|
|
) + commonFlags
|
|
|
|
|
|
class Block(FieldSet):
|
|
BLOCK_INFO = {
|
|
# None means 'use default function'
|
|
0x72: ("marker", "File format marker", markerFlags, None, None),
|
|
0x73: ("archive_start", "Archive info", archiveFlags, archiveHeader, None),
|
|
0x74: ("file[]", fileDescription("File entry"), fileFlags, fileHeader, fileBody),
|
|
0x75: ("comment[]", "Comment", None, commentHeader, commentBody),
|
|
0x76: ("av_info[]", "Extra information", None, avInfoHeader, avInfoBody),
|
|
0x77: ("sub_block[]", fileDescription("Subblock"), None, newSubHeader, fileBody),
|
|
0x78: ("recovery[]", "Recovery block", None, recoveryHeader, None),
|
|
0x79: ("signature", "Signature block", None, signatureHeader, None),
|
|
0x7A: ("sub_block[]", fileDescription("New-format subblock"), fileFlags,
|
|
newSubHeader, fileBody),
|
|
0x7B: ("archive_end", "Archive end block", endFlags, None, None),
|
|
}
|
|
|
|
def __init__(self, parent, name):
|
|
FieldSet.__init__(self, parent, name)
|
|
t = self["block_type"].value
|
|
if t in self.BLOCK_INFO:
|
|
self._name, desc, parseFlags, parseHeader, parseBody = self.BLOCK_INFO[t]
|
|
if callable(desc):
|
|
self.createDescription = lambda: desc(self)
|
|
elif desc:
|
|
self._description = desc
|
|
if parseFlags: self.parseFlags = lambda: parseFlags(self)
|
|
if parseHeader: self.parseHeader = lambda: parseHeader(self)
|
|
if parseBody: self.parseBody = lambda: parseBody(self)
|
|
else:
|
|
self.info("Processing as unknown block of type %u" % t)
|
|
|
|
self._size = 8 * self["block_size"].value
|
|
if t == 0x74 or t == 0x7A:
|
|
self._size += 8 * self["compressed_size"].value
|
|
if "is_large" in self["flags"] and self["flags/is_large"].value:
|
|
self._size += 8 * self["large_size"].value
|
|
elif "has_added_size" in self:
|
|
self._size += 8 * self["added_size"].value
|
|
# TODO: check if any other member is needed here
|
|
|
|
def createFields(self):
|
|
yield textHandler(UInt16(self, "crc16", "Block CRC16"), hexadecimal)
|
|
yield textHandler(UInt8(self, "block_type", "Block type"), hexadecimal)
|
|
|
|
# Parse flags
|
|
for field in self.parseFlags():
|
|
yield field
|
|
|
|
# Get block size
|
|
yield filesizeHandler(UInt16(self, "block_size", "Block size"))
|
|
|
|
# Parse remaining header
|
|
for field in self.parseHeader():
|
|
yield field
|
|
|
|
# Finish header with stuff of unknow size
|
|
size = self["block_size"].value - (self.current_size // 8)
|
|
if size > 0:
|
|
yield RawBytes(self, "unknown", size, "Unknow data (UInt32 probably)")
|
|
|
|
# Parse body
|
|
for field in self.parseBody():
|
|
yield field
|
|
|
|
def createDescription(self):
|
|
return "Block entry: %s" % self["type"].display
|
|
|
|
def parseFlags(self):
|
|
yield BlockFlags(self, "flags", "Block header flags")
|
|
|
|
def parseHeader(self):
|
|
if "has_added_size" in self["flags"] and \
|
|
self["flags/has_added_size"].value:
|
|
yield filesizeHandler(UInt32(self, "added_size",
|
|
"Supplementary block size"))
|
|
|
|
def parseBody(self):
|
|
"""
|
|
Parse what is left of the block
|
|
"""
|
|
size = self["block_size"].value - (self.current_size // 8)
|
|
if "has_added_size" in self["flags"] and self["flags/has_added_size"].value:
|
|
size += self["added_size"].value
|
|
if size > 0:
|
|
yield RawBytes(self, "body", size, "Body data")
|
|
|
|
|
|
class RarFile(Parser):
|
|
MAGIC = "Rar!\x1A\x07\x00"
|
|
PARSER_TAGS = {
|
|
"id": "rar",
|
|
"category": "archive",
|
|
"file_ext": ("rar",),
|
|
"mime": (u"application/x-rar-compressed",),
|
|
"min_size": 7 * 8,
|
|
"magic": ((MAGIC, 0),),
|
|
"description": "Roshal archive (RAR)",
|
|
}
|
|
endian = LITTLE_ENDIAN
|
|
|
|
def validate(self):
|
|
magic = self.MAGIC
|
|
if self.stream.readBytes(0, len(magic)) != magic:
|
|
return "Invalid magic"
|
|
return True
|
|
|
|
def createFields(self):
|
|
while not self.eof:
|
|
yield Block(self, "block[]")
|
|
|
|
def createContentSize(self):
|
|
start = 0
|
|
end = MAX_FILESIZE * 8
|
|
pos = self.stream.searchBytes("\xC4\x3D\x7B\x00\x40\x07\x00", start, end)
|
|
if pos is not None:
|
|
return pos + 7 * 8
|
|
return None
|