SickGear/lib/hachoir/stream/input.py
JackDandy 980e05cc99 Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo.
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014).
Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/*
Remove metadata/qt*

PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5).
Basic Support for XMP Packets.
tga: improvements to adhere more closely to the spec.
pdf: slightly improved parsing.
rar: fix TypeError on unknown block types.
Add MacRoman win32 codepage.
tiff/exif: support SubIFDs and tiled images.
Add method to export metadata in dictionary.
mpeg_video: don't attempt to parse Stream past length.
mpeg_video: parse ESCR correctly, add SCR value.
Change centralise CustomFragments.
field: don't set parser class if class is None, to enable autodetect.
field: add value/display for CustomFragment.
parser: inline warning to enable tracebacks in debug mode.
Fix empty bytestrings in makePrintable.
Fix contentSize in jpeg.py to account for image_data blocks.
Fix the ELF parser.
Enhance the AR archive parser.
elf parser: fix wrong wrong fields order in parsing little endian section flags.
elf parser: add s390 as a machine type.
Flesh out mp4 parser.

PORT: Version 2.0a1 (inline with 3.0a1).
Major refactoring and PEP8.
Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams.
metadata: get comment from ZIP.
Support for InputIOStream.read(0).
Fix sizeGe when size is None.
Remove unused new_seekable_field_set file.
Remove parser Mapsforge .map.
Remove parser Parallel Realities Starfighter .pak files.
sevenzip: fix for newer archives.
java: update access flags and modifiers for Java 1.7 and update description text for most recent Java.
Support ustar prefix field in tar archives.
Remove file_system* parsers.
Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*.
Remove program parsers macho, nds, prc.
Support non-8bit Character subclasses.
Python parser supports Python 3.7.
Enhance mpeg_ts parser to support MTS/M2TS.
Support for creation date in tiff.
Change don't hardcode errno constant.

PORT: 1.9.1
Internal Only: The following are legacy reference to upstream commit messages.
Relevant changes up to b0a115f8.
Use integer division.
Replace HACHOIR_ERRORS with Exception.
Fix metadata.Data: make it sortable.
Import fixes from e7de492.
PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad).
Replace hachoir.core.field with hachoir.field
Replace hachoir.core.stream with hachoir.stream
Remove the compatibility module for PY1.5 to PY2.5.
metadata: support TIFF picture.
metadata: fix string normalization.
metadata: fix datetime regex Fix hachoir bug #57.
FileFromInputStream: fix comparison between None and an int.
InputIOStream: open the file in binary mode.
2018-03-28 00:43:11 +01:00

611 lines
20 KiB
Python

from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from hachoir.core.error import info
from hachoir.core.log import Logger
from hachoir.core.bits import str2long
from hachoir.core.i18n import getTerminalCharset
from hachoir.core.tools import lowerBound
from hachoir.core.i18n import _
from hachoir.core.tools import alignValue
from errno import ESPIPE
from weakref import ref as weakref_ref
from hachoir.stream import StreamError
class InputStreamError(StreamError):
pass
class ReadStreamError(InputStreamError):
def __init__(self, size, address, got=None):
self.size = size
self.address = address
self.got = got
if self.got is not None:
msg = _("Can't read %u bits at address %u (got %u bits)") % (self.size, self.address, self.got)
else:
msg = _("Can't read %u bits at address %u") % (self.size, self.address)
InputStreamError.__init__(self, msg)
class NullStreamError(InputStreamError):
def __init__(self, source):
self.source = source
msg = _("Input size is nul (source='%s')!") % self.source
InputStreamError.__init__(self, msg)
class FileFromInputStream:
_offset = 0
_from_end = False
def __init__(self, stream):
self.stream = stream
self._setSize(stream.askSize(self))
def _setSize(self, size):
if size is None:
self._size = size
elif size % 8:
raise InputStreamError("Invalid size")
else:
self._size = size // 8
def tell(self):
if self._from_end:
while self._size is None:
self.stream._feed(max(self.stream._current_size << 1, 1 << 16))
self._from_end = False
self._offset += self._size
return self._offset
def seek(self, pos, whence=0):
if whence == 0:
self._from_end = False
self._offset = pos
elif whence == 1:
self._offset += pos
elif whence == 2:
self._from_end = True
self._offset = pos
else:
raise ValueError("seek() second argument must be 0, 1 or 2")
def read(self, size=None):
def read(address, size):
shift, data, missing = self.stream.read(8 * address, 8 * size)
if shift:
raise InputStreamError("TODO: handle non-byte-aligned data")
return data
if self._size or size is not None and not self._from_end:
# We don't want self.tell() to read anything
# and the size must be known if we read until the end.
pos = self.tell()
if size is None or (self._size is not None and self._size < pos + size):
size = self._size - pos
if size <= 0:
return ''
data = read(pos, size)
self._offset += len(data)
return data
elif self._from_end:
# TODO: not tested
max_size = - self._offset
if size is None or max_size < size:
size = max_size
if size <= 0:
return ''
data = '', ''
self._offset = max(0, self.stream._current_size // 8 + self._offset)
self._from_end = False
bs = max(max_size, 1 << 16)
while True:
d = read(self._offset, bs)
data = data[1], d
self._offset += len(d)
if self._size:
bs = self._size - self._offset
if not bs:
data = data[0] + data[1]
d = len(data) - max_size
return data[d:d + size]
else:
# TODO: not tested
data = []
size = 1 << 16
while True:
d = read(self._offset, size)
data.append(d)
self._offset += len(d)
if self._size:
size = self._size - self._offset
if not size:
return ''.join(data)
class InputStream(Logger):
_set_size = None
_current_size = 0
def __init__(self, source=None, size=None, packets=None, **args):
self.source = source
self._size = size # in bits
if size == 0:
raise NullStreamError(source)
self.tags = tuple(args.get("tags", tuple()))
self.packets = packets
def close(self):
raise NotImplementedError
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
self.close()
def askSize(self, client):
if self._size != self._current_size:
if self._set_size is None:
self._set_size = []
self._set_size.append(weakref_ref(client))
return self._size
def _setSize(self, size=None):
assert self._size is None or self._current_size <= self._size
if self._size != self._current_size:
self._size = self._current_size
if not self._size:
raise NullStreamError(self.source)
if self._set_size:
for client in self._set_size:
client = client()
if client:
client._setSize(self._size)
del self._set_size
size = property(lambda self: self._size, doc="Size of the stream in bits")
checked = property(lambda self: self._size == self._current_size)
def sizeGe(self, size, const=False):
if self._current_size >= size:
return True
elif self._size is not None and 0 < self._size < size:
return False
else:
return not const and not self._feed(size)
def _feed(self, size):
return self.read(size - 1, 1)[2]
def read(self, address, size):
"""
Read 'size' bits at position 'address' (in bits)
from the beginning of the stream.
"""
raise NotImplementedError
def readBits(self, address, nbits, endian):
assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN)
if endian is MIDDLE_ENDIAN:
# read an aligned chunk of words
wordaddr, remainder = divmod(address, 16)
wordnbits = alignValue(remainder + nbits, 16)
_, data, missing = self.read(wordaddr * 16, wordnbits)
shift = remainder
else:
shift, data, missing = self.read(address, nbits)
if missing:
raise ReadStreamError(nbits, address)
value = str2long(data, endian)
if endian in (BIG_ENDIAN, MIDDLE_ENDIAN):
value >>= len(data) * 8 - shift - nbits
else:
value >>= shift
return value & (1 << nbits) - 1
def readInteger(self, address, signed, nbits, endian):
""" Read an integer number """
value = self.readBits(address, nbits, endian)
# Signe number. Example with nbits=8:
# if 128 <= value: value -= 256
if signed and (1 << (nbits - 1)) <= value:
value -= (1 << nbits)
return value
def readBytes(self, address, nb_bytes):
shift, data, missing = self.read(address, 8 * nb_bytes)
if shift:
raise InputStreamError("TODO: handle non-byte-aligned data")
if missing:
raise ReadStreamError(8 * nb_bytes, address)
return data
def searchBytesLength(self, needle, include_needle,
start_address=0, end_address=None):
"""
If include_needle is True, add its length to the result.
Returns None is needle can't be found.
"""
pos = self.searchBytes(needle, start_address, end_address)
if pos is None:
return None
length = (pos - start_address) // 8
if include_needle:
length += len(needle)
return length
def searchBytes(self, needle, start_address=0, end_address=None):
"""
Search some bytes in [start_address;end_address[. Addresses must
be aligned to byte. Returns the address of the bytes if found,
None else.
"""
if start_address % 8:
raise InputStreamError("Unable to search bytes with address with bit granularity")
length = len(needle)
size = max(3 * length, 4096)
buffer = ''
if self._size and (end_address is None or self._size < end_address):
end_address = self._size
while True:
if end_address is not None:
todo = (end_address - start_address) >> 3
if todo < size:
if todo <= 0:
return None
size = todo
data = self.readBytes(start_address, size)
if end_address is None and self._size:
end_address = self._size
size = (end_address - start_address) >> 3
assert size > 0
data = data[:size]
start_address += 8 * size
buffer = buffer[len(buffer) - length + 1:] + data
found = buffer.find(needle)
if found >= 0:
return start_address + (found - len(buffer)) * 8
def file(self):
return FileFromInputStream(self)
class InputPipe(object):
"""
InputPipe makes input streams seekable by caching a certain
amount of data. The memory usage may be unlimited in worst cases.
A function (set_size) is called when the size of the stream is known.
InputPipe sees the input stream as an array of blocks of
size = (2 ^ self.buffer_size) and self.buffers maps to this array.
It also maintains a circular ordered list of non-discarded blocks,
sorted by access time.
Each element of self.buffers is an array of 3 elements:
* self.buffers[i][0] is the data.
len(self.buffers[i][0]) == 1 << self.buffer_size
(except at the end: the length may be smaller)
* self.buffers[i][1] is the index of a more recently used block
* self.buffers[i][2] is the opposite of self.buffers[1],
in order to have a double-linked list.
For any discarded block, self.buffers[i] = None
self.last is the index of the most recently accessed block.
self.first is the first (= smallest index) non-discarded block.
How InputPipe discards blocks:
* Just before returning from the read method.
* Only if there are more than self.buffer_nb_min blocks in memory.
* While self.buffers[self.first] is that least recently used block.
Property: There is no hole in self.buffers, except at the beginning.
"""
buffer_nb_min = 256
buffer_size = 16
last = None
size = None
def __init__(self, input, set_size=None):
self._input = input
self.first = self.address = 0
self.buffers = []
self.set_size = set_size
current_size = property(lambda self: len(self.buffers) << self.buffer_size)
def _append(self, data):
if self.last is None:
self.last = next = prev = 0
else:
prev = self.last
last = self.buffers[prev]
next = last[1]
self.last = self.buffers[next][2] = last[1] = len(self.buffers)
self.buffers.append([data, next, prev])
def _get(self, index):
if index >= len(self.buffers):
return ''
buf = self.buffers[index]
if buf is None:
raise InputStreamError(_("Error: Buffers too small. Can't seek backward."))
if self.last != index:
next = buf[1]
prev = buf[2]
self.buffers[next][2] = prev
self.buffers[prev][1] = next
first = self.buffers[self.last][1]
buf[1] = first
buf[2] = self.last
self.buffers[first][2] = index
self.buffers[self.last][1] = index
self.last = index
return buf[0]
def _flush(self):
lim = len(self.buffers) - self.buffer_nb_min
while self.first < lim:
buf = self.buffers[self.first]
if buf[2] != self.last:
break
info("Discarding buffer %u." % self.first)
self.buffers[self.last][1] = buf[1]
self.buffers[buf[1]][2] = self.last
self.buffers[self.first] = None
self.first += 1
def seek(self, address):
assert 0 <= address
self.address = address
def read(self, size):
end = self.address + size
for i in xrange(len(self.buffers), (end >> self.buffer_size) + 1):
data = self._input.read(1 << self.buffer_size)
if len(data) < 1 << self.buffer_size:
self.size = (len(self.buffers) << self.buffer_size) + len(data)
if self.set_size:
self.set_size(self.size)
if data:
self._append(data)
break
self._append(data)
block, offset = divmod(self.address, 1 << self.buffer_size)
data = ''.join(self._get(index)
for index in xrange(block, (end - 1 >> self.buffer_size) + 1)
)[offset:offset + size]
self._flush()
self.address += len(data)
return data
class InputIOStream(InputStream):
def __init__(self, input, size=None, **args):
if not hasattr(input, "seek"):
if size is None:
input = InputPipe(input, self._setSize)
else:
input = InputPipe(input)
elif size is None:
try:
input.seek(0, 2)
size = input.tell() * 8
except IOError, err:
if err.errno == ESPIPE:
input = InputPipe(input, self._setSize)
else:
charset = getTerminalCharset()
errmsg = unicode(str(err), charset)
source = args.get("source", "<inputio:%r>" % input)
raise InputStreamError(_("Unable to get size of %s: %s") % (source, errmsg))
self._input = input
InputStream.__init__(self, size=size, **args)
def close(self):
self._input.close()
def __current_size(self):
if self._size:
return self._size
if self._input.size:
return 8 * self._input.size
return 8 * self._input.current_size
_current_size = property(__current_size)
def read(self, address, size):
if not size:
return 0, '', False
assert size > 0
_size = self._size
address, shift = divmod(address, 8)
self._input.seek(address)
size = (size + shift + 7) >> 3
data = self._input.read(size)
got = len(data)
missing = size != got
if missing and _size == self._size:
raise ReadStreamError(8 * size, 8 * address, 8 * got)
return shift, data, missing
def file(self):
if hasattr(self._input, "fileno"):
from os import dup, fdopen
new_fd = dup(self._input.fileno())
new_file = fdopen(new_fd, "rb")
new_file.seek(0)
return new_file
return InputStream.file(self)
class StringInputStream(InputStream):
def __init__(self, data, source="<string>", **args):
self.data = data
InputStream.__init__(self, source=source, size=8 * len(data), **args)
self._current_size = self._size
def close(self):
pass
def read(self, address, size):
address, shift = divmod(address, 8)
size = (size + shift + 7) >> 3
data = self.data[address:address + size]
got = len(data)
if got != size:
raise ReadStreamError(8 * size, 8 * address, 8 * got)
return shift, data, False
class InputSubStream(InputStream):
def __init__(self, stream, offset, size=None, source=None, **args):
if offset is None:
offset = 0
if size is None and stream.size is not None:
size = stream.size - offset
if size <= 0:
raise ValueError("InputSubStream: offset is outside input stream")
self.stream = stream
self._offset = offset
if source is None:
source = "<substream input=%s offset=%s size=%s>" % (
stream.source, offset, size)
InputStream.__init__(self, source=source, size=size, **args)
self.stream.askSize(self)
_current_size = property(lambda self: min(
self._size, max(0, self.stream._current_size - self._offset)))
def close(self):
self.stream = None
def read(self, address, size):
return self.stream.read(self._offset + address, size)
def InputFieldStream(field, **args):
if not field.parent:
return field.stream
stream = field.parent.stream
args["size"] = field.size
args.setdefault("source", stream.source + field.path)
return InputSubStream(stream, field.absolute_address, **args)
class FragmentedStream(InputStream):
def __init__(self, field, **args):
self.stream = field.parent.stream
data = field.getData()
self.fragments = [(0, data.absolute_address, data.size)]
self.next = field.next
args.setdefault("source", "%s%s" % (self.stream.source, field.path))
InputStream.__init__(self, **args)
if not self.next:
self._current_size = data.size
self._setSize()
def close(self):
self.stream = None
def _feed(self, end):
if self._current_size < end:
if self.checked:
raise ReadStreamError(end - self._size, self._size)
a, fa, fs = self.fragments[-1]
while self.stream.sizeGe(fa + min(fs, end - a)):
a += fs
f = self.next
if a >= end:
self._current_size = end
if a == end and not f:
self._setSize()
return False
if f:
self.next = f.next
f = f.getData()
if not f:
self._current_size = a
self._setSize()
return True
fa = f.absolute_address
fs = f.size
self.fragments += [(a, fa, fs)]
self._current_size = a + max(0, self.stream.size - fa)
self._setSize()
return True
return False
def read(self, address, size):
assert size > 0
missing = self._feed(address + size)
if missing:
size = self._size - address
if size <= 0:
return 0, '', True
d = []
i = lowerBound(self.fragments, lambda x: x[0] <= address)
a, fa, fs = self.fragments[i - 1]
a -= address
fa -= a
fs += a
s = None
while True:
n = min(fs, size)
u, v, w = self.stream.read(fa, n)
assert not w
if s is None:
s = u
else:
assert not u
d += [v]
size -= n
if not size:
return s, ''.join(d), missing
a, fa, fs = self.fragments[i]
i += 1
class ConcatStream(InputStream):
# TODO: concatene any number of any type of stream
def __init__(self, streams, **args):
if len(streams) > 2 or not streams[0].checked:
raise NotImplementedError
self.__size0 = streams[0].size
size1 = streams[1].askSize(self)
if size1 is not None:
args["size"] = self.__size0 + size1
self.__streams = streams
InputStream.__init__(self, **args)
_current_size = property(lambda self: self.__size0 + self.__streams[1]._current_size)
def close(self):
self.__streams = None
def read(self, address, size):
_size = self._size
s = self.__size0 - address
shift, data, missing = None, '', False
if s > 0:
s = min(size, s)
shift, data, w = self.__streams[0].read(address, s)
assert not w
a, s = 0, size - s
else:
a, s = -s, size
if s:
u, v, missing = self.__streams[1].read(a, s)
if missing and _size == self._size:
raise ReadStreamError(s, a)
if shift is None:
shift = u
else:
assert not u
data += v
return shift, data, missing