mirror of
https://github.com/SickGear/SickGear.git
synced 2025-01-21 00:53:37 +00:00
980e05cc99
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014). Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/* Remove metadata/qt* PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5). Basic Support for XMP Packets. tga: improvements to adhere more closely to the spec. pdf: slightly improved parsing. rar: fix TypeError on unknown block types. Add MacRoman win32 codepage. tiff/exif: support SubIFDs and tiled images. Add method to export metadata in dictionary. mpeg_video: don't attempt to parse Stream past length. mpeg_video: parse ESCR correctly, add SCR value. Change centralise CustomFragments. field: don't set parser class if class is None, to enable autodetect. field: add value/display for CustomFragment. parser: inline warning to enable tracebacks in debug mode. Fix empty bytestrings in makePrintable. Fix contentSize in jpeg.py to account for image_data blocks. Fix the ELF parser. Enhance the AR archive parser. elf parser: fix wrong wrong fields order in parsing little endian section flags. elf parser: add s390 as a machine type. Flesh out mp4 parser. PORT: Version 2.0a1 (inline with 3.0a1). Major refactoring and PEP8. Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams. metadata: get comment from ZIP. Support for InputIOStream.read(0). Fix sizeGe when size is None. Remove unused new_seekable_field_set file. Remove parser Mapsforge .map. Remove parser Parallel Realities Starfighter .pak files. sevenzip: fix for newer archives. java: update access flags and modifiers for Java 1.7 and update description text for most recent Java. Support ustar prefix field in tar archives. Remove file_system* parsers. Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*. Remove program parsers macho, nds, prc. Support non-8bit Character subclasses. Python parser supports Python 3.7. Enhance mpeg_ts parser to support MTS/M2TS. Support for creation date in tiff. Change don't hardcode errno constant. PORT: 1.9.1 Internal Only: The following are legacy reference to upstream commit messages. Relevant changes up to b0a115f8. Use integer division. Replace HACHOIR_ERRORS with Exception. Fix metadata.Data: make it sortable. Import fixes from e7de492. PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad). Replace hachoir.core.field with hachoir.field Replace hachoir.core.stream with hachoir.stream Remove the compatibility module for PY1.5 to PY2.5. metadata: support TIFF picture. metadata: fix string normalization. metadata: fix datetime regex Fix hachoir bug #57. FileFromInputStream: fix comparison between None and an int. InputIOStream: open the file in binary mode.
320 lines
8 KiB
Python
320 lines
8 KiB
Python
"""
|
|
Utilities to convert integers and binary strings to binary (number), binary
|
|
string, number, hexadecimal, etc.
|
|
"""
|
|
|
|
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
|
|
from itertools import chain, repeat
|
|
from struct import calcsize, unpack, error as struct_error
|
|
|
|
|
|
def swap16(value):
|
|
"""
|
|
Swap byte between big and little endian of a 16 bits integer.
|
|
|
|
>>> "%x" % swap16(0x1234)
|
|
'3412'
|
|
"""
|
|
return (value & 0xFF) << 8 | (value >> 8)
|
|
|
|
|
|
def swap32(value):
|
|
"""
|
|
Swap byte between big and little endian of a 32 bits integer.
|
|
|
|
>>> "%x" % swap32(0x12345678)
|
|
'78563412'
|
|
"""
|
|
value = int(value)
|
|
return (((value & 0x000000FF) << 24)
|
|
| ((value & 0x0000FF00) << 8)
|
|
| ((value & 0x00FF0000) >> 8)
|
|
| ((value & 0xFF000000) >> 24))
|
|
|
|
|
|
def arrswapmid(data):
|
|
r"""
|
|
Convert an array of characters from middle-endian to big-endian and
|
|
vice-versa.
|
|
|
|
>>> arrswapmid("badcfehg")
|
|
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
|
|
"""
|
|
assert len(data) % 2 == 0
|
|
ret = [''] * len(data)
|
|
ret[1::2] = data[0::2]
|
|
ret[0::2] = data[1::2]
|
|
return ret
|
|
|
|
|
|
def strswapmid(data):
|
|
r"""
|
|
Convert raw data from middle-endian to big-endian and vice-versa.
|
|
|
|
>>> strswapmid("badcfehg")
|
|
'abcdefgh'
|
|
"""
|
|
return ''.join(arrswapmid(data))
|
|
|
|
|
|
def bin2long(text, endian):
|
|
"""
|
|
Convert binary number written in a string into an integer.
|
|
Skip characters differents than "0" and "1".
|
|
|
|
>>> bin2long("110", BIG_ENDIAN)
|
|
6
|
|
>>> bin2long("110", LITTLE_ENDIAN)
|
|
3
|
|
>>> bin2long("11 00", LITTLE_ENDIAN)
|
|
3
|
|
"""
|
|
assert endian in (LITTLE_ENDIAN, BIG_ENDIAN)
|
|
bits = [(ord(character) - ord("0"))
|
|
for character in text if character in "01"]
|
|
if endian is not BIG_ENDIAN:
|
|
bits = bits[::-1]
|
|
size = len(bits)
|
|
assert 0 < size
|
|
value = 0
|
|
for bit in bits:
|
|
value *= 2
|
|
value += bit
|
|
return value
|
|
|
|
|
|
def str2hex(value, prefix="", glue=u"", format="%02X"):
|
|
r"""
|
|
Convert binary string in hexadecimal (base 16).
|
|
|
|
>>> str2hex("ABC")
|
|
u'414243'
|
|
>>> str2hex("\xF0\xAF", glue=" ")
|
|
u'F0 AF'
|
|
>>> str2hex("ABC", prefix="0x")
|
|
u'0x414243'
|
|
>>> str2hex("ABC", format=r"\x%02X")
|
|
u'\\x41\\x42\\x43'
|
|
"""
|
|
if isinstance(glue, str):
|
|
glue = unicode(glue)
|
|
if 0 < len(prefix):
|
|
text = [prefix]
|
|
else:
|
|
text = []
|
|
for character in value:
|
|
text.append(format % ord(character))
|
|
return glue.join(text)
|
|
|
|
|
|
def countBits(value):
|
|
"""
|
|
Count number of bits needed to store a (positive) integer number.
|
|
|
|
>>> countBits(0)
|
|
1
|
|
>>> countBits(1000)
|
|
10
|
|
>>> countBits(44100)
|
|
16
|
|
>>> countBits(18446744073709551615)
|
|
64
|
|
"""
|
|
assert 0 <= value
|
|
count = 1
|
|
bits = 1
|
|
while (1 << bits) <= value:
|
|
count += bits
|
|
value >>= bits
|
|
bits <<= 1
|
|
while 2 <= value:
|
|
if bits != 1:
|
|
bits >>= 1
|
|
else:
|
|
bits -= 1
|
|
while (1 << bits) <= value:
|
|
count += bits
|
|
value >>= bits
|
|
return count
|
|
|
|
|
|
def byte2bin(number, classic_mode=True):
|
|
"""
|
|
Convert a byte (integer in 0..255 range) to a binary string.
|
|
If classic_mode is true (default value), reverse bits.
|
|
|
|
>>> byte2bin(10)
|
|
'00001010'
|
|
>>> byte2bin(10, False)
|
|
'01010000'
|
|
"""
|
|
text = ""
|
|
for i in range(0, 8):
|
|
if classic_mode:
|
|
mask = 1 << (7 - i)
|
|
else:
|
|
mask = 1 << i
|
|
if (number & mask) == mask:
|
|
text += "1"
|
|
else:
|
|
text += "0"
|
|
return text
|
|
|
|
|
|
def long2raw(value, endian, size=None):
|
|
r"""
|
|
Convert a number (positive and not nul) to a raw string.
|
|
If size is given, add nul bytes to fill to size bytes.
|
|
|
|
>>> long2raw(0x1219, BIG_ENDIAN)
|
|
'\x12\x19'
|
|
>>> long2raw(0x1219, BIG_ENDIAN, 4) # 32 bits
|
|
'\x00\x00\x12\x19'
|
|
>>> long2raw(0x1219, LITTLE_ENDIAN, 4) # 32 bits
|
|
'\x19\x12\x00\x00'
|
|
"""
|
|
assert (not size and 0 < value) or (0 <= value)
|
|
assert endian in (LITTLE_ENDIAN, BIG_ENDIAN, MIDDLE_ENDIAN)
|
|
text = []
|
|
while (value != 0 or text == ""):
|
|
byte = value % 256
|
|
text.append(chr(byte))
|
|
value >>= 8
|
|
if size:
|
|
need = max(size - len(text), 0)
|
|
else:
|
|
need = 0
|
|
if need:
|
|
if endian is LITTLE_ENDIAN:
|
|
text = chain(text, repeat("\0", need))
|
|
else:
|
|
text = chain(repeat("\0", need), reversed(text))
|
|
else:
|
|
if endian is not LITTLE_ENDIAN:
|
|
text = reversed(text)
|
|
if endian is MIDDLE_ENDIAN:
|
|
text = arrswapmid(text)
|
|
return "".join(text)
|
|
|
|
|
|
def long2bin(size, value, endian, classic_mode=False):
|
|
"""
|
|
Convert a number into bits (in a string):
|
|
- size: size in bits of the number
|
|
- value: positive (or nul) number
|
|
- endian: BIG_ENDIAN (most important bit first)
|
|
or LITTLE_ENDIAN (least important bit first)
|
|
- classic_mode (default: False): reverse each packet of 8 bits
|
|
|
|
>>> long2bin(16, 1+4 + (1+8)*256, BIG_ENDIAN)
|
|
'10100000 10010000'
|
|
>>> long2bin(16, 1+4 + (1+8)*256, BIG_ENDIAN, True)
|
|
'00000101 00001001'
|
|
>>> long2bin(16, 1+4 + (1+8)*256, LITTLE_ENDIAN)
|
|
'00001001 00000101'
|
|
>>> long2bin(16, 1+4 + (1+8)*256, LITTLE_ENDIAN, True)
|
|
'10010000 10100000'
|
|
"""
|
|
text = ""
|
|
assert endian in (LITTLE_ENDIAN, BIG_ENDIAN)
|
|
assert 0 <= value
|
|
for index in xrange(size):
|
|
if (value & 1) == 1:
|
|
text += "1"
|
|
else:
|
|
text += "0"
|
|
value >>= 1
|
|
if endian is LITTLE_ENDIAN:
|
|
text = text[::-1]
|
|
result = ""
|
|
while len(text) != 0:
|
|
if len(result) != 0:
|
|
result += " "
|
|
if classic_mode:
|
|
result += text[7::-1]
|
|
else:
|
|
result += text[:8]
|
|
text = text[8:]
|
|
return result
|
|
|
|
|
|
def str2bin(value, classic_mode=True):
|
|
r"""
|
|
Convert binary string to binary numbers.
|
|
If classic_mode is true (default value), reverse bits.
|
|
|
|
>>> str2bin("\x03\xFF")
|
|
'00000011 11111111'
|
|
>>> str2bin("\x03\xFF", False)
|
|
'11000000 11111111'
|
|
"""
|
|
text = ""
|
|
for character in value:
|
|
if text != "":
|
|
text += " "
|
|
byte = ord(character)
|
|
text += byte2bin(byte, classic_mode)
|
|
return text
|
|
|
|
|
|
def _createStructFormat():
|
|
"""
|
|
Create a dictionnary (endian, size_byte) => struct format used
|
|
by str2long() to convert raw data to positive integer.
|
|
"""
|
|
format = {
|
|
BIG_ENDIAN: {},
|
|
LITTLE_ENDIAN: {},
|
|
}
|
|
for struct_format in "BHILQ":
|
|
try:
|
|
size = calcsize(struct_format)
|
|
format[BIG_ENDIAN][size] = '>%s' % struct_format
|
|
format[LITTLE_ENDIAN][size] = '<%s' % struct_format
|
|
except struct_error:
|
|
pass
|
|
return format
|
|
|
|
|
|
_struct_format = _createStructFormat()
|
|
|
|
|
|
def str2long(data, endian):
|
|
r"""
|
|
Convert a raw data (type 'str') into a long integer.
|
|
|
|
>>> chr(str2long('*', BIG_ENDIAN))
|
|
'*'
|
|
>>> str2long("\x00\x01\x02\x03", BIG_ENDIAN) == 0x10203
|
|
True
|
|
>>> str2long("\x2a\x10", LITTLE_ENDIAN) == 0x102a
|
|
True
|
|
>>> str2long("\xff\x14\x2a\x10", BIG_ENDIAN) == 0xff142a10
|
|
True
|
|
>>> str2long("\x00\x01\x02\x03", LITTLE_ENDIAN) == 0x3020100
|
|
True
|
|
>>> str2long("\xff\x14\x2a\x10\xab\x00\xd9\x0e", BIG_ENDIAN) == 0xff142a10ab00d90e
|
|
True
|
|
>>> str2long("\xff\xff\xff\xff\xff\xff\xff\xff", BIG_ENDIAN) == (2**64-1)
|
|
True
|
|
>>> str2long("\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d
|
|
True
|
|
"""
|
|
assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits
|
|
try:
|
|
return unpack(_struct_format[endian][len(data)], data)[0]
|
|
except KeyError:
|
|
pass
|
|
|
|
assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN)
|
|
shift = 0
|
|
value = 0
|
|
if endian is BIG_ENDIAN:
|
|
data = reversed(data)
|
|
elif endian is MIDDLE_ENDIAN:
|
|
data = reversed(strswapmid(data))
|
|
for character in data:
|
|
byte = ord(character)
|
|
value += (byte << shift)
|
|
shift += 8
|
|
return value
|