SickGear/lib/hachoir/core/bits.py
JackDandy 980e05cc99 Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo.
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014).
Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/*
Remove metadata/qt*

PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5).
Basic Support for XMP Packets.
tga: improvements to adhere more closely to the spec.
pdf: slightly improved parsing.
rar: fix TypeError on unknown block types.
Add MacRoman win32 codepage.
tiff/exif: support SubIFDs and tiled images.
Add method to export metadata in dictionary.
mpeg_video: don't attempt to parse Stream past length.
mpeg_video: parse ESCR correctly, add SCR value.
Change centralise CustomFragments.
field: don't set parser class if class is None, to enable autodetect.
field: add value/display for CustomFragment.
parser: inline warning to enable tracebacks in debug mode.
Fix empty bytestrings in makePrintable.
Fix contentSize in jpeg.py to account for image_data blocks.
Fix the ELF parser.
Enhance the AR archive parser.
elf parser: fix wrong wrong fields order in parsing little endian section flags.
elf parser: add s390 as a machine type.
Flesh out mp4 parser.

PORT: Version 2.0a1 (inline with 3.0a1).
Major refactoring and PEP8.
Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams.
metadata: get comment from ZIP.
Support for InputIOStream.read(0).
Fix sizeGe when size is None.
Remove unused new_seekable_field_set file.
Remove parser Mapsforge .map.
Remove parser Parallel Realities Starfighter .pak files.
sevenzip: fix for newer archives.
java: update access flags and modifiers for Java 1.7 and update description text for most recent Java.
Support ustar prefix field in tar archives.
Remove file_system* parsers.
Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*.
Remove program parsers macho, nds, prc.
Support non-8bit Character subclasses.
Python parser supports Python 3.7.
Enhance mpeg_ts parser to support MTS/M2TS.
Support for creation date in tiff.
Change don't hardcode errno constant.

PORT: 1.9.1
Internal Only: The following are legacy reference to upstream commit messages.
Relevant changes up to b0a115f8.
Use integer division.
Replace HACHOIR_ERRORS with Exception.
Fix metadata.Data: make it sortable.
Import fixes from e7de492.
PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad).
Replace hachoir.core.field with hachoir.field
Replace hachoir.core.stream with hachoir.stream
Remove the compatibility module for PY1.5 to PY2.5.
metadata: support TIFF picture.
metadata: fix string normalization.
metadata: fix datetime regex Fix hachoir bug #57.
FileFromInputStream: fix comparison between None and an int.
InputIOStream: open the file in binary mode.
2018-03-28 00:43:11 +01:00

320 lines
8 KiB
Python

"""
Utilities to convert integers and binary strings to binary (number), binary
string, number, hexadecimal, etc.
"""
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from itertools import chain, repeat
from struct import calcsize, unpack, error as struct_error
def swap16(value):
"""
Swap byte between big and little endian of a 16 bits integer.
>>> "%x" % swap16(0x1234)
'3412'
"""
return (value & 0xFF) << 8 | (value >> 8)
def swap32(value):
"""
Swap byte between big and little endian of a 32 bits integer.
>>> "%x" % swap32(0x12345678)
'78563412'
"""
value = int(value)
return (((value & 0x000000FF) << 24)
| ((value & 0x0000FF00) << 8)
| ((value & 0x00FF0000) >> 8)
| ((value & 0xFF000000) >> 24))
def arrswapmid(data):
r"""
Convert an array of characters from middle-endian to big-endian and
vice-versa.
>>> arrswapmid("badcfehg")
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
"""
assert len(data) % 2 == 0
ret = [''] * len(data)
ret[1::2] = data[0::2]
ret[0::2] = data[1::2]
return ret
def strswapmid(data):
r"""
Convert raw data from middle-endian to big-endian and vice-versa.
>>> strswapmid("badcfehg")
'abcdefgh'
"""
return ''.join(arrswapmid(data))
def bin2long(text, endian):
"""
Convert binary number written in a string into an integer.
Skip characters differents than "0" and "1".
>>> bin2long("110", BIG_ENDIAN)
6
>>> bin2long("110", LITTLE_ENDIAN)
3
>>> bin2long("11 00", LITTLE_ENDIAN)
3
"""
assert endian in (LITTLE_ENDIAN, BIG_ENDIAN)
bits = [(ord(character) - ord("0"))
for character in text if character in "01"]
if endian is not BIG_ENDIAN:
bits = bits[::-1]
size = len(bits)
assert 0 < size
value = 0
for bit in bits:
value *= 2
value += bit
return value
def str2hex(value, prefix="", glue=u"", format="%02X"):
r"""
Convert binary string in hexadecimal (base 16).
>>> str2hex("ABC")
u'414243'
>>> str2hex("\xF0\xAF", glue=" ")
u'F0 AF'
>>> str2hex("ABC", prefix="0x")
u'0x414243'
>>> str2hex("ABC", format=r"\x%02X")
u'\\x41\\x42\\x43'
"""
if isinstance(glue, str):
glue = unicode(glue)
if 0 < len(prefix):
text = [prefix]
else:
text = []
for character in value:
text.append(format % ord(character))
return glue.join(text)
def countBits(value):
"""
Count number of bits needed to store a (positive) integer number.
>>> countBits(0)
1
>>> countBits(1000)
10
>>> countBits(44100)
16
>>> countBits(18446744073709551615)
64
"""
assert 0 <= value
count = 1
bits = 1
while (1 << bits) <= value:
count += bits
value >>= bits
bits <<= 1
while 2 <= value:
if bits != 1:
bits >>= 1
else:
bits -= 1
while (1 << bits) <= value:
count += bits
value >>= bits
return count
def byte2bin(number, classic_mode=True):
"""
Convert a byte (integer in 0..255 range) to a binary string.
If classic_mode is true (default value), reverse bits.
>>> byte2bin(10)
'00001010'
>>> byte2bin(10, False)
'01010000'
"""
text = ""
for i in range(0, 8):
if classic_mode:
mask = 1 << (7 - i)
else:
mask = 1 << i
if (number & mask) == mask:
text += "1"
else:
text += "0"
return text
def long2raw(value, endian, size=None):
r"""
Convert a number (positive and not nul) to a raw string.
If size is given, add nul bytes to fill to size bytes.
>>> long2raw(0x1219, BIG_ENDIAN)
'\x12\x19'
>>> long2raw(0x1219, BIG_ENDIAN, 4) # 32 bits
'\x00\x00\x12\x19'
>>> long2raw(0x1219, LITTLE_ENDIAN, 4) # 32 bits
'\x19\x12\x00\x00'
"""
assert (not size and 0 < value) or (0 <= value)
assert endian in (LITTLE_ENDIAN, BIG_ENDIAN, MIDDLE_ENDIAN)
text = []
while (value != 0 or text == ""):
byte = value % 256
text.append(chr(byte))
value >>= 8
if size:
need = max(size - len(text), 0)
else:
need = 0
if need:
if endian is LITTLE_ENDIAN:
text = chain(text, repeat("\0", need))
else:
text = chain(repeat("\0", need), reversed(text))
else:
if endian is not LITTLE_ENDIAN:
text = reversed(text)
if endian is MIDDLE_ENDIAN:
text = arrswapmid(text)
return "".join(text)
def long2bin(size, value, endian, classic_mode=False):
"""
Convert a number into bits (in a string):
- size: size in bits of the number
- value: positive (or nul) number
- endian: BIG_ENDIAN (most important bit first)
or LITTLE_ENDIAN (least important bit first)
- classic_mode (default: False): reverse each packet of 8 bits
>>> long2bin(16, 1+4 + (1+8)*256, BIG_ENDIAN)
'10100000 10010000'
>>> long2bin(16, 1+4 + (1+8)*256, BIG_ENDIAN, True)
'00000101 00001001'
>>> long2bin(16, 1+4 + (1+8)*256, LITTLE_ENDIAN)
'00001001 00000101'
>>> long2bin(16, 1+4 + (1+8)*256, LITTLE_ENDIAN, True)
'10010000 10100000'
"""
text = ""
assert endian in (LITTLE_ENDIAN, BIG_ENDIAN)
assert 0 <= value
for index in xrange(size):
if (value & 1) == 1:
text += "1"
else:
text += "0"
value >>= 1
if endian is LITTLE_ENDIAN:
text = text[::-1]
result = ""
while len(text) != 0:
if len(result) != 0:
result += " "
if classic_mode:
result += text[7::-1]
else:
result += text[:8]
text = text[8:]
return result
def str2bin(value, classic_mode=True):
r"""
Convert binary string to binary numbers.
If classic_mode is true (default value), reverse bits.
>>> str2bin("\x03\xFF")
'00000011 11111111'
>>> str2bin("\x03\xFF", False)
'11000000 11111111'
"""
text = ""
for character in value:
if text != "":
text += " "
byte = ord(character)
text += byte2bin(byte, classic_mode)
return text
def _createStructFormat():
"""
Create a dictionnary (endian, size_byte) => struct format used
by str2long() to convert raw data to positive integer.
"""
format = {
BIG_ENDIAN: {},
LITTLE_ENDIAN: {},
}
for struct_format in "BHILQ":
try:
size = calcsize(struct_format)
format[BIG_ENDIAN][size] = '>%s' % struct_format
format[LITTLE_ENDIAN][size] = '<%s' % struct_format
except struct_error:
pass
return format
_struct_format = _createStructFormat()
def str2long(data, endian):
r"""
Convert a raw data (type 'str') into a long integer.
>>> chr(str2long('*', BIG_ENDIAN))
'*'
>>> str2long("\x00\x01\x02\x03", BIG_ENDIAN) == 0x10203
True
>>> str2long("\x2a\x10", LITTLE_ENDIAN) == 0x102a
True
>>> str2long("\xff\x14\x2a\x10", BIG_ENDIAN) == 0xff142a10
True
>>> str2long("\x00\x01\x02\x03", LITTLE_ENDIAN) == 0x3020100
True
>>> str2long("\xff\x14\x2a\x10\xab\x00\xd9\x0e", BIG_ENDIAN) == 0xff142a10ab00d90e
True
>>> str2long("\xff\xff\xff\xff\xff\xff\xff\xff", BIG_ENDIAN) == (2**64-1)
True
>>> str2long("\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d
True
"""
assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits
try:
return unpack(_struct_format[endian][len(data)], data)[0]
except KeyError:
pass
assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN)
shift = 0
value = 0
if endian is BIG_ENDIAN:
data = reversed(data)
elif endian is MIDDLE_ENDIAN:
data = reversed(strswapmid(data))
for character in data:
byte = ord(character)
value += (byte << shift)
shift += 8
return value