""" Utilities to convert integers and binary strings to binary (number), binary string, number, hexadecimal, etc. """ from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN from hachoir_core.compatibility import reversed from itertools import chain, repeat from struct import calcsize, unpack, error as struct_error def swap16(value): """ Swap byte between big and little endian of a 16 bits integer. >>> "%x" % swap16(0x1234) '3412' """ return (value & 0xFF) << 8 | (value >> 8) def swap32(value): """ Swap byte between big and little endian of a 32 bits integer. >>> "%x" % swap32(0x12345678) '78563412' """ value = long(value) return ((value & 0x000000FFL) << 24) \ | ((value & 0x0000FF00L) << 8) \ | ((value & 0x00FF0000L) >> 8) \ | ((value & 0xFF000000L) >> 24) def arrswapmid(data): r""" Convert an array of characters from middle-endian to big-endian and vice-versa. >>> arrswapmid("badcfehg") ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] """ assert len(data)%2 == 0 ret = ['']*len(data) ret[1::2] = data[0::2] ret[0::2] = data[1::2] return ret def strswapmid(data): r""" Convert raw data from middle-endian to big-endian and vice-versa. >>> strswapmid("badcfehg") 'abcdefgh' """ return ''.join(arrswapmid(data)) def bin2long(text, endian): """ Convert binary number written in a string into an integer. Skip characters differents than "0" and "1". >>> bin2long("110", BIG_ENDIAN) 6 >>> bin2long("110", LITTLE_ENDIAN) 3 >>> bin2long("11 00", LITTLE_ENDIAN) 3 """ assert endian in (LITTLE_ENDIAN, BIG_ENDIAN) bits = [ (ord(character)-ord("0")) \ for character in text if character in "01" ] if endian is not BIG_ENDIAN: bits = bits[::-1] size = len(bits) assert 0 < size value = 0 for bit in bits: value *= 2 value += bit return value def str2hex(value, prefix="", glue=u"", format="%02X"): r""" Convert binary string in hexadecimal (base 16). >>> str2hex("ABC") u'414243' >>> str2hex("\xF0\xAF", glue=" ") u'F0 AF' >>> str2hex("ABC", prefix="0x") u'0x414243' >>> str2hex("ABC", format=r"\x%02X") u'\\x41\\x42\\x43' """ if isinstance(glue, str): glue = unicode(glue) if 0 < len(prefix): text = [prefix] else: text = [] for character in value: text.append(format % ord(character)) return glue.join(text) def countBits(value): """ Count number of bits needed to store a (positive) integer number. >>> countBits(0) 1 >>> countBits(1000) 10 >>> countBits(44100) 16 >>> countBits(18446744073709551615) 64 """ assert 0 <= value count = 1 bits = 1 while (1 << bits) <= value: count += bits value >>= bits bits <<= 1 while 2 <= value: if bits != 1: bits >>= 1 else: bits -= 1 while (1 << bits) <= value: count += bits value >>= bits return count def byte2bin(number, classic_mode=True): """ Convert a byte (integer in 0..255 range) to a binary string. If classic_mode is true (default value), reverse bits. >>> byte2bin(10) '00001010' >>> byte2bin(10, False) '01010000' """ text = "" for i in range(0, 8): if classic_mode: mask = 1 << (7-i) else: mask = 1 << i if (number & mask) == mask: text += "1" else: text += "0" return text def long2raw(value, endian, size=None): r""" Convert a number (positive and not nul) to a raw string. If size is given, add nul bytes to fill to size bytes. >>> long2raw(0x1219, BIG_ENDIAN) '\x12\x19' >>> long2raw(0x1219, BIG_ENDIAN, 4) # 32 bits '\x00\x00\x12\x19' >>> long2raw(0x1219, LITTLE_ENDIAN, 4) # 32 bits '\x19\x12\x00\x00' """ assert (not size and 0 < value) or (0 <= value) assert endian in (LITTLE_ENDIAN, BIG_ENDIAN, MIDDLE_ENDIAN) text = [] while (value != 0 or text == ""): byte = value % 256 text.append( chr(byte) ) value >>= 8 if size: need = max(size - len(text), 0) else: need = 0 if need: if endian is LITTLE_ENDIAN: text = chain(text, repeat("\0", need)) else: text = chain(repeat("\0", need), reversed(text)) else: if endian is not LITTLE_ENDIAN: text = reversed(text) if endian is MIDDLE_ENDIAN: text = arrswapmid(text) return "".join(text) def long2bin(size, value, endian, classic_mode=False): """ Convert a number into bits (in a string): - size: size in bits of the number - value: positive (or nul) number - endian: BIG_ENDIAN (most important bit first) or LITTLE_ENDIAN (least important bit first) - classic_mode (default: False): reverse each packet of 8 bits >>> long2bin(16, 1+4 + (1+8)*256, BIG_ENDIAN) '10100000 10010000' >>> long2bin(16, 1+4 + (1+8)*256, BIG_ENDIAN, True) '00000101 00001001' >>> long2bin(16, 1+4 + (1+8)*256, LITTLE_ENDIAN) '00001001 00000101' >>> long2bin(16, 1+4 + (1+8)*256, LITTLE_ENDIAN, True) '10010000 10100000' """ text = "" assert endian in (LITTLE_ENDIAN, BIG_ENDIAN) assert 0 <= value for index in xrange(size): if (value & 1) == 1: text += "1" else: text += "0" value >>= 1 if endian is LITTLE_ENDIAN: text = text[::-1] result = "" while len(text) != 0: if len(result) != 0: result += " " if classic_mode: result += text[7::-1] else: result += text[:8] text = text[8:] return result def str2bin(value, classic_mode=True): r""" Convert binary string to binary numbers. If classic_mode is true (default value), reverse bits. >>> str2bin("\x03\xFF") '00000011 11111111' >>> str2bin("\x03\xFF", False) '11000000 11111111' """ text = "" for character in value: if text != "": text += " " byte = ord(character) text += byte2bin(byte, classic_mode) return text def _createStructFormat(): """ Create a dictionnary (endian, size_byte) => struct format used by str2long() to convert raw data to positive integer. """ format = { BIG_ENDIAN: {}, LITTLE_ENDIAN: {}, } for struct_format in "BHILQ": try: size = calcsize(struct_format) format[BIG_ENDIAN][size] = '>%s' % struct_format format[LITTLE_ENDIAN][size] = '<%s' % struct_format except struct_error: pass return format _struct_format = _createStructFormat() def str2long(data, endian): r""" Convert a raw data (type 'str') into a long integer. >>> chr(str2long('*', BIG_ENDIAN)) '*' >>> str2long("\x00\x01\x02\x03", BIG_ENDIAN) == 0x10203 True >>> str2long("\x2a\x10", LITTLE_ENDIAN) == 0x102a True >>> str2long("\xff\x14\x2a\x10", BIG_ENDIAN) == 0xff142a10 True >>> str2long("\x00\x01\x02\x03", LITTLE_ENDIAN) == 0x3020100 True >>> str2long("\xff\x14\x2a\x10\xab\x00\xd9\x0e", BIG_ENDIAN) == 0xff142a10ab00d90e True >>> str2long("\xff\xff\xff\xff\xff\xff\xff\xff", BIG_ENDIAN) == (2**64-1) True >>> str2long("\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d True """ assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits try: return unpack(_struct_format[endian][len(data)], data)[0] except KeyError: pass assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) shift = 0 value = 0 if endian is BIG_ENDIAN: data = reversed(data) elif endian is MIDDLE_ENDIAN: data = reversed(strswapmid(data)) for character in data: byte = ord(character) value += (byte << shift) shift += 8 return value