SickGear/lib/hachoir/core/tools.py

"""
Various utilities.
"""

import re
import stat
from datetime import datetime, timedelta, MAXYEAR
from warnings import warn


def deprecated(comment=None):
    """
    This is a decorator which can be used to mark functions
    as deprecated. It will result in a warning being emmitted
    when the function is used.

    Examples: ::

       @deprecated
       def oldfunc(): ...

       @deprecated("use newfunc()!")
       def oldfunc2(): ...

    Code from: http://code.activestate.com/recipes/391367/
    """
    def _deprecated(func):
        def newFunc(*args, **kwargs):
            message = "Call to deprecated function %s" % func.__name__
            if comment:
                message += ": " + comment
            warn(message, category=DeprecationWarning, stacklevel=2)
            return func(*args, **kwargs)
        newFunc.__name__ = func.__name__
        newFunc.__doc__ = func.__doc__
        newFunc.__dict__.update(func.__dict__)
        return newFunc
    return _deprecated


def paddingSize(value, align):
    """
    Compute size of a padding field.

    >>> paddingSize(31, 4)
    1
    >>> paddingSize(32, 4)
    0
    >>> paddingSize(33, 4)
    3

    Note: (value + paddingSize(value, align)) == alignValue(value, align)
    """
    if value % align != 0:
        return align - (value % align)
    else:
        return 0


def alignValue(value, align):
    """
    Align a value to next 'align' multiple.

    >>> alignValue(31, 4)
    32
    >>> alignValue(32, 4)
    32
    >>> alignValue(33, 4)
    36

    Note: alignValue(value, align) == (value + paddingSize(value, align))
    """

    if value % align != 0:
        return value + align - (value % align)
    else:
        return value


def timedelta2seconds(delta):
    """
    Convert a datetime.timedelta() objet to a number of second
    (floatting point number).

    >>> timedelta2seconds(timedelta(seconds=2, microseconds=40000))
    2.04
    >>> timedelta2seconds(timedelta(minutes=1, milliseconds=250))
    60.25
    """
    return delta.microseconds / 1000000.0 \
        + delta.seconds + delta.days * 60 * 60 * 24


def humanDurationNanosec(nsec):
    """
    Convert a duration in nanosecond to human natural representation.
    Returns an unicode string.

    >>> humanDurationNanosec(60417893)
    '60.42 ms'
    """

    # Nano second
    if nsec < 1000:
        return "%u nsec" % nsec

    # Micro seconds
    usec, nsec = divmod(nsec, 1000)
    if usec < 1000:
        return "%.2f usec" % (usec + float(nsec) / 1000)

    # Milli seconds
    msec, usec = divmod(usec, 1000)
    if msec < 1000:
        return "%.2f ms" % (msec + float(usec) / 1000)
    return humanDuration(msec)


def humanDuration(delta):
    """
    Convert a duration in millisecond to human natural representation.
    Returns an unicode string.

    >>> humanDuration(0)
    '0 ms'
    >>> humanDuration(213)
    '213 ms'
    >>> humanDuration(4213)
    '4 sec 213 ms'
    >>> humanDuration(6402309)
    '1 hours 46 min 42 sec'
    """
    if not isinstance(delta, timedelta):
        delta = timedelta(microseconds=delta * 1000)

    # Milliseconds
    text = []
    if 1000 <= delta.microseconds:
        text.append("%u ms" % (delta.microseconds // 1000))

    # Seconds
    minutes, seconds = divmod(delta.seconds, 60)
    hours, minutes = divmod(minutes, 60)
    if seconds:
        text.append("%u sec" % seconds)
    if minutes:
        text.append("%u min" % minutes)
    if hours:
        text.append("%u hours" % hours)

    # Days
    years, days = divmod(delta.days, 365)
    if days:
        text.append("%u days" % days)
    if years:
        text.append("%u years" % years)
    if 3 < len(text):
        text = text[-3:]
    elif not text:
        return "0 ms"
    return " ".join(reversed(text))


def humanFilesize(size):
    """
    Convert a file size in byte to human natural representation.
    It uses the values: 1 KB is 1024 bytes, 1 MB is 1024 KB, etc.
    The result is an unicode string.

    >>> humanFilesize(1)
    '1 bytes'
    >>> humanFilesize(790)
    '790 bytes'
    >>> humanFilesize(256960)
    '250.9 KB'
    """
    if size < 10000:
        return "%u bytes" % size
    units = ["KB", "MB", "GB", "TB"]
    size = float(size)
    divisor = 1024
    for unit in units:
        size = size / divisor
        if size < divisor:
            return "%.1f %s" % (size, unit)
    return "%u %s" % (size, unit)


def humanBitSize(size):
    """
    Convert a size in bit to human classic representation.
    It uses the values: 1 Kbit is 1000 bits, 1 Mbit is 1000 Kbit, etc.
    The result is an unicode string.

    >>> humanBitSize(1)
    '1 bits'
    >>> humanBitSize(790)
    '790 bits'
    >>> humanBitSize(256960)
    '257.0 Kbit'
    """
    divisor = 1000
    if size < divisor:
        return "%u bits" % size
    units = ["Kbit", "Mbit", "Gbit", "Tbit"]
    size = float(size)
    for unit in units:
        size = size / divisor
        if size < divisor:
            return "%.1f %s" % (size, unit)
    return "%u %s" % (size, unit)


def humanBitRate(size):
    """
    Convert a bit rate to human classic representation. It uses humanBitSize()
    to convert size into human reprensation. The result is an unicode string.

    >>> humanBitRate(790)
    '790 bits/sec'
    >>> humanBitRate(256960)
    '257.0 Kbit/sec'
    """
    return "".join((humanBitSize(size), "/sec"))


def humanFrequency(hertz):
    """
    Convert a frequency in hertz to human classic representation.
    It uses the values: 1 KHz is 1000 Hz, 1 MHz is 1000 KMhz, etc.
    The result is an unicode string.

    >>> humanFrequency(790)
    '790 Hz'
    >>> humanFrequency(629469)
    '629.5 kHz'
    """
    divisor = 1000
    if hertz < divisor:
        return "%u Hz" % hertz
    units = ["kHz", "MHz", "GHz", "THz"]
    hertz = float(hertz)
    for unit in units:
        hertz = hertz / divisor
        if hertz < divisor:
            return "%.1f %s" % (hertz, unit)
    return "%s %s" % (hertz, unit)


regex_control_code = re.compile(r"([\x00-\x1f\x7f])")
controlchars = tuple({
    # Don't use "\0", because "\0"+"0"+"1" = "\001" = "\1" (1 character)
    # Same rease to not use octal syntax ("\1")
    ord("\n"): r"\n",
    ord("\r"): r"\r",
    ord("\t"): r"\t",
    ord("\a"): r"\a",
    ord("\b"): r"\b",
}.get(code, '\\x%02x' % code)
    for code in range(128)
)


def makePrintable(data, charset, quote=None, smart=True):
    r"""
    Prepare a string to make it printable in the specified charset.
    It escapes control characters. Characters with code bigger than 127
    are escaped if data type is 'str' or if charset is "ASCII".

    Examples with Unicode:
    >>> aged = "âgé"
    >>> repr(aged)  # text type is 'unicode'
    "'âgé'"
    >>> makePrintable(b"abc\0", "UTF-8")
    'abc\\0'
    >>> makePrintable(aged, "latin1")
    '\xe2g\xe9'
    >>> makePrintable(aged, "latin1", quote='"')
    '"\xe2g\xe9"'

    Examples with string encoded in latin1:
    >>> aged_latin = "âgé".encode("latin1")
    >>> repr(aged_latin)  # text type is 'bytes'
    "b'\\xe2g\\xe9'"
    >>> makePrintable(aged_latin, "latin1")
    '\\xe2g\\xe9'
    >>> makePrintable("", "latin1")
    ''
    >>> makePrintable("a", "latin1", quote='"')
    '"a"'
    >>> makePrintable("", "latin1", quote='"')
    '(empty)'
    >>> makePrintable("abc", "latin1", quote="'")
    "'abc'"

    Control codes:
    >>> makePrintable("\0\x03\x0a\x10 \x7f", "latin1")
    '\\0\\3\\n\\x10 \\x7f'

    Quote character may also be escaped (only ' and "):
    >>> print(makePrintable("a\"b", "latin-1", quote='"'))
    "a\"b"
    >>> print(makePrintable("a\"b", "latin-1", quote="'"))
    'a"b'
    >>> print(makePrintable("a'b", "latin-1", quote="'"))
    'a\'b'
    """

    if data:
        if not isinstance(data, str):
            data = str(data, "ISO-8859-1")
            charset = "ASCII"
        data = regex_control_code.sub(
            lambda regs: controlchars[ord(regs.group(1))], data)
        if quote:
            if quote in "\"'":
                data = data.replace(quote, '\\' + quote)
            data = ''.join((quote, data, quote))
    elif quote:
        data = "(empty)"
    else:
        data = ""
    data = data.encode(charset, "backslashreplace")
    if smart:
        # Replace \x00\x01 by \0\1
        data = re.sub(br"\\x0([0-7])(?=[^0-7]|$)", br"\\\1", data)
    return str(data, charset)


def makeUnicode(text):
    r"""
    Convert text to printable Unicode string. For byte string (type 'str'),
    use charset ISO-8859-1 for the conversion to Unicode

    >>> makeUnicode('abc\0d')
    'abc\\0d'
    >>> makeUnicode('a\xe9')
    'a\xe9'
    """
    if isinstance(text, bytes):
        text = str(text, "ISO-8859-1")
    elif not isinstance(text, str):
        try:
            text = str(text)
        except UnicodeError:
            try:
                text = str(text)
            except Exception:
                text = repr(text)
            return makeUnicode(text)
    text = regex_control_code.sub(
        lambda regs: controlchars[ord(regs.group(1))], text)
    text = re.sub(r"\\x0([0-7])(?=[^0-7]|$)", r"\\\1", text)
    return text


def binarySearch(seq, cmp_func):
    """
    Search a value in a sequence using binary search. Returns index of the
    value, or None if the value doesn't exist.

    'seq' have to be sorted in ascending order according to the
    comparaison function ;

    'cmp_func', prototype func(x), is the compare function:
    - Return strictly positive value if we have to search forward ;
    - Return strictly negative value if we have to search backward ;
    - Otherwise (zero) we got the value.

    >>> # Search number 5 (search forward)
    ... binarySearch([0, 4, 5, 10], lambda x: 5-x)
    2
    >>> # Backward search
    ... binarySearch([10, 5, 4, 0], lambda x: x-5)
    1
    """
    lower = 0
    upper = len(seq)
    while lower < upper:
        index = (lower + upper) >> 1
        diff = cmp_func(seq[index])
        if diff < 0:
            upper = index
        elif diff > 0:
            lower = index + 1
        else:
            return index
    return None


def lowerBound(seq, cmp_func):
    f = 0
    seqlen = len(seq)
    while seqlen > 0:
        h = seqlen >> 1
        m = f + h
        if cmp_func(seq[m]):
            f = m
            f += 1
            seqlen -= h + 1
        else:
            seqlen = h
    return f


def _ftypelet(mode):
    if stat.S_ISREG(mode) or not stat.S_IFMT(mode):
        return '-'
    if stat.S_ISBLK(mode):
        return 'b'
    if stat.S_ISCHR(mode):
        return 'c'
    if stat.S_ISDIR(mode):
        return 'd'
    if stat.S_ISFIFO(mode):
        return 'p'
    if stat.S_ISLNK(mode):
        return 'l'
    if stat.S_ISSOCK(mode):
        return 's'
    return '?'


def humanUnixAttributes(mode):
    """
    Convert a Unix file attributes (or "file mode") to an unicode string.

    Original source code:
    http://cvs.savannah.gnu.org/viewcvs/coreutils/lib/filemode.c?root=coreutils

    >>> humanUnixAttributes(0o644)
    '-rw-r--r-- (644)'
    >>> humanUnixAttributes(0o2755)
    '-rwxr-sr-x (2755)'
    """

    chars = [_ftypelet(mode), 'r', 'w', 'x', 'r', 'w', 'x', 'r', 'w', 'x']
    for i in range(1, 10):
        if not mode & 1 << 9 - i:
            chars[i] = '-'
    if mode & stat.S_ISUID:
        if chars[3] != 'x':
            chars[3] = 'S'
        else:
            chars[3] = 's'
    if mode & stat.S_ISGID:
        if chars[6] != 'x':
            chars[6] = 'S'
        else:
            chars[6] = 's'
    if mode & stat.S_ISVTX:
        if chars[9] != 'x':
            chars[9] = 'T'
        else:
            chars[9] = 't'
    return "%s (%o)" % (''.join(chars), mode)


def createDict(data, index):
    """
    Create a new dictionnay from dictionnary key=>values:
    just keep value number 'index' from all values.

    >>> data={10: ("dix", 100, "a"), 20: ("vingt", 200, "b")}
    >>> createDict(data, 0)
    {10: 'dix', 20: 'vingt'}
    >>> createDict(data, 2)
    {10: 'a', 20: 'b'}
    """
    return dict((key, values[index]) for key, values in data.items())


# Start of UNIX timestamp (Epoch): 1st January 1970 at 00:00
UNIX_TIMESTAMP_T0 = datetime(1970, 1, 1)


def timestampUNIX(value):
    """
    Convert an UNIX (32-bit) timestamp to datetime object. Timestamp value
    is the number of seconds since the 1st January 1970 at 00:00. Maximum
    value is 2147483647: 19 january 2038 at 03:14:07.

    May raise ValueError for invalid value: value have to be in 0..2147483647.

    >>> timestampUNIX(0)
    datetime.datetime(1970, 1, 1, 0, 0)
    >>> timestampUNIX(1154175644)
    datetime.datetime(2006, 7, 29, 12, 20, 44)
    >>> timestampUNIX(1154175644.37)
    datetime.datetime(2006, 7, 29, 12, 20, 44, 370000)
    >>> timestampUNIX(2147483647)
    datetime.datetime(2038, 1, 19, 3, 14, 7)
    """
    if not isinstance(value, (float, int)):
        raise TypeError("timestampUNIX(): an integer or float is required")
    if not(0 <= value <= 2147483647):
        raise ValueError("timestampUNIX(): value have to be in 0..2147483647")
    return UNIX_TIMESTAMP_T0 + timedelta(seconds=value)


# Start of Macintosh timestamp: 1st January 1904 at 00:00
MAC_TIMESTAMP_T0 = datetime(1904, 1, 1)


def timestampMac32(value):
    """
    Convert an Mac (32-bit) timestamp to string. The format is the number
    of seconds since the 1st January 1904 (to 2040). Returns unicode string.

    >>> timestampMac32(0)
    datetime.datetime(1904, 1, 1, 0, 0)
    >>> timestampMac32(2843043290)
    datetime.datetime(1994, 2, 2, 14, 14, 50)
    """
    if not isinstance(value, (float, int)):
        raise TypeError("an integer or float is required")
    if not(0 <= value <= 4294967295):
        return "invalid Mac timestamp (%s)" % value
    return MAC_TIMESTAMP_T0 + timedelta(seconds=value)


def durationWin64(value):
    """
    Convert Windows 64-bit duration to string. The timestamp format is
    a 64-bit number: number of 100ns. See also timestampWin64().

    >>> str(durationWin64(1072580000))
    '0:01:47.258000'
    >>> str(durationWin64(2146280000))
    '0:03:34.628000'
    """
    if not isinstance(value, (float, int)):
        raise TypeError("an integer or float is required")
    if value < 0:
        raise ValueError("value have to be a positive or nul integer")
    return timedelta(microseconds=value / 10)


def durationMillisWin64(value):
    """
    Convert Windows 64-bit duration to string. The timestamp format is
    a 64-bit number: number of milliseconds. See also timestampMilliWin64().

    >>> str(durationMillisWin64(107258))
    '0:01:47.258000'
    >>> str(durationMillisWin64(214628))
    '0:03:34.628000'
    """
    if not isinstance(value, (float, int)):
        raise TypeError("an integer or float is required")
    if value < 0:
        raise ValueError("value have to be a positive or nul integer")
    return timedelta(microseconds=value * 1000)


# Start of 64-bit Windows timestamp: 1st January 1600 at 00:00
WIN64_TIMESTAMP_T0 = datetime(1601, 1, 1, 0, 0, 0)


def timestampWin64(value):
    """
    Convert Windows 64-bit timestamp to string. The timestamp format is
    a 64-bit number which represents number of 100ns since the
    1st January 1601 at 00:00. Result is an unicode string.
    See also durationWin64(). Maximum date is 28 may 60056.

    >>> timestampWin64(0)
    datetime.datetime(1601, 1, 1, 0, 0)
    >>> timestampWin64(127840491566710000)
    datetime.datetime(2006, 2, 10, 12, 45, 56, 671000)
    """
    try:
        return WIN64_TIMESTAMP_T0 + durationWin64(value)
    except OverflowError:
        raise ValueError("date newer than year %s (value=%s)"
                         % (MAXYEAR, value))


# Start of 60-bit UUID timestamp: 15 October 1582 at 00:00
UUID60_TIMESTAMP_T0 = datetime(1582, 10, 15, 0, 0, 0)


def timestampUUID60(value):
    """
    Convert UUID 60-bit timestamp to string. The timestamp format is
    a 60-bit number which represents number of 100ns since the
    the 15 October 1582 at 00:00. Result is an unicode string.

    >>> timestampUUID60(0)
    datetime.datetime(1582, 10, 15, 0, 0)
    >>> timestampUUID60(130435676263032368)
    datetime.datetime(1996, 2, 14, 5, 13, 46, 303236)
    """
    if not isinstance(value, (float, int)):
        raise TypeError("an integer or float is required")
    if value < 0:
        raise ValueError("value have to be a positive or nul integer")
    try:
        return UUID60_TIMESTAMP_T0 + timedelta(microseconds=value / 10)
    except OverflowError:
        raise ValueError("timestampUUID60() overflow (value=%s)" % value)


def humanDatetime(value, strip_microsecond=True):
    """
    Convert a timestamp to Unicode string: use ISO format with space separator.

    >>> humanDatetime( datetime(2006, 7, 29, 12, 20, 44) )
    '2006-07-29 12:20:44'
    >>> humanDatetime( datetime(2003, 6, 30, 16, 0, 5, 370000) )
    '2003-06-30 16:00:05'
    >>> humanDatetime( datetime(2003, 6, 30, 16, 0, 5, 370000), False )
    '2003-06-30 16:00:05.370000'
    """
    text = str(value.isoformat())
    text = text.replace('T', ' ')
    if strip_microsecond and "." in text:
        text = text.split(".")[0]
    return text


NEWLINES_REGEX = re.compile("\n+")


def normalizeNewline(text):
    r"""
    Replace Windows and Mac newlines with Unix newlines.
    Replace multiple consecutive newlines with one newline.

    >>> normalizeNewline('a\r\nb')
    'a\nb'
    >>> normalizeNewline('a\r\rb')
    'a\nb'
    >>> normalizeNewline('a\n\nb')
    'a\nb'
    """
    text = text.replace("\r\n", "\n")
    text = text.replace("\r", "\n")
    return NEWLINES_REGEX.sub("\n", text)