SickGear/lib/hachoir/core/i18n.py
JackDandy 980e05cc99 Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo.
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014).
Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/*
Remove metadata/qt*

PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5).
Basic Support for XMP Packets.
tga: improvements to adhere more closely to the spec.
pdf: slightly improved parsing.
rar: fix TypeError on unknown block types.
Add MacRoman win32 codepage.
tiff/exif: support SubIFDs and tiled images.
Add method to export metadata in dictionary.
mpeg_video: don't attempt to parse Stream past length.
mpeg_video: parse ESCR correctly, add SCR value.
Change centralise CustomFragments.
field: don't set parser class if class is None, to enable autodetect.
field: add value/display for CustomFragment.
parser: inline warning to enable tracebacks in debug mode.
Fix empty bytestrings in makePrintable.
Fix contentSize in jpeg.py to account for image_data blocks.
Fix the ELF parser.
Enhance the AR archive parser.
elf parser: fix wrong wrong fields order in parsing little endian section flags.
elf parser: add s390 as a machine type.
Flesh out mp4 parser.

PORT: Version 2.0a1 (inline with 3.0a1).
Major refactoring and PEP8.
Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams.
metadata: get comment from ZIP.
Support for InputIOStream.read(0).
Fix sizeGe when size is None.
Remove unused new_seekable_field_set file.
Remove parser Mapsforge .map.
Remove parser Parallel Realities Starfighter .pak files.
sevenzip: fix for newer archives.
java: update access flags and modifiers for Java 1.7 and update description text for most recent Java.
Support ustar prefix field in tar archives.
Remove file_system* parsers.
Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*.
Remove program parsers macho, nds, prc.
Support non-8bit Character subclasses.
Python parser supports Python 3.7.
Enhance mpeg_ts parser to support MTS/M2TS.
Support for creation date in tiff.
Change don't hardcode errno constant.

PORT: 1.9.1
Internal Only: The following are legacy reference to upstream commit messages.
Relevant changes up to b0a115f8.
Use integer division.
Replace HACHOIR_ERRORS with Exception.
Fix metadata.Data: make it sortable.
Import fixes from e7de492.
PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad).
Replace hachoir.core.field with hachoir.field
Replace hachoir.core.stream with hachoir.stream
Remove the compatibility module for PY1.5 to PY2.5.
metadata: support TIFF picture.
metadata: fix string normalization.
metadata: fix datetime regex Fix hachoir bug #57.
FileFromInputStream: fix comparison between None and an int.
InputIOStream: open the file in binary mode.
2018-03-28 00:43:11 +01:00

225 lines
6.1 KiB
Python

# -*- coding: UTF-8 -*-
"""
Functions to manage internationalisation (i18n):
- initLocale(): setup locales and install Unicode compatible stdout and
stderr ;
- getTerminalCharset(): guess terminal charset ;
- gettext(text) translate a string to current language. The function always
returns Unicode string. You can also use the alias: _() ;
- ngettext(singular, plural, count): translate a sentence with singular and
plural form. The function always returns Unicode string.
WARNING: Loading this module indirectly calls initLocale() which sets
locale LC_ALL to ''. This is needed to get user preferred locale
settings.
"""
import hachoir.core.config as config
import hachoir.core
import locale
from os import path
import sys
from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE
def _getTerminalCharset():
"""
Function used by getTerminalCharset() to get terminal charset.
@see getTerminalCharset()
"""
# (1) Try locale.getpreferredencoding()
try:
charset = locale.getpreferredencoding()
if charset:
return charset
except (locale.Error, AttributeError):
pass
# (2) Try locale.nl_langinfo(CODESET)
try:
charset = locale.nl_langinfo(locale.CODESET)
if charset:
return charset
except (locale.Error, AttributeError):
pass
# (3) Try sys.stdout.encoding
if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:
return sys.stdout.encoding
# (4) Otherwise, returns "ASCII"
return "ASCII"
def getTerminalCharset():
"""
Guess terminal charset using differents tests:
1. Try locale.getpreferredencoding()
2. Try locale.nl_langinfo(CODESET)
3. Try sys.stdout.encoding
4. Otherwise, returns "ASCII"
WARNING: Call initLocale() before calling this function.
"""
try:
return getTerminalCharset.value
except AttributeError:
getTerminalCharset.value = _getTerminalCharset()
return getTerminalCharset.value
class UnicodeStdout(object):
def __init__(self, old_device, charset):
self.device = old_device
self.charset = charset
def flush(self):
self.device.flush()
def write(self, text):
if isinstance(text, unicode):
text = text.encode(self.charset, 'replace')
self.device.write(text)
def writelines(self, lines):
for text in lines:
self.write(text)
def initLocale():
# Only initialize locale once
if initLocale.is_done:
return getTerminalCharset()
initLocale.is_done = True
# Setup locales
try:
locale.setlocale(locale.LC_ALL, "")
except (locale.Error, IOError):
pass
# Get the terminal charset
charset = getTerminalCharset()
# UnicodeStdout conflicts with the readline module
if config.unicode_stdout and ('readline' not in sys.modules):
# Replace stdout and stderr by unicode objet supporting unicode string
sys.stdout = UnicodeStdout(sys.stdout, charset)
sys.stderr = UnicodeStdout(sys.stderr, charset)
return charset
initLocale.is_done = False
def _dummy_gettext(text):
return unicode(text)
def _dummy_ngettext(singular, plural, count):
if 1 < abs(count) or not count:
return unicode(plural)
else:
return unicode(singular)
def _initGettext():
charset = initLocale()
# Try to load gettext module
if config.use_i18n:
try:
import gettext
ok = True
except ImportError:
ok = False
else:
ok = False
# gettext is not available or not needed: use dummy gettext functions
if not ok:
return (_dummy_gettext, _dummy_ngettext)
# Gettext variables
package = 'hachoir'
locale_dir = path.join(path.dirname(__file__), "..", "locale")
# Initialize gettext module
gettext.bindtextdomain(package, locale_dir)
gettext.textdomain(package)
translate = gettext.gettext
ngettext = gettext.ngettext
# TODO: translate_unicode lambda function really sucks!
# => find native function to do that
unicode_gettext = lambda text: \
unicode(translate(text), charset)
unicode_ngettext = lambda singular, plural, count: \
unicode(ngettext(singular, plural, count), charset)
return (unicode_gettext, unicode_ngettext)
UTF_BOMS = (
(BOM_UTF8, "UTF-8"),
(BOM_UTF16_LE, "UTF-16-LE"),
(BOM_UTF16_BE, "UTF-16-BE"),
)
# Set of valid characters for specific charset
CHARSET_CHARACTERS = (
# U+00E0: LATIN SMALL LETTER A WITH GRAVE
(set(u"©®éêè\xE0ç".encode("ISO-8859-1")), "ISO-8859-1"),
(set(u"©®éêè\xE0ç€".encode("ISO-8859-15")), "ISO-8859-15"),
(set(u"©®".encode("MacRoman")), "MacRoman"),
(set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),
)
def guessBytesCharset(bytes, default=None):
r"""
>>> guessBytesCharset("abc")
'ASCII'
>>> guessBytesCharset("\xEF\xBB\xBFabc")
'UTF-8'
>>> guessBytesCharset("abc\xC3\xA9")
'UTF-8'
>>> guessBytesCharset("File written by Adobe Photoshop\xA8 4.0\0")
'MacRoman'
>>> guessBytesCharset("\xE9l\xE9phant")
'ISO-8859-1'
>>> guessBytesCharset("100 \xA4")
'ISO-8859-15'
>>> guessBytesCharset('Word \xb8\xea\xe4\xef\xf3\xe7 - Microsoft Outlook 97 - \xd1\xf5\xe8\xec\xdf\xf3\xe5\xe9\xf2 e-mail')
'ISO-8859-7'
"""
# Check for UTF BOM
for bom_bytes, charset in UTF_BOMS:
if bytes.startswith(bom_bytes):
return charset
# Pure ASCII?
try:
text = unicode(bytes, 'ASCII', 'strict')
return 'ASCII'
except UnicodeDecodeError:
pass
# Valid UTF-8?
try:
text = unicode(bytes, 'UTF-8', 'strict')
return 'UTF-8'
except UnicodeDecodeError:
pass
# Create a set of non-ASCII characters
non_ascii_set = set(byte for byte in bytes if ord(byte) >= 128)
for characters, charset in CHARSET_CHARACTERS:
if characters.issuperset(non_ascii_set):
return charset
return default
# Initialize _(), gettext() and ngettext() functions
gettext, ngettext = _initGettext()
_ = gettext