mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-01 00:43:37 +00:00
980e05cc99
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014). Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/* Remove metadata/qt* PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5). Basic Support for XMP Packets. tga: improvements to adhere more closely to the spec. pdf: slightly improved parsing. rar: fix TypeError on unknown block types. Add MacRoman win32 codepage. tiff/exif: support SubIFDs and tiled images. Add method to export metadata in dictionary. mpeg_video: don't attempt to parse Stream past length. mpeg_video: parse ESCR correctly, add SCR value. Change centralise CustomFragments. field: don't set parser class if class is None, to enable autodetect. field: add value/display for CustomFragment. parser: inline warning to enable tracebacks in debug mode. Fix empty bytestrings in makePrintable. Fix contentSize in jpeg.py to account for image_data blocks. Fix the ELF parser. Enhance the AR archive parser. elf parser: fix wrong wrong fields order in parsing little endian section flags. elf parser: add s390 as a machine type. Flesh out mp4 parser. PORT: Version 2.0a1 (inline with 3.0a1). Major refactoring and PEP8. Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams. metadata: get comment from ZIP. Support for InputIOStream.read(0). Fix sizeGe when size is None. Remove unused new_seekable_field_set file. Remove parser Mapsforge .map. Remove parser Parallel Realities Starfighter .pak files. sevenzip: fix for newer archives. java: update access flags and modifiers for Java 1.7 and update description text for most recent Java. Support ustar prefix field in tar archives. Remove file_system* parsers. Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*. Remove program parsers macho, nds, prc. Support non-8bit Character subclasses. Python parser supports Python 3.7. Enhance mpeg_ts parser to support MTS/M2TS. Support for creation date in tiff. Change don't hardcode errno constant. PORT: 1.9.1 Internal Only: The following are legacy reference to upstream commit messages. Relevant changes up to b0a115f8. Use integer division. Replace HACHOIR_ERRORS with Exception. Fix metadata.Data: make it sortable. Import fixes from e7de492. PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad). Replace hachoir.core.field with hachoir.field Replace hachoir.core.stream with hachoir.stream Remove the compatibility module for PY1.5 to PY2.5. metadata: support TIFF picture. metadata: fix string normalization. metadata: fix datetime regex Fix hachoir bug #57. FileFromInputStream: fix comparison between None and an int. InputIOStream: open the file in binary mode.
225 lines
6.1 KiB
Python
225 lines
6.1 KiB
Python
# -*- coding: UTF-8 -*-
|
|
"""
|
|
Functions to manage internationalisation (i18n):
|
|
- initLocale(): setup locales and install Unicode compatible stdout and
|
|
stderr ;
|
|
- getTerminalCharset(): guess terminal charset ;
|
|
- gettext(text) translate a string to current language. The function always
|
|
returns Unicode string. You can also use the alias: _() ;
|
|
- ngettext(singular, plural, count): translate a sentence with singular and
|
|
plural form. The function always returns Unicode string.
|
|
|
|
WARNING: Loading this module indirectly calls initLocale() which sets
|
|
locale LC_ALL to ''. This is needed to get user preferred locale
|
|
settings.
|
|
"""
|
|
|
|
import hachoir.core.config as config
|
|
import hachoir.core
|
|
import locale
|
|
from os import path
|
|
import sys
|
|
from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE
|
|
|
|
|
|
def _getTerminalCharset():
|
|
"""
|
|
Function used by getTerminalCharset() to get terminal charset.
|
|
|
|
@see getTerminalCharset()
|
|
"""
|
|
# (1) Try locale.getpreferredencoding()
|
|
try:
|
|
charset = locale.getpreferredencoding()
|
|
if charset:
|
|
return charset
|
|
except (locale.Error, AttributeError):
|
|
pass
|
|
|
|
# (2) Try locale.nl_langinfo(CODESET)
|
|
try:
|
|
charset = locale.nl_langinfo(locale.CODESET)
|
|
if charset:
|
|
return charset
|
|
except (locale.Error, AttributeError):
|
|
pass
|
|
|
|
# (3) Try sys.stdout.encoding
|
|
if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:
|
|
return sys.stdout.encoding
|
|
|
|
# (4) Otherwise, returns "ASCII"
|
|
return "ASCII"
|
|
|
|
|
|
def getTerminalCharset():
|
|
"""
|
|
Guess terminal charset using differents tests:
|
|
1. Try locale.getpreferredencoding()
|
|
2. Try locale.nl_langinfo(CODESET)
|
|
3. Try sys.stdout.encoding
|
|
4. Otherwise, returns "ASCII"
|
|
|
|
WARNING: Call initLocale() before calling this function.
|
|
"""
|
|
try:
|
|
return getTerminalCharset.value
|
|
except AttributeError:
|
|
getTerminalCharset.value = _getTerminalCharset()
|
|
return getTerminalCharset.value
|
|
|
|
|
|
class UnicodeStdout(object):
|
|
def __init__(self, old_device, charset):
|
|
self.device = old_device
|
|
self.charset = charset
|
|
|
|
def flush(self):
|
|
self.device.flush()
|
|
|
|
def write(self, text):
|
|
if isinstance(text, unicode):
|
|
text = text.encode(self.charset, 'replace')
|
|
self.device.write(text)
|
|
|
|
def writelines(self, lines):
|
|
for text in lines:
|
|
self.write(text)
|
|
|
|
|
|
def initLocale():
|
|
# Only initialize locale once
|
|
if initLocale.is_done:
|
|
return getTerminalCharset()
|
|
initLocale.is_done = True
|
|
|
|
# Setup locales
|
|
try:
|
|
locale.setlocale(locale.LC_ALL, "")
|
|
except (locale.Error, IOError):
|
|
pass
|
|
|
|
# Get the terminal charset
|
|
charset = getTerminalCharset()
|
|
|
|
# UnicodeStdout conflicts with the readline module
|
|
if config.unicode_stdout and ('readline' not in sys.modules):
|
|
# Replace stdout and stderr by unicode objet supporting unicode string
|
|
sys.stdout = UnicodeStdout(sys.stdout, charset)
|
|
sys.stderr = UnicodeStdout(sys.stderr, charset)
|
|
return charset
|
|
|
|
|
|
initLocale.is_done = False
|
|
|
|
|
|
def _dummy_gettext(text):
|
|
return unicode(text)
|
|
|
|
|
|
def _dummy_ngettext(singular, plural, count):
|
|
if 1 < abs(count) or not count:
|
|
return unicode(plural)
|
|
else:
|
|
return unicode(singular)
|
|
|
|
|
|
def _initGettext():
|
|
charset = initLocale()
|
|
|
|
# Try to load gettext module
|
|
if config.use_i18n:
|
|
try:
|
|
import gettext
|
|
ok = True
|
|
except ImportError:
|
|
ok = False
|
|
else:
|
|
ok = False
|
|
|
|
# gettext is not available or not needed: use dummy gettext functions
|
|
if not ok:
|
|
return (_dummy_gettext, _dummy_ngettext)
|
|
|
|
# Gettext variables
|
|
package = 'hachoir'
|
|
locale_dir = path.join(path.dirname(__file__), "..", "locale")
|
|
|
|
# Initialize gettext module
|
|
gettext.bindtextdomain(package, locale_dir)
|
|
gettext.textdomain(package)
|
|
translate = gettext.gettext
|
|
ngettext = gettext.ngettext
|
|
|
|
# TODO: translate_unicode lambda function really sucks!
|
|
# => find native function to do that
|
|
unicode_gettext = lambda text: \
|
|
unicode(translate(text), charset)
|
|
unicode_ngettext = lambda singular, plural, count: \
|
|
unicode(ngettext(singular, plural, count), charset)
|
|
return (unicode_gettext, unicode_ngettext)
|
|
|
|
|
|
UTF_BOMS = (
|
|
(BOM_UTF8, "UTF-8"),
|
|
(BOM_UTF16_LE, "UTF-16-LE"),
|
|
(BOM_UTF16_BE, "UTF-16-BE"),
|
|
)
|
|
|
|
# Set of valid characters for specific charset
|
|
CHARSET_CHARACTERS = (
|
|
# U+00E0: LATIN SMALL LETTER A WITH GRAVE
|
|
(set(u"©®éêè\xE0ç".encode("ISO-8859-1")), "ISO-8859-1"),
|
|
(set(u"©®éêè\xE0ç€".encode("ISO-8859-15")), "ISO-8859-15"),
|
|
(set(u"©®".encode("MacRoman")), "MacRoman"),
|
|
(set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),
|
|
)
|
|
|
|
|
|
def guessBytesCharset(bytes, default=None):
|
|
r"""
|
|
>>> guessBytesCharset("abc")
|
|
'ASCII'
|
|
>>> guessBytesCharset("\xEF\xBB\xBFabc")
|
|
'UTF-8'
|
|
>>> guessBytesCharset("abc\xC3\xA9")
|
|
'UTF-8'
|
|
>>> guessBytesCharset("File written by Adobe Photoshop\xA8 4.0\0")
|
|
'MacRoman'
|
|
>>> guessBytesCharset("\xE9l\xE9phant")
|
|
'ISO-8859-1'
|
|
>>> guessBytesCharset("100 \xA4")
|
|
'ISO-8859-15'
|
|
>>> guessBytesCharset('Word \xb8\xea\xe4\xef\xf3\xe7 - Microsoft Outlook 97 - \xd1\xf5\xe8\xec\xdf\xf3\xe5\xe9\xf2 e-mail')
|
|
'ISO-8859-7'
|
|
"""
|
|
# Check for UTF BOM
|
|
for bom_bytes, charset in UTF_BOMS:
|
|
if bytes.startswith(bom_bytes):
|
|
return charset
|
|
|
|
# Pure ASCII?
|
|
try:
|
|
text = unicode(bytes, 'ASCII', 'strict')
|
|
return 'ASCII'
|
|
except UnicodeDecodeError:
|
|
pass
|
|
|
|
# Valid UTF-8?
|
|
try:
|
|
text = unicode(bytes, 'UTF-8', 'strict')
|
|
return 'UTF-8'
|
|
except UnicodeDecodeError:
|
|
pass
|
|
|
|
# Create a set of non-ASCII characters
|
|
non_ascii_set = set(byte for byte in bytes if ord(byte) >= 128)
|
|
for characters, charset in CHARSET_CHARACTERS:
|
|
if characters.issuperset(non_ascii_set):
|
|
return charset
|
|
return default
|
|
|
|
|
|
# Initialize _(), gettext() and ngettext() functions
|
|
gettext, ngettext = _initGettext()
|
|
_ = gettext
|