mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-11 05:33:37 +00:00
0d9fbc1ad7
This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy!
214 lines
6.1 KiB
Python
214 lines
6.1 KiB
Python
# -*- coding: UTF-8 -*-
|
|
"""
|
|
Functions to manage internationalisation (i18n):
|
|
- initLocale(): setup locales and install Unicode compatible stdout and
|
|
stderr ;
|
|
- getTerminalCharset(): guess terminal charset ;
|
|
- gettext(text) translate a string to current language. The function always
|
|
returns Unicode string. You can also use the alias: _() ;
|
|
- ngettext(singular, plural, count): translate a sentence with singular and
|
|
plural form. The function always returns Unicode string.
|
|
|
|
WARNING: Loading this module indirectly calls initLocale() which sets
|
|
locale LC_ALL to ''. This is needed to get user preferred locale
|
|
settings.
|
|
"""
|
|
|
|
import lib.hachoir_core.config as config
|
|
import lib.hachoir_core
|
|
import locale
|
|
from os import path
|
|
import sys
|
|
from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE
|
|
|
|
def _getTerminalCharset():
|
|
"""
|
|
Function used by getTerminalCharset() to get terminal charset.
|
|
|
|
@see getTerminalCharset()
|
|
"""
|
|
# (1) Try locale.getpreferredencoding()
|
|
try:
|
|
charset = locale.getpreferredencoding()
|
|
if charset:
|
|
return charset
|
|
except (locale.Error, AttributeError):
|
|
pass
|
|
|
|
# (2) Try locale.nl_langinfo(CODESET)
|
|
try:
|
|
charset = locale.nl_langinfo(locale.CODESET)
|
|
if charset:
|
|
return charset
|
|
except (locale.Error, AttributeError):
|
|
pass
|
|
|
|
# (3) Try sys.stdout.encoding
|
|
if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:
|
|
return sys.stdout.encoding
|
|
|
|
# (4) Otherwise, returns "ASCII"
|
|
return "ASCII"
|
|
|
|
def getTerminalCharset():
|
|
"""
|
|
Guess terminal charset using differents tests:
|
|
1. Try locale.getpreferredencoding()
|
|
2. Try locale.nl_langinfo(CODESET)
|
|
3. Try sys.stdout.encoding
|
|
4. Otherwise, returns "ASCII"
|
|
|
|
WARNING: Call initLocale() before calling this function.
|
|
"""
|
|
try:
|
|
return getTerminalCharset.value
|
|
except AttributeError:
|
|
getTerminalCharset.value = _getTerminalCharset()
|
|
return getTerminalCharset.value
|
|
|
|
class UnicodeStdout(object):
|
|
def __init__(self, old_device, charset):
|
|
self.device = old_device
|
|
self.charset = charset
|
|
|
|
def flush(self):
|
|
self.device.flush()
|
|
|
|
def write(self, text):
|
|
if isinstance(text, unicode):
|
|
text = text.encode(self.charset, 'replace')
|
|
self.device.write(text)
|
|
|
|
def writelines(self, lines):
|
|
for text in lines:
|
|
self.write(text)
|
|
|
|
def initLocale():
|
|
# Only initialize locale once
|
|
if initLocale.is_done:
|
|
return getTerminalCharset()
|
|
initLocale.is_done = True
|
|
|
|
# Setup locales
|
|
try:
|
|
locale.setlocale(locale.LC_ALL, "")
|
|
except (locale.Error, IOError):
|
|
pass
|
|
|
|
# Get the terminal charset
|
|
charset = getTerminalCharset()
|
|
|
|
# UnicodeStdout conflicts with the readline module
|
|
if config.unicode_stdout and ('readline' not in sys.modules):
|
|
# Replace stdout and stderr by unicode objet supporting unicode string
|
|
sys.stdout = UnicodeStdout(sys.stdout, charset)
|
|
sys.stderr = UnicodeStdout(sys.stderr, charset)
|
|
return charset
|
|
initLocale.is_done = False
|
|
|
|
def _dummy_gettext(text):
|
|
return unicode(text)
|
|
|
|
def _dummy_ngettext(singular, plural, count):
|
|
if 1 < abs(count) or not count:
|
|
return unicode(plural)
|
|
else:
|
|
return unicode(singular)
|
|
|
|
def _initGettext():
|
|
charset = initLocale()
|
|
|
|
# Try to load gettext module
|
|
if config.use_i18n:
|
|
try:
|
|
import gettext
|
|
ok = True
|
|
except ImportError:
|
|
ok = False
|
|
else:
|
|
ok = False
|
|
|
|
# gettext is not available or not needed: use dummy gettext functions
|
|
if not ok:
|
|
return (_dummy_gettext, _dummy_ngettext)
|
|
|
|
# Gettext variables
|
|
package = lib.hachoir_core.PACKAGE
|
|
locale_dir = path.join(path.dirname(__file__), "..", "locale")
|
|
|
|
# Initialize gettext module
|
|
gettext.bindtextdomain(package, locale_dir)
|
|
gettext.textdomain(package)
|
|
translate = gettext.gettext
|
|
ngettext = gettext.ngettext
|
|
|
|
# TODO: translate_unicode lambda function really sucks!
|
|
# => find native function to do that
|
|
unicode_gettext = lambda text: \
|
|
unicode(translate(text), charset)
|
|
unicode_ngettext = lambda singular, plural, count: \
|
|
unicode(ngettext(singular, plural, count), charset)
|
|
return (unicode_gettext, unicode_ngettext)
|
|
|
|
UTF_BOMS = (
|
|
(BOM_UTF8, "UTF-8"),
|
|
(BOM_UTF16_LE, "UTF-16-LE"),
|
|
(BOM_UTF16_BE, "UTF-16-BE"),
|
|
)
|
|
|
|
# Set of valid characters for specific charset
|
|
CHARSET_CHARACTERS = (
|
|
# U+00E0: LATIN SMALL LETTER A WITH GRAVE
|
|
(set(u"©®éêè\xE0ç".encode("ISO-8859-1")), "ISO-8859-1"),
|
|
(set(u"©®éêè\xE0ç€".encode("ISO-8859-15")), "ISO-8859-15"),
|
|
(set(u"©®".encode("MacRoman")), "MacRoman"),
|
|
(set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),
|
|
)
|
|
|
|
def guessBytesCharset(bytes, default=None):
|
|
r"""
|
|
>>> guessBytesCharset("abc")
|
|
'ASCII'
|
|
>>> guessBytesCharset("\xEF\xBB\xBFabc")
|
|
'UTF-8'
|
|
>>> guessBytesCharset("abc\xC3\xA9")
|
|
'UTF-8'
|
|
>>> guessBytesCharset("File written by Adobe Photoshop\xA8 4.0\0")
|
|
'MacRoman'
|
|
>>> guessBytesCharset("\xE9l\xE9phant")
|
|
'ISO-8859-1'
|
|
>>> guessBytesCharset("100 \xA4")
|
|
'ISO-8859-15'
|
|
>>> guessBytesCharset('Word \xb8\xea\xe4\xef\xf3\xe7 - Microsoft Outlook 97 - \xd1\xf5\xe8\xec\xdf\xf3\xe5\xe9\xf2 e-mail')
|
|
'ISO-8859-7'
|
|
"""
|
|
# Check for UTF BOM
|
|
for bom_bytes, charset in UTF_BOMS:
|
|
if bytes.startswith(bom_bytes):
|
|
return charset
|
|
|
|
# Pure ASCII?
|
|
try:
|
|
text = unicode(bytes, 'ASCII', 'strict')
|
|
return 'ASCII'
|
|
except UnicodeDecodeError:
|
|
pass
|
|
|
|
# Valid UTF-8?
|
|
try:
|
|
text = unicode(bytes, 'UTF-8', 'strict')
|
|
return 'UTF-8'
|
|
except UnicodeDecodeError:
|
|
pass
|
|
|
|
# Create a set of non-ASCII characters
|
|
non_ascii_set = set( byte for byte in bytes if ord(byte) >= 128 )
|
|
for characters, charset in CHARSET_CHARACTERS:
|
|
if characters.issuperset(non_ascii_set):
|
|
return charset
|
|
return default
|
|
|
|
# Initialize _(), gettext() and ngettext() functions
|
|
gettext, ngettext = _initGettext()
|
|
_ = gettext
|
|
|