Merge pull request #608 from JackDandy/feature/UpdateUnidecode

Update unidecode library 0.04.11 to 0.04.18 (fd57cbf).
This commit is contained in:
JackDandy 2016-01-12 02:36:48 +00:00
commit 81bd0ab4d9
16 changed files with 854 additions and 334 deletions

View file

@ -1,5 +1,6 @@
### 0.12.0 (2016-xx-xx xx:xx:xx UTC)
* Update unidecode library 0.04.11 to 0.04.18 (fd57cbf)
* Update xmltodict library 0.9.2 (579a005) to 0.9.2 (eac0031)

View file

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
# vi:tabstop=4:expandtab:sw=4
"""Transliterate Unicode text into plain 7-bit ASCII.
Example usage:
@ -18,19 +19,53 @@ from sys import version_info
Cache = {}
def unidecode(string):
def _warn_if_not_unicode(string):
if version_info[0] < 3 and not isinstance(string, unicode):
warnings.warn( "Argument %r is not an unicode object. "
"Passing an encoded string will likely have "
"unexpected results." % (type(string),),
RuntimeWarning, 2)
def unidecode_expect_ascii(string):
"""Transliterate an Unicode object into an ASCII string
>>> unidecode(u"\u5317\u4EB0")
"Bei Jing "
This function first tries to convert the string using ASCII codec.
If it fails (because of non-ASCII characters), it falls back to
transliteration using the character tables.
This is approx. five times faster if the string only contains ASCII
characters, but slightly slower than using unidecode directly if non-ASCII
chars are present.
"""
_warn_if_not_unicode(string)
try:
bytestring = string.encode('ASCII')
except UnicodeEncodeError:
return _unidecode(string)
if version_info[0] >= 3:
return string
else:
return bytestring
def unidecode_expect_nonascii(string):
"""Transliterate an Unicode object into an ASCII string
>>> unidecode(u"\u5317\u4EB0")
"Bei Jing "
"""
if version_info[0] < 3 and not isinstance(string, unicode):
warnings.warn( "Argument %r is not an unicode object. "
"Passing an encoded string will likely have "
"unexpected results." % (type(string),),
RuntimeWarning, 2)
_warn_if_not_unicode(string)
return _unidecode(string)
unidecode = unidecode_expect_ascii
def _unidecode(string):
retval = []
for char in string:
@ -43,6 +78,11 @@ def unidecode(string):
if codepoint > 0xeffff:
continue # Characters in Private Use Area and above are ignored
if 0xd800 <= codepoint <= 0xdfff:
warnings.warn( "Surrogate character %r will be ignored. "
"You might be using a narrow Python build." % (char,),
RuntimeWarning, 2)
section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits
@ -50,7 +90,7 @@ def unidecode(string):
table = Cache[section]
except KeyError:
try:
mod = __import__('unidecode.x%03x'%(section), [], [], ['data'])
mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])
except ImportError:
Cache[section] = None
continue # No match: ignore this character and carry on.

58
lib/unidecode/util.py Normal file
View file

@ -0,0 +1,58 @@
# vim:ts=4 sw=4 expandtab softtabstop=4
from __future__ import print_function
import optparse
import locale
import os
import sys
import warnings
from unidecode import unidecode
PY3 = sys.version_info[0] >= 3
def fatal(msg):
sys.stderr.write(msg + "\n")
sys.exit(1)
def main():
default_encoding = locale.getpreferredencoding()
parser = optparse.OptionParser('%prog [options] [FILE]',
description="Transliterate Unicode text into ASCII. FILE is path to file to transliterate. "
"Standard input is used if FILE is omitted and -c is not specified.")
parser.add_option('-e', '--encoding', metavar='ENCODING', default=default_encoding,
help='Specify an encoding (default is %s)' % (default_encoding,))
parser.add_option('-c', metavar='TEXT', dest='text',
help='Transliterate TEXT instead of FILE')
options, args = parser.parse_args()
encoding = options.encoding
if args:
if options.text:
fatal("Can't use both FILE and -c option")
else:
with open(args[0], 'rb') as f:
stream = f.read()
elif options.text:
if PY3:
stream = os.fsencode(options.text)
else:
stream = options.text
# add a newline to the string if it comes from the
# command line so that the result is printed nicely
# on the console.
stream += '\n'.encode('ascii')
else:
if PY3:
stream = sys.stdin.buffer.read()
else:
stream = sys.stdin.read()
try:
stream = stream.decode(encoding)
except UnicodeDecodeError as e:
fatal('Unable to decode input: %s, start: %d, end: %d' % (e.reason, e.start, e.end))
sys.stdout.write(unidecode(stream))

View file

@ -1,132 +1,15 @@
data = (
'\x00', # 0x00
'\x01', # 0x01
'\x02', # 0x02
'\x03', # 0x03
'\x04', # 0x04
'\x05', # 0x05
'\x06', # 0x06
'\x07', # 0x07
'\x08', # 0x08
'\x09', # 0x09
'\x0a', # 0x0a
'\x0b', # 0x0b
'\x0c', # 0x0c
'\x0d', # 0x0d
'\x0e', # 0x0e
'\x0f', # 0x0f
'\x10', # 0x10
'\x11', # 0x11
'\x12', # 0x12
'\x13', # 0x13
'\x14', # 0x14
'\x15', # 0x15
'\x16', # 0x16
'\x17', # 0x17
'\x18', # 0x18
'\x19', # 0x19
'\x1a', # 0x1a
'\x1b', # 0x1b
'\x1c', # 0x1c
'\x1d', # 0x1d
'\x1e', # 0x1e
'\x1f', # 0x1f
' ', # 0x20
'!', # 0x21
'"', # 0x22
'#', # 0x23
'$', # 0x24
'%', # 0x25
'&', # 0x26
'\'', # 0x27
'(', # 0x28
')', # 0x29
'*', # 0x2a
'+', # 0x2b
',', # 0x2c
'-', # 0x2d
'.', # 0x2e
'/', # 0x2f
'0', # 0x30
'1', # 0x31
'2', # 0x32
'3', # 0x33
'4', # 0x34
'5', # 0x35
'6', # 0x36
'7', # 0x37
'8', # 0x38
'9', # 0x39
':', # 0x3a
';', # 0x3b
'<', # 0x3c
'=', # 0x3d
'>', # 0x3e
'?', # 0x3f
'@', # 0x40
'A', # 0x41
'B', # 0x42
'C', # 0x43
'D', # 0x44
'E', # 0x45
'F', # 0x46
'G', # 0x47
'H', # 0x48
'I', # 0x49
'J', # 0x4a
'K', # 0x4b
'L', # 0x4c
'M', # 0x4d
'N', # 0x4e
'O', # 0x4f
'P', # 0x50
'Q', # 0x51
'R', # 0x52
'S', # 0x53
'T', # 0x54
'U', # 0x55
'V', # 0x56
'W', # 0x57
'X', # 0x58
'Y', # 0x59
'Z', # 0x5a
']', # 0x5b
'\\', # 0x5c
']', # 0x5d
'^', # 0x5e
'_', # 0x5f
'`', # 0x60
'a', # 0x61
'b', # 0x62
'c', # 0x63
'd', # 0x64
'e', # 0x65
'f', # 0x66
'g', # 0x67
'h', # 0x68
'i', # 0x69
'j', # 0x6a
'k', # 0x6b
'l', # 0x6c
'm', # 0x6d
'n', # 0x6e
'o', # 0x6f
'p', # 0x70
'q', # 0x71
'r', # 0x72
's', # 0x73
't', # 0x74
'u', # 0x75
'v', # 0x76
'w', # 0x77
'x', # 0x78
'y', # 0x79
'z', # 0x7a
'{', # 0x7b
'|', # 0x7c
'}', # 0x7d
'~', # 0x7e
'', # 0x7f
# Code points u+007f and below are equivalent to ASCII and are handled by a
# special case in the code. Hence they are not present in this table.
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', # 0x80
'', # 0x81
'', # 0x82
@ -162,7 +45,10 @@ data = (
' ', # 0xa0
'!', # 0xa1
'C/', # 0xa2
# Not "GBP" - Pound Sign is used for more than just British Pounds.
'PS', # 0xa3
'$?', # 0xa4
'Y=', # 0xa5
'|', # 0xa6
@ -177,8 +63,11 @@ data = (
'-', # 0xaf
'deg', # 0xb0
'+-', # 0xb1
# These might be combined with other superscript digits (u+2070 - u+2079)
'2', # 0xb2
'3', # 0xb3
'\'', # 0xb4
'u', # 0xb5
'P', # 0xb6
@ -195,7 +84,10 @@ data = (
'A', # 0xc1
'A', # 0xc2
'A', # 0xc3
# Not "AE" - used in languages other than German
'A', # 0xc4
'A', # 0xc5
'AE', # 0xc6
'C', # 0xc7
@ -213,13 +105,19 @@ data = (
'O', # 0xd3
'O', # 0xd4
'O', # 0xd5
# Not "OE" - used in languages other than German
'O', # 0xd6
'x', # 0xd7
'O', # 0xd8
'U', # 0xd9
'U', # 0xda
'U', # 0xdb
# Not "UE" - used in languages other than German
'U', # 0xdc
'Y', # 0xdd
'Th', # 0xde
'ss', # 0xdf
@ -227,7 +125,10 @@ data = (
'a', # 0xe1
'a', # 0xe2
'a', # 0xe3
# Not "ae" - used in languages other than German
'a', # 0xe4
'a', # 0xe5
'ae', # 0xe6
'c', # 0xe7
@ -245,13 +146,19 @@ data = (
'o', # 0xf3
'o', # 0xf4
'o', # 0xf5
# Not "oe" - used in languages other than German
'o', # 0xf6
'/', # 0xf7
'o', # 0xf8
'u', # 0xf9
'u', # 0xfa
'u', # 0xfb
# Not "ue" - used in languages other than German
'u', # 0xfc
'y', # 0xfd
'th', # 0xfe
'y', # 0xff

View file

@ -136,7 +136,7 @@ data = (
'f', # 0x86
'ew', # 0x87
'[?]', # 0x88
'.', # 0x89
':', # 0x89
'-', # 0x8a
'[?]', # 0x8b
'[?]', # 0x8c
@ -191,7 +191,7 @@ data = (
'', # 0xbd
'', # 0xbe
'', # 0xbf
'', # 0xc0
'|', # 0xc0
'', # 0xc1
'', # 0xc2
':', # 0xc3

View file

@ -70,23 +70,23 @@ data = (
'/', # 0x44
'-[', # 0x45
']-', # 0x46
'[?]', # 0x47
'??', # 0x47
'?!', # 0x48
'!?', # 0x49
'7', # 0x4a
'PP', # 0x4b
'(]', # 0x4c
'[)', # 0x4d
'[?]', # 0x4e
'*', # 0x4e
'[?]', # 0x4f
'[?]', # 0x50
'[?]', # 0x51
'[?]', # 0x52
'[?]', # 0x53
'%', # 0x52
'~', # 0x53
'[?]', # 0x54
'[?]', # 0x55
'[?]', # 0x56
'[?]', # 0x57
"''''", # 0x57
'[?]', # 0x58
'[?]', # 0x59
'[?]', # 0x5a
@ -95,7 +95,7 @@ data = (
'[?]', # 0x5d
'[?]', # 0x5e
'[?]', # 0x5f
'[?]', # 0x60
'', # 0x60
'[?]', # 0x61
'[?]', # 0x62
'[?]', # 0x63
@ -171,7 +171,7 @@ data = (
'W', # 0xa9
'NS', # 0xaa
'D', # 0xab
'EU', # 0xac
'EUR', # 0xac
'K', # 0xad
'T', # 0xae
'Dr', # 0xaf
@ -228,7 +228,7 @@ data = (
'', # 0xe2
'', # 0xe3
'[?]', # 0xe4
'[?]', # 0xe5
'', # 0xe5
'[?]', # 0xe6
'[?]', # 0xe7
'[?]', # 0xe8

View file

@ -1,7 +1,7 @@
data = (
'', # 0x00
'', # 0x01
'', # 0x02
'C', # 0x02
'', # 0x03
'', # 0x04
'', # 0x05
@ -12,7 +12,7 @@ data = (
'', # 0x0a
'', # 0x0b
'', # 0x0c
'', # 0x0d
'H', # 0x0d
'', # 0x0e
'', # 0x0f
'', # 0x10
@ -20,22 +20,22 @@ data = (
'', # 0x12
'', # 0x13
'', # 0x14
'', # 0x15
'N', # 0x15
'', # 0x16
'', # 0x17
'', # 0x18
'', # 0x19
'', # 0x1a
'P', # 0x19
'Q', # 0x1a
'', # 0x1b
'', # 0x1c
'', # 0x1d
'R', # 0x1d
'', # 0x1e
'', # 0x1f
'', # 0x20
'', # 0x21
'', # 0x22
'(sm)', # 0x20
'TEL', # 0x21
'(tm)', # 0x22
'', # 0x23
'', # 0x24
'Z', # 0x24
'', # 0x25
'', # 0x26
'', # 0x27
@ -45,12 +45,12 @@ data = (
'A', # 0x2b
'', # 0x2c
'', # 0x2d
'', # 0x2e
'', # 0x2f
'', # 0x30
'', # 0x31
'e', # 0x2e
'e', # 0x2f
'E', # 0x30
'F', # 0x31
'F', # 0x32
'', # 0x33
'M', # 0x33
'', # 0x34
'', # 0x35
'', # 0x36
@ -58,21 +58,21 @@ data = (
'', # 0x38
'', # 0x39
'', # 0x3a
'[?]', # 0x3b
'[?]', # 0x3c
'[?]', # 0x3d
'[?]', # 0x3e
'[?]', # 0x3f
'FAX', # 0x3b
'', # 0x3c
'', # 0x3d
'', # 0x3e
'', # 0x3f
'[?]', # 0x40
'[?]', # 0x41
'[?]', # 0x42
'[?]', # 0x43
'[?]', # 0x44
'[?]', # 0x45
'[?]', # 0x46
'[?]', # 0x47
'[?]', # 0x48
'[?]', # 0x49
'D', # 0x45
'd', # 0x46
'e', # 0x47
'i', # 0x48
'j', # 0x49
'[?]', # 0x4a
'[?]', # 0x4b
'[?]', # 0x4c

View file

@ -17,12 +17,12 @@ data = (
'[?]', # 0x0f
'[?]', # 0x10
'[?]', # 0x11
'[?]', # 0x12
'-', # 0x12
'[?]', # 0x13
'[?]', # 0x14
'[?]', # 0x15
'[?]', # 0x16
'[?]', # 0x17
'/', # 0x15
'\\', # 0x16
'*', # 0x17
'[?]', # 0x18
'[?]', # 0x19
'[?]', # 0x1a
@ -34,7 +34,7 @@ data = (
'[?]', # 0x20
'[?]', # 0x21
'[?]', # 0x22
'[?]', # 0x23
'|', # 0x23
'[?]', # 0x24
'[?]', # 0x25
'[?]', # 0x26
@ -53,13 +53,13 @@ data = (
'[?]', # 0x33
'[?]', # 0x34
'[?]', # 0x35
'[?]', # 0x36
':', # 0x36
'[?]', # 0x37
'[?]', # 0x38
'[?]', # 0x39
'[?]', # 0x3a
'[?]', # 0x3b
'[?]', # 0x3c
'~', # 0x3c
'[?]', # 0x3d
'[?]', # 0x3e
'[?]', # 0x3f
@ -99,10 +99,10 @@ data = (
'[?]', # 0x61
'[?]', # 0x62
'[?]', # 0x63
'[?]', # 0x64
'[?]', # 0x65
'[?]', # 0x66
'[?]', # 0x67
'<=', # 0x64
'>=', # 0x65
'<=', # 0x66
'>=', # 0x67
'[?]', # 0x68
'[?]', # 0x69
'[?]', # 0x6a

View file

@ -2,7 +2,7 @@ data = (
'[?]', # 0x00
'[?]', # 0x01
'[?]', # 0x02
'[?]', # 0x03
'^', # 0x03
'[?]', # 0x04
'[?]', # 0x05
'[?]', # 0x06
@ -40,8 +40,8 @@ data = (
'[?]', # 0x26
'[?]', # 0x27
'[?]', # 0x28
'[?]', # 0x29
'[?]', # 0x2a
'<', # 0x29
'> ', # 0x2a
'[?]', # 0x2b
'[?]', # 0x2c
'[?]', # 0x2d

View file

@ -95,118 +95,118 @@ data = (
'[?]', # 0x5d
'[?]', # 0x5e
'[?]', # 0x5f
'', # 0x60
'', # 0x61
'', # 0x62
'', # 0x63
'', # 0x64
'', # 0x65
'', # 0x66
'', # 0x67
'', # 0x68
'', # 0x69
'', # 0x6a
'', # 0x6b
'', # 0x6c
'', # 0x6d
'', # 0x6e
'', # 0x6f
'', # 0x70
'', # 0x71
'', # 0x72
'', # 0x73
'', # 0x74
'', # 0x75
'', # 0x76
'', # 0x77
'', # 0x78
'', # 0x79
'', # 0x7a
'', # 0x7b
'', # 0x7c
'', # 0x7d
'', # 0x7e
'', # 0x7f
'', # 0x80
'', # 0x81
'', # 0x82
'', # 0x83
'', # 0x84
'', # 0x85
'', # 0x86
'', # 0x87
'', # 0x88
'', # 0x89
'', # 0x8a
'', # 0x8b
'', # 0x8c
'', # 0x8d
'', # 0x8e
'', # 0x8f
'', # 0x90
'', # 0x91
'', # 0x92
'', # 0x93
'', # 0x94
'', # 0x95
'', # 0x96
'', # 0x97
'', # 0x98
'', # 0x99
'', # 0x9a
'', # 0x9b
'', # 0x9c
'', # 0x9d
'', # 0x9e
'', # 0x9f
'', # 0xa0
'', # 0xa1
'', # 0xa2
'', # 0xa3
'', # 0xa4
'', # 0xa5
'', # 0xa6
'', # 0xa7
'', # 0xa8
'', # 0xa9
'', # 0xaa
'', # 0xab
'', # 0xac
'', # 0xad
'', # 0xae
'', # 0xaf
'', # 0xb0
'', # 0xb1
'', # 0xb2
'', # 0xb3
'', # 0xb4
'', # 0xb5
'', # 0xb6
'', # 0xb7
'', # 0xb8
'', # 0xb9
'', # 0xba
'', # 0xbb
'', # 0xbc
'', # 0xbd
'', # 0xbe
'', # 0xbf
'', # 0xc0
'', # 0xc1
'', # 0xc2
'', # 0xc3
'', # 0xc4
'', # 0xc5
'', # 0xc6
'', # 0xc7
'', # 0xc8
'', # 0xc9
'', # 0xca
'', # 0xcb
'', # 0xcc
'', # 0xcd
'', # 0xce
'', # 0xcf
'1', # 0x60
'2', # 0x61
'3', # 0x62
'4', # 0x63
'5', # 0x64
'6', # 0x65
'7', # 0x66
'8', # 0x67
'9', # 0x68
'10', # 0x69
'11', # 0x6a
'12', # 0x6b
'13', # 0x6c
'14', # 0x6d
'15', # 0x6e
'16', # 0x6f
'17', # 0x70
'18', # 0x71
'19', # 0x72
'20', # 0x73
'(1)', # 0x74
'(2)', # 0x75
'(3)', # 0x76
'(4)', # 0x77
'(5)', # 0x78
'(6)', # 0x79
'(7)', # 0x7a
'(8)', # 0x7b
'(9)', # 0x7c
'(10)', # 0x7d
'(11)', # 0x7e
'(12)', # 0x7f
'(13)', # 0x80
'(14)', # 0x81
'(15)', # 0x82
'(16)', # 0x83
'(17)', # 0x84
'(18)', # 0x85
'(19)', # 0x86
'(20)', # 0x87
'1.', # 0x88
'2.', # 0x89
'3.', # 0x8a
'4.', # 0x8b
'5.', # 0x8c
'6.', # 0x8d
'7.', # 0x8e
'8.', # 0x8f
'9.', # 0x90
'10.', # 0x91
'11.', # 0x92
'12.', # 0x93
'13.', # 0x94
'14.', # 0x95
'15.', # 0x96
'16.', # 0x97
'17.', # 0x98
'18.', # 0x99
'19.', # 0x9a
'20.', # 0x9b
'(a)', # 0x9c
'(b)', # 0x9d
'(c)', # 0x9e
'(d)', # 0x9f
'(e)', # 0xa0
'(f)', # 0xa1
'(g)', # 0xa2
'(h)', # 0xa3
'(i)', # 0xa4
'(j)', # 0xa5
'(k)', # 0xa6
'(l)', # 0xa7
'(m)', # 0xa8
'(n)', # 0xa9
'(o)', # 0xaa
'(p)', # 0xab
'(q)', # 0xac
'(r)', # 0xad
'(s)', # 0xae
'(t)', # 0xaf
'(u)', # 0xb0
'(v)', # 0xb1
'(w)', # 0xb2
'(x)', # 0xb3
'(y)', # 0xb4
'(z)', # 0xb5
'a', # 0xb6
'b', # 0xb7
'c', # 0xb8
'd', # 0xb9
'e', # 0xba
'f', # 0xbb
'g', # 0xbc
'h', # 0xbd
'i', # 0xbe
'j', # 0xbf
'k', # 0xc0
'l', # 0xc1
'm', # 0xc2
'n', # 0xc3
'o', # 0xc4
'p', # 0xc5
'q', # 0xc6
'r', # 0xc7
's', # 0xc8
't', # 0xc9
'u', # 0xca
'v', # 0xcb
'w', # 0xcc
'x', # 0xcd
'y', # 0xce
'z', # 0xcf
'a', # 0xd0
'b', # 0xd1
'c', # 0xd2

View file

@ -110,7 +110,7 @@ data = (
'', # 0x6c
'', # 0x6d
'', # 0x6e
'', # 0x6f
'#', # 0x6f
'', # 0x70
'', # 0x71
'[?]', # 0x72

View file

@ -48,7 +48,7 @@ data = (
'', # 0x2e
'', # 0x2f
'', # 0x30
'', # 0x31
'*', # 0x31
'', # 0x32
'', # 0x33
'', # 0x34
@ -87,7 +87,7 @@ data = (
'', # 0x55
'', # 0x56
'', # 0x57
'', # 0x58
'|', # 0x58
'', # 0x59
'', # 0x5a
'', # 0x5b
@ -97,7 +97,7 @@ data = (
'[?]', # 0x5f
'[?]', # 0x60
'', # 0x61
'', # 0x62
'!', # 0x62
'', # 0x63
'', # 0x64
'', # 0x65
@ -229,10 +229,10 @@ data = (
'[?]', # 0xe3
'[?]', # 0xe4
'[?]', # 0xe5
'[?]', # 0xe6
'[', # 0xe6
'[?]', # 0xe7
'[?]', # 0xe8
'[?]', # 0xe9
'<', # 0xe8
'> ', # 0xe9
'[?]', # 0xea
'[?]', # 0xeb
'[?]', # 0xec

257
lib/unidecode/x029.py Normal file
View file

@ -0,0 +1,257 @@
data = (
'', # 0x00
'', # 0x01
'', # 0x02
'', # 0x03
'', # 0x04
'', # 0x05
'', # 0x06
'', # 0x07
'', # 0x08
'', # 0x09
'', # 0x0a
'', # 0x0b
'', # 0x0c
'', # 0x0d
'', # 0x0e
'', # 0x0f
'', # 0x10
'', # 0x11
'', # 0x12
'', # 0x13
'', # 0x14
'', # 0x15
'', # 0x16
'', # 0x17
'', # 0x18
'', # 0x19
'', # 0x1a
'', # 0x1b
'', # 0x1c
'', # 0x1d
'', # 0x1e
'', # 0x1f
'', # 0x20
'', # 0x21
'', # 0x22
'', # 0x23
'', # 0x24
'', # 0x25
'', # 0x26
'', # 0x27
'', # 0x28
'', # 0x29
'', # 0x2a
'', # 0x2b
'', # 0x2c
'', # 0x2d
'', # 0x2e
'', # 0x2f
'', # 0x30
'', # 0x31
'', # 0x32
'', # 0x33
'', # 0x34
'', # 0x35
'', # 0x36
'', # 0x37
'', # 0x38
'', # 0x39
'', # 0x3a
'', # 0x3b
'', # 0x3c
'', # 0x3d
'', # 0x3e
'', # 0x3f
'', # 0x40
'', # 0x41
'', # 0x42
'', # 0x43
'', # 0x44
'', # 0x45
'', # 0x46
'', # 0x47
'', # 0x48
'', # 0x49
'', # 0x4a
'', # 0x4b
'', # 0x4c
'', # 0x4d
'', # 0x4e
'', # 0x4f
'', # 0x50
'', # 0x51
'', # 0x52
'', # 0x53
'', # 0x54
'', # 0x55
'', # 0x56
'', # 0x57
'', # 0x58
'', # 0x59
'', # 0x5a
'', # 0x5b
'', # 0x5c
'', # 0x5d
'', # 0x5e
'', # 0x5f
'', # 0x60
'', # 0x61
'', # 0x62
'', # 0x63
'', # 0x64
'', # 0x65
'', # 0x66
'', # 0x67
'', # 0x68
'', # 0x69
'', # 0x6a
'', # 0x6b
'', # 0x6c
'', # 0x6d
'', # 0x6e
'', # 0x6f
'', # 0x70
'', # 0x71
'', # 0x72
'', # 0x73
'', # 0x74
'', # 0x75
'', # 0x76
'', # 0x77
'', # 0x78
'', # 0x79
'', # 0x7a
'', # 0x7b
'', # 0x7c
'', # 0x7d
'', # 0x7e
'', # 0x7f
'', # 0x80
'', # 0x81
'', # 0x82
'{', # 0x83
'} ', # 0x84
'', # 0x85
'', # 0x86
'', # 0x87
'', # 0x88
'', # 0x89
'', # 0x8a
'', # 0x8b
'', # 0x8c
'', # 0x8d
'', # 0x8e
'', # 0x8f
'', # 0x90
'', # 0x91
'', # 0x92
'', # 0x93
'', # 0x94
'', # 0x95
'', # 0x96
'', # 0x97
'', # 0x98
'', # 0x99
'', # 0x9a
'', # 0x9b
'', # 0x9c
'', # 0x9d
'', # 0x9e
'', # 0x9f
'', # 0xa0
'', # 0xa1
'', # 0xa2
'', # 0xa3
'', # 0xa4
'', # 0xa5
'', # 0xa6
'', # 0xa7
'', # 0xa8
'', # 0xa9
'', # 0xaa
'', # 0xab
'', # 0xac
'', # 0xad
'', # 0xae
'', # 0xaf
'', # 0xb0
'', # 0xb1
'', # 0xb2
'', # 0xb3
'', # 0xb4
'', # 0xb5
'', # 0xb6
'', # 0xb7
'', # 0xb8
'', # 0xb9
'', # 0xba
'', # 0xbb
'', # 0xbc
'', # 0xbd
'', # 0xbe
'', # 0xbf
'', # 0xc0
'', # 0xc1
'', # 0xc2
'', # 0xc3
'', # 0xc4
'', # 0xc5
'', # 0xc6
'', # 0xc7
'', # 0xc8
'', # 0xc9
'', # 0xca
'', # 0xcb
'', # 0xcc
'', # 0xcd
'', # 0xce
'', # 0xcf
'', # 0xd0
'', # 0xd1
'', # 0xd2
'', # 0xd3
'', # 0xd4
'', # 0xd5
'', # 0xd6
'', # 0xd7
'', # 0xd8
'', # 0xd9
'', # 0xda
'', # 0xdb
'', # 0xdc
'', # 0xdd
'', # 0xde
'', # 0xdf
'', # 0xe0
'', # 0xe1
'', # 0xe2
'', # 0xe3
'', # 0xe4
'', # 0xe5
'', # 0xe6
'', # 0xe7
'', # 0xe8
'', # 0xe9
'', # 0xea
'', # 0xeb
'', # 0xec
'', # 0xed
'', # 0xee
'', # 0xef
'', # 0xf0
'', # 0xf1
'', # 0xf2
'', # 0xf3
'', # 0xf4
'', # 0xf5
'', # 0xf6
'', # 0xf7
'', # 0xf8
'', # 0xf9
'', # 0xfa
'', # 0xfb
'', # 0xfc
'', # 0xfd
'', # 0xfe
)

257
lib/unidecode/x02a.py Normal file
View file

@ -0,0 +1,257 @@
data = (
'', # 0x00
'', # 0x01
'', # 0x02
'', # 0x03
'', # 0x04
'', # 0x05
'', # 0x06
'', # 0x07
'', # 0x08
'', # 0x09
'', # 0x0a
'', # 0x0b
'', # 0x0c
'', # 0x0d
'', # 0x0e
'', # 0x0f
'', # 0x10
'', # 0x11
'', # 0x12
'', # 0x13
'', # 0x14
'', # 0x15
'', # 0x16
'', # 0x17
'', # 0x18
'', # 0x19
'', # 0x1a
'', # 0x1b
'', # 0x1c
'', # 0x1d
'', # 0x1e
'', # 0x1f
'', # 0x20
'', # 0x21
'', # 0x22
'', # 0x23
'', # 0x24
'', # 0x25
'', # 0x26
'', # 0x27
'', # 0x28
'', # 0x29
'', # 0x2a
'', # 0x2b
'', # 0x2c
'', # 0x2d
'', # 0x2e
'', # 0x2f
'', # 0x30
'', # 0x31
'', # 0x32
'', # 0x33
'', # 0x34
'', # 0x35
'', # 0x36
'', # 0x37
'', # 0x38
'', # 0x39
'', # 0x3a
'', # 0x3b
'', # 0x3c
'', # 0x3d
'', # 0x3e
'', # 0x3f
'', # 0x40
'', # 0x41
'', # 0x42
'', # 0x43
'', # 0x44
'', # 0x45
'', # 0x46
'', # 0x47
'', # 0x48
'', # 0x49
'', # 0x4a
'', # 0x4b
'', # 0x4c
'', # 0x4d
'', # 0x4e
'', # 0x4f
'', # 0x50
'', # 0x51
'', # 0x52
'', # 0x53
'', # 0x54
'', # 0x55
'', # 0x56
'', # 0x57
'', # 0x58
'', # 0x59
'', # 0x5a
'', # 0x5b
'', # 0x5c
'', # 0x5d
'', # 0x5e
'', # 0x5f
'', # 0x60
'', # 0x61
'', # 0x62
'', # 0x63
'', # 0x64
'', # 0x65
'', # 0x66
'', # 0x67
'', # 0x68
'', # 0x69
'', # 0x6a
'', # 0x6b
'', # 0x6c
'', # 0x6d
'', # 0x6e
'', # 0x6f
'', # 0x70
'', # 0x71
'', # 0x72
'', # 0x73
'::=', # 0x74
'==', # 0x75
'===', # 0x76
'', # 0x77
'', # 0x78
'', # 0x79
'', # 0x7a
'', # 0x7b
'', # 0x7c
'', # 0x7d
'', # 0x7e
'', # 0x7f
'', # 0x80
'', # 0x81
'', # 0x82
'', # 0x83
'', # 0x84
'', # 0x85
'', # 0x86
'', # 0x87
'', # 0x88
'', # 0x89
'', # 0x8a
'', # 0x8b
'', # 0x8c
'', # 0x8d
'', # 0x8e
'', # 0x8f
'', # 0x90
'', # 0x91
'', # 0x92
'', # 0x93
'', # 0x94
'', # 0x95
'', # 0x96
'', # 0x97
'', # 0x98
'', # 0x99
'', # 0x9a
'', # 0x9b
'', # 0x9c
'', # 0x9d
'', # 0x9e
'', # 0x9f
'', # 0xa0
'', # 0xa1
'', # 0xa2
'', # 0xa3
'', # 0xa4
'', # 0xa5
'', # 0xa6
'', # 0xa7
'', # 0xa8
'', # 0xa9
'', # 0xaa
'', # 0xab
'', # 0xac
'', # 0xad
'', # 0xae
'', # 0xaf
'', # 0xb0
'', # 0xb1
'', # 0xb2
'', # 0xb3
'', # 0xb4
'', # 0xb5
'', # 0xb6
'', # 0xb7
'', # 0xb8
'', # 0xb9
'', # 0xba
'', # 0xbb
'', # 0xbc
'', # 0xbd
'', # 0xbe
'', # 0xbf
'', # 0xc0
'', # 0xc1
'', # 0xc2
'', # 0xc3
'', # 0xc4
'', # 0xc5
'', # 0xc6
'', # 0xc7
'', # 0xc8
'', # 0xc9
'', # 0xca
'', # 0xcb
'', # 0xcc
'', # 0xcd
'', # 0xce
'', # 0xcf
'', # 0xd0
'', # 0xd1
'', # 0xd2
'', # 0xd3
'', # 0xd4
'', # 0xd5
'', # 0xd6
'', # 0xd7
'', # 0xd8
'', # 0xd9
'', # 0xda
'', # 0xdb
'', # 0xdc
'', # 0xdd
'', # 0xde
'', # 0xdf
'', # 0xe0
'', # 0xe1
'', # 0xe2
'', # 0xe3
'', # 0xe4
'', # 0xe5
'', # 0xe6
'', # 0xe7
'', # 0xe8
'', # 0xe9
'', # 0xea
'', # 0xeb
'', # 0xec
'', # 0xed
'', # 0xee
'', # 0xef
'', # 0xf0
'', # 0xf1
'', # 0xf2
'', # 0xf3
'', # 0xf4
'', # 0xf5
'', # 0xf6
'', # 0xf7
'', # 0xf8
'', # 0xf9
'', # 0xfa
'', # 0xfb
'', # 0xfc
'', # 0xfd
'', # 0xfe
)

View file

@ -80,21 +80,21 @@ data = (
'[?]', # 0x4e
'[?]', # 0x4f
'[?]', # 0x50
'[?]', # 0x51
'[?]', # 0x52
'[?]', # 0x53
'[?]', # 0x54
'[?]', # 0x55
'[?]', # 0x56
'[?]', # 0x57
'[?]', # 0x58
'[?]', # 0x59
'[?]', # 0x5a
'[?]', # 0x5b
'[?]', # 0x5c
'[?]', # 0x5d
'[?]', # 0x5e
'[?]', # 0x5f
'21', # 0x51
'22', # 0x52
'23', # 0x53
'24', # 0x54
'25', # 0x55
'26', # 0x56
'27', # 0x57
'28', # 0x58
'29', # 0x59
'30', # 0x5a
'31', # 0x5b
'32', # 0x5c
'33', # 0x5d
'34', # 0x5e
'35', # 0x5f
'(g)', # 0x60
'(n)', # 0x61
'(d)', # 0x62
@ -176,21 +176,21 @@ data = (
'(Zi) ', # 0xae
'(Xie) ', # 0xaf
'(Ye) ', # 0xb0
'[?]', # 0xb1
'[?]', # 0xb2
'[?]', # 0xb3
'[?]', # 0xb4
'[?]', # 0xb5
'[?]', # 0xb6
'[?]', # 0xb7
'[?]', # 0xb8
'[?]', # 0xb9
'[?]', # 0xba
'[?]', # 0xbb
'[?]', # 0xbc
'[?]', # 0xbd
'[?]', # 0xbe
'[?]', # 0xbf
'36', # 0xb1
'37', # 0xb2
'38', # 0xb3
'39', # 0xb4
'40', # 0xb5
'41', # 0xb6
'42', # 0xb7
'43', # 0xb8
'44', # 0xb9
'45', # 0xba
'46', # 0xbb
'47', # 0xbc
'48', # 0xbd
'49', # 0xbe
'50', # 0xbf
'1M', # 0xc0
'2M', # 0xc1
'3M', # 0xc2

View file

@ -1,5 +1,5 @@
data = (
'[?] ', # 0x00
'Yi ', # 0x00
'Ding ', # 0x01
'Kao ', # 0x02
'Qi ', # 0x03