From 2fd30411e6780cd713e9cc8f5c46bee808020956 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Tue, 12 Jan 2016 02:01:42 +0000 Subject: [PATCH] Update unidecode library 0.04.11 to 0.04.18 (fd57cbf). --- CHANGES.md | 1 + lib/unidecode/__init__.py | 54 ++++++-- lib/unidecode/util.py | 58 +++++++++ lib/unidecode/x000.py | 163 ++++++------------------ lib/unidecode/x005.py | 4 +- lib/unidecode/x020.py | 16 +-- lib/unidecode/x021.py | 50 ++++---- lib/unidecode/x022.py | 22 ++-- lib/unidecode/x023.py | 6 +- lib/unidecode/x024.py | 224 ++++++++++++++++----------------- lib/unidecode/x026.py | 2 +- lib/unidecode/x027.py | 12 +- lib/unidecode/x029.py | 257 ++++++++++++++++++++++++++++++++++++++ lib/unidecode/x02a.py | 257 ++++++++++++++++++++++++++++++++++++++ lib/unidecode/x032.py | 60 ++++----- lib/unidecode/x04e.py | 2 +- 16 files changed, 854 insertions(+), 334 deletions(-) create mode 100644 lib/unidecode/util.py create mode 100644 lib/unidecode/x029.py create mode 100644 lib/unidecode/x02a.py diff --git a/CHANGES.md b/CHANGES.md index 6faac1b8..28badf83 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,6 @@ ### 0.12.0 (2016-xx-xx xx:xx:xx UTC) +* Update unidecode library 0.04.11 to 0.04.18 (fd57cbf) * Update xmltodict library 0.9.2 (579a005) to 0.9.2 (eac0031) diff --git a/lib/unidecode/__init__.py b/lib/unidecode/__init__.py index 82eb5a3f..3b68de4c 100644 --- a/lib/unidecode/__init__.py +++ b/lib/unidecode/__init__.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +# vi:tabstop=4:expandtab:sw=4 """Transliterate Unicode text into plain 7-bit ASCII. Example usage: @@ -18,19 +19,53 @@ from sys import version_info Cache = {} -def unidecode(string): + +def _warn_if_not_unicode(string): + if version_info[0] < 3 and not isinstance(string, unicode): + warnings.warn( "Argument %r is not an unicode object. " + "Passing an encoded string will likely have " + "unexpected results." % (type(string),), + RuntimeWarning, 2) + + +def unidecode_expect_ascii(string): + """Transliterate an Unicode object into an ASCII string + + >>> unidecode(u"\u5317\u4EB0") + "Bei Jing " + + This function first tries to convert the string using ASCII codec. + If it fails (because of non-ASCII characters), it falls back to + transliteration using the character tables. + + This is approx. five times faster if the string only contains ASCII + characters, but slightly slower than using unidecode directly if non-ASCII + chars are present. + """ + + _warn_if_not_unicode(string) + try: + bytestring = string.encode('ASCII') + except UnicodeEncodeError: + return _unidecode(string) + if version_info[0] >= 3: + return string + else: + return bytestring + +def unidecode_expect_nonascii(string): """Transliterate an Unicode object into an ASCII string >>> unidecode(u"\u5317\u4EB0") "Bei Jing " """ - if version_info[0] < 3 and not isinstance(string, unicode): - warnings.warn( "Argument %r is not an unicode object. " - "Passing an encoded string will likely have " - "unexpected results." % (type(string),), - RuntimeWarning, 2) + _warn_if_not_unicode(string) + return _unidecode(string) +unidecode = unidecode_expect_ascii + +def _unidecode(string): retval = [] for char in string: @@ -43,6 +78,11 @@ def unidecode(string): if codepoint > 0xeffff: continue # Characters in Private Use Area and above are ignored + if 0xd800 <= codepoint <= 0xdfff: + warnings.warn( "Surrogate character %r will be ignored. " + "You might be using a narrow Python build." % (char,), + RuntimeWarning, 2) + section = codepoint >> 8 # Chop off the last two hex digits position = codepoint % 256 # Last two hex digits @@ -50,7 +90,7 @@ def unidecode(string): table = Cache[section] except KeyError: try: - mod = __import__('unidecode.x%03x'%(section), [], [], ['data']) + mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data']) except ImportError: Cache[section] = None continue # No match: ignore this character and carry on. diff --git a/lib/unidecode/util.py b/lib/unidecode/util.py new file mode 100644 index 00000000..477280d1 --- /dev/null +++ b/lib/unidecode/util.py @@ -0,0 +1,58 @@ +# vim:ts=4 sw=4 expandtab softtabstop=4 +from __future__ import print_function +import optparse +import locale +import os +import sys +import warnings + +from unidecode import unidecode + +PY3 = sys.version_info[0] >= 3 + +def fatal(msg): + sys.stderr.write(msg + "\n") + sys.exit(1) + +def main(): + default_encoding = locale.getpreferredencoding() + + parser = optparse.OptionParser('%prog [options] [FILE]', + description="Transliterate Unicode text into ASCII. FILE is path to file to transliterate. " + "Standard input is used if FILE is omitted and -c is not specified.") + parser.add_option('-e', '--encoding', metavar='ENCODING', default=default_encoding, + help='Specify an encoding (default is %s)' % (default_encoding,)) + parser.add_option('-c', metavar='TEXT', dest='text', + help='Transliterate TEXT instead of FILE') + + options, args = parser.parse_args() + + encoding = options.encoding + + if args: + if options.text: + fatal("Can't use both FILE and -c option") + else: + with open(args[0], 'rb') as f: + stream = f.read() + elif options.text: + if PY3: + stream = os.fsencode(options.text) + else: + stream = options.text + # add a newline to the string if it comes from the + # command line so that the result is printed nicely + # on the console. + stream += '\n'.encode('ascii') + else: + if PY3: + stream = sys.stdin.buffer.read() + else: + stream = sys.stdin.read() + + try: + stream = stream.decode(encoding) + except UnicodeDecodeError as e: + fatal('Unable to decode input: %s, start: %d, end: %d' % (e.reason, e.start, e.end)) + + sys.stdout.write(unidecode(stream)) diff --git a/lib/unidecode/x000.py b/lib/unidecode/x000.py index 6821df47..c3f8f515 100644 --- a/lib/unidecode/x000.py +++ b/lib/unidecode/x000.py @@ -1,132 +1,15 @@ data = ( -'\x00', # 0x00 -'\x01', # 0x01 -'\x02', # 0x02 -'\x03', # 0x03 -'\x04', # 0x04 -'\x05', # 0x05 -'\x06', # 0x06 -'\x07', # 0x07 -'\x08', # 0x08 -'\x09', # 0x09 -'\x0a', # 0x0a -'\x0b', # 0x0b -'\x0c', # 0x0c -'\x0d', # 0x0d -'\x0e', # 0x0e -'\x0f', # 0x0f -'\x10', # 0x10 -'\x11', # 0x11 -'\x12', # 0x12 -'\x13', # 0x13 -'\x14', # 0x14 -'\x15', # 0x15 -'\x16', # 0x16 -'\x17', # 0x17 -'\x18', # 0x18 -'\x19', # 0x19 -'\x1a', # 0x1a -'\x1b', # 0x1b -'\x1c', # 0x1c -'\x1d', # 0x1d -'\x1e', # 0x1e -'\x1f', # 0x1f -' ', # 0x20 -'!', # 0x21 -'"', # 0x22 -'#', # 0x23 -'$', # 0x24 -'%', # 0x25 -'&', # 0x26 -'\'', # 0x27 -'(', # 0x28 -')', # 0x29 -'*', # 0x2a -'+', # 0x2b -',', # 0x2c -'-', # 0x2d -'.', # 0x2e -'/', # 0x2f -'0', # 0x30 -'1', # 0x31 -'2', # 0x32 -'3', # 0x33 -'4', # 0x34 -'5', # 0x35 -'6', # 0x36 -'7', # 0x37 -'8', # 0x38 -'9', # 0x39 -':', # 0x3a -';', # 0x3b -'<', # 0x3c -'=', # 0x3d -'>', # 0x3e -'?', # 0x3f -'@', # 0x40 -'A', # 0x41 -'B', # 0x42 -'C', # 0x43 -'D', # 0x44 -'E', # 0x45 -'F', # 0x46 -'G', # 0x47 -'H', # 0x48 -'I', # 0x49 -'J', # 0x4a -'K', # 0x4b -'L', # 0x4c -'M', # 0x4d -'N', # 0x4e -'O', # 0x4f -'P', # 0x50 -'Q', # 0x51 -'R', # 0x52 -'S', # 0x53 -'T', # 0x54 -'U', # 0x55 -'V', # 0x56 -'W', # 0x57 -'X', # 0x58 -'Y', # 0x59 -'Z', # 0x5a -']', # 0x5b -'\\', # 0x5c -']', # 0x5d -'^', # 0x5e -'_', # 0x5f -'`', # 0x60 -'a', # 0x61 -'b', # 0x62 -'c', # 0x63 -'d', # 0x64 -'e', # 0x65 -'f', # 0x66 -'g', # 0x67 -'h', # 0x68 -'i', # 0x69 -'j', # 0x6a -'k', # 0x6b -'l', # 0x6c -'m', # 0x6d -'n', # 0x6e -'o', # 0x6f -'p', # 0x70 -'q', # 0x71 -'r', # 0x72 -'s', # 0x73 -'t', # 0x74 -'u', # 0x75 -'v', # 0x76 -'w', # 0x77 -'x', # 0x78 -'y', # 0x79 -'z', # 0x7a -'{', # 0x7b -'|', # 0x7c -'}', # 0x7d -'~', # 0x7e -'', # 0x7f +# Code points u+007f and below are equivalent to ASCII and are handled by a +# special case in the code. Hence they are not present in this table. +'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', +'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', +'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', +'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', +'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', +'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', +'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', +'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', + '', # 0x80 '', # 0x81 '', # 0x82 @@ -162,7 +45,10 @@ data = ( ' ', # 0xa0 '!', # 0xa1 'C/', # 0xa2 + +# Not "GBP" - Pound Sign is used for more than just British Pounds. 'PS', # 0xa3 + '$?', # 0xa4 'Y=', # 0xa5 '|', # 0xa6 @@ -177,8 +63,11 @@ data = ( '-', # 0xaf 'deg', # 0xb0 '+-', # 0xb1 + +# These might be combined with other superscript digits (u+2070 - u+2079) '2', # 0xb2 '3', # 0xb3 + '\'', # 0xb4 'u', # 0xb5 'P', # 0xb6 @@ -195,7 +84,10 @@ data = ( 'A', # 0xc1 'A', # 0xc2 'A', # 0xc3 + +# Not "AE" - used in languages other than German 'A', # 0xc4 + 'A', # 0xc5 'AE', # 0xc6 'C', # 0xc7 @@ -213,13 +105,19 @@ data = ( 'O', # 0xd3 'O', # 0xd4 'O', # 0xd5 + +# Not "OE" - used in languages other than German 'O', # 0xd6 + 'x', # 0xd7 'O', # 0xd8 'U', # 0xd9 'U', # 0xda 'U', # 0xdb + +# Not "UE" - used in languages other than German 'U', # 0xdc + 'Y', # 0xdd 'Th', # 0xde 'ss', # 0xdf @@ -227,7 +125,10 @@ data = ( 'a', # 0xe1 'a', # 0xe2 'a', # 0xe3 + +# Not "ae" - used in languages other than German 'a', # 0xe4 + 'a', # 0xe5 'ae', # 0xe6 'c', # 0xe7 @@ -245,13 +146,19 @@ data = ( 'o', # 0xf3 'o', # 0xf4 'o', # 0xf5 + +# Not "oe" - used in languages other than German 'o', # 0xf6 + '/', # 0xf7 'o', # 0xf8 'u', # 0xf9 'u', # 0xfa 'u', # 0xfb + +# Not "ue" - used in languages other than German 'u', # 0xfc + 'y', # 0xfd 'th', # 0xfe 'y', # 0xff diff --git a/lib/unidecode/x005.py b/lib/unidecode/x005.py index 738a9962..2913ffff 100644 --- a/lib/unidecode/x005.py +++ b/lib/unidecode/x005.py @@ -136,7 +136,7 @@ data = ( 'f', # 0x86 'ew', # 0x87 '[?]', # 0x88 -'.', # 0x89 +':', # 0x89 '-', # 0x8a '[?]', # 0x8b '[?]', # 0x8c @@ -191,7 +191,7 @@ data = ( '', # 0xbd '', # 0xbe '', # 0xbf -'', # 0xc0 +'|', # 0xc0 '', # 0xc1 '', # 0xc2 ':', # 0xc3 diff --git a/lib/unidecode/x020.py b/lib/unidecode/x020.py index cb6963e2..b6494730 100644 --- a/lib/unidecode/x020.py +++ b/lib/unidecode/x020.py @@ -70,23 +70,23 @@ data = ( '/', # 0x44 '-[', # 0x45 ']-', # 0x46 -'[?]', # 0x47 +'??', # 0x47 '?!', # 0x48 '!?', # 0x49 '7', # 0x4a 'PP', # 0x4b '(]', # 0x4c '[)', # 0x4d -'[?]', # 0x4e +'*', # 0x4e '[?]', # 0x4f '[?]', # 0x50 '[?]', # 0x51 -'[?]', # 0x52 -'[?]', # 0x53 +'%', # 0x52 +'~', # 0x53 '[?]', # 0x54 '[?]', # 0x55 '[?]', # 0x56 -'[?]', # 0x57 +"''''", # 0x57 '[?]', # 0x58 '[?]', # 0x59 '[?]', # 0x5a @@ -95,7 +95,7 @@ data = ( '[?]', # 0x5d '[?]', # 0x5e '[?]', # 0x5f -'[?]', # 0x60 +'', # 0x60 '[?]', # 0x61 '[?]', # 0x62 '[?]', # 0x63 @@ -171,7 +171,7 @@ data = ( 'W', # 0xa9 'NS', # 0xaa 'D', # 0xab -'EU', # 0xac +'EUR', # 0xac 'K', # 0xad 'T', # 0xae 'Dr', # 0xaf @@ -228,7 +228,7 @@ data = ( '', # 0xe2 '', # 0xe3 '[?]', # 0xe4 -'[?]', # 0xe5 +'', # 0xe5 '[?]', # 0xe6 '[?]', # 0xe7 '[?]', # 0xe8 diff --git a/lib/unidecode/x021.py b/lib/unidecode/x021.py index 0164cdb5..067d9bdc 100644 --- a/lib/unidecode/x021.py +++ b/lib/unidecode/x021.py @@ -1,7 +1,7 @@ data = ( '', # 0x00 '', # 0x01 -'', # 0x02 +'C', # 0x02 '', # 0x03 '', # 0x04 '', # 0x05 @@ -12,7 +12,7 @@ data = ( '', # 0x0a '', # 0x0b '', # 0x0c -'', # 0x0d +'H', # 0x0d '', # 0x0e '', # 0x0f '', # 0x10 @@ -20,22 +20,22 @@ data = ( '', # 0x12 '', # 0x13 '', # 0x14 -'', # 0x15 +'N', # 0x15 '', # 0x16 '', # 0x17 '', # 0x18 -'', # 0x19 -'', # 0x1a +'P', # 0x19 +'Q', # 0x1a '', # 0x1b '', # 0x1c -'', # 0x1d +'R', # 0x1d '', # 0x1e '', # 0x1f -'', # 0x20 -'', # 0x21 -'', # 0x22 +'(sm)', # 0x20 +'TEL', # 0x21 +'(tm)', # 0x22 '', # 0x23 -'', # 0x24 +'Z', # 0x24 '', # 0x25 '', # 0x26 '', # 0x27 @@ -45,12 +45,12 @@ data = ( 'A', # 0x2b '', # 0x2c '', # 0x2d -'', # 0x2e -'', # 0x2f -'', # 0x30 -'', # 0x31 +'e', # 0x2e +'e', # 0x2f +'E', # 0x30 +'F', # 0x31 'F', # 0x32 -'', # 0x33 +'M', # 0x33 '', # 0x34 '', # 0x35 '', # 0x36 @@ -58,21 +58,21 @@ data = ( '', # 0x38 '', # 0x39 '', # 0x3a -'[?]', # 0x3b -'[?]', # 0x3c -'[?]', # 0x3d -'[?]', # 0x3e -'[?]', # 0x3f +'FAX', # 0x3b +'', # 0x3c +'', # 0x3d +'', # 0x3e +'', # 0x3f '[?]', # 0x40 '[?]', # 0x41 '[?]', # 0x42 '[?]', # 0x43 '[?]', # 0x44 -'[?]', # 0x45 -'[?]', # 0x46 -'[?]', # 0x47 -'[?]', # 0x48 -'[?]', # 0x49 +'D', # 0x45 +'d', # 0x46 +'e', # 0x47 +'i', # 0x48 +'j', # 0x49 '[?]', # 0x4a '[?]', # 0x4b '[?]', # 0x4c diff --git a/lib/unidecode/x022.py b/lib/unidecode/x022.py index 2046a9b4..e38fb5cc 100644 --- a/lib/unidecode/x022.py +++ b/lib/unidecode/x022.py @@ -17,12 +17,12 @@ data = ( '[?]', # 0x0f '[?]', # 0x10 '[?]', # 0x11 -'[?]', # 0x12 +'-', # 0x12 '[?]', # 0x13 '[?]', # 0x14 -'[?]', # 0x15 -'[?]', # 0x16 -'[?]', # 0x17 +'/', # 0x15 +'\\', # 0x16 +'*', # 0x17 '[?]', # 0x18 '[?]', # 0x19 '[?]', # 0x1a @@ -34,7 +34,7 @@ data = ( '[?]', # 0x20 '[?]', # 0x21 '[?]', # 0x22 -'[?]', # 0x23 +'|', # 0x23 '[?]', # 0x24 '[?]', # 0x25 '[?]', # 0x26 @@ -53,13 +53,13 @@ data = ( '[?]', # 0x33 '[?]', # 0x34 '[?]', # 0x35 -'[?]', # 0x36 +':', # 0x36 '[?]', # 0x37 '[?]', # 0x38 '[?]', # 0x39 '[?]', # 0x3a '[?]', # 0x3b -'[?]', # 0x3c +'~', # 0x3c '[?]', # 0x3d '[?]', # 0x3e '[?]', # 0x3f @@ -99,10 +99,10 @@ data = ( '[?]', # 0x61 '[?]', # 0x62 '[?]', # 0x63 -'[?]', # 0x64 -'[?]', # 0x65 -'[?]', # 0x66 -'[?]', # 0x67 +'<=', # 0x64 +'>=', # 0x65 +'<=', # 0x66 +'>=', # 0x67 '[?]', # 0x68 '[?]', # 0x69 '[?]', # 0x6a diff --git a/lib/unidecode/x023.py b/lib/unidecode/x023.py index 2046a9b4..3c4462e2 100644 --- a/lib/unidecode/x023.py +++ b/lib/unidecode/x023.py @@ -2,7 +2,7 @@ data = ( '[?]', # 0x00 '[?]', # 0x01 '[?]', # 0x02 -'[?]', # 0x03 +'^', # 0x03 '[?]', # 0x04 '[?]', # 0x05 '[?]', # 0x06 @@ -40,8 +40,8 @@ data = ( '[?]', # 0x26 '[?]', # 0x27 '[?]', # 0x28 -'[?]', # 0x29 -'[?]', # 0x2a +'<', # 0x29 +'> ', # 0x2a '[?]', # 0x2b '[?]', # 0x2c '[?]', # 0x2d diff --git a/lib/unidecode/x024.py b/lib/unidecode/x024.py index 76d8c8cd..20b3c8f1 100644 --- a/lib/unidecode/x024.py +++ b/lib/unidecode/x024.py @@ -95,118 +95,118 @@ data = ( '[?]', # 0x5d '[?]', # 0x5e '[?]', # 0x5f -'', # 0x60 -'', # 0x61 -'', # 0x62 -'', # 0x63 -'', # 0x64 -'', # 0x65 -'', # 0x66 -'', # 0x67 -'', # 0x68 -'', # 0x69 -'', # 0x6a -'', # 0x6b -'', # 0x6c -'', # 0x6d -'', # 0x6e -'', # 0x6f -'', # 0x70 -'', # 0x71 -'', # 0x72 -'', # 0x73 -'', # 0x74 -'', # 0x75 -'', # 0x76 -'', # 0x77 -'', # 0x78 -'', # 0x79 -'', # 0x7a -'', # 0x7b -'', # 0x7c -'', # 0x7d -'', # 0x7e -'', # 0x7f -'', # 0x80 -'', # 0x81 -'', # 0x82 -'', # 0x83 -'', # 0x84 -'', # 0x85 -'', # 0x86 -'', # 0x87 -'', # 0x88 -'', # 0x89 -'', # 0x8a -'', # 0x8b -'', # 0x8c -'', # 0x8d -'', # 0x8e -'', # 0x8f -'', # 0x90 -'', # 0x91 -'', # 0x92 -'', # 0x93 -'', # 0x94 -'', # 0x95 -'', # 0x96 -'', # 0x97 -'', # 0x98 -'', # 0x99 -'', # 0x9a -'', # 0x9b -'', # 0x9c -'', # 0x9d -'', # 0x9e -'', # 0x9f -'', # 0xa0 -'', # 0xa1 -'', # 0xa2 -'', # 0xa3 -'', # 0xa4 -'', # 0xa5 -'', # 0xa6 -'', # 0xa7 -'', # 0xa8 -'', # 0xa9 -'', # 0xaa -'', # 0xab -'', # 0xac -'', # 0xad -'', # 0xae -'', # 0xaf -'', # 0xb0 -'', # 0xb1 -'', # 0xb2 -'', # 0xb3 -'', # 0xb4 -'', # 0xb5 -'', # 0xb6 -'', # 0xb7 -'', # 0xb8 -'', # 0xb9 -'', # 0xba -'', # 0xbb -'', # 0xbc -'', # 0xbd -'', # 0xbe -'', # 0xbf -'', # 0xc0 -'', # 0xc1 -'', # 0xc2 -'', # 0xc3 -'', # 0xc4 -'', # 0xc5 -'', # 0xc6 -'', # 0xc7 -'', # 0xc8 -'', # 0xc9 -'', # 0xca -'', # 0xcb -'', # 0xcc -'', # 0xcd -'', # 0xce -'', # 0xcf +'1', # 0x60 +'2', # 0x61 +'3', # 0x62 +'4', # 0x63 +'5', # 0x64 +'6', # 0x65 +'7', # 0x66 +'8', # 0x67 +'9', # 0x68 +'10', # 0x69 +'11', # 0x6a +'12', # 0x6b +'13', # 0x6c +'14', # 0x6d +'15', # 0x6e +'16', # 0x6f +'17', # 0x70 +'18', # 0x71 +'19', # 0x72 +'20', # 0x73 +'(1)', # 0x74 +'(2)', # 0x75 +'(3)', # 0x76 +'(4)', # 0x77 +'(5)', # 0x78 +'(6)', # 0x79 +'(7)', # 0x7a +'(8)', # 0x7b +'(9)', # 0x7c +'(10)', # 0x7d +'(11)', # 0x7e +'(12)', # 0x7f +'(13)', # 0x80 +'(14)', # 0x81 +'(15)', # 0x82 +'(16)', # 0x83 +'(17)', # 0x84 +'(18)', # 0x85 +'(19)', # 0x86 +'(20)', # 0x87 +'1.', # 0x88 +'2.', # 0x89 +'3.', # 0x8a +'4.', # 0x8b +'5.', # 0x8c +'6.', # 0x8d +'7.', # 0x8e +'8.', # 0x8f +'9.', # 0x90 +'10.', # 0x91 +'11.', # 0x92 +'12.', # 0x93 +'13.', # 0x94 +'14.', # 0x95 +'15.', # 0x96 +'16.', # 0x97 +'17.', # 0x98 +'18.', # 0x99 +'19.', # 0x9a +'20.', # 0x9b +'(a)', # 0x9c +'(b)', # 0x9d +'(c)', # 0x9e +'(d)', # 0x9f +'(e)', # 0xa0 +'(f)', # 0xa1 +'(g)', # 0xa2 +'(h)', # 0xa3 +'(i)', # 0xa4 +'(j)', # 0xa5 +'(k)', # 0xa6 +'(l)', # 0xa7 +'(m)', # 0xa8 +'(n)', # 0xa9 +'(o)', # 0xaa +'(p)', # 0xab +'(q)', # 0xac +'(r)', # 0xad +'(s)', # 0xae +'(t)', # 0xaf +'(u)', # 0xb0 +'(v)', # 0xb1 +'(w)', # 0xb2 +'(x)', # 0xb3 +'(y)', # 0xb4 +'(z)', # 0xb5 +'a', # 0xb6 +'b', # 0xb7 +'c', # 0xb8 +'d', # 0xb9 +'e', # 0xba +'f', # 0xbb +'g', # 0xbc +'h', # 0xbd +'i', # 0xbe +'j', # 0xbf +'k', # 0xc0 +'l', # 0xc1 +'m', # 0xc2 +'n', # 0xc3 +'o', # 0xc4 +'p', # 0xc5 +'q', # 0xc6 +'r', # 0xc7 +'s', # 0xc8 +'t', # 0xc9 +'u', # 0xca +'v', # 0xcb +'w', # 0xcc +'x', # 0xcd +'y', # 0xce +'z', # 0xcf 'a', # 0xd0 'b', # 0xd1 'c', # 0xd2 diff --git a/lib/unidecode/x026.py b/lib/unidecode/x026.py index bfb03a9a..c575472c 100644 --- a/lib/unidecode/x026.py +++ b/lib/unidecode/x026.py @@ -110,7 +110,7 @@ data = ( '', # 0x6c '', # 0x6d '', # 0x6e -'', # 0x6f +'#', # 0x6f '', # 0x70 '', # 0x71 '[?]', # 0x72 diff --git a/lib/unidecode/x027.py b/lib/unidecode/x027.py index 473cfc77..3c74c073 100644 --- a/lib/unidecode/x027.py +++ b/lib/unidecode/x027.py @@ -48,7 +48,7 @@ data = ( '', # 0x2e '', # 0x2f '', # 0x30 -'', # 0x31 +'*', # 0x31 '', # 0x32 '', # 0x33 '', # 0x34 @@ -87,7 +87,7 @@ data = ( '', # 0x55 '', # 0x56 '', # 0x57 -'', # 0x58 +'|', # 0x58 '', # 0x59 '', # 0x5a '', # 0x5b @@ -97,7 +97,7 @@ data = ( '[?]', # 0x5f '[?]', # 0x60 '', # 0x61 -'', # 0x62 +'!', # 0x62 '', # 0x63 '', # 0x64 '', # 0x65 @@ -229,10 +229,10 @@ data = ( '[?]', # 0xe3 '[?]', # 0xe4 '[?]', # 0xe5 -'[?]', # 0xe6 +'[', # 0xe6 '[?]', # 0xe7 -'[?]', # 0xe8 -'[?]', # 0xe9 +'<', # 0xe8 +'> ', # 0xe9 '[?]', # 0xea '[?]', # 0xeb '[?]', # 0xec diff --git a/lib/unidecode/x029.py b/lib/unidecode/x029.py new file mode 100644 index 00000000..c2df2548 --- /dev/null +++ b/lib/unidecode/x029.py @@ -0,0 +1,257 @@ +data = ( +'', # 0x00 +'', # 0x01 +'', # 0x02 +'', # 0x03 +'', # 0x04 +'', # 0x05 +'', # 0x06 +'', # 0x07 +'', # 0x08 +'', # 0x09 +'', # 0x0a +'', # 0x0b +'', # 0x0c +'', # 0x0d +'', # 0x0e +'', # 0x0f +'', # 0x10 +'', # 0x11 +'', # 0x12 +'', # 0x13 +'', # 0x14 +'', # 0x15 +'', # 0x16 +'', # 0x17 +'', # 0x18 +'', # 0x19 +'', # 0x1a +'', # 0x1b +'', # 0x1c +'', # 0x1d +'', # 0x1e +'', # 0x1f +'', # 0x20 +'', # 0x21 +'', # 0x22 +'', # 0x23 +'', # 0x24 +'', # 0x25 +'', # 0x26 +'', # 0x27 +'', # 0x28 +'', # 0x29 +'', # 0x2a +'', # 0x2b +'', # 0x2c +'', # 0x2d +'', # 0x2e +'', # 0x2f +'', # 0x30 +'', # 0x31 +'', # 0x32 +'', # 0x33 +'', # 0x34 +'', # 0x35 +'', # 0x36 +'', # 0x37 +'', # 0x38 +'', # 0x39 +'', # 0x3a +'', # 0x3b +'', # 0x3c +'', # 0x3d +'', # 0x3e +'', # 0x3f +'', # 0x40 +'', # 0x41 +'', # 0x42 +'', # 0x43 +'', # 0x44 +'', # 0x45 +'', # 0x46 +'', # 0x47 +'', # 0x48 +'', # 0x49 +'', # 0x4a +'', # 0x4b +'', # 0x4c +'', # 0x4d +'', # 0x4e +'', # 0x4f +'', # 0x50 +'', # 0x51 +'', # 0x52 +'', # 0x53 +'', # 0x54 +'', # 0x55 +'', # 0x56 +'', # 0x57 +'', # 0x58 +'', # 0x59 +'', # 0x5a +'', # 0x5b +'', # 0x5c +'', # 0x5d +'', # 0x5e +'', # 0x5f +'', # 0x60 +'', # 0x61 +'', # 0x62 +'', # 0x63 +'', # 0x64 +'', # 0x65 +'', # 0x66 +'', # 0x67 +'', # 0x68 +'', # 0x69 +'', # 0x6a +'', # 0x6b +'', # 0x6c +'', # 0x6d +'', # 0x6e +'', # 0x6f +'', # 0x70 +'', # 0x71 +'', # 0x72 +'', # 0x73 +'', # 0x74 +'', # 0x75 +'', # 0x76 +'', # 0x77 +'', # 0x78 +'', # 0x79 +'', # 0x7a +'', # 0x7b +'', # 0x7c +'', # 0x7d +'', # 0x7e +'', # 0x7f +'', # 0x80 +'', # 0x81 +'', # 0x82 +'{', # 0x83 +'} ', # 0x84 +'', # 0x85 +'', # 0x86 +'', # 0x87 +'', # 0x88 +'', # 0x89 +'', # 0x8a +'', # 0x8b +'', # 0x8c +'', # 0x8d +'', # 0x8e +'', # 0x8f +'', # 0x90 +'', # 0x91 +'', # 0x92 +'', # 0x93 +'', # 0x94 +'', # 0x95 +'', # 0x96 +'', # 0x97 +'', # 0x98 +'', # 0x99 +'', # 0x9a +'', # 0x9b +'', # 0x9c +'', # 0x9d +'', # 0x9e +'', # 0x9f +'', # 0xa0 +'', # 0xa1 +'', # 0xa2 +'', # 0xa3 +'', # 0xa4 +'', # 0xa5 +'', # 0xa6 +'', # 0xa7 +'', # 0xa8 +'', # 0xa9 +'', # 0xaa +'', # 0xab +'', # 0xac +'', # 0xad +'', # 0xae +'', # 0xaf +'', # 0xb0 +'', # 0xb1 +'', # 0xb2 +'', # 0xb3 +'', # 0xb4 +'', # 0xb5 +'', # 0xb6 +'', # 0xb7 +'', # 0xb8 +'', # 0xb9 +'', # 0xba +'', # 0xbb +'', # 0xbc +'', # 0xbd +'', # 0xbe +'', # 0xbf +'', # 0xc0 +'', # 0xc1 +'', # 0xc2 +'', # 0xc3 +'', # 0xc4 +'', # 0xc5 +'', # 0xc6 +'', # 0xc7 +'', # 0xc8 +'', # 0xc9 +'', # 0xca +'', # 0xcb +'', # 0xcc +'', # 0xcd +'', # 0xce +'', # 0xcf +'', # 0xd0 +'', # 0xd1 +'', # 0xd2 +'', # 0xd3 +'', # 0xd4 +'', # 0xd5 +'', # 0xd6 +'', # 0xd7 +'', # 0xd8 +'', # 0xd9 +'', # 0xda +'', # 0xdb +'', # 0xdc +'', # 0xdd +'', # 0xde +'', # 0xdf +'', # 0xe0 +'', # 0xe1 +'', # 0xe2 +'', # 0xe3 +'', # 0xe4 +'', # 0xe5 +'', # 0xe6 +'', # 0xe7 +'', # 0xe8 +'', # 0xe9 +'', # 0xea +'', # 0xeb +'', # 0xec +'', # 0xed +'', # 0xee +'', # 0xef +'', # 0xf0 +'', # 0xf1 +'', # 0xf2 +'', # 0xf3 +'', # 0xf4 +'', # 0xf5 +'', # 0xf6 +'', # 0xf7 +'', # 0xf8 +'', # 0xf9 +'', # 0xfa +'', # 0xfb +'', # 0xfc +'', # 0xfd +'', # 0xfe +) diff --git a/lib/unidecode/x02a.py b/lib/unidecode/x02a.py new file mode 100644 index 00000000..b832ef35 --- /dev/null +++ b/lib/unidecode/x02a.py @@ -0,0 +1,257 @@ +data = ( +'', # 0x00 +'', # 0x01 +'', # 0x02 +'', # 0x03 +'', # 0x04 +'', # 0x05 +'', # 0x06 +'', # 0x07 +'', # 0x08 +'', # 0x09 +'', # 0x0a +'', # 0x0b +'', # 0x0c +'', # 0x0d +'', # 0x0e +'', # 0x0f +'', # 0x10 +'', # 0x11 +'', # 0x12 +'', # 0x13 +'', # 0x14 +'', # 0x15 +'', # 0x16 +'', # 0x17 +'', # 0x18 +'', # 0x19 +'', # 0x1a +'', # 0x1b +'', # 0x1c +'', # 0x1d +'', # 0x1e +'', # 0x1f +'', # 0x20 +'', # 0x21 +'', # 0x22 +'', # 0x23 +'', # 0x24 +'', # 0x25 +'', # 0x26 +'', # 0x27 +'', # 0x28 +'', # 0x29 +'', # 0x2a +'', # 0x2b +'', # 0x2c +'', # 0x2d +'', # 0x2e +'', # 0x2f +'', # 0x30 +'', # 0x31 +'', # 0x32 +'', # 0x33 +'', # 0x34 +'', # 0x35 +'', # 0x36 +'', # 0x37 +'', # 0x38 +'', # 0x39 +'', # 0x3a +'', # 0x3b +'', # 0x3c +'', # 0x3d +'', # 0x3e +'', # 0x3f +'', # 0x40 +'', # 0x41 +'', # 0x42 +'', # 0x43 +'', # 0x44 +'', # 0x45 +'', # 0x46 +'', # 0x47 +'', # 0x48 +'', # 0x49 +'', # 0x4a +'', # 0x4b +'', # 0x4c +'', # 0x4d +'', # 0x4e +'', # 0x4f +'', # 0x50 +'', # 0x51 +'', # 0x52 +'', # 0x53 +'', # 0x54 +'', # 0x55 +'', # 0x56 +'', # 0x57 +'', # 0x58 +'', # 0x59 +'', # 0x5a +'', # 0x5b +'', # 0x5c +'', # 0x5d +'', # 0x5e +'', # 0x5f +'', # 0x60 +'', # 0x61 +'', # 0x62 +'', # 0x63 +'', # 0x64 +'', # 0x65 +'', # 0x66 +'', # 0x67 +'', # 0x68 +'', # 0x69 +'', # 0x6a +'', # 0x6b +'', # 0x6c +'', # 0x6d +'', # 0x6e +'', # 0x6f +'', # 0x70 +'', # 0x71 +'', # 0x72 +'', # 0x73 +'::=', # 0x74 +'==', # 0x75 +'===', # 0x76 +'', # 0x77 +'', # 0x78 +'', # 0x79 +'', # 0x7a +'', # 0x7b +'', # 0x7c +'', # 0x7d +'', # 0x7e +'', # 0x7f +'', # 0x80 +'', # 0x81 +'', # 0x82 +'', # 0x83 +'', # 0x84 +'', # 0x85 +'', # 0x86 +'', # 0x87 +'', # 0x88 +'', # 0x89 +'', # 0x8a +'', # 0x8b +'', # 0x8c +'', # 0x8d +'', # 0x8e +'', # 0x8f +'', # 0x90 +'', # 0x91 +'', # 0x92 +'', # 0x93 +'', # 0x94 +'', # 0x95 +'', # 0x96 +'', # 0x97 +'', # 0x98 +'', # 0x99 +'', # 0x9a +'', # 0x9b +'', # 0x9c +'', # 0x9d +'', # 0x9e +'', # 0x9f +'', # 0xa0 +'', # 0xa1 +'', # 0xa2 +'', # 0xa3 +'', # 0xa4 +'', # 0xa5 +'', # 0xa6 +'', # 0xa7 +'', # 0xa8 +'', # 0xa9 +'', # 0xaa +'', # 0xab +'', # 0xac +'', # 0xad +'', # 0xae +'', # 0xaf +'', # 0xb0 +'', # 0xb1 +'', # 0xb2 +'', # 0xb3 +'', # 0xb4 +'', # 0xb5 +'', # 0xb6 +'', # 0xb7 +'', # 0xb8 +'', # 0xb9 +'', # 0xba +'', # 0xbb +'', # 0xbc +'', # 0xbd +'', # 0xbe +'', # 0xbf +'', # 0xc0 +'', # 0xc1 +'', # 0xc2 +'', # 0xc3 +'', # 0xc4 +'', # 0xc5 +'', # 0xc6 +'', # 0xc7 +'', # 0xc8 +'', # 0xc9 +'', # 0xca +'', # 0xcb +'', # 0xcc +'', # 0xcd +'', # 0xce +'', # 0xcf +'', # 0xd0 +'', # 0xd1 +'', # 0xd2 +'', # 0xd3 +'', # 0xd4 +'', # 0xd5 +'', # 0xd6 +'', # 0xd7 +'', # 0xd8 +'', # 0xd9 +'', # 0xda +'', # 0xdb +'', # 0xdc +'', # 0xdd +'', # 0xde +'', # 0xdf +'', # 0xe0 +'', # 0xe1 +'', # 0xe2 +'', # 0xe3 +'', # 0xe4 +'', # 0xe5 +'', # 0xe6 +'', # 0xe7 +'', # 0xe8 +'', # 0xe9 +'', # 0xea +'', # 0xeb +'', # 0xec +'', # 0xed +'', # 0xee +'', # 0xef +'', # 0xf0 +'', # 0xf1 +'', # 0xf2 +'', # 0xf3 +'', # 0xf4 +'', # 0xf5 +'', # 0xf6 +'', # 0xf7 +'', # 0xf8 +'', # 0xf9 +'', # 0xfa +'', # 0xfb +'', # 0xfc +'', # 0xfd +'', # 0xfe +) diff --git a/lib/unidecode/x032.py b/lib/unidecode/x032.py index 3295a25c..30282d4a 100644 --- a/lib/unidecode/x032.py +++ b/lib/unidecode/x032.py @@ -80,21 +80,21 @@ data = ( '[?]', # 0x4e '[?]', # 0x4f '[?]', # 0x50 -'[?]', # 0x51 -'[?]', # 0x52 -'[?]', # 0x53 -'[?]', # 0x54 -'[?]', # 0x55 -'[?]', # 0x56 -'[?]', # 0x57 -'[?]', # 0x58 -'[?]', # 0x59 -'[?]', # 0x5a -'[?]', # 0x5b -'[?]', # 0x5c -'[?]', # 0x5d -'[?]', # 0x5e -'[?]', # 0x5f +'21', # 0x51 +'22', # 0x52 +'23', # 0x53 +'24', # 0x54 +'25', # 0x55 +'26', # 0x56 +'27', # 0x57 +'28', # 0x58 +'29', # 0x59 +'30', # 0x5a +'31', # 0x5b +'32', # 0x5c +'33', # 0x5d +'34', # 0x5e +'35', # 0x5f '(g)', # 0x60 '(n)', # 0x61 '(d)', # 0x62 @@ -176,21 +176,21 @@ data = ( '(Zi) ', # 0xae '(Xie) ', # 0xaf '(Ye) ', # 0xb0 -'[?]', # 0xb1 -'[?]', # 0xb2 -'[?]', # 0xb3 -'[?]', # 0xb4 -'[?]', # 0xb5 -'[?]', # 0xb6 -'[?]', # 0xb7 -'[?]', # 0xb8 -'[?]', # 0xb9 -'[?]', # 0xba -'[?]', # 0xbb -'[?]', # 0xbc -'[?]', # 0xbd -'[?]', # 0xbe -'[?]', # 0xbf +'36', # 0xb1 +'37', # 0xb2 +'38', # 0xb3 +'39', # 0xb4 +'40', # 0xb5 +'41', # 0xb6 +'42', # 0xb7 +'43', # 0xb8 +'44', # 0xb9 +'45', # 0xba +'46', # 0xbb +'47', # 0xbc +'48', # 0xbd +'49', # 0xbe +'50', # 0xbf '1M', # 0xc0 '2M', # 0xc1 '3M', # 0xc2 diff --git a/lib/unidecode/x04e.py b/lib/unidecode/x04e.py index e346f67b..b472b855 100644 --- a/lib/unidecode/x04e.py +++ b/lib/unidecode/x04e.py @@ -1,5 +1,5 @@ data = ( -'[?] ', # 0x00 +'Yi ', # 0x00 'Ding ', # 0x01 'Kao ', # 0x02 'Qi ', # 0x03