diff --git a/CHANGES.md b/CHANGES.md index 5b5d7825..60377395 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,7 @@ * Update CacheControl 0.12.11 (c05ef9e) to 0.13.1 (783a338) * Update feedparser 6.0.10 (859ac57) to 6.0.10 (9865dec) * Update filelock 3.12.0 (b4713c9) to 3.12.4 (c1163ae) +* Update idna library 3.4 (37c7d9b) to 3.4 (cab054c) * Update Msgpack 1.0.5 (0516c2c) to 1.0.6 (e1d3d5d) * Update package resource API 67.5.1 (f51eccd) to 68.1.2 (1ef36f2) * Update soupsieve 2.3.2.post1 (792d566) to 2.4.1 (2e66beb) diff --git a/lib/idna/codec.py b/lib/idna/codec.py index 7a0558d4..eaeada58 100644 --- a/lib/idna/codec.py +++ b/lib/idna/codec.py @@ -1,7 +1,7 @@ from .core import encode, decode, alabel, ulabel, IDNAError import codecs import re -from typing import Tuple, Optional +from typing import Any, Tuple, Optional _unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]') @@ -26,24 +26,24 @@ class Codec(codecs.Codec): return decode(data), len(data) class IncrementalEncoder(codecs.BufferedIncrementalEncoder): - def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore + def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]: if errors != 'strict': raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) if not data: - return "", 0 + return b'', 0 labels = _unicode_dots_re.split(data) - trailing_dot = '' + trailing_dot = b'' if labels: if not labels[-1]: - trailing_dot = '.' + trailing_dot = b'.' del labels[-1] elif not final: # Keep potentially unfinished label until the next call del labels[-1] if labels: - trailing_dot = '.' + trailing_dot = b'.' result = [] size = 0 @@ -54,18 +54,21 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): size += len(label) # Join with U+002E - result_str = '.'.join(result) + trailing_dot # type: ignore + result_bytes = b'.'.join(result) + trailing_dot size += len(trailing_dot) - return result_str, size + return result_bytes, size class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def _buffer_decode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore + def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]: if errors != 'strict': raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) if not data: return ('', 0) + if not isinstance(data, str): + data = str(data, 'ascii') + labels = _unicode_dots_re.split(data) trailing_dot = '' if labels: @@ -99,13 +102,11 @@ class StreamReader(Codec, codecs.StreamReader): pass -def getregentry(name: str) -> Optional[codecs.CodecInfo]: - if name != 'idna' and name != 'idna2008': +def search_function(name: str) -> Optional[codecs.CodecInfo]: + if name != 'idna2008': return None - - # Compatibility as a search_function for codecs.register() return codecs.CodecInfo( - name='idna2008', + name=name, encode=Codec().encode, # type: ignore decode=Codec().decode, # type: ignore incrementalencoder=IncrementalEncoder, @@ -114,4 +115,4 @@ def getregentry(name: str) -> Optional[codecs.CodecInfo]: streamreader=StreamReader, ) -codecs.register(getregentry) +codecs.register(search_function) diff --git a/lib/idna/core.py b/lib/idna/core.py index 4f300371..0bd89a3c 100644 --- a/lib/idna/core.py +++ b/lib/idna/core.py @@ -338,9 +338,9 @@ def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes: - if isinstance(s, (bytes, bytearray)): + if not isinstance(s, str): try: - s = s.decode('ascii') + s = str(s, 'ascii') except UnicodeDecodeError: raise IDNAError('should pass a unicode string to the function rather than a byte string.') if uts46: @@ -372,8 +372,8 @@ def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False) -> str: try: - if isinstance(s, (bytes, bytearray)): - s = s.decode('ascii') + if not isinstance(s, str): + s = str(s, 'ascii') except UnicodeDecodeError: raise IDNAError('Invalid ASCII in A-label') if uts46: diff --git a/lib/idna/idnadata.py b/lib/idna/idnadata.py index 67db4625..f9bc0d85 100644 --- a/lib/idna/idnadata.py +++ b/lib/idna/idnadata.py @@ -1834,7 +1834,6 @@ codepoint_classes = { 0xa7d50000a7d6, 0xa7d70000a7d8, 0xa7d90000a7da, - 0xa7f20000a7f5, 0xa7f60000a7f8, 0xa7fa0000a828, 0xa82c0000a82d, @@ -1907,9 +1906,7 @@ codepoint_classes = { 0x1060000010737, 0x1074000010756, 0x1076000010768, - 0x1078000010786, - 0x10787000107b1, - 0x107b2000107bb, + 0x1078000010781, 0x1080000010806, 0x1080800010809, 0x1080a00010836,