Merge pull request #616 from JackDandy/feature/UpdateChardet

Update chardet packages 2.3.0 (26982c5) to 2.3.0 (d7fae98).
This commit is contained in:
JackDandy 2016-01-12 03:17:29 +00:00
commit 27e3076d1f
5 changed files with 6 additions and 5 deletions

View file

@ -10,6 +10,7 @@
* Update backports/ssl_match_hostname 3.4.0.2 to 3.5.0.1 (r18) * Update backports/ssl_match_hostname 3.4.0.2 to 3.5.0.1 (r18)
* Update cachecontrol library 0.11.2 to 0.11.5 * Update cachecontrol library 0.11.2 to 0.11.5
* Update Certifi to 2015.11.20.1 (385476b) * Update Certifi to 2015.11.20.1 (385476b)
* Update chardet packages 2.3.0 (26982c5) to 2.3.0 (d7fae98)
### 0.11.0 (2016-01-10 22:30:00 UTC) ### 0.11.0 (2016-01-10 22:30:00 UTC)

View file

@ -131,7 +131,7 @@ class JapaneseContextAnalysis(object):
def reset(self): def reset(self):
self._total_rel = 0 # total sequence received self._total_rel = 0 # total sequence received
# category counters, each interger counts sequence in its category # category counters, each integer counts sequence in its category
self._rel_sample = [0] * self.NUM_OF_CATEGORY self._rel_sample = [0] * self.NUM_OF_CATEGORY
# if last byte in current buffer is not the last byte of a character, # if last byte in current buffer is not the last byte of a character,
# we need to know how many bytes to skip in next buffer # we need to know how many bytes to skip in next buffer

View file

@ -323,7 +323,7 @@ GB2312_ST = (
# To be accurate, the length of class 6 can be either 2 or 4. # To be accurate, the length of class 6 can be either 2 or 4.
# But it is not necessary to discriminate between the two since # But it is not necessary to discriminate between the two since
# it is used for frequency analysis only, and we are validing # it is used for frequency analysis only, and we are validating
# each code range there as well. So it is safe to set it to be # each code range there as well. So it is safe to set it to be
# 2 here. # 2 here.
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)

View file

@ -103,7 +103,7 @@ class SingleByteCharSetProber(CharSetProber):
self._state = ProbingState.found_it self._state = ProbingState.found_it
elif cf < self.NEGATIVE_SHORTCUT_THRESHOLD: elif cf < self.NEGATIVE_SHORTCUT_THRESHOLD:
self.logger.debug('%s confidence = %s, below negative ' self.logger.debug('%s confidence = %s, below negative '
'shortcut threshhold %s', 'shortcut threshold %s',
self._model['charset_name'], cf, self._model['charset_name'], cf,
self.NEGATIVE_SHORTCUT_THRESHOLD) self.NEGATIVE_SHORTCUT_THRESHOLD)
self._state = ProbingState.not_me self._state = ProbingState.not_me

View file

@ -29,7 +29,7 @@
Module containing the UniversalDetector detector class, which is the primary Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use. class a user of ``chardet`` should use.
:author: Mark Pilgrim (intial port to Python) :author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code) :author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0) :author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco :author: Ian Cordasco
@ -224,7 +224,7 @@ class UniversalDetector(object):
return self.result return self.result
if self.logger.getEffectiveLevel() == logging.DEBUG: if self.logger.getEffectiveLevel() == logging.DEBUG:
self.logger.debug('no probers hit minimum threshhold') self.logger.debug('no probers hit minimum threshold')
for prober in self._charset_probers[0].probers: for prober in self._charset_probers[0].probers:
if not prober: if not prober:
continue continue