Merge branch 'release/0.16.0'

This commit is contained in:
JackDandy 2018-04-26 17:08:53 +01:00
commit 603b8d57b4
511 changed files with 56224 additions and 31429 deletions

View file

@ -1,4 +1,39 @@
### 0.15.14 (2018-04-20 12:00:00 UTC)
### 0.16.0 (2018-04-26 17:10:00 UTC)
* Change search show result 'exists in db' text into a link to display show page
* Change increase namecache size and fix deleting items from it when at capacity
* Change improve security with cross-site request forgery (xsrf) protection on web forms
* Change improve security by sending header flags httponly and secure with cookies
* Change improve security with DNS rebinding prevention, set "Allowed browser hostnames" at config/General/Web Interface
* Change improve test for creating self-signed SSL cert
* Change force restart when switching SSL on/off
* Change disable SSL cert verification for logins in pp-scripts
* Change hachoir targa and mpeg_ts mime parser tags so they validate
* Update backports/ssl_match_hostname 3.5.0.1 (r18) to 3.7.0.1 (r28)
* Update cachecontrol library 0.12.3 (db54c40) to 0.12.4 (bd94f7e)
* Update chardet packages 3.0.4 (9b8c5c2) to 4.0.0 (b3d867a)
* Update dateutil library 2.6.1 (2f3a160) to 2.7.2 (ff03c0f)
* Update feedparser library 5.2.1 (f1dd1bb) to 5.2.1 (5646f4c) - Uses the faster cchardet if installed
* Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo
* Update html5lib 0.99999999/1.0b9 (1a28d72) to 1.1-dev (e9ef538)
* Update IMDb 5.1 (r907) to 5.2.1dev20171113 (f640595)
* Update jquery.form plugin 3.51.0 to 4.2.2
* Update moment.js 2.17.1 to 2.21.0
* Update profilehooks 1.9.0 (de7d59b) to 1.10.0 (0ce1e29)
* Update Certifi 2017.07.27 (f808089) to 2018.01.18 (e225253)
* Update PySocks 1.6.5 (b4323df) to 1.6.8 (524ceb4)
* Update rarfile 3.0 (3e54b22) to 3.0 (2704344)
* Update Requests library 2.13.0 (fc54869) to 2.15.1 (282b01a)
* Update scandir 1.3 to 1.6 (c3592ee)
* Update SimpleJSON library 3.10.0 (c52efea) to 3.13.2 (6ffddbe)
* Update Six compatibility library 1.10.0 (r433) to 1.11.0 (68112f3)
* Update Tornado Web Server 5.0.1 (35a538f) to 5.1.dev1 (415f453)
* Update unidecode library 0.04.21 (e99b0e3) to 1.0.22 (81f938d)
* Update webencodings 0.5 (3970651) to 0.5.1 (fa2cb5d)
* Update xmltodict library 0.10.2 (375d3a6) to 0.11.0 (79ac9a4)
### 0.15.14 (2018-04-20 12:00:00 UTC)
* Change prefer modern html5lib over old to prevent display show issue on systems that fail to clean libs
* Change add un/pw for cookie support to improve SpeedCD torrent provider

View file

@ -11,8 +11,6 @@ Libs with customisations...
/lib/hachoir_parser/misc/torrent.py
/lib/lockfile/mkdirlockfile.py
/lib/pynma/pynma.py
/lib/requests/packages/urllib3/connectionpool.py
/lib/requests/packages/urllib3/util/ssl_.py
/lib/tmdb_api/tmdb_api.py
/lib/tornado
/lib/tvdb_api/tvdb_api.py

View file

@ -36,6 +36,8 @@ import warnings
warnings.filterwarnings('ignore', module=r'.*fuzzywuzzy.*')
warnings.filterwarnings('ignore', module=r'.*Cheetah.*')
warnings.filterwarnings('ignore', module=r'.*connectionpool.*', message='.*certificate verification.*')
warnings.filterwarnings('ignore', module=r'.*ssl_.*', message='.*SSLContext object.*')
if not (2, 7, 9) <= sys.version_info < (3, 0):
print('Python %s.%s.%s detected.' % sys.version_info[:3])
@ -55,7 +57,7 @@ try:
except ValueError:
print('Sorry, requires Python module Cheetah 2.1.0 or newer.')
sys.exit(1)
except:
except (StandardError, Exception):
print('The Python module Cheetah is required')
sys.exit(1)
@ -325,23 +327,25 @@ class SickGear(object):
print(u'Unable to find "%s", all settings will be default!' % sickbeard.CONFIG_FILE)
sickbeard.CFG = ConfigObj(sickbeard.CONFIG_FILE)
stack_size = None
try:
stack_size = int(sickbeard.CFG['General']['stack_size'])
except:
except (StandardError, Exception):
stack_size = None
if stack_size:
try:
threading.stack_size(stack_size)
except (StandardError, Exception) as e:
print('Stack Size %s not set: %s' % (stack_size, e.message))
except (StandardError, Exception) as er:
print('Stack Size %s not set: %s' % (stack_size, er.message))
# check all db versions
for d, min_v, max_v, base_v, mo in [
('failed.db', sickbeard.failed_db.MIN_DB_VERSION, sickbeard.failed_db.MAX_DB_VERSION, sickbeard.failed_db.TEST_BASE_VERSION, 'FailedDb'),
('cache.db', sickbeard.cache_db.MIN_DB_VERSION, sickbeard.cache_db.MAX_DB_VERSION, sickbeard.cache_db.TEST_BASE_VERSION, 'CacheDb'),
('sickbeard.db', sickbeard.mainDB.MIN_DB_VERSION, sickbeard.mainDB.MAX_DB_VERSION, sickbeard.mainDB.TEST_BASE_VERSION, 'MainDb')
('failed.db', sickbeard.failed_db.MIN_DB_VERSION, sickbeard.failed_db.MAX_DB_VERSION,
sickbeard.failed_db.TEST_BASE_VERSION, 'FailedDb'),
('cache.db', sickbeard.cache_db.MIN_DB_VERSION, sickbeard.cache_db.MAX_DB_VERSION,
sickbeard.cache_db.TEST_BASE_VERSION, 'CacheDb'),
('sickbeard.db', sickbeard.mainDB.MIN_DB_VERSION, sickbeard.mainDB.MAX_DB_VERSION,
sickbeard.mainDB.TEST_BASE_VERSION, 'MainDb')
]:
cur_db_version = db.DBConnection(d).checkDBVersion()
@ -407,19 +411,25 @@ class SickGear(object):
self.webhost = (('0.0.0.0', '::')[sickbeard.WEB_IPV6], '')[sickbeard.WEB_IPV64]
# web server options
self.web_options = {
'port': int(self.start_port),
'host': self.webhost,
'data_root': os.path.join(sickbeard.PROG_DIR, 'gui', sickbeard.GUI_NAME),
'web_root': sickbeard.WEB_ROOT,
'log_dir': self.log_dir,
'username': sickbeard.WEB_USERNAME,
'password': sickbeard.WEB_PASSWORD,
'enable_https': sickbeard.ENABLE_HTTPS,
'handle_reverse_proxy': sickbeard.HANDLE_REVERSE_PROXY,
'https_cert': os.path.join(sickbeard.PROG_DIR, sickbeard.HTTPS_CERT),
'https_key': os.path.join(sickbeard.PROG_DIR, sickbeard.HTTPS_KEY),
}
self.web_options = dict(
host=self.webhost,
port=int(self.start_port),
web_root=sickbeard.WEB_ROOT,
data_root=os.path.join(sickbeard.PROG_DIR, 'gui', sickbeard.GUI_NAME),
log_dir=self.log_dir,
username=sickbeard.WEB_USERNAME,
password=sickbeard.WEB_PASSWORD,
handle_reverse_proxy=sickbeard.HANDLE_REVERSE_PROXY,
enable_https=False,
https_cert=None,
https_key=None,
)
if sickbeard.ENABLE_HTTPS:
self.web_options.update(dict(
enable_https=sickbeard.ENABLE_HTTPS,
https_cert=os.path.join(sickbeard.PROG_DIR, sickbeard.HTTPS_CERT),
https_key=os.path.join(sickbeard.PROG_DIR, sickbeard.HTTPS_KEY)
))
# start web server
try:
@ -594,7 +604,7 @@ class SickGear(object):
# shutdown web server
if self.webserver:
logger.log('Shutting down Tornado')
self.webserver.shutDown()
self.webserver.shut_down()
try:
self.webserver.join(10)
except (StandardError, Exception):
@ -634,6 +644,7 @@ class SickGear(object):
def exit(code):
os._exit(code)
if __name__ == '__main__':
if sys.hexversion >= 0x020600F0:
freeze_support()

View file

@ -4,13 +4,18 @@ import os
import shutil
parent_dir = os.path.abspath(os.path.dirname(__file__))
cleaned_file = os.path.abspath(os.path.join(parent_dir, '.cleaned.tmp'))
if not os.path.isfile(cleaned_file):
cleaned_file = os.path.abspath(os.path.join(parent_dir, '.cleaned002.tmp'))
test = os.path.abspath(os.path.join(parent_dir, 'lib', 'hachoir_core'))
if not os.path.isfile(cleaned_file) or os.path.exists(test):
dead_dirs = [os.path.abspath(os.path.join(parent_dir, *d)) for d in [
('.cleaned.tmp',),
('tornado',),
('lib', 'feedcache'),
('lib', 'hachoir_core'), ('lib', 'hachoir_metadata'), ('lib', 'hachoir_parser'),
('lib', 'jsonrpclib'),
('lib', 'shove'),
('lib', 'trakt'),
('lib', 'tvrage_api'),
('lib', 'unrar2')
]]

View file

@ -63,7 +63,7 @@
# Send PostProcessing requests to SickGear
#
# PostProcessing-Script version: 1.3.
# PostProcessing-Script version: 1.5.
# <!--
# For more info and updates please visit forum topic at
# -->
@ -155,11 +155,15 @@ import locale
import os
import re
import sys
import warnings
__version__ = '1.3'
__version__ = '1.5'
verbose = 0 or 'yes' == os.environ.get('NZBPO_SG_VERBOSE', 'no')
warnings.filterwarnings('ignore', module=r'.*connectionpool.*', message='.*certificate verification.*')
warnings.filterwarnings('ignore', module=r'.*ssl_.*', message='.*SSLContext object.*')
# NZBGet exit codes for post-processing scripts (Queue-scripts don't have any special exit codes).
POSTPROCESS_SUCCESS, POSTPROCESS_ERROR, POSTPROCESS_NONE = 93, 94, 95
@ -485,7 +489,10 @@ def call_sickgear(nzb_name, dir_name, test=False):
s = requests.Session()
if username or password:
login = '%s%s:%s%s/login' % (protocol, host, port, webroot)
r = s.get(login, verify=False)
login_params = {'username': username, 'password': password}
if 401 == r.status_code and r.cookies.get('_xsrf'):
login_params['_xsrf'] = r.cookies.get('_xsrf')
s.post(login, data=login_params, stream=True, verify=False)
r = s.get(url, auth=(username, password), params=params, stream=True, verify=False, timeout=900)
except (StandardError, Exception):

View file

@ -23,11 +23,15 @@ from __future__ import with_statement
import os.path
import sys
import warnings
sickbeardPath = os.path.split(os.path.split(sys.argv[0])[0])[0]
sys.path.insert(1, os.path.join(sickbeardPath, 'lib'))
sys.path.insert(1, sickbeardPath)
warnings.filterwarnings('ignore', module=r'.*connectionpool.*', message='.*certificate verification.*')
warnings.filterwarnings('ignore', module=r'.*ssl_.*', message='.*SSLContext object.*')
try:
import requests
except ImportError:
@ -132,7 +136,12 @@ def processEpisode(dir_to_process, org_NZB_name=None, status=None):
try:
sess = requests.Session()
sess.post(login_url, data={'username': username, 'password': password}, stream=True, verify=False)
if username or password:
r = sess.get(login_url, verify=False)
login_params = {'username': username, 'password': password}
if 401 == r.status_code and r.cookies.get('_xsrf'):
login_params['_xsrf'] = r.cookies.get('_xsrf')
sess.post(login_url, data=login_params, stream=True, verify=False)
result = sess.get(url, params=params, stream=True, verify=False)
if result.status_code == 401:
print('Verify and use correct username and password in autoProcessTV.cfg')
@ -150,4 +159,4 @@ def processEpisode(dir_to_process, org_NZB_name=None, status=None):
if __name__ == '__main__':
print ('This module is supposed to be used as import in other scripts and not run standalone.')
print ('Use sabToSickBeard instead.')
sys.exit(1)
sys.exit(1)

View file

@ -5,12 +5,16 @@ import os
import time
import ConfigParser
import logging
import warnings
sickbeardPath = os.path.split(os.path.split(sys.argv[0])[0])[0]
sys.path.insert(1, os.path.join(sickbeardPath, 'lib'))
sys.path.insert(1, sickbeardPath)
configFilename = os.path.join(sickbeardPath, 'config.ini')
warnings.filterwarnings('ignore', module=r'.*connectionpool.*', message='.*certificate verification.*')
warnings.filterwarnings('ignore', module=r'.*ssl_.*', message='.*SSLContext object.*')
try:
import requests
except ImportError:
@ -181,7 +185,12 @@ def main():
try:
sess = requests.Session()
sess.post(login_url, data={'username': username, 'password': password}, stream=True, verify=False)
if username or password:
r = sess.get(login_url, verify=False)
login_params = {'username': username, 'password': password}
if 401 == r.status_code and r.cookies.get('_xsrf'):
login_params['_xsrf'] = r.cookies.get('_xsrf')
sess.post(login_url, data=login_params, stream=True, verify=False)
response = sess.get(url, auth=(username, password), params=params, verify=False, allow_redirects=False)
except Exception as e:
scriptlogger.error(u': Unknown exception raised when opening url: ' + str(e))

View file

@ -20,6 +20,7 @@
<div id="config-content">
<form id="configForm" action="saveAnime" method="post">
$xsrf_form_html
<div id="config-components">

View file

@ -42,6 +42,8 @@
<div id="config-content">
<form id="configForm" action="saveGeneral" method="post">
$xsrf_form_html
<div id="config-components">
<ul>
@ -589,6 +591,17 @@
</label>
</div>
<div class="field-pair">
<label for="allowed_hosts">
<span class="component-title">Allowed browser hostnames</span>
<span class="component-desc">
<input type="text" name="allowed_hosts" id="allowed-hosts" value="$sg_str('ALLOWED_HOSTS')" class="form-control input-sm input300">
<p>blank for insecure allow all</p>
<div class="clear-left"><p>whitelist names that browse the interface (e.g. $request_host, my_hostname)</p></div>
</span>
</label>
</div>
<input type="submit" class="btn config_submitter" value="Save Changes">
</fieldset>

View file

@ -38,6 +38,8 @@
<div id="config">
<div id="config-content">
<form id="configForm" action="$sbRoot/config/notifications/save_notifications" method="post">
$xsrf_form_html
<div id="config-components">
<ul>
<li><a href="#tabs-1">Home Theater / NAS</a></li>

View file

@ -30,6 +30,7 @@
<div id="config-content" class="linefix">
<form id="configForm" action="savePostProcessing" method="post">
$xsrf_form_html
<div id="config-components">
<ul>

View file

@ -67,6 +67,7 @@
<div id="config-content">
<form id="configForm" action="saveProviders" method="post">
$xsrf_form_html
<div id="config-components">
<ul>

View file

@ -30,6 +30,7 @@
<div id="config-content" class="linefix">
<form id="configForm" action="saveSearch" method="post">
$xsrf_form_html
<div id="config-components">
<ul>

View file

@ -49,6 +49,7 @@
<div id="config-content">
<form id="configForm" action="saveSubtitles" method="post">
$xsrf_form_html
<div id="config-components">
<ul>

View file

@ -54,6 +54,7 @@
<form action="editShow" method="post" id="editShow" style="width:894px">
<input type="hidden" name="show" id="show" value="$show.indexerid">
<input type="hidden" name="indexer" id="indexer" value="$show.indexer">
$xsrf_form_html
<div id="config-components">
<ul>

View file

@ -38,6 +38,7 @@ var config = { sortArticle: #echo ['!1','!0'][$sg_var('SORT_ARTICLE')]# }
<h3>Existing show folders</h3>
<form id="addShowForm" method="post" action="$sbRoot/home/addShows/addNewShow" accept-charset="utf-8">
$xsrf_form_html
<span#if $kwargs.get('hash_dir', None)# class="hide"#end if#>
<p>Tip: shows are added quicker when usable show nfo and xml metadata is found</p>

View file

@ -43,6 +43,7 @@
#end if
<form id="addShowForm"#if $kwargs.get('action')# class="fullwidth"#end if# method="post" action="$sbRoot/home/addShows/addNewShow" accept-charset="utf-8">
$xsrf_form_html
<fieldset class="sectionwrap step-one">
<legend class="legendStep"><p>#if $use_provided_info#Using known show information#else#Find show at TV info source#end if#</p></legend>

View file

@ -18,6 +18,7 @@
<form name="processForm" method="post" action="processEpisode">
<input type="hidden" id="type" name="type" value="manual">
$xsrf_form_html
<div id="postProcess" class="stepDiv">

View file

@ -30,7 +30,8 @@
<br />
<form id="addShowForm" method="post" action="$sbRoot/home/addShows/addRecommendedShow" accept-charset="utf-8">
$xsrf_form_html
<fieldset class="sectionwrap step-one">
<legend class="legendStep"><p>Select a recommended show</p></legend>
@ -69,4 +70,4 @@
</div>
#include $os.path.join($sickbeard.PROG_DIR,"gui/slick/interfaces/default/inc_bottom.tmpl")
#include $os.path.join($sickbeard.PROG_DIR,"gui/slick/interfaces/default/inc_bottom.tmpl")

View file

@ -44,9 +44,9 @@
#end if
</style>
</head>
<body><div class="login"><form action="" method="post"><div class="login-img center-block form-group"></div>
<body><div class="login"><form action="" method="post">$xsrf_form_html<div class="login-img center-block form-group"></div>
<div class="login-error"><div class="#if 'authfailed'==$resp then 'showme' else 'hide' #"><i class="error16"></i><span class="red-text">Authentication failed, please retry</span></div></div>
<div class="form-group input-group"><span class="input-group-addon"><i class="icons icons-user" style=""></i></span><input name="username" class="form-control" placeholder="Username" type="text" autofocus></div>
<div class="form-group input-group"><span class="input-group-addon"><i class="icons icons-lock" style=""></i></span><input name="password" class="form-control" placeholder="Password" type="password"></div>
<div class="form-group"><label for="remember_me" class="login-remember"><input id="remember_me" name="remember_me" type="checkbox" value="1" checked="checked"><span>Remember me</span></label><input class="btn pull-right" name="submit" type="submit" value="Login"></div>
</form></div></body></html>
</form></div></body></html>

View file

@ -83,6 +83,7 @@ $myShowList.sort(lambda x, y: cmp(x.name, y.name))
<h1 class="title">$title</h1>
#end if
<form name="bulkChangeForm" method="post" action="bulkChange">
$xsrf_form_html
<table id="bulkChangeTable" class="sickbeardTable tablesorter" cellspacing="1" border="0" cellpadding="0">
<thead>

View file

@ -62,6 +62,8 @@
<script type="text/javascript" src="$sbRoot/js/manageEpisodeStatuses.js?v=$sbPID"></script>
<form action="$sbRoot/manage/changeEpisodeStatuses" method="post">
$xsrf_form_html
<input type="hidden" id="oldStatus" name="oldStatus" value="$whichStatus">
<h3><span class="grey-text">$ep_count</span> episode#echo ('s', '')[1 == $ep_count]# marked <span class="grey-text">$statusStrings[$whichStatus].lower()</span> in <span class="grey-text">${len($sorted_show_ids)}</span> show#echo ('s', '')[1 == len($sorted_show_ids)]#</h3>

View file

@ -22,6 +22,8 @@
<script type="text/javascript" src="$sbRoot/js/massEdit.js?v=$sbPID"></script>
<form action="massEditSubmit" method="post">
$xsrf_form_html
<input type="hidden" name="toEdit" value="$showList">
<div class="optionWrapper">

View file

@ -45,6 +45,8 @@
<input type="hidden" id="selectSubLang" name="selectSubLang" value="$whichSubs">
<form action="$sbRoot/manage/downloadSubtitleMissed" method="post">
$xsrf_form_html
<h2>Episodes without $subsLanguage subtitles.</h2>
<br />
Download missed subtitles for selected episodes <input class="btn btn-inline" type="submit" value="Go" />

View file

@ -266,8 +266,10 @@ $(document).ready(function () {
});
function config_success(response) {
if (response == 'reload') {
if ('reload' == response) {
window.location.reload(true);
} else if ('restart' == response) {
window.location.href = sbRoot + $('a.restart').attr('href')
}
$('.config_submitter').each(function () {
$(this).removeAttr('disabled');

View file

@ -827,6 +827,7 @@ $(document).ready(function(){
return
}
$.post(sbRoot + '/home/save_show_email', {
_xsrf: Cookies.get('_xsrf'),
show: show,
emails: $('#show-email-list').val()},
function (data){

View file

@ -130,6 +130,7 @@ $(document).ready(function () {
$.SickGear.history.lastDeleteRecords = deleteRecords ? checked : '';
$.post($.SickGear.Root + '/history/watched',
{
_xsrf: Cookies.get('_xsrf'),
tvew_id: delArr.join('|'),
files: (deleteFiles ? '1' : ''),
records: (deleteRecords ? '1' : '')

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -85,7 +85,7 @@ $(document).ready(function () {
Aired: 8, Network: 9, Genre: 10, Overview: 11, RelSort: 12, DateSort: 13, AzSort: 14, ImgUrl: 15
};
$.each(data.results, function (index, item) {
attrs = (!0 === item[result.isInDB] ? ' disabled="disabled"' : (!0 === checked ? '' : ' checked'));
attrs = (!1 !== item[result.isInDB] ? ' disabled="disabled"' : (!0 === checked ? '' : ' checked'));
checked = (' checked' === attrs) ? !0 : checked;
rowType = (0 == row % 2 ? '' : ' alt');
row++;
@ -102,7 +102,7 @@ $(document).ready(function () {
srcState = [
null === item[result.SrcName] ? '' : item[result.SrcName],
!1 === item[result.isInDB] ? '' : '<span class="exists-db">exists in db</span>']
!1 === item[result.isInDB] ? '' : '<span class="exists-db"><a href="' + sbRoot + item[result.isInDB] + '" target="_blank">exists in db</a></span>']
.join(' - ').replace(/(^[\s-]+|[\s-]+$)/, '');
resultStr += '<div class="results-item' + rowType + '" data-indb="' + (!1 === item[result.isInDB] ? '' : '1') + '" data-sort-rel="' + item[result.RelSort] + '" data-sort-date="' + item[result.DateSort] + '" data-sort-az="' + item[result.AzSort] + '">'
+ '<input id="whichSeries" type="radio"'

View file

@ -1,51 +0,0 @@
Python License (Python-2.0)
Python License, Version 2 (Python-2.0)
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------
1. This LICENSE AGREEMENT is between the Python Software Foundation
("PSF"), and the Individual or Organization ("Licensee") accessing and
otherwise using this software ("Python") in source or binary form and
its associated documentation.
2. Subject to the terms and conditions of this License Agreement, PSF
hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use Python
alone or in any derivative version, provided, however, that PSF's
License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
2001-2013 Python Software Foundation; All Rights Reserved" are retained in
Python alone or in any derivative version prepared by Licensee.
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python.
4. PSF is making Python available to Licensee on an "AS IS"
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between PSF and
Licensee. This License Agreement does not grant permission to use PSF
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.

View file

@ -1,69 +0,0 @@
The ssl.match_hostname() function from Python 3.5
=================================================
The Secure Sockets Layer is only actually *secure*
if you check the hostname in the certificate returned
by the server to which you are connecting,
and verify that it matches to hostname
that you are trying to reach.
But the matching logic, defined in `RFC2818`_,
can be a bit tricky to implement on your own.
So the ``ssl`` package in the Standard Library of Python 3.2
and greater now includes a ``match_hostname()`` function
for performing this check instead of requiring every application
to implement the check separately.
This backport brings ``match_hostname()`` to users
of earlier versions of Python.
Simply make this distribution a dependency of your package,
and then use it like this::
from backports.ssl_match_hostname import match_hostname, CertificateError
[...]
sslsock = ssl.wrap_socket(sock, ssl_version=ssl.PROTOCOL_SSLv23,
cert_reqs=ssl.CERT_REQUIRED, ca_certs=...)
try:
match_hostname(sslsock.getpeercert(), hostname)
except CertificateError, ce:
...
Brandon Craig Rhodes is merely the packager of this distribution;
the actual code inside comes from Python 3.5 with small changes for
portability.
Requirements
------------
* If you want to verify hosts match with certificates via ServerAltname
IPAddress fields, you need to install the `ipaddress module`_.
backports.ssl_match_hostname will continue to work without ipaddress but
will only be able to handle ServerAltName DNSName fields, not IPAddress.
System packagers (Linux distributions, et al) are encouraged to add
this as a hard dependency in their packages.
* If you need to use this on Python versions earlier than 2.6 you will need to
install the `ssl module`_. From Python 2.6 upwards ``ssl`` is included in
the Python Standard Library so you do not need to install it separately.
.. _`ipaddress module`:: https://pypi.python.org/pypi/ipaddress
.. _`ssl module`:: https://pypi.python.org/pypi/ssl
History
-------
* This function was introduced in python-3.2
* It was updated for python-3.4a1 for a CVE
(backports-ssl_match_hostname-3.4.0.1)
* It was updated from RFC2818 to RFC 6125 compliance in order to fix another
security flaw for python-3.3.3 and python-3.4a5
(backports-ssl_match_hostname-3.4.0.2)
* It was updated in python-3.5 to handle IPAddresses in ServerAltName fields
(something that backports.ssl_match_hostname will do if you also install the
ipaddress library from pypi).
.. _RFC2818: http://tools.ietf.org/html/rfc2818.html

View file

@ -1,82 +1,134 @@
"""The match_hostname() function from Python 3.3.3, essential when using SSL."""
"""The match_hostname() function from Python 3.7.0, essential when using SSL."""
import re
import sys
import socket as _socket
# ipaddress has been backported to 2.6+ in pypi. If it is installed on the
# system, use it to handle IPAddress ServerAltnames (this was added in
# python-3.5) otherwise only do DNS matching. This allows
# backports.ssl_match_hostname to continue to be used all the way back to
# python-2.4.
try:
import ipaddress
except ImportError:
ipaddress = None
__version__ = '3.5.0.1'
# Divergence: Python-3.7+'s _ssl has this exception type but older Pythons do not
from _ssl import SSLCertVerificationError
CertificateError = SSLCertVerificationError
except:
class CertificateError(ValueError):
pass
class CertificateError(ValueError):
pass
__version__ = '3.7.0.1'
def _dnsname_match(dn, hostname, max_wildcards=1):
# Divergence: Added to deal with ipaddess as bytes on python2
def _to_text(obj):
if isinstance(obj, str) and sys.version_info < (3,):
obj = unicode(obj, encoding='ascii', errors='strict')
elif sys.version_info >= (3,) and isinstance(obj, bytes):
obj = str(obj, encoding='ascii', errors='strict')
return obj
def _to_bytes(obj):
if isinstance(obj, str) and sys.version_info >= (3,):
obj = bytes(obj, encoding='ascii', errors='strict')
elif sys.version_info < (3,) and isinstance(obj, unicode):
obj = obj.encode('ascii', 'strict')
return obj
def _dnsname_match(dn, hostname):
"""Matching according to RFC 6125, section 6.4.3
http://tools.ietf.org/html/rfc6125#section-6.4.3
- Hostnames are compared lower case.
- For IDNA, both dn and hostname must be encoded as IDN A-label (ACE).
- Partial wildcards like 'www*.example.org', multiple wildcards, sole
wildcard or wildcards in labels other then the left-most label are not
supported and a CertificateError is raised.
- A wildcard must match at least one character.
"""
pats = []
if not dn:
return False
# Ported from python3-syntax:
# leftmost, *remainder = dn.split(r'.')
parts = dn.split(r'.')
leftmost = parts[0]
remainder = parts[1:]
wildcards = leftmost.count('*')
if wildcards > max_wildcards:
# Issue #17980: avoid denials of service by refusing more
# than one wildcard per fragment. A survey of established
# policy among SSL implementations showed it to be a
# reasonable choice.
raise CertificateError(
"too many wildcards in certificate DNS name: " + repr(dn))
wildcards = dn.count('*')
# speed up common case w/o wildcards
if not wildcards:
return dn.lower() == hostname.lower()
# RFC 6125, section 6.4.3, subitem 1.
# The client SHOULD NOT attempt to match a presented identifier in which
# the wildcard character comprises a label other than the left-most label.
if leftmost == '*':
# When '*' is a fragment by itself, it matches a non-empty dotless
# fragment.
pats.append('[^.]+')
elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
# RFC 6125, section 6.4.3, subitem 3.
# The client SHOULD NOT attempt to match a presented identifier
# where the wildcard character is embedded within an A-label or
# U-label of an internationalized domain name.
pats.append(re.escape(leftmost))
if wildcards > 1:
# Divergence .format() to percent formatting for Python < 2.6
raise CertificateError(
"too many wildcards in certificate DNS name: %s" % repr(dn))
dn_leftmost, sep, dn_remainder = dn.partition('.')
if '*' in dn_remainder:
# Only match wildcard in leftmost segment.
# Divergence .format() to percent formatting for Python < 2.6
raise CertificateError(
"wildcard can only be present in the leftmost label: "
"%s." % repr(dn))
if not sep:
# no right side
# Divergence .format() to percent formatting for Python < 2.6
raise CertificateError(
"sole wildcard without additional labels are not support: "
"%s." % repr(dn))
if dn_leftmost != '*':
# no partial wildcard matching
# Divergence .format() to percent formatting for Python < 2.6
raise CertificateError(
"partial wildcards in leftmost label are not supported: "
"%s." % repr(dn))
hostname_leftmost, sep, hostname_remainder = hostname.partition('.')
if not hostname_leftmost or not sep:
# wildcard must match at least one char
return False
return dn_remainder.lower() == hostname_remainder.lower()
def _inet_paton(ipname):
"""Try to convert an IP address to packed binary form
Supports IPv4 addresses on all platforms and IPv6 on platforms with IPv6
support.
"""
# inet_aton() also accepts strings like '1'
# Divergence: We make sure we have native string type for all python versions
try:
b_ipname = _to_bytes(ipname)
except UnicodeError:
raise ValueError("%s must be an all-ascii string." % repr(ipname))
# Set ipname in native string format
if sys.version_info < (3,):
n_ipname = b_ipname
else:
# Otherwise, '*' matches any dotless string, e.g. www*
pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
n_ipname = ipname
# add the remaining fragments, ignore any wildcards
for frag in remainder:
pats.append(re.escape(frag))
if n_ipname.count('.') == 3:
try:
return _socket.inet_aton(n_ipname)
# Divergence: OSError on late python3. socket.error earlier.
# Null bytes generate ValueError on python3(we want to raise
# ValueError anyway), TypeError # earlier
except (OSError, _socket.error, TypeError):
pass
pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
return pat.match(hostname)
try:
return _socket.inet_pton(_socket.AF_INET6, n_ipname)
# Divergence: OSError on late python3. socket.error earlier.
# Null bytes generate ValueError on python3(we want to raise
# ValueError anyway), TypeError # earlier
except (OSError, _socket.error, TypeError):
# Divergence .format() to percent formatting for Python < 2.6
raise ValueError("%s is neither an IPv4 nor an IP6 "
"address." % repr(ipname))
except AttributeError:
# AF_INET6 not available
pass
# Divergence .format() to percent formatting for Python < 2.6
raise ValueError("%s is not an IPv4 address." % repr(ipname))
def _to_unicode(obj):
if isinstance(obj, str) and sys.version_info < (3,):
obj = unicode(obj, encoding='ascii', errors='strict')
return obj
def _ipaddress_match(ipname, host_ip):
"""Exact matching of IP addresses.
@ -85,15 +137,19 @@ def _ipaddress_match(ipname, host_ip):
(section 1.7.2 - "Out of Scope").
"""
# OpenSSL may add a trailing newline to a subjectAltName's IP address
# Divergence from upstream: ipaddress can't handle byte str
ip = ipaddress.ip_address(_to_unicode(ipname).rstrip())
ip = _inet_paton(ipname.rstrip())
return ip == host_ip
def match_hostname(cert, hostname):
"""Verify that *cert* (in decoded format as returned by
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
rules are followed, but IP addresses are not accepted for *hostname*.
rules are followed.
The function matches IP addresses rather than dNSNames if hostname is a
valid ipaddress string. IPv4 addresses are supported on all platforms.
IPv6 addresses are supported on platforms with IPv6 support (AF_INET6
and inet_pton).
CertificateError is raised on failure. On success, the function
returns nothing.
@ -103,22 +159,16 @@ def match_hostname(cert, hostname):
"SSL socket or SSL context with either "
"CERT_OPTIONAL or CERT_REQUIRED")
try:
# Divergence from upstream: ipaddress can't handle byte str
host_ip = ipaddress.ip_address(_to_unicode(hostname))
# Divergence: Deal with hostname as bytes
host_ip = _inet_paton(_to_text(hostname))
except ValueError:
# Not an IP address (common case)
host_ip = None
except UnicodeError:
# Divergence from upstream: Have to deal with ipaddress not taking
# byte strings. addresses should be all ascii, so we consider it not
# an ipaddress in this case
# Divergence: Deal with hostname as byte strings.
# IP addresses should be all ascii, so we consider it not
# an IP address if this fails
host_ip = None
except AttributeError:
# Divergence from upstream: Make ipaddress library optional
if ipaddress is None:
host_ip = None
else:
raise
dnsnames = []
san = cert.get('subjectAltName', ())
for key, value in san:

View file

@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = 'Eric Larson'
__email__ = 'eric@ionrock.org'
__version__ = '0.12.3'
__version__ = '0.12.4'
from .wrapper import CacheControl
from .adapter import CacheControlAdapter

View file

@ -1,5 +1,6 @@
import types
import functools
import zlib
from requests.adapters import HTTPAdapter
@ -37,7 +38,10 @@ class CacheControlAdapter(HTTPAdapter):
"""
cacheable = cacheable_methods or self.cacheable_methods
if request.method in cacheable:
cached_response = self.controller.cached_request(request)
try:
cached_response = self.controller.cached_request(request)
except zlib.error:
cached_response = None
if cached_response:
return self.build_response(request, cached_response,
from_cache=True)

View file

@ -8,13 +8,13 @@ from threading import Lock
class BaseCache(object):
def get(self, key):
raise NotImplemented()
raise NotImplementedError()
def set(self, key, value):
raise NotImplemented()
raise NotImplementedError()
def delete(self, key):
raise NotImplemented()
raise NotImplementedError()
def close(self):
pass

View file

@ -1,6 +1,7 @@
from __future__ import division
from datetime import datetime
from cachecontrol.cache import BaseCache
def total_seconds(td):
@ -13,7 +14,7 @@ def total_seconds(td):
return int((ms + secs * 10**6) / 10**6)
class RedisCache(object):
class RedisCache(BaseCache):
def __init__(self, conn):
self.conn = conn
@ -38,4 +39,5 @@ class RedisCache(object):
self.conn.delete(key)
def close(self):
self.conn.disconnect()
"""Redis uses connection pooling, no need to close the connection."""
pass

View file

@ -1,3 +1,3 @@
from .core import where, old_where
__version__ = "2017.07.27"
__version__ = "2018.01.18"

File diff suppressed because it is too large Load diff

View file

@ -26,11 +26,12 @@ def where():
def old_where():
warnings.warn(
"The weak security bundle is being deprecated.",
"The weak security bundle has been removed. certifi.old_where() is now an alias "
"of certifi.where(). Please update your code to use certifi.where() instead. "
"certifi.old_where() will be removed in 2018.",
DeprecatedBundleWarning
)
f = os.path.dirname(__file__)
return os.path.join(f, 'weak.pem')
return where()
if __name__ == '__main__':
print(where())

View file

@ -1,414 +0,0 @@
# Issuer: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
# Subject: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
# Label: "Entrust.net Secure Server CA"
# Serial: 927650371
# MD5 Fingerprint: df:f2:80:73:cc:f1:e6:61:73:fc:f5:42:e9:c5:7c:ee
# SHA1 Fingerprint: 99:a6:9b:e6:1a:fe:88:6b:4d:2b:82:00:7c:b8:54:fc:31:7e:15:39
# SHA256 Fingerprint: 62:f2:40:27:8c:56:4c:4d:d8:bf:7d:9d:4f:6f:36:6e:a8:94:d2:2f:5f:34:d9:89:a9:83:ac:ec:2f:ff:ed:50
-----BEGIN CERTIFICATE-----
MIIE2DCCBEGgAwIBAgIEN0rSQzANBgkqhkiG9w0BAQUFADCBwzELMAkGA1UEBhMC
VVMxFDASBgNVBAoTC0VudHJ1c3QubmV0MTswOQYDVQQLEzJ3d3cuZW50cnVzdC5u
ZXQvQ1BTIGluY29ycC4gYnkgcmVmLiAobGltaXRzIGxpYWIuKTElMCMGA1UECxMc
KGMpIDE5OTkgRW50cnVzdC5uZXQgTGltaXRlZDE6MDgGA1UEAxMxRW50cnVzdC5u
ZXQgU2VjdXJlIFNlcnZlciBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05OTA1
MjUxNjA5NDBaFw0xOTA1MjUxNjM5NDBaMIHDMQswCQYDVQQGEwJVUzEUMBIGA1UE
ChMLRW50cnVzdC5uZXQxOzA5BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5j
b3JwLiBieSByZWYuIChsaW1pdHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBF
bnRydXN0Lm5ldCBMaW1pdGVkMTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUg
U2VydmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGdMA0GCSqGSIb3DQEBAQUA
A4GLADCBhwKBgQDNKIM0VBuJ8w+vN5Ex/68xYMmo6LIQaO2f55M28Qpku0f1BBc/
I0dNxScZgSYMVHINiC3ZH5oSn7yzcdOAGT9HZnuMNSjSuQrfJNqc1lB5gXpa0zf3
wkrYKZImZNHkmGw6AIr1NJtl+O3jEP/9uElY3KDegjlrgbEWGWG5VLbmQwIBA6OC
AdcwggHTMBEGCWCGSAGG+EIBAQQEAwIABzCCARkGA1UdHwSCARAwggEMMIHeoIHb
oIHYpIHVMIHSMQswCQYDVQQGEwJVUzEUMBIGA1UEChMLRW50cnVzdC5uZXQxOzA5
BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5jb3JwLiBieSByZWYuIChsaW1p
dHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBFbnRydXN0Lm5ldCBMaW1pdGVk
MTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUgU2VydmVyIENlcnRpZmljYXRp
b24gQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMCmgJ6AlhiNodHRwOi8vd3d3LmVu
dHJ1c3QubmV0L0NSTC9uZXQxLmNybDArBgNVHRAEJDAigA8xOTk5MDUyNTE2MDk0
MFqBDzIwMTkwNTI1MTYwOTQwWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAU8Bdi
E1U9s/8KAGv7UISX8+1i0BowHQYDVR0OBBYEFPAXYhNVPbP/CgBr+1CEl/PtYtAa
MAwGA1UdEwQFMAMBAf8wGQYJKoZIhvZ9B0EABAwwChsEVjQuMAMCBJAwDQYJKoZI
hvcNAQEFBQADgYEAkNwwAvpkdMKnCqV8IY00F6j7Rw7/JXyNEwr75Ji174z4xRAN
95K+8cPV1ZVqBLssziY2ZcgxxufuP+NXdYR6Ee9GTxj005i7qIcyunL2POI9n9cd
2cNgQ4xYDiKWL2KjLB+6rQXvqzJ4h6BUcxm1XAX5Uj5tLUUL9wqT6u0G+bI=
-----END CERTIFICATE-----
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 2 Policy Validation Authority
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 2 Policy Validation Authority
# Label: "ValiCert Class 2 VA"
# Serial: 1
# MD5 Fingerprint: a9:23:75:9b:ba:49:36:6e:31:c2:db:f2:e7:66:ba:87
# SHA1 Fingerprint: 31:7a:2a:d0:7f:2b:33:5e:f5:a1:c3:4e:4b:57:e8:b7:d8:f1:fc:a6
# SHA256 Fingerprint: 58:d0:17:27:9c:d4:dc:63:ab:dd:b1:96:a6:c9:90:6c:30:c4:e0:87:83:ea:e8:c1:60:99:54:d6:93:55:59:6b
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMTk1NFoXDTE5MDYy
NjAwMTk1NFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDOOnHK5avIWZJV16vY
dA757tn2VUdZZUcOBVXc65g2PFxTXdMwzzjsvUGJ7SVCCSRrCl6zfN1SLUzm1NZ9
WlmpZdRJEy0kTRxQb7XBhVQ7/nHk01xC+YDgkRoKWzk2Z/M/VXwbP7RfZHM047QS
v4dk+NoS/zcnwbNDu+97bi5p9wIDAQABMA0GCSqGSIb3DQEBBQUAA4GBADt/UG9v
UJSZSWI4OB9L+KXIPqeCgfYrx+jFzug6EILLGACOTb2oWH+heQC1u+mNr0HZDzTu
IYEZoDJJKPTEjlbVUjP9UNV+mWwD5MlM/Mtsq2azSiGM5bUMMj4QssxsodyamEwC
W/POuZ6lcg5Ktz885hZo+L7tdEy8W9ViH0Pd
-----END CERTIFICATE-----
# Issuer: CN=NetLock Expressz (Class C) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Subject: CN=NetLock Expressz (Class C) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Label: "NetLock Express (Class C) Root"
# Serial: 104
# MD5 Fingerprint: 4f:eb:f1:f0:70:c2:80:63:5d:58:9f:da:12:3c:a9:c4
# SHA1 Fingerprint: e3:92:51:2f:0a:cf:f5:05:df:f6:de:06:7f:75:37:e1:65:ea:57:4b
# SHA256 Fingerprint: 0b:5e:ed:4e:84:64:03:cf:55:e0:65:84:84:40:ed:2a:82:75:8b:f5:b9:aa:1f:25:3d:46:13:cf:a0:80:ff:3f
-----BEGIN CERTIFICATE-----
MIIFTzCCBLigAwIBAgIBaDANBgkqhkiG9w0BAQQFADCBmzELMAkGA1UEBhMCSFUx
ETAPBgNVBAcTCEJ1ZGFwZXN0MScwJQYDVQQKEx5OZXRMb2NrIEhhbG96YXRiaXp0
b25zYWdpIEtmdC4xGjAYBgNVBAsTEVRhbnVzaXR2YW55a2lhZG9rMTQwMgYDVQQD
EytOZXRMb2NrIEV4cHJlc3N6IChDbGFzcyBDKSBUYW51c2l0dmFueWtpYWRvMB4X
DTk5MDIyNTE0MDgxMVoXDTE5MDIyMDE0MDgxMVowgZsxCzAJBgNVBAYTAkhVMREw
DwYDVQQHEwhCdWRhcGVzdDEnMCUGA1UEChMeTmV0TG9jayBIYWxvemF0Yml6dG9u
c2FnaSBLZnQuMRowGAYDVQQLExFUYW51c2l0dmFueWtpYWRvazE0MDIGA1UEAxMr
TmV0TG9jayBFeHByZXNzeiAoQ2xhc3MgQykgVGFudXNpdHZhbnlraWFkbzCBnzAN
BgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA6+ywbGGKIyWvYCDj2Z/8kwvbXY2wobNA
OoLO/XXgeDIDhlqGlZHtU/qdQPzm6N3ZW3oDvV3zOwzDUXmbrVWg6dADEK8KuhRC
2VImESLH0iDMgqSaqf64gXadarfSNnU+sYYJ9m5tfk63euyucYT2BDMIJTLrdKwW
RMbkQJMdf60CAwEAAaOCAp8wggKbMBIGA1UdEwEB/wQIMAYBAf8CAQQwDgYDVR0P
AQH/BAQDAgAGMBEGCWCGSAGG+EIBAQQEAwIABzCCAmAGCWCGSAGG+EIBDQSCAlEW
ggJNRklHWUVMRU0hIEV6ZW4gdGFudXNpdHZhbnkgYSBOZXRMb2NrIEtmdC4gQWx0
YWxhbm9zIFN6b2xnYWx0YXRhc2kgRmVsdGV0ZWxlaWJlbiBsZWlydCBlbGphcmFz
b2sgYWxhcGphbiBrZXN6dWx0LiBBIGhpdGVsZXNpdGVzIGZvbHlhbWF0YXQgYSBO
ZXRMb2NrIEtmdC4gdGVybWVrZmVsZWxvc3NlZy1iaXp0b3NpdGFzYSB2ZWRpLiBB
IGRpZ2l0YWxpcyBhbGFpcmFzIGVsZm9nYWRhc2FuYWsgZmVsdGV0ZWxlIGF6IGVs
b2lydCBlbGxlbm9yemVzaSBlbGphcmFzIG1lZ3RldGVsZS4gQXogZWxqYXJhcyBs
ZWlyYXNhIG1lZ3RhbGFsaGF0byBhIE5ldExvY2sgS2Z0LiBJbnRlcm5ldCBob25s
YXBqYW4gYSBodHRwczovL3d3dy5uZXRsb2NrLm5ldC9kb2NzIGNpbWVuIHZhZ3kg
a2VyaGV0byBheiBlbGxlbm9yemVzQG5ldGxvY2submV0IGUtbWFpbCBjaW1lbi4g
SU1QT1JUQU5UISBUaGUgaXNzdWFuY2UgYW5kIHRoZSB1c2Ugb2YgdGhpcyBjZXJ0
aWZpY2F0ZSBpcyBzdWJqZWN0IHRvIHRoZSBOZXRMb2NrIENQUyBhdmFpbGFibGUg
YXQgaHR0cHM6Ly93d3cubmV0bG9jay5uZXQvZG9jcyBvciBieSBlLW1haWwgYXQg
Y3BzQG5ldGxvY2submV0LjANBgkqhkiG9w0BAQQFAAOBgQAQrX/XDDKACtiG8XmY
ta3UzbM2xJZIwVzNmtkFLp++UOv0JhQQLdRmF/iewSf98e3ke0ugbLWrmldwpu2g
pO0u9f38vf5NNwgMvOOWgyL1SRt/Syu0VMGAfJlOHdCM7tCs5ZL6dVb+ZKATj7i4
Fp1hBWeAyNDYpQcCNJgEjTME1A==
-----END CERTIFICATE-----
# Issuer: CN=NetLock Uzleti (Class B) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Subject: CN=NetLock Uzleti (Class B) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Label: "NetLock Business (Class B) Root"
# Serial: 105
# MD5 Fingerprint: 39:16:aa:b9:6a:41:e1:14:69:df:9e:6c:3b:72:dc:b6
# SHA1 Fingerprint: 87:9f:4b:ee:05:df:98:58:3b:e3:60:d6:33:e7:0d:3f:fe:98:71:af
# SHA256 Fingerprint: 39:df:7b:68:2b:7b:93:8f:84:71:54:81:cc:de:8d:60:d8:f2:2e:c5:98:87:7d:0a:aa:c1:2b:59:18:2b:03:12
-----BEGIN CERTIFICATE-----
MIIFSzCCBLSgAwIBAgIBaTANBgkqhkiG9w0BAQQFADCBmTELMAkGA1UEBhMCSFUx
ETAPBgNVBAcTCEJ1ZGFwZXN0MScwJQYDVQQKEx5OZXRMb2NrIEhhbG96YXRiaXp0
b25zYWdpIEtmdC4xGjAYBgNVBAsTEVRhbnVzaXR2YW55a2lhZG9rMTIwMAYDVQQD
EylOZXRMb2NrIFV6bGV0aSAoQ2xhc3MgQikgVGFudXNpdHZhbnlraWFkbzAeFw05
OTAyMjUxNDEwMjJaFw0xOTAyMjAxNDEwMjJaMIGZMQswCQYDVQQGEwJIVTERMA8G
A1UEBxMIQnVkYXBlc3QxJzAlBgNVBAoTHk5ldExvY2sgSGFsb3phdGJpenRvbnNh
Z2kgS2Z0LjEaMBgGA1UECxMRVGFudXNpdHZhbnlraWFkb2sxMjAwBgNVBAMTKU5l
dExvY2sgVXpsZXRpIChDbGFzcyBCKSBUYW51c2l0dmFueWtpYWRvMIGfMA0GCSqG
SIb3DQEBAQUAA4GNADCBiQKBgQCx6gTsIKAjwo84YM/HRrPVG/77uZmeBNwcf4xK
gZjupNTKihe5In+DCnVMm8Bp2GQ5o+2So/1bXHQawEfKOml2mrriRBf8TKPV/riX
iK+IA4kfpPIEPsgHC+b5sy96YhQJRhTKZPWLgLViqNhr1nGTLbO/CVRY7QbrqHvc
Q7GhaQIDAQABo4ICnzCCApswEgYDVR0TAQH/BAgwBgEB/wIBBDAOBgNVHQ8BAf8E
BAMCAAYwEQYJYIZIAYb4QgEBBAQDAgAHMIICYAYJYIZIAYb4QgENBIICURaCAk1G
SUdZRUxFTSEgRXplbiB0YW51c2l0dmFueSBhIE5ldExvY2sgS2Z0LiBBbHRhbGFu
b3MgU3pvbGdhbHRhdGFzaSBGZWx0ZXRlbGVpYmVuIGxlaXJ0IGVsamFyYXNvayBh
bGFwamFuIGtlc3p1bHQuIEEgaGl0ZWxlc2l0ZXMgZm9seWFtYXRhdCBhIE5ldExv
Y2sgS2Z0LiB0ZXJtZWtmZWxlbG9zc2VnLWJpenRvc2l0YXNhIHZlZGkuIEEgZGln
aXRhbGlzIGFsYWlyYXMgZWxmb2dhZGFzYW5hayBmZWx0ZXRlbGUgYXogZWxvaXJ0
IGVsbGVub3J6ZXNpIGVsamFyYXMgbWVndGV0ZWxlLiBBeiBlbGphcmFzIGxlaXJh
c2EgbWVndGFsYWxoYXRvIGEgTmV0TG9jayBLZnQuIEludGVybmV0IGhvbmxhcGph
biBhIGh0dHBzOi8vd3d3Lm5ldGxvY2submV0L2RvY3MgY2ltZW4gdmFneSBrZXJo
ZXRvIGF6IGVsbGVub3J6ZXNAbmV0bG9jay5uZXQgZS1tYWlsIGNpbWVuLiBJTVBP
UlRBTlQhIFRoZSBpc3N1YW5jZSBhbmQgdGhlIHVzZSBvZiB0aGlzIGNlcnRpZmlj
YXRlIGlzIHN1YmplY3QgdG8gdGhlIE5ldExvY2sgQ1BTIGF2YWlsYWJsZSBhdCBo
dHRwczovL3d3dy5uZXRsb2NrLm5ldC9kb2NzIG9yIGJ5IGUtbWFpbCBhdCBjcHNA
bmV0bG9jay5uZXQuMA0GCSqGSIb3DQEBBAUAA4GBAATbrowXr/gOkDFOzT4JwG06
sPgzTEdM43WIEJessDgVkcYplswhwG08pXTP2IKlOcNl40JwuyKQ433bNXbhoLXa
n3BukxowOR0w2y7jfLKRstE3Kfq51hdcR0/jHTjrn9V7lagonhVK0dHQKwCXoOKS
NitjrFgBazMpUIaD8QFI
-----END CERTIFICATE-----
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 3 Policy Validation Authority
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 3 Policy Validation Authority
# Label: "RSA Root Certificate 1"
# Serial: 1
# MD5 Fingerprint: a2:6f:53:b7:ee:40:db:4a:68:e7:fa:18:d9:10:4b:72
# SHA1 Fingerprint: 69:bd:8c:f4:9c:d3:00:fb:59:2e:17:93:ca:55:6a:f3:ec:aa:35:fb
# SHA256 Fingerprint: bc:23:f9:8a:31:3c:b9:2d:e3:bb:fc:3a:5a:9f:44:61:ac:39:49:4c:4a:e1:5a:9e:9d:f1:31:e9:9b:73:01:9a
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDMgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMjIzM1oXDTE5MDYy
NjAwMjIzM1owgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDMgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDjmFGWHOjVsQaBalfD
cnWTq8+epvzzFlLWLU2fNUSoLgRNB0mKOCn1dzfnt6td3zZxFJmP3MKS8edgkpfs
2Ejcv8ECIMYkpChMMFp2bbFc893enhBxoYjHW5tBbcqwuI4V7q0zK89HBFx1cQqY
JJgpp0lZpd34t0NiYfPT4tBVPwIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAFa7AliE
Zwgs3x/be0kz9dNnnfS0ChCzycUs4pJqcXgn8nCDQtM+z6lU9PHYkhaM0QTLS6vJ
n0WuPIqpsHEzXcjFV9+vqDWzf4mH6eglkrh/hXqu1rweN1gqZ8mRzyqBPu3GOd/A
PhmcGcwTTYJBtYze4D1gCCAPRX5ron+jjBXu
-----END CERTIFICATE-----
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 1 Policy Validation Authority
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 1 Policy Validation Authority
# Label: "ValiCert Class 1 VA"
# Serial: 1
# MD5 Fingerprint: 65:58:ab:15:ad:57:6c:1e:a8:a7:b5:69:ac:bf:ff:eb
# SHA1 Fingerprint: e5:df:74:3c:b6:01:c4:9b:98:43:dc:ab:8c:e8:6a:81:10:9f:e4:8e
# SHA256 Fingerprint: f4:c1:49:55:1a:30:13:a3:5b:c7:bf:fe:17:a7:f3:44:9b:c1:ab:5b:5a:0a:e7:4b:06:c2:3b:90:00:4c:01:04
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDEgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNTIyMjM0OFoXDTE5MDYy
NTIyMjM0OFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDEgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDYWYJ6ibiWuqYvaG9Y
LqdUHAZu9OqNSLwxlBfw8068srg1knaw0KWlAdcAAxIiGQj4/xEjm84H9b9pGib+
TunRf50sQB1ZaG6m+FiwnRqP0z/x3BkGgagO4DrdyFNFCQbmD3DD+kCmDuJWBQ8Y
TfwggtFzVXSNdnKgHZ0dwN0/cQIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAFBoPUn0
LBwGlN+VYH+Wexf+T3GtZMjdd9LvWVXoP+iOBSoh8gfStadS/pyxtuJbdxdA6nLW
I8sogTLDAHkY7FkXicnGah5xyf23dKUlRWnFSKsZ4UWKJWsZ7uW7EvV/96aNUcPw
nXS3qT6gpf+2SQMT2iLM7XGCK5nPOrf1LXLI
-----END CERTIFICATE-----
# Issuer: CN=Equifax Secure eBusiness CA-1 O=Equifax Secure Inc.
# Subject: CN=Equifax Secure eBusiness CA-1 O=Equifax Secure Inc.
# Label: "Equifax Secure eBusiness CA 1"
# Serial: 4
# MD5 Fingerprint: 64:9c:ef:2e:44:fc:c6:8f:52:07:d0:51:73:8f:cb:3d
# SHA1 Fingerprint: da:40:18:8b:91:89:a3:ed:ee:ae:da:97:fe:2f:9d:f5:b7:d1:8a:41
# SHA256 Fingerprint: cf:56:ff:46:a4:a1:86:10:9d:d9:65:84:b5:ee:b5:8a:51:0c:42:75:b0:e5:f9:4f:40:bb:ae:86:5e:19:f6:73
-----BEGIN CERTIFICATE-----
MIICgjCCAeugAwIBAgIBBDANBgkqhkiG9w0BAQQFADBTMQswCQYDVQQGEwJVUzEc
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEmMCQGA1UEAxMdRXF1aWZheCBT
ZWN1cmUgZUJ1c2luZXNzIENBLTEwHhcNOTkwNjIxMDQwMDAwWhcNMjAwNjIxMDQw
MDAwWjBTMQswCQYDVQQGEwJVUzEcMBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5j
LjEmMCQGA1UEAxMdRXF1aWZheCBTZWN1cmUgZUJ1c2luZXNzIENBLTEwgZ8wDQYJ
KoZIhvcNAQEBBQADgY0AMIGJAoGBAM4vGbwXt3fek6lfWg0XTzQaDJj0ItlZ1MRo
RvC0NcWFAyDGr0WlIVFFQesWWDYyb+JQYmT5/VGcqiTZ9J2DKocKIdMSODRsjQBu
WqDZQu4aIZX5UkxVWsUPOE9G+m34LjXWHXzr4vCwdYDIqROsvojvOm6rXyo4YgKw
Env+j6YDAgMBAAGjZjBkMBEGCWCGSAGG+EIBAQQEAwIABzAPBgNVHRMBAf8EBTAD
AQH/MB8GA1UdIwQYMBaAFEp4MlIR21kWNl7fwRQ2QGpHfEyhMB0GA1UdDgQWBBRK
eDJSEdtZFjZe38EUNkBqR3xMoTANBgkqhkiG9w0BAQQFAAOBgQB1W6ibAxHm6VZM
zfmpTMANmvPMZWnmJXbMWbfWVMMdzZmsGd20hdXgPfxiIKeES1hl8eL5lSE/9dR+
WB5Hh1Q+WKG1tfgq73HnvMP2sUlG4tega+VWeponmHxGYhTnyfxuAxJ5gDgdSIKN
/Bf+KpYrtWKmpj29f5JZzVoqgrI3eQ==
-----END CERTIFICATE-----
# Issuer: CN=Equifax Secure Global eBusiness CA-1 O=Equifax Secure Inc.
# Subject: CN=Equifax Secure Global eBusiness CA-1 O=Equifax Secure Inc.
# Label: "Equifax Secure Global eBusiness CA"
# Serial: 1
# MD5 Fingerprint: 8f:5d:77:06:27:c4:98:3c:5b:93:78:e7:d7:7d:9b:cc
# SHA1 Fingerprint: 7e:78:4a:10:1c:82:65:cc:2d:e1:f1:6d:47:b4:40:ca:d9:0a:19:45
# SHA256 Fingerprint: 5f:0b:62:ea:b5:e3:53:ea:65:21:65:16:58:fb:b6:53:59:f4:43:28:0a:4a:fb:d1:04:d7:7d:10:f9:f0:4c:07
-----BEGIN CERTIFICATE-----
MIICkDCCAfmgAwIBAgIBATANBgkqhkiG9w0BAQQFADBaMQswCQYDVQQGEwJVUzEc
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEtMCsGA1UEAxMkRXF1aWZheCBT
ZWN1cmUgR2xvYmFsIGVCdXNpbmVzcyBDQS0xMB4XDTk5MDYyMTA0MDAwMFoXDTIw
MDYyMTA0MDAwMFowWjELMAkGA1UEBhMCVVMxHDAaBgNVBAoTE0VxdWlmYXggU2Vj
dXJlIEluYy4xLTArBgNVBAMTJEVxdWlmYXggU2VjdXJlIEdsb2JhbCBlQnVzaW5l
c3MgQ0EtMTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAuucXkAJlsTRVPEnC
UdXfp9E3j9HngXNBUmCbnaEXJnitx7HoJpQytd4zjTov2/KaelpzmKNc6fuKcxtc
58O/gGzNqfTWK8D3+ZmqY6KxRwIP1ORROhI8bIpaVIRw28HFkM9yRcuoWcDNM50/
o5brhTMhHD4ePmBudpxnhcXIw2ECAwEAAaNmMGQwEQYJYIZIAYb4QgEBBAQDAgAH
MA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUvqigdHJQa0S3ySPY+6j/s1dr
aGwwHQYDVR0OBBYEFL6ooHRyUGtEt8kj2Puo/7NXa2hsMA0GCSqGSIb3DQEBBAUA
A4GBADDiAVGqx+pf2rnQZQ8w1j7aDRRJbpGTJxQx78T3LUX47Me/okENI7SS+RkA
Z70Br83gcfxaz2TE4JaY0KNA4gGK7ycH8WUBikQtBmV1UsCGECAhX2xrD2yuCRyv
8qIYNMR1pHMc8Y3c7635s3a0kr/clRAevsvIO1qEYBlWlKlV
-----END CERTIFICATE-----
# Issuer: CN=Thawte Premium Server CA O=Thawte Consulting cc OU=Certification Services Division
# Subject: CN=Thawte Premium Server CA O=Thawte Consulting cc OU=Certification Services Division
# Label: "Thawte Premium Server CA"
# Serial: 1
# MD5 Fingerprint: 06:9f:69:79:16:66:90:02:1b:8c:8c:a2:c3:07:6f:3a
# SHA1 Fingerprint: 62:7f:8d:78:27:65:63:99:d2:7d:7f:90:44:c9:fe:b3:f3:3e:fa:9a
# SHA256 Fingerprint: ab:70:36:36:5c:71:54:aa:29:c2:c2:9f:5d:41:91:16:3b:16:2a:22:25:01:13:57:d5:6d:07:ff:a7:bc:1f:72
-----BEGIN CERTIFICATE-----
MIIDJzCCApCgAwIBAgIBATANBgkqhkiG9w0BAQQFADCBzjELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
biBTZXJ2aWNlcyBEaXZpc2lvbjEhMB8GA1UEAxMYVGhhd3RlIFByZW1pdW0gU2Vy
dmVyIENBMSgwJgYJKoZIhvcNAQkBFhlwcmVtaXVtLXNlcnZlckB0aGF3dGUuY29t
MB4XDTk2MDgwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgc4xCzAJBgNVBAYTAlpB
MRUwEwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsG
A1UEChMUVGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRp
b24gU2VydmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQcmVtaXVtIFNl
cnZlciBDQTEoMCYGCSqGSIb3DQEJARYZcHJlbWl1bS1zZXJ2ZXJAdGhhd3RlLmNv
bTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA0jY2aovXwlue2oFBYo847kkE
VdbQ7xwblRZH7xhINTpS9CtqBo87L+pW46+GjZ4X9560ZXUCTe/LCaIhUdib0GfQ
ug2SBhRz1JPLlyoAnFxODLz6FVL88kRu2hFKbgifLy3j+ao6hnO2RlNYyIkFvYMR
uHM/qgeN9EJN50CdHDcCAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG
9w0BAQQFAAOBgQAmSCwWwlj66BZ0DKqqX1Q/8tfJeGBeXm43YyJ3Nn6yF8Q0ufUI
hfzJATj/Tb7yFkJD57taRvvBxhEf8UqwKEbJw8RCfbz6q1lu1bdRiBHjpIUZa4JM
pAwSremkrj/xw0llmozFyD4lt5SZu5IycQfwhl7tUCemDaYj+bvLpgcUQg==
-----END CERTIFICATE-----
# Issuer: CN=Thawte Server CA O=Thawte Consulting cc OU=Certification Services Division
# Subject: CN=Thawte Server CA O=Thawte Consulting cc OU=Certification Services Division
# Label: "Thawte Server CA"
# Serial: 1
# MD5 Fingerprint: c5:70:c4:a2:ed:53:78:0c:c8:10:53:81:64:cb:d0:1d
# SHA1 Fingerprint: 23:e5:94:94:51:95:f2:41:48:03:b4:d5:64:d2:a3:a3:f5:d8:8b:8c
# SHA256 Fingerprint: b4:41:0b:73:e2:e6:ea:ca:47:fb:c4:2f:8f:a4:01:8a:f4:38:1d:c5:4c:fa:a8:44:50:46:1e:ed:09:45:4d:e9
-----BEGIN CERTIFICATE-----
MIIDEzCCAnygAwIBAgIBATANBgkqhkiG9w0BAQQFADCBxDELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
biBTZXJ2aWNlcyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEm
MCQGCSqGSIb3DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wHhcNOTYwODAx
MDAwMDAwWhcNMjAxMjMxMjM1OTU5WjCBxDELMAkGA1UEBhMCWkExFTATBgNVBAgT
DFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYDVQQKExRUaGF3
dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNl
cyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEmMCQGCSqGSIb3
DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wgZ8wDQYJKoZIhvcNAQEBBQAD
gY0AMIGJAoGBANOkUG7I/1Zr5s9dtuoMaHVHoqrC2oQl/Kj0R1HahbUgdJSGHg91
yekIYfUGbTBuFRkC6VLAYttNmZ7iagxEOM3+vuNkCXDF/rFrKbYvScg71CcEJRCX
L+eQbcAoQpnXTEPew/UhbVSfXcNY4cDk2VuwuNy0e982OsK1ZiIS1ocNAgMBAAGj
EzARMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEEBQADgYEAB/pMaVz7lcxG
7oWDTSEwjsrZqG9JGubaUeNgcGyEYRGhGshIPllDfU+VPaGLtwtimHp1it2ITk6e
QNuozDJ0uW8NxuOzRAvZim+aKZuZGCg70eNAKJpaPNW15yAbi8qkq43pUdniTCxZ
qdq5snUb9kLy78fyGPmJvKP/iiMucEc=
-----END CERTIFICATE-----
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Label: "Verisign Class 3 Public Primary Certification Authority"
# Serial: 149843929435818692848040365716851702463
# MD5 Fingerprint: 10:fc:63:5d:f6:26:3e:0d:f3:25:be:5f:79:cd:67:67
# SHA1 Fingerprint: 74:2c:31:92:e6:07:e4:24:eb:45:49:54:2b:e1:bb:c5:3e:61:74:e2
# SHA256 Fingerprint: e7:68:56:34:ef:ac:f6:9a:ce:93:9a:6b:25:5b:7b:4f:ab:ef:42:93:5b:50:a2:65:ac:b5:cb:60:27:e4:4e:70
-----BEGIN CERTIFICATE-----
MIICPDCCAaUCEHC65B0Q2Sk0tjjKewPMur8wDQYJKoZIhvcNAQECBQAwXzELMAkG
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
MDEyOTAwMDAwMFoXDTI4MDgwMTIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE
BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is
I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G
CSqGSIb3DQEBAgUAA4GBALtMEivPLCYATxQT3ab7/AoRhIzzKBxnki98tsX63/Do
lbwdj2wsqFHMc9ikwFPwTtYmwHYBV4GSXiHx0bH/59AhWM1pF+NEHJwZRDmJXNyc
AA9WjQKZ7aKQRUzkuxCkPfAyAw7xzvjoyVGM5mKf5p/AfbdynMk2OmufTqj/ZA1k
-----END CERTIFICATE-----
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Label: "Verisign Class 3 Public Primary Certification Authority"
# Serial: 80507572722862485515306429940691309246
# MD5 Fingerprint: ef:5a:f1:33:ef:f1:cd:bb:51:02:ee:12:14:4b:96:c4
# SHA1 Fingerprint: a1:db:63:93:91:6f:17:e4:18:55:09:40:04:15:c7:02:40:b0:ae:6b
# SHA256 Fingerprint: a4:b6:b3:99:6f:c2:f3:06:b3:fd:86:81:bd:63:41:3d:8c:50:09:cc:4f:a3:29:c2:cc:f0:e2:fa:1b:14:03:05
-----BEGIN CERTIFICATE-----
MIICPDCCAaUCEDyRMcsf9tAbDpq40ES/Er4wDQYJKoZIhvcNAQEFBQAwXzELMAkG
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
MDEyOTAwMDAwMFoXDTI4MDgwMjIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE
BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is
I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G
CSqGSIb3DQEBBQUAA4GBABByUqkFFBkyCEHwxWsKzH4PIRnN5GfcX6kb5sroc50i
2JhucwNhkcV8sEVAbkSdjbCxlnRhLQ2pRdKkkirWmnWXbj9T/UWZYB2oK0z5XqcJ
2HUw19JlYD1n1khVdWk/kfVIC0dpImmClr7JyDiGSnoscxlIaU5rfGW/D/xwzoiQ
-----END CERTIFICATE-----
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority - G2/(c) 1998 VeriSign, Inc. - For authorized use only/VeriSign Trust Network
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority - G2/(c) 1998 VeriSign, Inc. - For authorized use only/VeriSign Trust Network
# Label: "Verisign Class 3 Public Primary Certification Authority - G2"
# Serial: 167285380242319648451154478808036881606
# MD5 Fingerprint: a2:33:9b:4c:74:78:73:d4:6c:e7:c1:f3:8d:cb:5c:e9
# SHA1 Fingerprint: 85:37:1c:a6:e5:50:14:3d:ce:28:03:47:1b:de:3a:09:e8:f8:77:0f
# SHA256 Fingerprint: 83:ce:3c:12:29:68:8a:59:3d:48:5f:81:97:3c:0f:91:95:43:1e:da:37:cc:5e:36:43:0e:79:c7:a8:88:63:8b
-----BEGIN CERTIFICATE-----
MIIDAjCCAmsCEH3Z/gfPqB63EHln+6eJNMYwDQYJKoZIhvcNAQEFBQAwgcExCzAJ
BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh
c3MgMyBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy
MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp
emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X
DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw
FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgMyBQdWJsaWMg
UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo
YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5
MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB
AQUAA4GNADCBiQKBgQDMXtERXVxp0KvTuWpMmR9ZmDCOFoUgRm1HP9SFIIThbbP4
pO0M8RcPO/mn+SXXwc+EY/J8Y8+iR/LGWzOOZEAEaMGAuWQcRXfH2G71lSk8UOg0
13gfqLptQ5GVj0VXXn7F+8qkBOvqlzdUMG+7AUcyM83cV5tkaWH4mx0ciU9cZwID
AQABMA0GCSqGSIb3DQEBBQUAA4GBAFFNzb5cy5gZnBWyATl4Lk0PZ3BwmcYQWpSk
U01UbSuvDV1Ai2TT1+7eVmGSX6bEHRBhNtMsJzzoKQm5EWR0zLVznxxIqbxhAe7i
F6YM40AIOw7n60RzKprxaZLvcRTDOaxxp5EJb+RxBrO6WVcmeQD2+A2iMzAo1KpY
oJ2daZH9
-----END CERTIFICATE-----
# Issuer: CN=GTE CyberTrust Global Root O=GTE Corporation OU=GTE CyberTrust Solutions, Inc.
# Subject: CN=GTE CyberTrust Global Root O=GTE Corporation OU=GTE CyberTrust Solutions, Inc.
# Label: "GTE CyberTrust Global Root"
# Serial: 421
# MD5 Fingerprint: ca:3d:d3:68:f1:03:5c:d0:32:fa:b8:2b:59:e8:5a:db
# SHA1 Fingerprint: 97:81:79:50:d8:1c:96:70:cc:34:d8:09:cf:79:44:31:36:7e:f4:74
# SHA256 Fingerprint: a5:31:25:18:8d:21:10:aa:96:4b:02:c7:b7:c6:da:32:03:17:08:94:e5:fb:71:ff:fb:66:67:d5:e6:81:0a:36
-----BEGIN CERTIFICATE-----
MIICWjCCAcMCAgGlMA0GCSqGSIb3DQEBBAUAMHUxCzAJBgNVBAYTAlVTMRgwFgYD
VQQKEw9HVEUgQ29ycG9yYXRpb24xJzAlBgNVBAsTHkdURSBDeWJlclRydXN0IFNv
bHV0aW9ucywgSW5jLjEjMCEGA1UEAxMaR1RFIEN5YmVyVHJ1c3QgR2xvYmFsIFJv
b3QwHhcNOTgwODEzMDAyOTAwWhcNMTgwODEzMjM1OTAwWjB1MQswCQYDVQQGEwJV
UzEYMBYGA1UEChMPR1RFIENvcnBvcmF0aW9uMScwJQYDVQQLEx5HVEUgQ3liZXJU
cnVzdCBTb2x1dGlvbnMsIEluYy4xIzAhBgNVBAMTGkdURSBDeWJlclRydXN0IEds
b2JhbCBSb290MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCVD6C28FCc6HrH
iM3dFw4usJTQGz0O9pTAipTHBsiQl8i4ZBp6fmw8U+E3KHNgf7KXUwefU/ltWJTS
r41tiGeA5u2ylc9yMcqlHHK6XALnZELn+aks1joNrI1CqiQBOeacPwGFVw1Yh0X4
04Wqk2kmhXBIgD8SFcd5tB8FLztimQIDAQABMA0GCSqGSIb3DQEBBAUAA4GBAG3r
GwnpXtlR22ciYaQqPEh346B8pt5zohQDhT37qw4wxYMWM4ETCJ57NE7fQMh017l9
3PR2VX2bY1QY6fDq81yx2YtCHrnAlU66+tXifPVoYb+O7AWXX1uw16OFNMQkpw0P
lZPvy5TYnh+dXIVtx6quTx8itc2VrbqnzPmrC3p/
-----END CERTIFICATE-----
# Issuer: C=US, O=Equifax, OU=Equifax Secure Certificate Authority
# Subject: C=US, O=Equifax, OU=Equifax Secure Certificate Authority
# Label: "Equifax Secure Certificate Authority"
# Serial: 903804111
# MD5 Fingerprint: 67:cb:9d:c0:13:24:8a:82:9b:b2:17:1e:d1:1b:ec:d4
# SHA1 Fingerprint: d2:32:09:ad:23:d3:14:23:21:74:e4:0d:7f:9d:62:13:97:86:63:3a
# SHA256 Fingerprint: 08:29:7a:40:47:db:a2:36:80:c7:31:db:6e:31:76:53:ca:78:48:e1:be:bd:3a:0b:01:79:a7:07:f9:2c:f1:78
-----BEGIN CERTIFICATE-----
MIIDIDCCAomgAwIBAgIENd70zzANBgkqhkiG9w0BAQUFADBOMQswCQYDVQQGEwJV
UzEQMA4GA1UEChMHRXF1aWZheDEtMCsGA1UECxMkRXF1aWZheCBTZWN1cmUgQ2Vy
dGlmaWNhdGUgQXV0aG9yaXR5MB4XDTk4MDgyMjE2NDE1MVoXDTE4MDgyMjE2NDE1
MVowTjELMAkGA1UEBhMCVVMxEDAOBgNVBAoTB0VxdWlmYXgxLTArBgNVBAsTJEVx
dWlmYXggU2VjdXJlIENlcnRpZmljYXRlIEF1dGhvcml0eTCBnzANBgkqhkiG9w0B
AQEFAAOBjQAwgYkCgYEAwV2xWGcIYu6gmi0fCG2RFGiYCh7+2gRvE4RiIcPRfM6f
BeC4AfBONOziipUEZKzxa1NfBbPLZ4C/QgKO/t0BCezhABRP/PvwDN1Dulsr4R+A
cJkVV5MW8Q+XarfCaCMczE1ZMKxRHjuvK9buY0V7xdlfUNLjUA86iOe/FP3gx7kC
AwEAAaOCAQkwggEFMHAGA1UdHwRpMGcwZaBjoGGkXzBdMQswCQYDVQQGEwJVUzEQ
MA4GA1UEChMHRXF1aWZheDEtMCsGA1UECxMkRXF1aWZheCBTZWN1cmUgQ2VydGlm
aWNhdGUgQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMBoGA1UdEAQTMBGBDzIwMTgw
ODIyMTY0MTUxWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAUSOZo+SvSspXXR9gj
IBBPM5iQn9QwHQYDVR0OBBYEFEjmaPkr0rKV10fYIyAQTzOYkJ/UMAwGA1UdEwQF
MAMBAf8wGgYJKoZIhvZ9B0EABA0wCxsFVjMuMGMDAgbAMA0GCSqGSIb3DQEBBQUA
A4GBAFjOKer89961zgK5F7WF0bnj4JXMJTENAKaSbn+2kmOeUJXRmm/kEd5jhW6Y
7qj/WsjTVbJmcVfewCHrPSqnI0kBBIZCe/zuf6IWUrVnZ9NA2zsmWLIodz2uFHdh
1voqZiegDfqnc1zqcPGUIWVEX/r87yloqaKHee9570+sB3c4
-----END CERTIFICATE-----

File diff suppressed because it is too large Load diff

View file

@ -16,11 +16,14 @@
######################### END LICENSE BLOCK #########################
from .compat import PY2, PY3
from .universaldetector import UniversalDetector
from .enums import InputState
from .version import __version__, VERSION
__all__ = ['UniversalDetector', 'detect', 'detect_all', '__version__', 'VERSION']
def detect(byte_str):
"""
Detect the encoding of the given byte string.
@ -37,3 +40,43 @@ def detect(byte_str):
detector = UniversalDetector()
detector.feed(byte_str)
return detector.close()
def detect_all(byte_str):
"""
Detect all the possible encodings of the given byte string.
:param byte_str: The byte sequence to examine.
:type byte_str: ``bytes`` or ``bytearray``
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
raise TypeError('Expected object of type bytes or bytearray, got: '
'{0}'.format(type(byte_str)))
else:
byte_str = bytearray(byte_str)
detector = UniversalDetector()
detector.feed(byte_str)
detector.close()
if detector._input_state == InputState.HIGH_BYTE:
results = []
for prober in detector._charset_probers:
if prober.get_confidence() > detector.MINIMUM_THRESHOLD:
charset_name = prober.charset_name
lower_charset_name = prober.charset_name.lower()
# Use Windows encoding name instead of ISO-8859 if we saw any
# extra Windows-specific bytes
if lower_charset_name.startswith('iso-8859'):
if detector._has_win_bytes:
charset_name = detector.ISO_WIN_MAP.get(lower_charset_name,
charset_name)
results.append({
'encoding': charset_name,
'confidence': prober.get_confidence()
})
if len(results) > 0:
return sorted(results, key=lambda result: -result['confidence'])
return [detector.result]

View file

@ -25,10 +25,12 @@ import sys
if sys.version_info < (3, 0):
PY2 = True
PY3 = False
base_str = (str, unicode)
string_types = (str, unicode)
text_type = unicode
iteritems = dict.iteritems
else:
PY2 = False
PY3 = True
base_str = (bytes, str)
string_types = (bytes, str)
text_type = str
iteritems = dict.items

File diff suppressed because it is too large Load diff

View file

@ -1,333 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# KOI8-R language model
# Character Mapping Table:
KOI8R_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0
238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
)
win1251_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
)
latin5_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
)
macCyrillic_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
)
IBM855_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
)
IBM866_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
)
# Model Table:
# total sequences: 100%
# first 512 sequences: 97.6601%
# first 1024 sequences: 2.3389%
# rest sequences: 0.1237%
# negative sequences: 0.0009%
RussianLangModel = (
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
)
Koi8rModel = {
'char_to_order_map': KOI8R_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "KOI8-R",
'language': 'Russian',
}
Win1251CyrillicModel = {
'char_to_order_map': win1251_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "windows-1251",
'language': 'Russian',
}
Latin5CyrillicModel = {
'char_to_order_map': latin5_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "ISO-8859-5",
'language': 'Russian',
}
MacCyrillicModel = {
'char_to_order_map': macCyrillic_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "MacCyrillic",
'language': 'Russian',
}
Ibm866Model = {
'char_to_order_map': IBM866_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "IBM866",
'language': 'Russian',
}
Ibm855Model = {
'char_to_order_map': IBM855_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "IBM855",
'language': 'Russian',
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,310 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Metadata about languages used by our model training code for our
SingleByteCharSetProbers. Could be used for other things in the future.
This code is based on the language metadata from the uchardet project.
"""
from __future__ import absolute_import, print_function
from string import ascii_letters
# TODO: Add Ukranian (KOI8-U)
class Language(object):
"""Metadata about a language useful for training models
:ivar name: The human name for the language, in English.
:type name: str
:ivar iso_code: 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
or use another catalog as a last resort.
:type iso_code: str
:ivar use_ascii: Whether or not ASCII letters should be included in trained
models.
:type use_ascii: bool
:ivar charsets: The charsets we want to support and create data for.
:type charsets: list of str
:ivar alphabet: The characters in the language's alphabet. If `use_ascii` is
`True`, you only need to add those not in the ASCII set.
:type alphabet: str
:ivar wiki_start_pages: The Wikipedia pages to start from if we're crawling
Wikipedia for training data.
:type wiki_start_pages: list of str
"""
def __init__(self, name=None, iso_code=None, use_ascii=True, charsets=None,
alphabet=None, wiki_start_pages=None):
super(Language, self).__init__()
self.name = name
self.iso_code = iso_code
self.use_ascii = use_ascii
self.charsets = charsets
if self.use_ascii:
if alphabet:
alphabet += ascii_letters
else:
alphabet = ascii_letters
elif not alphabet:
raise ValueError('Must supply alphabet if use_ascii is False')
self.alphabet = ''.join(sorted(set(alphabet))) if alphabet else None
self.wiki_start_pages = wiki_start_pages
def __repr__(self):
return '{}({})'.format(self.__class__.__name__,
', '.join('{}={!r}'.format(k, v)
for k, v in self.__dict__.items()
if not k.startswith('_')))
LANGUAGES = {'Arabic': Language(name='Arabic',
iso_code='ar',
use_ascii=False,
# We only support encodings that use isolated
# forms, because the current recommendation is
# that the rendering system handles presentation
# forms. This means we purposefully skip IBM864.
charsets=['ISO-8859-6', 'WINDOWS-1256',
'CP720', 'CP864'],
alphabet=u'ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ',
wiki_start_pages=[u'الصفحة_الرئيسية']),
'Belarusian': Language(name='Belarusian',
iso_code='be',
use_ascii=False,
charsets=['ISO-8859-5', 'WINDOWS-1251',
'IBM866', 'MacCyrillic'],
alphabet=(u'АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯ'
u'абвгдеёжзійклмнопрстуўфхцчшыьэюяʼ'),
wiki_start_pages=[u'Галоўная_старонка']),
'Bulgarian': Language(name='Bulgarian',
iso_code='bg',
use_ascii=False,
charsets=['ISO-8859-5', 'WINDOWS-1251',
'IBM855'],
alphabet=(u'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯ'
u'абвгдежзийклмнопрстуфхцчшщъьюя'),
wiki_start_pages=[u'Начална_страница']),
'Czech': Language(name='Czech',
iso_code='cz',
use_ascii=True,
charsets=['ISO-8859-2', 'WINDOWS-1250'],
alphabet=u'áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ',
wiki_start_pages=[u'Hlavní_strana']),
'Danish': Language(name='Danish',
iso_code='da',
use_ascii=True,
charsets=['ISO-8859-1', 'ISO-8859-15',
'WINDOWS-1252'],
alphabet=u'æøåÆØÅ',
wiki_start_pages=[u'Forside']),
'German': Language(name='German',
iso_code='de',
use_ascii=True,
charsets=['ISO-8859-1', 'WINDOWS-1252'],
alphabet=u'äöüßÄÖÜ',
wiki_start_pages=[u'Wikipedia:Hauptseite']),
'Greek': Language(name='Greek',
iso_code='el',
use_ascii=False,
charsets=['ISO-8859-7', 'WINDOWS-1253'],
alphabet=(u'αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώ'
u'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ'),
wiki_start_pages=[u'Πύλη:Κύρια']),
'English': Language(name='English',
iso_code='en',
use_ascii=True,
charsets=['ISO-8859-1', 'WINDOWS-1252'],
wiki_start_pages=[u'Main_Page']),
'Esperanto': Language(name='Esperanto',
iso_code='eo',
# Q, W, X, and Y not used at all
use_ascii=False,
charsets=['ISO-8859-3'],
alphabet=(u'abcĉdefgĝhĥijĵklmnoprsŝtuŭvz'
u'ABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ'),
wiki_start_pages=[u'Vikipedio:Ĉefpaĝo']),
'Spanish': Language(name='Spanish',
iso_code='es',
use_ascii=True,
charsets=['ISO-8859-1', 'ISO-8859-15',
'WINDOWS-1252'],
alphabet=u'ñáéíóúüÑÁÉÍÓÚÜ',
wiki_start_pages=[u'Wikipedia:Portada']),
'Estonian': Language(name='Estonian',
iso_code='et',
use_ascii=False,
charsets=['ISO-8859-4', 'ISO-8859-13',
'WINDOWS-1257'],
# C, F, Š, Q, W, X, Y, Z, Ž are only for
# loanwords
alphabet=(u'ABDEGHIJKLMNOPRSTUVÕÄÖÜ'
u'abdeghijklmnoprstuvõäöü'),
wiki_start_pages=[u'Esileht']),
'Finnish': Language(name='Finnish',
iso_code='fi',
use_ascii=True,
charsets=['ISO-8859-1', 'ISO-8859-15',
'WINDOWS-1252'],
alphabet=u'ÅÄÖŠŽåäöšž',
wiki_start_pages=[u'Wikipedia:Etusivu']),
'French': Language(name='French',
iso_code='fr',
use_ascii=True,
charsets=['ISO-8859-1', 'ISO-8859-15',
'WINDOWS-1252'],
alphabet=u'œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ',
wiki_start_pages=[u'Wikipédia:Accueil_principal',
u'Bœuf (animal)']),
'Hebrew': Language(name='Hebrew',
iso_code='he',
use_ascii=False,
charsets=['ISO-8859-8', 'WINDOWS-1255'],
alphabet=u'אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ',
wiki_start_pages=[u'עמוד_ראשי']),
'Croatian': Language(name='Croatian',
iso_code='hr',
# Q, W, X, Y are only used for foreign words.
use_ascii=False,
charsets=['ISO-8859-2', 'WINDOWS-1250'],
alphabet=(u'abcčćdđefghijklmnoprsštuvzž'
u'ABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ'),
wiki_start_pages=[u'Glavna_stranica']),
'Hungarian': Language(name='Hungarian',
iso_code='hu',
# Q, W, X, Y are only used for foreign words.
use_ascii=False,
charsets=['ISO-8859-2', 'WINDOWS-1250'],
alphabet=(u'abcdefghijklmnoprstuvzáéíóöőúüű'
u'ABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ'),
wiki_start_pages=[u'Kezdőlap']),
'Italian': Language(name='Italian',
iso_code='it',
use_ascii=True,
charsets=['ISO-8859-1', 'ISO-8859-15',
'WINDOWS-1252'],
alphabet=u'ÀÈÉÌÒÓÙàèéìòóù',
wiki_start_pages=[u'Pagina_principale']),
'Lithuanian': Language(name='Lithuanian',
iso_code='lt',
use_ascii=False,
charsets=['ISO-8859-13', 'WINDOWS-1257',
'ISO-8859-4'],
# Q, W, and X not used at all
alphabet=(u'AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽ'
u'aąbcčdeęėfghiįyjklmnoprsštuųūvzž'),
wiki_start_pages=[u'Pagrindinis_puslapis']),
'Latvian': Language(name='Latvian',
iso_code='lv',
use_ascii=False,
charsets=['ISO-8859-13', 'WINDOWS-1257',
'ISO-8859-4'],
# Q, W, X, Y are only for loanwords
alphabet=(u'AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽ'
u'aābcčdeēfgģhiījkķlļmnņoprsštuūvzž'),
wiki_start_pages=[u'Sākumlapa']),
'Macedonian': Language(name='Macedonian',
iso_code='mk',
use_ascii=False,
charsets=['ISO-8859-5', 'WINDOWS-1251',
'MacCyrillic', 'IBM855'],
alphabet=(u'АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ'
u'абвгдѓежзѕијклљмнњопрстќуфхцчџш'),
wiki_start_pages=[u'Главна_страница']),
'Dutch': Language(name='Dutch',
iso_code='nl',
use_ascii=True,
charsets=['ISO-8859-1', 'WINDOWS-1252'],
wiki_start_pages=[u'Hoofdpagina']),
'Polish': Language(name='Polish',
iso_code='pl',
# Q and X are only used for foreign words.
use_ascii=False,
charsets=['ISO-8859-2', 'WINDOWS-1250'],
alphabet=(u'AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ'
u'aąbcćdeęfghijklłmnńoóprsśtuwyzźż'),
wiki_start_pages=[u'Wikipedia:Strona_główna']),
'Portuguese': Language(name='Portuguese',
iso_code='pt',
use_ascii=True,
charsets=['ISO-8859-1', 'ISO-8859-15',
'WINDOWS-1252'],
alphabet=u'ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú',
wiki_start_pages=[u'Wikipédia:Página_principal']),
'Romanian': Language(name='Romanian',
iso_code='ro',
use_ascii=True,
charsets=['ISO-8859-2', 'WINDOWS-1250'],
alphabet=u'ăâîșțĂÂÎȘȚ',
wiki_start_pages=[u'Pagina_principală']),
'Russian': Language(name='Russian',
iso_code='ru',
use_ascii=False,
charsets=['ISO-8859-5', 'WINDOWS-1251',
'KOI8-R', 'MacCyrillic', 'IBM866',
'IBM855'],
alphabet=(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'
u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'),
wiki_start_pages=[u'Заглавная_страница']),
'Slovak': Language(name='Slovak',
iso_code='sk',
use_ascii=True,
charsets=['ISO-8859-2', 'WINDOWS-1250'],
alphabet=u'áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ',
wiki_start_pages=[u'Hlavná_stránka']),
'Slovene': Language(name='Slovene',
iso_code='sl',
# Q, W, X, Y are only used for foreign words.
use_ascii=False,
charsets=['ISO-8859-2', 'WINDOWS-1250'],
alphabet=(u'abcčdefghijklmnoprsštuvzž'
u'ABCČDEFGHIJKLMNOPRSŠTUVZŽ'),
wiki_start_pages=[u'Glavna_stran']),
# Serbian can be written in both Latin and Cyrillic, but there's no
# simple way to get the Latin alphabet pages from Wikipedia through
# the API, so for now we just support Cyrillic.
'Serbian': Language(name='Serbian',
iso_code='sr',
alphabet=(u'АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ'
u'абвгдђежзијклљмнњопрстћуфхцчџш'),
charsets=['ISO-8859-5', 'WINDOWS-1251',
'MacCyrillic', 'IBM855'],
wiki_start_pages=[u'Главна_страна']),
'Thai': Language(name='Thai',
iso_code='th',
use_ascii=False,
charsets=['ISO-8859-11', 'TIS-620', 'CP874'],
alphabet=u'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛',
wiki_start_pages=[u'หน้าหลัก']),
'Turkish': Language(name='Turkish',
iso_code='tr',
# Q, W, and X are not used by Turkish
use_ascii=False,
charsets=['ISO-8859-3', 'ISO-8859-9',
'WINDOWS-1254'],
alphabet=(u'abcçdefgğhıijklmnoöprsştuüvyzâîû'
u'ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ'),
wiki_start_pages=[u'Ana_Sayfa']),
'Vietnamese': Language(name='Vietnamese',
iso_code='vi',
use_ascii=False,
# Windows-1258 is the only common 8-bit
# Vietnamese encoding supported by Python.
# From Wikipedia:
# For systems that lack support for Unicode,
# dozens of 8-bit Vietnamese code pages are
# available.[1] The most common are VISCII
# (TCVN 5712:1993), VPS, and Windows-1258.[3]
# Where ASCII is required, such as when
# ensuring readability in plain text e-mail,
# Vietnamese letters are often encoded
# according to Vietnamese Quoted-Readable
# (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4]
# though usage of either variable-width
# scheme has declined dramatically following
# the adoption of Unicode on the World Wide
# Web.
charsets=['WINDOWS-1258'],
alphabet=(u'aăâbcdđeêghiklmnoôơpqrstuưvxy'
u'AĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY'),
wiki_start_pages=[u'Chữ_Quốc_ngữ']),
}

View file

@ -26,10 +26,22 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from collections import namedtuple
from .charsetprober import CharSetProber
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
SingleByteCharSetModel = namedtuple('SingleByteCharSetModel',
['charset_name',
'language',
'char_to_order_map',
'language_model',
'typical_positive_ratio',
'keep_ascii_letters',
'alphabet'])
class SingleByteCharSetProber(CharSetProber):
SAMPLE_SIZE = 64
SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
@ -65,25 +77,25 @@ class SingleByteCharSetProber(CharSetProber):
if self._name_prober:
return self._name_prober.charset_name
else:
return self._model['charset_name']
return self._model.charset_name
@property
def language(self):
if self._name_prober:
return self._name_prober.language
else:
return self._model.get('language')
return self._model.language
def feed(self, byte_str):
if not self._model['keep_english_letter']:
# TODO: Make filter_international_words keep things in self.alphabet
if not self._model.keep_ascii_letters:
byte_str = self.filter_international_words(byte_str)
if not byte_str:
return self.state
char_to_order_map = self._model['char_to_order_map']
for i, c in enumerate(byte_str):
# XXX: Order is in range 1-64, so one would think we want 0-63 here,
# but that leads to 27 more test failures than before.
order = char_to_order_map[c]
char_to_order_map = self._model.char_to_order_map
language_model = self._model.language_model
for char in byte_str:
order = char_to_order_map.get(char, CharacterCategory.UNDEFINED)
# XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
# CharacterCategory.SYMBOL is actually 253, so we use CONTROL
# to make it closer to the original intent. The only difference
@ -91,20 +103,21 @@ class SingleByteCharSetProber(CharSetProber):
# _total_char purposes.
if order < CharacterCategory.CONTROL:
self._total_char += 1
# TODO: Follow uchardet's lead and discount confidence for frequent
# control characters.
# See https://github.com/BYVoid/uchardet/commit/55b4f23971db61
if order < self.SAMPLE_SIZE:
self._freq_char += 1
if self._last_order < self.SAMPLE_SIZE:
self._total_seqs += 1
if not self._reversed:
i = (self._last_order * self.SAMPLE_SIZE) + order
model = self._model['precedence_matrix'][i]
else: # reverse the order of the letters in the lookup
i = (order * self.SAMPLE_SIZE) + self._last_order
model = self._model['precedence_matrix'][i]
self._seq_counters[model] += 1
lm_cat = language_model[self._last_order][order]
else:
lm_cat = language_model[order][self._last_order]
self._seq_counters[lm_cat] += 1
self._last_order = order
charset_name = self._model['charset_name']
charset_name = self._model.charset_name
if self.state == ProbingState.DETECTING:
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
confidence = self.get_confidence()
@ -125,7 +138,7 @@ class SingleByteCharSetProber(CharSetProber):
r = 0.01
if self._total_seqs > 0:
r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
self._total_seqs / self._model['typical_positive_ratio'])
self._total_seqs / self._model.typical_positive_ratio)
r = r * self._freq_char / self._total_char
if r >= 1.0:
r = 0.99

View file

@ -27,47 +27,57 @@
######################### END LICENSE BLOCK #########################
from .charsetgroupprober import CharSetGroupProber
from .sbcharsetprober import SingleByteCharSetProber
from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
Latin5CyrillicModel, MacCyrillicModel,
Ibm866Model, Ibm855Model)
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
from .langthaimodel import TIS620ThaiModel
from .langhebrewmodel import Win1255HebrewModel
from .hebrewprober import HebrewProber
from .langturkishmodel import Latin5TurkishModel
from .langbulgarianmodel import (ISO_8859_5_BULGARIAN_MODEL,
WINDOWS_1251_BULGARIAN_MODEL)
from .langgreekmodel import ISO_8859_7_GREEK_MODEL, WINDOWS_1253_GREEK_MODEL
from .langhebrewmodel import WINDOWS_1255_HEBREW_MODEL
# from .langhungarianmodel import (ISO_8859_2_HUNGARIAN_MODEL,
# WINDOWS_1250_HUNGARIAN_MODEL)
from .langrussianmodel import (IBM855_RUSSIAN_MODEL, IBM866_RUSSIAN_MODEL,
ISO_8859_5_RUSSIAN_MODEL, KOI8_R_RUSSIAN_MODEL,
MACCYRILLIC_RUSSIAN_MODEL,
WINDOWS_1251_RUSSIAN_MODEL)
from .langthaimodel import TIS_620_THAI_MODEL
from .langturkishmodel import ISO_8859_9_TURKISH_MODEL
from .sbcharsetprober import SingleByteCharSetProber
class SBCSGroupProber(CharSetGroupProber):
def __init__(self):
super(SBCSGroupProber, self).__init__()
hebrew_prober = HebrewProber()
logical_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL,
False, hebrew_prober)
# TODO: See if using ISO-8859-8 Hebrew model works better here, since
# it's actually the visual one
visual_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL,
True, hebrew_prober)
hebrew_prober.set_model_probers(logical_hebrew_prober,
visual_hebrew_prober)
# TODO: ORDER MATTERS HERE. I changed the order vs what was in master
# and several tests failed that did not before. Some thought
# should be put into the ordering, and we should consider making
# order not matter here, because that is very counter-intuitive.
self.probers = [
SingleByteCharSetProber(Win1251CyrillicModel),
SingleByteCharSetProber(Koi8rModel),
SingleByteCharSetProber(Latin5CyrillicModel),
SingleByteCharSetProber(MacCyrillicModel),
SingleByteCharSetProber(Ibm866Model),
SingleByteCharSetProber(Ibm855Model),
SingleByteCharSetProber(Latin7GreekModel),
SingleByteCharSetProber(Win1253GreekModel),
SingleByteCharSetProber(Latin5BulgarianModel),
SingleByteCharSetProber(Win1251BulgarianModel),
SingleByteCharSetProber(WINDOWS_1251_RUSSIAN_MODEL),
SingleByteCharSetProber(KOI8_R_RUSSIAN_MODEL),
SingleByteCharSetProber(ISO_8859_5_RUSSIAN_MODEL),
SingleByteCharSetProber(MACCYRILLIC_RUSSIAN_MODEL),
SingleByteCharSetProber(IBM866_RUSSIAN_MODEL),
SingleByteCharSetProber(IBM855_RUSSIAN_MODEL),
SingleByteCharSetProber(ISO_8859_7_GREEK_MODEL),
SingleByteCharSetProber(WINDOWS_1253_GREEK_MODEL),
SingleByteCharSetProber(ISO_8859_5_BULGARIAN_MODEL),
SingleByteCharSetProber(WINDOWS_1251_BULGARIAN_MODEL),
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
# after we retrain model.
# SingleByteCharSetProber(Latin2HungarianModel),
# SingleByteCharSetProber(Win1250HungarianModel),
SingleByteCharSetProber(TIS620ThaiModel),
SingleByteCharSetProber(Latin5TurkishModel),
# SingleByteCharSetProber(ISO_8859_2_HUNGARIAN_MODEL),
# SingleByteCharSetProber(WINDOWS_1250_HUNGARIAN_MODEL),
SingleByteCharSetProber(TIS_620_THAI_MODEL),
SingleByteCharSetProber(ISO_8859_9_TURKISH_MODEL),
hebrew_prober,
logical_hebrew_prober,
visual_hebrew_prober,
]
hebrew_prober = HebrewProber()
logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
False, hebrew_prober)
visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
hebrew_prober)
hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
self.probers.extend([hebrew_prober, logical_hebrew_prober,
visual_hebrew_prober])
self.reset()

View file

@ -266,7 +266,7 @@ class UniversalDetector(object):
'language': max_prober.language}
# Log all prober confidences if none met MINIMUM_THRESHOLD
if self.logger.getEffectiveLevel() == logging.DEBUG:
if self.logger.getEffectiveLevel() <= logging.DEBUG:
if self.result['encoding'] is None:
self.logger.debug('no probers hit minimum threshold')
for group_prober in self._charset_probers:

View file

@ -5,5 +5,5 @@ from within setup.py and from chardet subpackages.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
__version__ = "3.0.4"
__version__ = "4.0.0"
VERSION = __version__.split('.')

View file

@ -1,2 +1,8 @@
# -*- coding: utf-8 -*-
from ._version import VERSION as __version__
try:
from ._version import version as __version__
except ImportError:
__version__ = 'unknown'
__all__ = ['easter', 'parser', 'relativedelta', 'rrule', 'tz',
'utils', 'zoneinfo']

View file

@ -1,10 +0,0 @@
"""
Contains information about the dateutil version.
"""
VERSION_MAJOR = 2
VERSION_MINOR = 6
VERSION_PATCH = 1
VERSION_TUPLE = (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
VERSION = '.'.join(map(str, VERSION_TUPLE))

View file

@ -41,11 +41,11 @@ def easter(year, method=EASTER_WESTERN):
More about the algorithm may be found at:
http://users.chariot.net.au/~gmarts/eastalg.htm
`GM Arts: Easter Algorithms <http://www.gmarts.org/index.php?go=415>`_
and
http://www.tondering.dk/claus/calendar.html
`The Calendar FAQ: Easter <https://www.tondering.dk/claus/cal/easter.php>`_
"""

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
from ._parser import parse, parser, parserinfo
from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
from ._parser import UnknownTimezoneWarning
from ._parser import __doc__
from .isoparser import isoparser, isoparse
__all__ = ['parse', 'parser', 'parserinfo',
'isoparse', 'isoparser',
'UnknownTimezoneWarning']
###
# Deprecate portions of the private interface so that downstream code that
# is improperly relying on it is given *some* notice.
def __deprecated_private_func(f):
from functools import wraps
import warnings
msg = ('{name} is a private function and may break without warning, '
'it will be moved and or renamed in future versions.')
msg = msg.format(name=f.__name__)
@wraps(f)
def deprecated_func(*args, **kwargs):
warnings.warn(msg, DeprecationWarning)
return f(*args, **kwargs)
return deprecated_func
def __deprecate_private_class(c):
import warnings
msg = ('{name} is a private class and may break without warning, '
'it will be moved and or renamed in future versions.')
msg = msg.format(name=c.__name__)
class private_class(c):
__doc__ = c.__doc__
def __init__(self, *args, **kwargs):
warnings.warn(msg, DeprecationWarning)
super(private_class, self).__init__(*args, **kwargs)
private_class.__name__ = c.__name__
return private_class
from ._parser import _timelex, _resultbase
from ._parser import _tzparser, _parsetz
_timelex = __deprecate_private_class(_timelex)
_tzparser = __deprecate_private_class(_tzparser)
_resultbase = __deprecate_private_class(_resultbase)
_parsetz = __deprecated_private_func(_parsetz)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,402 @@
# -*- coding: utf-8 -*-
"""
This module offers a parser for ISO-8601 strings
It is intended to support all valid date, time and datetime formats per the
ISO-8601 specification.
"""
from datetime import datetime, timedelta, time, date
import calendar
from dateutil import tz
from functools import wraps
import re
import six
__all__ = ["isoparse", "isoparser"]
def _takes_ascii(f):
@wraps(f)
def func(self, str_in, *args, **kwargs):
# If it's a stream, read the whole thing
str_in = getattr(str_in, 'read', lambda: str_in)()
# If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII
if isinstance(str_in, six.text_type):
# ASCII is the same in UTF-8
try:
str_in = str_in.encode('ascii')
except UnicodeEncodeError as e:
msg = 'ISO-8601 strings should contain only ASCII characters'
six.raise_from(ValueError(msg), e)
return f(self, str_in, *args, **kwargs)
return func
class isoparser(object):
def __init__(self, sep=None):
"""
:param sep:
A single character that separates date and time portions. If
``None``, the parser will accept any single character.
For strict ISO-8601 adherence, pass ``'T'``.
"""
if sep is not None:
if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'):
raise ValueError('Separator must be a single, non-numeric ' +
'ASCII character')
sep = sep.encode('ascii')
self._sep = sep
@_takes_ascii
def isoparse(self, dt_str):
"""
Parse an ISO-8601 datetime string into a :class:`datetime.datetime`.
An ISO-8601 datetime string consists of a date portion, followed
optionally by a time portion - the date and time portions are separated
by a single character separator, which is ``T`` in the official
standard. Incomplete date formats (such as ``YYYY-MM``) may *not* be
combined with a time portion.
Supported date formats are:
Common:
- ``YYYY``
- ``YYYY-MM`` or ``YYYYMM``
- ``YYYY-MM-DD`` or ``YYYYMMDD``
Uncommon:
- ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0)
- ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day
The ISO week and day numbering follows the same logic as
:func:`datetime.date.isocalendar`.
Supported time formats are:
- ``hh``
- ``hh:mm`` or ``hhmm``
- ``hh:mm:ss`` or ``hhmmss``
- ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits)
Midnight is a special case for `hh`, as the standard supports both
00:00 and 24:00 as a representation.
.. caution::
Support for fractional components other than seconds is part of the
ISO-8601 standard, but is not currently implemented in this parser.
Supported time zone offset formats are:
- `Z` (UTC)
- `±HH:MM`
- `±HHMM`
- `±HH`
Offsets will be represented as :class:`dateutil.tz.tzoffset` objects,
with the exception of UTC, which will be represented as
:class:`dateutil.tz.tzutc`. Time zone offsets equivalent to UTC (such
as `+00:00`) will also be represented as :class:`dateutil.tz.tzutc`.
:param dt_str:
A string or stream containing only an ISO-8601 datetime string
:return:
Returns a :class:`datetime.datetime` representing the string.
Unspecified components default to their lowest value.
.. warning::
As of version 2.7.0, the strictness of the parser should not be
considered a stable part of the contract. Any valid ISO-8601 string
that parses correctly with the default settings will continue to
parse correctly in future versions, but invalid strings that
currently fail (e.g. ``2017-01-01T00:00+00:00:00``) are not
guaranteed to continue failing in future versions if they encode
a valid date.
"""
components, pos = self._parse_isodate(dt_str)
if len(dt_str) > pos:
if self._sep is None or dt_str[pos:pos + 1] == self._sep:
components += self._parse_isotime(dt_str[pos + 1:])
else:
raise ValueError('String contains unknown ISO components')
return datetime(*components)
@_takes_ascii
def parse_isodate(self, datestr):
"""
Parse the date portion of an ISO string.
:param datestr:
The string portion of an ISO string, without a separator
:return:
Returns a :class:`datetime.date` object
"""
components, pos = self._parse_isodate(datestr)
if pos < len(datestr):
raise ValueError('String contains unknown ISO ' +
'components: {}'.format(datestr))
return date(*components)
@_takes_ascii
def parse_isotime(self, timestr):
"""
Parse the time portion of an ISO string.
:param timestr:
The time portion of an ISO string, without a separator
:return:
Returns a :class:`datetime.time` object
"""
return time(*self._parse_isotime(timestr))
@_takes_ascii
def parse_tzstr(self, tzstr, zero_as_utc=True):
"""
Parse a valid ISO time zone string.
See :func:`isoparser.isoparse` for details on supported formats.
:param tzstr:
A string representing an ISO time zone offset
:param zero_as_utc:
Whether to return :class:`dateutil.tz.tzutc` for zero-offset zones
:return:
Returns :class:`dateutil.tz.tzoffset` for offsets and
:class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is
specified) offsets equivalent to UTC.
"""
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
# Constants
_MICROSECOND_END_REGEX = re.compile(b'[-+Z]+')
_DATE_SEP = b'-'
_TIME_SEP = b':'
_MICRO_SEP = b'.'
def _parse_isodate(self, dt_str):
try:
return self._parse_isodate_common(dt_str)
except ValueError:
return self._parse_isodate_uncommon(dt_str)
def _parse_isodate_common(self, dt_str):
len_str = len(dt_str)
components = [1, 1, 1]
if len_str < 4:
raise ValueError('ISO string too short')
# Year
components[0] = int(dt_str[0:4])
pos = 4
if pos >= len_str:
return components, pos
has_sep = dt_str[pos:pos + 1] == self._DATE_SEP
if has_sep:
pos += 1
# Month
if len_str - pos < 2:
raise ValueError('Invalid common month')
components[1] = int(dt_str[pos:pos + 2])
pos += 2
if pos >= len_str:
if has_sep:
return components, pos
else:
raise ValueError('Invalid ISO format')
if has_sep:
if dt_str[pos:pos + 1] != self._DATE_SEP:
raise ValueError('Invalid separator in ISO string')
pos += 1
# Day
if len_str - pos < 2:
raise ValueError('Invalid common day')
components[2] = int(dt_str[pos:pos + 2])
return components, pos + 2
def _parse_isodate_uncommon(self, dt_str):
if len(dt_str) < 4:
raise ValueError('ISO string too short')
# All ISO formats start with the year
year = int(dt_str[0:4])
has_sep = dt_str[4:5] == self._DATE_SEP
pos = 4 + has_sep # Skip '-' if it's there
if dt_str[pos:pos + 1] == b'W':
# YYYY-?Www-?D?
pos += 1
weekno = int(dt_str[pos:pos + 2])
pos += 2
dayno = 1
if len(dt_str) > pos:
if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep:
raise ValueError('Inconsistent use of dash separator')
pos += has_sep
dayno = int(dt_str[pos:pos + 1])
pos += 1
base_date = self._calculate_weekdate(year, weekno, dayno)
else:
# YYYYDDD or YYYY-DDD
if len(dt_str) - pos < 3:
raise ValueError('Invalid ordinal day')
ordinal_day = int(dt_str[pos:pos + 3])
pos += 3
if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)):
raise ValueError('Invalid ordinal day' +
' {} for year {}'.format(ordinal_day, year))
base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1)
components = [base_date.year, base_date.month, base_date.day]
return components, pos
def _calculate_weekdate(self, year, week, day):
"""
Calculate the day of corresponding to the ISO year-week-day calendar.
This function is effectively the inverse of
:func:`datetime.date.isocalendar`.
:param year:
The year in the ISO calendar
:param week:
The week in the ISO calendar - range is [1, 53]
:param day:
The day in the ISO calendar - range is [1 (MON), 7 (SUN)]
:return:
Returns a :class:`datetime.date`
"""
if not 0 < week < 54:
raise ValueError('Invalid week: {}'.format(week))
if not 0 < day < 8: # Range is 1-7
raise ValueError('Invalid weekday: {}'.format(day))
# Get week 1 for the specific year:
jan_4 = date(year, 1, 4) # Week 1 always has January 4th in it
week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1)
# Now add the specific number of weeks and days to get what we want
week_offset = (week - 1) * 7 + (day - 1)
return week_1 + timedelta(days=week_offset)
def _parse_isotime(self, timestr):
len_str = len(timestr)
components = [0, 0, 0, 0, None]
pos = 0
comp = -1
if len(timestr) < 2:
raise ValueError('ISO time too short')
has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
while pos < len_str and comp < 5:
comp += 1
if timestr[pos:pos + 1] in b'-+Z':
# Detect time zone boundary
components[-1] = self._parse_tzstr(timestr[pos:])
pos = len_str
break
if comp < 3:
# Hour, minute, second
components[comp] = int(timestr[pos:pos + 2])
pos += 2
if (has_sep and pos < len_str and
timestr[pos:pos + 1] == self._TIME_SEP):
pos += 1
if comp == 3:
# Microsecond
if timestr[pos:pos + 1] != self._MICRO_SEP:
continue
pos += 1
us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6],
1)[0]
components[comp] = int(us_str) * 10**(6 - len(us_str))
pos += len(us_str)
if pos < len_str:
raise ValueError('Unused components in ISO string')
if components[0] == 24:
# Standard supports 00:00 and 24:00 as representations of midnight
if any(component != 0 for component in components[1:4]):
raise ValueError('Hour may only be 24 at 24:00:00.000')
components[0] = 0
return components
def _parse_tzstr(self, tzstr, zero_as_utc=True):
if tzstr == b'Z':
return tz.tzutc()
if len(tzstr) not in {3, 5, 6}:
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
if tzstr[0:1] == b'-':
mult = -1
elif tzstr[0:1] == b'+':
mult = 1
else:
raise ValueError('Time zone offset requires sign')
hours = int(tzstr[1:3])
if len(tzstr) == 3:
minutes = 0
else:
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
if zero_as_utc and hours == 0 and minutes == 0:
return tz.tzutc()
else:
if minutes > 59:
raise ValueError('Invalid minutes in time zone offset')
if hours > 23:
raise ValueError('Invalid hours in time zone offset')
return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60)
DEFAULT_ISOPARSER = isoparser()
isoparse = DEFAULT_ISOPARSER.isoparse

View file

@ -19,7 +19,7 @@ class relativedelta(object):
"""
The relativedelta type is based on the specification of the excellent
work done by M.-A. Lemburg in his
`mx.DateTime <http://www.egenix.com/files/python/mxDateTime.html>`_ extension.
`mx.DateTime <https://www.egenix.com/products/python/mxBase/mxDateTime/>`_ extension.
However, notice that this type does *NOT* implement the same algorithm as
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
@ -34,7 +34,7 @@ class relativedelta(object):
year, month, day, hour, minute, second, microsecond:
Absolute information (argument is singular); adding or subtracting a
relativedelta with absolute information does not perform an aritmetic
relativedelta with absolute information does not perform an arithmetic
operation, but rather REPLACES the corresponding value in the
original datetime with the value(s) in relativedelta.
@ -95,11 +95,6 @@ class relativedelta(object):
yearday=None, nlyearday=None,
hour=None, minute=None, second=None, microsecond=None):
# Check for non-integer values in integer-only quantities
if any(x is not None and x != int(x) for x in (years, months)):
raise ValueError("Non-integer years and months are "
"ambiguous and not currently supported.")
if dt1 and dt2:
# datetime is a subclass of date. So both must be date
if not (isinstance(dt1, datetime.date) and
@ -159,9 +154,14 @@ class relativedelta(object):
self.seconds = delta.seconds + delta.days * 86400
self.microseconds = delta.microseconds
else:
# Check for non-integer values in integer-only quantities
if any(x is not None and x != int(x) for x in (years, months)):
raise ValueError("Non-integer years and months are "
"ambiguous and not currently supported.")
# Relative information
self.years = years
self.months = months
self.years = int(years)
self.months = int(months)
self.days = days + weeks * 7
self.leapdays = leapdays
self.hours = hours
@ -249,7 +249,7 @@ class relativedelta(object):
@property
def weeks(self):
return self.days // 7
return int(self.days / 7.0)
@weeks.setter
def weeks(self, value):
@ -422,6 +422,24 @@ class relativedelta(object):
is not None else
other.microsecond))
def __abs__(self):
return self.__class__(years=abs(self.years),
months=abs(self.months),
days=abs(self.days),
hours=abs(self.hours),
minutes=abs(self.minutes),
seconds=abs(self.seconds),
microseconds=abs(self.microseconds),
leapdays=self.leapdays,
year=self.year,
month=self.month,
day=self.day,
weekday=self.weekday,
hour=self.hour,
minute=self.minute,
second=self.second,
microsecond=self.microsecond)
def __neg__(self):
return self.__class__(years=-self.years,
months=-self.months,

View file

@ -2,12 +2,13 @@
"""
The rrule module offers a small, complete, and very fast, implementation of
the recurrence rules documented in the
`iCalendar RFC <http://www.ietf.org/rfc/rfc2445.txt>`_,
`iCalendar RFC <https://tools.ietf.org/html/rfc5545>`_,
including support for caching of results.
"""
import itertools
import datetime
import calendar
import re
import sys
try:
@ -20,6 +21,7 @@ from six.moves import _thread, range
import heapq
from ._common import weekday as weekdaybase
from .tz import tzutc, tzlocal
# For warning about deprecation of until and count
from warnings import warn
@ -359,7 +361,7 @@ class rrule(rrulebase):
.. note::
As of version 2.5.0, the use of the ``until`` keyword together
with the ``count`` keyword is deprecated per RFC-2445 Sec. 4.3.10.
with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10.
:param until:
If given, this must be a datetime instance, that will specify the
limit of the recurrence. The last recurrence in the rule is the greatest
@ -368,7 +370,7 @@ class rrule(rrulebase):
.. note::
As of version 2.5.0, the use of the ``until`` keyword together
with the ``count`` keyword is deprecated per RFC-2445 Sec. 4.3.10.
with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10.
:param bysetpos:
If given, it must be either an integer, or a sequence of integers,
positive or negative. Each given integer will specify an occurrence
@ -446,8 +448,22 @@ class rrule(rrulebase):
until = datetime.datetime.fromordinal(until.toordinal())
self._until = until
if self._dtstart and self._until:
if (self._dtstart.tzinfo is not None) != (self._until.tzinfo is not None):
# According to RFC5545 Section 3.3.10:
# https://tools.ietf.org/html/rfc5545#section-3.3.10
#
# > If the "DTSTART" property is specified as a date with UTC
# > time or a date with local time and time zone reference,
# > then the UNTIL rule part MUST be specified as a date with
# > UTC time.
raise ValueError(
'RRULE UNTIL values must be specified in UTC when DTSTART '
'is timezone-aware'
)
if count is not None and until:
warn("Using both 'count' and 'until' is inconsistent with RFC 2445"
warn("Using both 'count' and 'until' is inconsistent with RFC 5545"
" and has been deprecated in dateutil. Future versions will "
"raise an error.", DeprecationWarning)
@ -674,7 +690,7 @@ class rrule(rrulebase):
def __str__(self):
"""
Output a string that would generate this RRULE if passed to rrulestr.
This is mostly compatible with RFC2445, except for the
This is mostly compatible with RFC5545, except for the
dateutil-specific extension BYEASTER.
"""
@ -699,7 +715,7 @@ class rrule(rrulebase):
if self._original_rule.get('byweekday') is not None:
# The str() method on weekday objects doesn't generate
# RFC2445-compliant strings, so we should modify that.
# RFC5545-compliant strings, so we should modify that.
original_rule = dict(self._original_rule)
wday_strings = []
for wday in original_rule['byweekday']:
@ -1496,11 +1512,17 @@ class _rrulestr(object):
forceset=False,
compatible=False,
ignoretz=False,
tzids=None,
tzinfos=None):
global parser
if compatible:
forceset = True
unfold = True
TZID_NAMES = dict(map(
lambda x: (x.upper(), x),
re.findall('TZID=(?P<name>[^:]+):', s)
))
s = s.upper()
if not s.strip():
raise ValueError("empty string")
@ -1563,8 +1585,29 @@ class _rrulestr(object):
# RFC 5445 3.8.2.4: The VALUE parameter is optional, but
# may be found only once.
value_found = False
TZID = None
valid_values = {"VALUE=DATE-TIME", "VALUE=DATE"}
for parm in parms:
if parm.startswith("TZID="):
try:
tzkey = TZID_NAMES[parm.split('TZID=')[-1]]
except KeyError:
continue
if tzids is None:
from . import tz
tzlookup = tz.gettz
elif callable(tzids):
tzlookup = tzids
else:
tzlookup = getattr(tzids, 'get', None)
if tzlookup is None:
msg = ('tzids must be a callable, ' +
'mapping, or None, ' +
'not %s' % tzids)
raise ValueError(msg)
TZID = tzlookup(tzkey)
continue
if parm not in valid_values:
raise ValueError("unsupported DTSTART parm: "+parm)
else:
@ -1577,6 +1620,11 @@ class _rrulestr(object):
from dateutil import parser
dtstart = parser.parse(value, ignoretz=ignoretz,
tzinfos=tzinfos)
if TZID is not None:
if dtstart.tzinfo is None:
dtstart = dtstart.replace(tzinfo=TZID)
else:
raise ValueError('DTSTART specifies multiple timezones')
else:
raise ValueError("unsupported property: "+name)
if (forceset or len(rrulevals) > 1 or rdatevals

View file

@ -1,5 +1,15 @@
from .tz import *
#: Convenience constant providing a :class:`tzutc()` instance
#:
#: .. versionadded:: 2.7.0
UTC = tzutc()
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz",
"enfold", "datetime_ambiguous", "datetime_exists"]
"enfold", "datetime_ambiguous", "datetime_exists",
"resolve_imaginary", "UTC", "DeprecatedTzFormatWarning"]
class DeprecatedTzFormatWarning(Warning):
"""Warning raised when time zones are parsed from deprecated formats."""

View file

@ -0,0 +1,49 @@
from datetime import timedelta
class _TzSingleton(type):
def __init__(cls, *args, **kwargs):
cls.__instance = None
super(_TzSingleton, cls).__init__(*args, **kwargs)
def __call__(cls):
if cls.__instance is None:
cls.__instance = super(_TzSingleton, cls).__call__()
return cls.__instance
class _TzFactory(type):
def instance(cls, *args, **kwargs):
"""Alternate constructor that returns a fresh instance"""
return type.__call__(cls, *args, **kwargs)
class _TzOffsetFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = {}
def __call__(cls, name, offset):
if isinstance(offset, timedelta):
key = (name, offset.total_seconds())
else:
key = (name, offset)
instance = cls.__instances.get(key, None)
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(name, offset))
return instance
class _TzStrFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = {}
def __call__(cls, s, posix_offset=False):
key = (s, posix_offset)
instance = cls.__instances.get(key, None)
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(s, posix_offset))
return instance

View file

@ -14,12 +14,15 @@ import sys
import os
import bisect
import six
from six import string_types
from six.moves import _thread
from ._common import tzname_in_python2, _tzinfo
from ._common import tzrangebase, enfold
from ._common import _validate_fromutc_inputs
from ._factories import _TzSingleton, _TzOffsetFactory
from ._factories import _TzStrFactory
try:
from .win import tzwin, tzwinlocal
except ImportError:
@ -30,9 +33,38 @@ EPOCH = datetime.datetime.utcfromtimestamp(0)
EPOCHORDINAL = EPOCH.toordinal()
@six.add_metaclass(_TzSingleton)
class tzutc(datetime.tzinfo):
"""
This is a tzinfo object that represents the UTC time zone.
**Examples:**
.. doctest::
>>> from datetime import *
>>> from dateutil.tz import *
>>> datetime.now()
datetime.datetime(2003, 9, 27, 9, 40, 1, 521290)
>>> datetime.now(tzutc())
datetime.datetime(2003, 9, 27, 12, 40, 12, 156379, tzinfo=tzutc())
>>> datetime.now(tzutc()).tzname()
'UTC'
.. versionchanged:: 2.7.0
``tzutc()`` is now a singleton, so the result of ``tzutc()`` will
always return the same object.
.. doctest::
>>> from dateutil.tz import tzutc, UTC
>>> tzutc() is tzutc()
True
>>> tzutc() is UTC
True
"""
def utcoffset(self, dt):
return ZERO
@ -86,16 +118,16 @@ class tzutc(datetime.tzinfo):
__reduce__ = object.__reduce__
@six.add_metaclass(_TzOffsetFactory)
class tzoffset(datetime.tzinfo):
"""
A simple class for representing a fixed offset from UTC.
:param name:
The timezone name, to be returned when ``tzname()`` is called.
:param offset:
The time zone offset in seconds, or (since version 2.6.0, represented
as a :py:class:`datetime.timedelta` object.
as a :py:class:`datetime.timedelta` object).
"""
def __init__(self, name, offset):
self._name = name
@ -128,8 +160,6 @@ class tzoffset(datetime.tzinfo):
:param dt:
A :py:class:`datetime.datetime`, naive or time zone aware.
:return:
Returns ``True`` if ambiguous, ``False`` otherwise.
@ -171,6 +201,7 @@ class tzlocal(_tzinfo):
self._dst_saved = self._dst_offset - self._std_offset
self._hasdst = bool(self._dst_saved)
self._tznames = tuple(time.tzname)
def utcoffset(self, dt):
if dt is None and self._hasdst:
@ -192,7 +223,7 @@ class tzlocal(_tzinfo):
@tzname_in_python2
def tzname(self, dt):
return time.tzname[self._isdst(dt)]
return self._tznames[self._isdst(dt)]
def is_ambiguous(self, dt):
"""
@ -257,12 +288,20 @@ class tzlocal(_tzinfo):
return dstval
def __eq__(self, other):
if not isinstance(other, tzlocal):
if isinstance(other, tzlocal):
return (self._std_offset == other._std_offset and
self._dst_offset == other._dst_offset)
elif isinstance(other, tzutc):
return (not self._hasdst and
self._tznames[0] in {'UTC', 'GMT'} and
self._std_offset == ZERO)
elif isinstance(other, tzoffset):
return (not self._hasdst and
self._tznames[0] == other._name and
self._std_offset == other._offset)
else:
return NotImplemented
return (self._std_offset == other._std_offset and
self._dst_offset == other._dst_offset)
__hash__ = None
def __ne__(self, other):
@ -348,8 +387,8 @@ class tzfile(_tzinfo):
``fileobj``'s ``name`` attribute or to ``repr(fileobj)``.
See `Sources for Time Zone and Daylight Saving Time Data
<http://www.twinsun.com/tz/tz-link.htm>`_ for more information. Time zone
files can be compiled from the `IANA Time Zone database files
<https://data.iana.org/time-zones/tz-link.html>`_ for more information. Time
zone files can be compiled from the `IANA Time Zone database files
<https://www.iana.org/time-zones>`_ with the `zic time zone compiler
<https://www.freebsd.org/cgi/man.cgi?query=zic&sektion=8>`_
"""
@ -927,6 +966,7 @@ class tzrange(tzrangebase):
return self._dst_base_offset_
@six.add_metaclass(_TzStrFactory)
class tzstr(tzrange):
"""
``tzstr`` objects are time zone objects specified by a time-zone string as
@ -953,17 +993,29 @@ class tzstr(tzrange):
``UTC+3`` as being 3 hours *behind* UTC rather than ahead, per the
POSIX standard.
.. caution::
Prior to version 2.7.0, this function also supported time zones
in the format:
* ``EST5EDT,4,0,6,7200,10,0,26,7200,3600``
* ``EST5EDT,4,1,0,7200,10,-1,0,7200,3600``
This format is non-standard and has been deprecated; this function
will raise a :class:`DeprecatedTZFormatWarning` until
support is removed in a future version.
.. _`GNU C Library: TZ Variable`:
https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html
"""
def __init__(self, s, posix_offset=False):
global parser
from dateutil import parser
from dateutil.parser import _parser as parser
self._s = s
res = parser._parsetz(s)
if res is None:
if res is None or res.any_unused_tokens:
raise ValueError("unknown string format")
# Here we break the compatibility with the TZ variable handling.
@ -1133,13 +1185,13 @@ class _tzicalvtz(_tzinfo):
class tzical(object):
"""
This object is designed to parse an iCalendar-style ``VTIMEZONE`` structure
as set out in `RFC 2445`_ Section 4.6.5 into one or more `tzinfo` objects.
as set out in `RFC 5545`_ Section 4.6.5 into one or more `tzinfo` objects.
:param `fileobj`:
A file or stream in iCalendar format, which should be UTF-8 encoded
with CRLF endings.
.. _`RFC 2445`: https://www.ietf.org/rfc/rfc2445.txt
.. _`RFC 5545`: https://tools.ietf.org/html/rfc5545
"""
def __init__(self, fileobj):
global rrule
@ -1346,84 +1398,118 @@ else:
TZFILES = []
TZPATHS = []
def __get_gettz(name, zoneinfo_priority=False):
tzlocal_classes = (tzlocal,)
if tzwinlocal is not None:
tzlocal_classes += (tzwinlocal,)
def gettz(name=None, zoneinfo_priority=False):
tz = None
if not name:
try:
name = os.environ["TZ"]
except KeyError:
pass
if name is None or name == ":":
for filepath in TZFILES:
if not os.path.isabs(filepath):
filename = filepath
for path in TZPATHS:
filepath = os.path.join(path, filename)
if os.path.isfile(filepath):
break
else:
continue
if os.path.isfile(filepath):
class GettzFunc(object):
def __init__(self, name, zoneinfo_priority=False):
self.__instances = {}
self._cache_lock = _thread.allocate_lock()
def __call__(self, name=None, zoneinfo_priority=False):
with self._cache_lock:
rv = self.__instances.get(name, None)
if rv is None:
rv = self.nocache(name=name, zoneinfo_priority=zoneinfo_priority)
if not (name is None or isinstance(rv, tzlocal_classes)):
# tzlocal is slightly more complicated than the other
# time zone providers because it depends on environment
# at construction time, so don't cache that.
self.__instances[name] = rv
return rv
def cache_clear(self):
with self._cache_lock:
self.__instances = {}
@staticmethod
def nocache(name=None, zoneinfo_priority=False):
"""A non-cached version of gettz"""
tz = None
if not name:
try:
tz = tzfile(filepath)
break
except (IOError, OSError, ValueError):
name = os.environ["TZ"]
except KeyError:
pass
else:
tz = tzlocal()
else:
if name.startswith(":"):
name = name[:-1]
if os.path.isabs(name):
if os.path.isfile(name):
tz = tzfile(name)
else:
tz = None
else:
if zoneinfo_priority:
from dateutil.zoneinfo import get_zonefile_instance
tz = get_zonefile_instance().get(name)
if not tz:
for path in TZPATHS:
filepath = os.path.join(path, name)
if not os.path.isfile(filepath):
filepath = filepath.replace(' ', '_')
if not os.path.isfile(filepath):
continue
try:
tz = tzfile(filepath)
break
except (IOError, OSError, ValueError):
pass
else:
tz = None
if tzwin is not None:
try:
tz = tzwin(name)
except WindowsError:
tz = None
if not zoneinfo_priority and not tz:
from dateutil.zoneinfo import get_zonefile_instance
tz = get_zonefile_instance().get(name)
if not tz:
for c in name:
# name must have at least one offset to be a tzstr
if c in "0123456789":
try:
tz = tzstr(name)
except ValueError:
pass
if name is None or name == ":":
for filepath in TZFILES:
if not os.path.isabs(filepath):
filename = filepath
for path in TZPATHS:
filepath = os.path.join(path, filename)
if os.path.isfile(filepath):
break
else:
if name in ("GMT", "UTC"):
tz = tzutc()
elif name in time.tzname:
tz = tzlocal()
return tz
continue
if os.path.isfile(filepath):
try:
tz = tzfile(filepath)
break
except (IOError, OSError, ValueError):
pass
else:
tz = tzlocal()
else:
if name.startswith(":"):
name = name[1:]
if os.path.isabs(name):
if os.path.isfile(name):
tz = tzfile(name)
else:
tz = None
else:
if zoneinfo_priority:
from dateutil.zoneinfo import get_zonefile_instance
tz = get_zonefile_instance().get(name)
if not tz:
for path in TZPATHS:
filepath = os.path.join(path, name)
if not os.path.isfile(filepath):
filepath = filepath.replace(' ', '_')
if not os.path.isfile(filepath):
continue
try:
tz = tzfile(filepath)
break
except (IOError, OSError, ValueError):
pass
else:
tz = None
if tzwin is not None:
try:
tz = tzwin(name)
except WindowsError:
tz = None
if not zoneinfo_priority and not tz:
from dateutil.zoneinfo import get_zonefile_instance
tz = get_zonefile_instance().get(name)
if not tz:
for c in name:
# name must have at least one offset to be a tzstr
if c in "0123456789":
try:
tz = tzstr(name)
except ValueError:
pass
break
else:
if name in ("GMT", "UTC"):
tz = tzutc()
elif name in time.tzname:
tz = tzlocal()
return tz
return GettzFunc(name, zoneinfo_priority)
gettz = __get_gettz(name=None, zoneinfo_priority=False)
del __get_gettz
def datetime_exists(dt, tz=None):
"""
@ -1440,6 +1526,8 @@ def datetime_exists(dt, tz=None):
:return:
Returns a boolean value whether or not the "wall time" exists in ``tz``.
..versionadded:: 2.7.0
"""
if tz is None:
if dt.tzinfo is None:
@ -1502,6 +1590,51 @@ def datetime_ambiguous(dt, tz=None):
return not (same_offset and same_dst)
def resolve_imaginary(dt):
"""
Given a datetime that may be imaginary, return an existing datetime.
This function assumes that an imaginary datetime represents what the
wall time would be in a zone had the offset transition not occurred, so
it will always fall forward by the transition's change in offset.
..doctest::
>>> from dateutil import tz
>>> from datetime import datetime
>>> NYC = tz.gettz('America/New_York')
>>> print(tz.resolve_imaginary(datetime(2017, 3, 12, 2, 30, tzinfo=NYC)))
2017-03-12 03:30:00-04:00
>>> KIR = tz.gettz('Pacific/Kiritimati')
>>> print(tz.resolve_imaginary(datetime(1995, 1, 1, 12, 30, tzinfo=KIR)))
1995-01-02 12:30:00+14:00
As a note, :func:`datetime.astimezone` is guaranteed to produce a valid,
existing datetime, so a round-trip to and from UTC is sufficient to get
an extant datetime, however, this generally "falls back" to an earlier time
rather than falling forward to the STD side (though no guarantees are made
about this behavior).
:param dt:
A :class:`datetime.datetime` which may or may not exist.
:return:
Returns an existing :class:`datetime.datetime`. If ``dt`` was not
imaginary, the datetime returned is guaranteed to be the same object
passed to the function.
..versionadded:: 2.7.0
"""
if dt.tzinfo is not None and not datetime_exists(dt):
curr_offset = (dt + datetime.timedelta(hours=24)).utcoffset()
old_offset = (dt - datetime.timedelta(hours=24)).utcoffset()
dt += curr_offset - old_offset
return dt
def _datetime_to_timestamp(dt):
"""
Convert a :class:`datetime.datetime` object to an epoch timestamp in seconds

View file

@ -1,6 +1,59 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from datetime import datetime, time
def today(tzinfo=None):
"""
Returns a :py:class:`datetime` representing the current day at midnight
:param tzinfo:
The time zone to attach (also used to determine the current day).
:return:
A :py:class:`datetime.datetime` object representing the current day
at midnight.
"""
dt = datetime.now(tzinfo)
return datetime.combine(dt.date(), time(0, tzinfo=tzinfo))
def default_tzinfo(dt, tzinfo):
"""
Sets the the ``tzinfo`` parameter on naive datetimes only
This is useful for example when you are provided a datetime that may have
either an implicit or explicit time zone, such as when parsing a time zone
string.
.. doctest::
>>> from dateutil.tz import tzoffset
>>> from dateutil.parser import parse
>>> from dateutil.utils import default_tzinfo
>>> dflt_tz = tzoffset("EST", -18000)
>>> print(default_tzinfo(parse('2014-01-01 12:30 UTC'), dflt_tz))
2014-01-01 12:30:00+00:00
>>> print(default_tzinfo(parse('2014-01-01 12:30'), dflt_tz))
2014-01-01 12:30:00-05:00
:param dt:
The datetime on which to replace the time zone
:param tzinfo:
The :py:class:`datetime.tzinfo` subclass instance to assign to
``dt`` if (and only if) it is naive.
:return:
Returns an aware :py:class:`datetime.datetime`.
"""
if dt.tzinfo is not None:
return dt
else:
return dt.replace(tzinfo=tzinfo)
def within_delta(dt1, dt2, delta):
"""

View file

@ -6,20 +6,19 @@ import os
from tarfile import TarFile
from pkgutil import get_data
from io import BytesIO
from contextlib import closing
from dateutil.tz import tzfile
from dateutil.tz import tzfile as _tzfile
from sickbeard import encodingKludge as ek
import sickbeard
__all__ = ["get_zonefile_instance", "gettz", "gettz_db_metadata", "rebuild"]
__all__ = ["get_zonefile_instance", "gettz", "gettz_db_metadata"]
ZONEFILENAME = "dateutil-zoneinfo.tar.gz"
METADATA_FN = 'METADATA'
class tzfile(tzfile):
class tzfile(_tzfile):
def __reduce__(self):
return (gettz, (self._filename,))

View file

@ -12,7 +12,7 @@ from dateutil.zoneinfo import METADATA_FN, ZONEFILENAME
def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None):
"""Rebuild the internal timezone info in dateutil/zoneinfo/zoneinfo*tar*
filename is the timezone tarball from ftp.iana.org/tz.
filename is the timezone tarball from ``ftp.iana.org/tz``.
"""
tmpdir = tempfile.mkdtemp()

View file

@ -41,4 +41,10 @@ from .api import parse
from .datetimes import registerDateHandler
from .exceptions import *
api.USER_AGENT = USER_AGENT
# If you want feedparser to automatically resolve all relative URIs, set this
# to 1.
RESOLVE_RELATIVE_URIS = 1
# If you want feedparser to automatically sanitize all potentially unsafe
# HTML content, set this to 1.
SANITIZE_HTML = 1

View file

@ -75,17 +75,7 @@ except NameError:
# of pre-installed parsers until it finds one that supports everything we need.
PREFERRED_XML_PARSERS = ["drv_libxml2"]
# If you want feedparser to automatically resolve all relative URIs, set this
# to 1.
RESOLVE_RELATIVE_URIS = 1
# If you want feedparser to automatically sanitize all potentially unsafe
# HTML content, set this to 1.
SANITIZE_HTML = 1
_XML_AVAILABLE = True
mixin.RESOLVE_RELATIVE_URIS = RESOLVE_RELATIVE_URIS
mixin.SANITIZE_HTML = SANITIZE_HTML
SUPPORTED_VERSIONS = {
'': 'unknown',
@ -175,17 +165,61 @@ StrictFeedParser = type(str('StrictFeedParser'), (
_StrictFeedParser, _FeedParserMixin, xml.sax.handler.ContentHandler, object
), {})
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None):
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None, resolve_relative_uris=None, sanitize_html=None):
'''Parse a feed from a URL, file, stream, or string.
request_headers, if given, is a dict from http header name to value to add
to the request; this overrides internally generated values.
:param url_file_stream_or_string:
File-like object, URL, file path, or string. Both byte and text strings
are accepted. If necessary, encoding will be derived from the response
headers or automatically detected.
Note that strings may trigger network I/O or filesystem access
depending on the value. Wrap an untrusted string in
a :class:`io.StringIO` or :class:`io.BytesIO` to avoid this. Do not
pass untrusted strings to this function.
When a URL is not passed the feed location to use in relative URL
resolution should be passed in the ``Content-Location`` response header
(see ``response_headers`` below).
:param str etag: HTTP ``ETag`` request header.
:param modified: HTTP ``Last-Modified`` request header.
:type modified: :class:`str`, :class:`time.struct_time` 9-tuple, or
:class:`datetime.datetime`
:param str agent: HTTP ``User-Agent`` request header, which defaults to
the value of :data:`feedparser.USER_AGENT`.
:param referrer: HTTP ``Referer`` [sic] request header.
:param request_headers:
A mapping of HTTP header name to HTTP header value to add to the
request, overriding internally generated values.
:type request_headers: :class:`dict` mapping :class:`str` to :class:`str`
:param response_headers:
A mapping of HTTP header name to HTTP header value. Multiple values may
be joined with a comma. If a HTTP request was made, these headers
override any matching headers in the response. Otherwise this specifies
the entirety of the response headers.
:type response_headers: :class:`dict` mapping :class:`str` to :class:`str`
:param bool resolve_relative_uris:
Should feedparser attempt to resolve relative URIs absolute ones within
HTML content? Defaults to the value of
:data:`feedparser.RESOLVE_RELATIVE_URIS`, which is ``True``.
:param bool sanitize_html:
Should feedparser skip HTML sanitization? Only disable this if you know
what you are doing! Defaults to the value of
:data:`feedparser.SANITIZE_HTML`, which is ``True``.
:return: A :class:`FeedParserDict`.
'''
if not agent or sanitize_html is None or resolve_relative_uris is None:
import feedparser
if not agent:
agent = USER_AGENT
agent = feedparser.USER_AGENT
if sanitize_html is None:
sanitize_html = feedparser.SANITIZE_HTML
if resolve_relative_uris is None:
resolve_relative_uris = feedparser.RESOLVE_RELATIVE_URIS
result = FeedParserDict(
bozo = False,
entries = [],
@ -220,6 +254,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
if use_strict_parser:
# initialize the SAX parser
feedparser = StrictFeedParser(baseuri, baselang, 'utf-8')
feedparser.resolve_relative_uris = resolve_relative_uris
feedparser.sanitize_html = sanitize_html
saxparser = xml.sax.make_parser(PREFERRED_XML_PARSERS)
saxparser.setFeature(xml.sax.handler.feature_namespaces, 1)
try:
@ -239,6 +275,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
use_strict_parser = 0
if not use_strict_parser and _SGML_AVAILABLE:
feedparser = LooseFeedParser(baseuri, baselang, 'utf-8', entities)
feedparser.resolve_relative_uris = resolve_relative_uris
feedparser.sanitize_html = sanitize_html
feedparser.feed(data.decode('utf-8', 'replace'))
result['feed'] = feedparser.feeddata
result['entries'] = feedparser.entries

View file

@ -34,7 +34,10 @@ import collections
import re
try:
import chardet
try:
import cchardet as chardet
except ImportError:
import chardet
except ImportError:
chardet = None
lazy_chardet_encoding = None

View file

@ -515,12 +515,12 @@ class _FeedParserMixin(
is_htmlish = self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types
# resolve relative URIs within embedded markup
if is_htmlish and RESOLVE_RELATIVE_URIS:
if is_htmlish and self.resolve_relative_uris:
if element in self.can_contain_relative_uris:
output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html'))
# sanitize embedded markup
if is_htmlish and SANITIZE_HTML:
if is_htmlish and self.sanitize_html:
if element in self.can_contain_dangerous_markup:
output = _sanitizeHTML(output, self.encoding, self.contentparams.get('type', 'text/html'))

2
lib/hachoir/__init__.py Normal file
View file

@ -0,0 +1,2 @@
# noinspection PyPep8Naming
from hachoir.version import VERSION as __version__

View file

@ -3,11 +3,11 @@ Utilities to convert integers and binary strings to binary (number), binary
string, number, hexadecimal, etc.
"""
from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from hachoir_core.compatibility import reversed
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from itertools import chain, repeat
from struct import calcsize, unpack, error as struct_error
def swap16(value):
"""
Swap byte between big and little endian of a 16 bits integer.
@ -17,6 +17,7 @@ def swap16(value):
"""
return (value & 0xFF) << 8 | (value >> 8)
def swap32(value):
"""
Swap byte between big and little endian of a 32 bits integer.
@ -24,25 +25,28 @@ def swap32(value):
>>> "%x" % swap32(0x12345678)
'78563412'
"""
value = long(value)
return ((value & 0x000000FFL) << 24) \
| ((value & 0x0000FF00L) << 8) \
| ((value & 0x00FF0000L) >> 8) \
| ((value & 0xFF000000L) >> 24)
value = int(value)
return (((value & 0x000000FF) << 24)
| ((value & 0x0000FF00) << 8)
| ((value & 0x00FF0000) >> 8)
| ((value & 0xFF000000) >> 24))
def arrswapmid(data):
r"""
Convert an array of characters from middle-endian to big-endian and vice-versa.
Convert an array of characters from middle-endian to big-endian and
vice-versa.
>>> arrswapmid("badcfehg")
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
"""
assert len(data)%2 == 0
ret = ['']*len(data)
assert len(data) % 2 == 0
ret = [''] * len(data)
ret[1::2] = data[0::2]
ret[0::2] = data[1::2]
return ret
def strswapmid(data):
r"""
Convert raw data from middle-endian to big-endian and vice-versa.
@ -52,6 +56,7 @@ def strswapmid(data):
"""
return ''.join(arrswapmid(data))
def bin2long(text, endian):
"""
Convert binary number written in a string into an integer.
@ -65,8 +70,8 @@ def bin2long(text, endian):
3
"""
assert endian in (LITTLE_ENDIAN, BIG_ENDIAN)
bits = [ (ord(character)-ord("0")) \
for character in text if character in "01" ]
bits = [(ord(character) - ord("0"))
for character in text if character in "01"]
if endian is not BIG_ENDIAN:
bits = bits[::-1]
size = len(bits)
@ -77,6 +82,7 @@ def bin2long(text, endian):
value += bit
return value
def str2hex(value, prefix="", glue=u"", format="%02X"):
r"""
Convert binary string in hexadecimal (base 16).
@ -100,6 +106,7 @@ def str2hex(value, prefix="", glue=u"", format="%02X"):
text.append(format % ord(character))
return glue.join(text)
def countBits(value):
"""
Count number of bits needed to store a (positive) integer number.
@ -117,7 +124,7 @@ def countBits(value):
count = 1
bits = 1
while (1 << bits) <= value:
count += bits
count += bits
value >>= bits
bits <<= 1
while 2 <= value:
@ -126,10 +133,11 @@ def countBits(value):
else:
bits -= 1
while (1 << bits) <= value:
count += bits
count += bits
value >>= bits
return count
def byte2bin(number, classic_mode=True):
"""
Convert a byte (integer in 0..255 range) to a binary string.
@ -143,7 +151,7 @@ def byte2bin(number, classic_mode=True):
text = ""
for i in range(0, 8):
if classic_mode:
mask = 1 << (7-i)
mask = 1 << (7 - i)
else:
mask = 1 << i
if (number & mask) == mask:
@ -152,6 +160,7 @@ def byte2bin(number, classic_mode=True):
text += "0"
return text
def long2raw(value, endian, size=None):
r"""
Convert a number (positive and not nul) to a raw string.
@ -169,7 +178,7 @@ def long2raw(value, endian, size=None):
text = []
while (value != 0 or text == ""):
byte = value % 256
text.append( chr(byte) )
text.append(chr(byte))
value >>= 8
if size:
need = max(size - len(text), 0)
@ -187,6 +196,7 @@ def long2raw(value, endian, size=None):
text = arrswapmid(text)
return "".join(text)
def long2bin(size, value, endian, classic_mode=False):
"""
Convert a number into bits (in a string):
@ -227,6 +237,7 @@ def long2bin(size, value, endian, classic_mode=False):
text = text[8:]
return result
def str2bin(value, classic_mode=True):
r"""
Convert binary string to binary numbers.
@ -245,13 +256,14 @@ def str2bin(value, classic_mode=True):
text += byte2bin(byte, classic_mode)
return text
def _createStructFormat():
"""
Create a dictionnary (endian, size_byte) => struct format used
by str2long() to convert raw data to positive integer.
"""
format = {
BIG_ENDIAN: {},
BIG_ENDIAN: {},
LITTLE_ENDIAN: {},
}
for struct_format in "BHILQ":
@ -262,8 +274,11 @@ def _createStructFormat():
except struct_error:
pass
return format
_struct_format = _createStructFormat()
def str2long(data, endian):
r"""
Convert a raw data (type 'str') into a long integer.
@ -285,7 +300,7 @@ def str2long(data, endian):
>>> str2long("\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d
True
"""
assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits
assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits
try:
return unpack(_struct_format[endian][len(data)], data)[0]
except KeyError:

View file

@ -8,8 +8,7 @@ import os
max_string_length = 40 # Max. length in characters of GenericString.display
max_byte_length = 14 # Max. length in bytes of RawBytes.display
max_bit_length = 256 # Max. length in bits of RawBits.display
unicode_stdout = True # Replace stdout and stderr with Unicode compatible objects
# Disable it for readline or ipython
unicode_stdout = True # Replace stdout and stderr with Unicode compatible objects, disable for readline or ipython
# Global options
debug = False # Display many informations usefull to debug
@ -24,6 +23,5 @@ else:
use_i18n = True
# Parser global options
autofix = True # Enable Autofix? see hachoir_core.field.GenericFieldSet
autofix = True # Enable Autofix? see hachoir.field.GenericFieldSet
check_padding_pattern = True # Check padding fields pattern?

View file

@ -2,32 +2,35 @@
Dictionnary classes which store values order.
"""
from hachoir_core.error import HachoirError
from hachoir_core.i18n import _
from hachoir.core.i18n import _
class UniqKeyError(HachoirError):
class UniqKeyError(Exception):
"""
Error raised when a value is set whereas the key already exist in a
dictionnary.
"""
pass
class Dict(object):
"""
This class works like classic Python dict() but has an important method:
__iter__() which allow to iterate into the dictionnary _values_ (and not
keys like Python's dict does).
"""
def __init__(self, values=None):
self._index = {} # key => index
self._key_list = [] # index => key
self._value_list = [] # index => value
self._index = {} # key => index
self._key_list = [] # index => key
self._value_list = [] # index => value
if values:
for key, value in values:
self.append(key,value)
self.append(key, value)
def _getValues(self):
return self._value_list
values = property(_getValues)
def index(self, key):
@ -139,7 +142,7 @@ class Dict(object):
index += len(self._value_list)
if not (0 <= index < len(self._value_list)):
raise IndexError(_("list assignment index out of range (%s/%s)")
% (index, len(self._value_list)))
% (index, len(self._value_list)))
del self._value_list[index]
del self._key_list[index]
@ -168,7 +171,7 @@ class Dict(object):
_index = index
if index < 0:
index += len(self._value_list)
if not(0 <= index <= len(self._value_list)):
if not (0 <= index <= len(self._value_list)):
raise IndexError(_("Insert error: index '%s' is invalid") % _index)
for item_key, item_index in self._index.iteritems():
if item_index >= index:
@ -178,6 +181,5 @@ class Dict(object):
self._value_list.insert(index, value)
def __repr__(self):
items = ( "%r: %r" % (key, value) for key, value in self.iteritems() )
items = ("%r: %r" % (key, value) for key, value in self.iteritems())
return "{%s}" % ", ".join(items)

View file

@ -2,7 +2,7 @@
Constant values about endian.
"""
from hachoir_core.i18n import _
from hachoir.core.i18n import _
BIG_ENDIAN = "ABCD"
LITTLE_ENDIAN = "DCBA"

30
lib/hachoir/core/error.py Normal file
View file

@ -0,0 +1,30 @@
"""
Functions to display an error (error, warning or information) message.
"""
from hachoir.core.log import log
import sys
import traceback
def getBacktrace(empty="Empty backtrace."):
"""
Try to get backtrace as string.
Returns "Error while trying to get backtrace" on failure.
"""
try:
info = sys.exc_info()
trace = traceback.format_exception(*info)
sys.exc_clear()
if trace[0] != "None\n":
return "".join(trace)
except Exception:
# No i18n here (imagine if i18n function calls error...)
return "Error while trying to get backtrace"
return empty
info = log.info
warning = log.warning
error = log.error

View file

@ -14,13 +14,14 @@ WARNING: Loading this module indirectly calls initLocale() which sets
settings.
"""
import hachoir_core.config as config
import hachoir_core
import hachoir.core.config as config
import hachoir.core
import locale
from os import path
import sys
from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE
def _getTerminalCharset():
"""
Function used by getTerminalCharset() to get terminal charset.
@ -50,6 +51,7 @@ def _getTerminalCharset():
# (4) Otherwise, returns "ASCII"
return "ASCII"
def getTerminalCharset():
"""
Guess terminal charset using differents tests:
@ -66,6 +68,7 @@ def getTerminalCharset():
getTerminalCharset.value = _getTerminalCharset()
return getTerminalCharset.value
class UnicodeStdout(object):
def __init__(self, old_device, charset):
self.device = old_device
@ -83,6 +86,7 @@ class UnicodeStdout(object):
for text in lines:
self.write(text)
def initLocale():
# Only initialize locale once
if initLocale.is_done:
@ -104,17 +108,22 @@ def initLocale():
sys.stdout = UnicodeStdout(sys.stdout, charset)
sys.stderr = UnicodeStdout(sys.stderr, charset)
return charset
initLocale.is_done = False
def _dummy_gettext(text):
return unicode(text)
def _dummy_ngettext(singular, plural, count):
if 1 < abs(count) or not count:
return unicode(plural)
else:
return unicode(singular)
def _initGettext():
charset = initLocale()
@ -133,7 +142,7 @@ def _initGettext():
return (_dummy_gettext, _dummy_ngettext)
# Gettext variables
package = hachoir_core.PACKAGE
package = 'hachoir'
locale_dir = path.join(path.dirname(__file__), "..", "locale")
# Initialize gettext module
@ -150,6 +159,7 @@ def _initGettext():
unicode(ngettext(singular, plural, count), charset)
return (unicode_gettext, unicode_ngettext)
UTF_BOMS = (
(BOM_UTF8, "UTF-8"),
(BOM_UTF16_LE, "UTF-16-LE"),
@ -165,6 +175,7 @@ CHARSET_CHARACTERS = (
(set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),
)
def guessBytesCharset(bytes, default=None):
r"""
>>> guessBytesCharset("abc")
@ -202,13 +213,13 @@ def guessBytesCharset(bytes, default=None):
pass
# Create a set of non-ASCII characters
non_ascii_set = set( byte for byte in bytes if ord(byte) >= 128 )
non_ascii_set = set(byte for byte in bytes if ord(byte) >= 128)
for characters, charset in CHARSET_CHARACTERS:
if characters.issuperset(non_ascii_set):
return charset
return default
# Initialize _(), gettext() and ngettext() functions
gettext, ngettext = _initGettext()
_ = gettext

View file

@ -555,4 +555,3 @@ for line in _ISO639:
for key in line[1].split("/"):
ISO639_2[key] = line[0]
del _ISO639

View file

@ -1,4 +1,5 @@
from hachoir_core.iso639 import ISO639_2
from hachoir.core.iso639 import ISO639_2
class Language:
def __init__(self, code):
@ -13,11 +14,10 @@ class Language:
return cmp(self.code, other.code)
def __unicode__(self):
return ISO639_2[self.code]
return ISO639_2[self.code]
def __str__(self):
return self.__unicode__()
return self.__unicode__()
def __repr__(self):
return "<Language '%s', code=%r>" % (unicode(self), self.code)

View file

@ -1,11 +1,15 @@
import os, sys, time
import hachoir_core.config as config
from hachoir_core.i18n import _
import os
import sys
import time
import hachoir.core.config as config
from hachoir.core.i18n import _
class Log:
LOG_INFO = 0
LOG_WARN = 1
LOG_ERROR = 2
LOG_INFO = 0
LOG_WARN = 1
LOG_ERROR = 2
level_name = {
LOG_WARN: "[warn]",
@ -18,7 +22,8 @@ class Log:
self.__file = None
self.use_print = True
self.use_buffer = False
self.on_new_message = None # Prototype: def func(level, prefix, text, context)
# Prototype: def func(level, prefix, text, context)
self.on_new_message = None
def shutdown(self):
if self.__file:
@ -46,12 +51,10 @@ class Log:
else:
self.__file = codecs.open(filename, "w", "utf-8")
self._writeIntoFile(_("Starting Hachoir"))
except IOError, err:
if err.errno == 2:
self.__file = None
self.info(_("[Log] setFilename(%s) fails: no such file") % filename)
else:
raise
except FileNotFoundError:
self.__file = None
self.info("[Log] setFilename(%s) fails: no such file"
% filename)
def _writeIntoFile(self, message):
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
@ -72,10 +75,10 @@ class Log:
"""
if level < self.LOG_ERROR and config.quiet or \
level <= self.LOG_INFO and not config.verbose:
level <= self.LOG_INFO and not config.verbose:
return
if config.debug:
from hachoir_core.error import getBacktrace
from hachoir.core.error import getBacktrace
backtrace = getBacktrace(None)
if backtrace:
text += "\n\n" + backtrace
@ -108,7 +111,7 @@ class Log:
# Use callback (if used)
if self.on_new_message:
self.on_new_message (level, prefix, _text, ctxt)
self.on_new_message(level, prefix, _text, ctxt)
def info(self, text):
"""
@ -131,14 +134,19 @@ class Log:
"""
self.newMessage(Log.LOG_ERROR, text)
log = Log()
class Logger(object):
def _logger(self):
return "<%s>" % self.__class__.__name__
def info(self, text):
log.newMessage(Log.LOG_INFO, text, self)
def warning(self, text):
log.newMessage(Log.LOG_WARN, text, self)
def error(self, text):
log.newMessage(Log.LOG_ERROR, text, self)

View file

View file

@ -1,15 +1,17 @@
from hachoir_core.tools import humanDurationNanosec
from hachoir_core.i18n import _
from hachoir.core.tools import humanDurationNanosec
from hachoir.core.i18n import _
from math import floor
from time import time
class BenchmarkError(Exception):
"""
Error during benchmark, use str(err) to format it as string.
"""
def __init__(self, message):
Exception.__init__(self,
"Benchmark internal error: %s" % message)
Exception.__init__(self, "Benchmark internal error: %s" % message)
class BenchmarkStat:
"""
@ -24,6 +26,7 @@ class BenchmarkStat:
- __len__(): get number of elements
- __nonzero__(): isn't empty?
"""
def __init__(self):
self._values = []
@ -53,9 +56,10 @@ class BenchmarkStat:
def getSum(self):
return self._sum
class Benchmark:
def __init__(self, max_time=5.0,
min_count=5, max_count=None, progress_time=1.0):
min_count=5, max_count=None, progress_time=1.0):
"""
Constructor:
- max_time: Maximum wanted duration of the whole benchmark
@ -97,8 +101,7 @@ class Benchmark:
average = stat.getSum() / len(stat)
values = (stat.getMin(), average, stat.getMax(), stat.getSum())
values = tuple(self.formatTime(value) for value in values)
print _("Benchmark: best=%s average=%s worst=%s total=%s") \
% values
print _("Benchmark: best=%s average=%s worst=%s total=%s") % values
def _runOnce(self, func, args, kw):
before = time()
@ -140,7 +143,7 @@ class Benchmark:
estimate = diff * count
if self.verbose:
print _("Run benchmark: %s calls (estimate: %s)") \
% (count, self.formatTime(estimate))
% (count, self.formatTime(estimate))
display_progress = self.verbose and (1.0 <= estimate)
total_count = 1
@ -148,14 +151,14 @@ class Benchmark:
# Run benchmark and display each result
if display_progress:
print _("Result %s/%s: %s (best: %s)") % \
(total_count, count,
self.formatTime(diff), self.formatTime(best))
(total_count, count,
self.formatTime(diff), self.formatTime(best))
part = count - total_count
# Will takes more than one second?
average = total_time / total_count
if self.progress_time < part * average:
part = max( int(self.progress_time / average), 1)
part = max(int(self.progress_time / average), 1)
for index in xrange(part):
diff = self._runOnce(func, args, kw)
stat.append(diff)
@ -164,8 +167,8 @@ class Benchmark:
total_count += part
if display_progress:
print _("Result %s/%s: %s (best: %s)") % \
(count, count,
self.formatTime(diff), self.formatTime(best))
(count, count,
self.formatTime(diff), self.formatTime(best))
return stat
def validateStat(self, stat):
@ -207,4 +210,3 @@ class Benchmark:
# Validate and display stats
self.validateStat(stat)
self.displayStat(stat)

View file

@ -1,37 +1,42 @@
from optparse import OptionGroup
from hachoir_core.log import log
from hachoir_core.i18n import _, getTerminalCharset
from hachoir_core.tools import makePrintable
import hachoir_core.config as config
from hachoir.core.log import log
from hachoir.core.i18n import _, getTerminalCharset
from hachoir.core.tools import makePrintable
import hachoir.core.config as config
def getHachoirOptions(parser):
"""
Create an option group (type optparse.OptionGroup) of Hachoir
library options.
"""
def setLogFilename(*args):
log.setFilename(args[2])
common = OptionGroup(parser, _("Hachoir library"), \
"Configure Hachoir library")
common = OptionGroup(parser, _("Hachoir library"),
"Configure Hachoir library")
common.add_option("--verbose", help=_("Verbose mode"),
default=False, action="store_true")
default=False, action="store_true")
common.add_option("--log", help=_("Write log in a file"),
type="string", action="callback", callback=setLogFilename)
type="string", action="callback",
callback=setLogFilename)
common.add_option("--quiet", help=_("Quiet mode (don't display warning)"),
default=False, action="store_true")
default=False, action="store_true")
common.add_option("--debug", help=_("Debug mode"),
default=False, action="store_true")
default=False, action="store_true")
return common
def configureHachoir(option):
# Configure Hachoir using "option" (value from optparse)
if option.quiet:
config.quiet = True
config.quiet = True
if option.verbose:
config.verbose = True
config.verbose = True
if option.debug:
config.debug = True
config.debug = True
def unicodeFilename(filename, charset=None):
if not charset:
@ -40,4 +45,3 @@ def unicodeFilename(filename, charset=None):
return unicode(filename, charset)
except UnicodeDecodeError:
return makePrintable(filename, charset, to_unicode=True)

View file

@ -1,8 +1,9 @@
import gc
#---- Default implementation when resource is missing ----------------------
# ---- Default implementation when resource is missing ----------------------
PAGE_SIZE = 4096
def getMemoryLimit():
"""
Get current memory limit in bytes.
@ -11,6 +12,7 @@ def getMemoryLimit():
"""
return None
def setMemoryLimit(max_mem):
"""
Set memory limit in bytes.
@ -20,6 +22,7 @@ def setMemoryLimit(max_mem):
"""
return False
def getMemorySize():
"""
Read currenet process memory size: size of available virtual memory.
@ -33,19 +36,22 @@ def getMemorySize():
return None
return int(statm[0]) * PAGE_SIZE
def clearCaches():
"""
Try to clear all caches: call gc.collect() (Python garbage collector).
"""
gc.collect()
#import re; re.purge()
# import re; re.purge()
try:
#---- 'resource' implementation ---------------------------------------------
# ---- 'resource' implementation ---------------------------------------------
from resource import getpagesize, getrlimit, setrlimit, RLIMIT_AS
PAGE_SIZE = getpagesize()
def getMemoryLimit():
try:
limit = getrlimit(RLIMIT_AS)[0]
@ -55,6 +61,7 @@ try:
except ValueError:
return None
def setMemoryLimit(max_mem):
if max_mem is None:
max_mem = -1
@ -66,6 +73,7 @@ try:
except ImportError:
pass
def limitedMemory(limit, func, *args, **kw):
"""
Limit memory grow when calling func(*args, **kw):
@ -96,4 +104,3 @@ def limitedMemory(limit, func, *args, **kw):
# After calling the function: clear all caches
clearCaches()

View file

@ -2,7 +2,9 @@ from hotshot import Profile
from hotshot.stats import load as loadStats
from os import unlink
def runProfiler(func, args=tuple(), kw={}, verbose=True, nb_func=25, sort_by=('cumulative', 'calls')):
def runProfiler(func, args=tuple(), kw={}, verbose=True, nb_func=25,
sort_by=('cumulative', 'calls')):
profile_filename = "/tmp/profiler"
prof = Profile(profile_filename)
try:
@ -28,4 +30,3 @@ def runProfiler(func, args=tuple(), kw={}, verbose=True, nb_func=25, sort_by=('c
return result
finally:
unlink(profile_filename)

View file

@ -6,24 +6,28 @@ from math import ceil
IMPLEMENTATION = None
class Timeout(RuntimeError):
"""
Timeout error, inherits from RuntimeError
"""
pass
def signalHandler(signum, frame):
"""
Signal handler to catch timeout signal: raise Timeout exception.
"""
raise Timeout("Timeout exceed!")
def limitedTime(second, func, *args, **kw):
"""
Call func(*args, **kw) with a timeout of second seconds.
"""
return func(*args, **kw)
def fixTimeout(second):
"""
Fix timeout value: convert to integer with a minimum of 1 second
@ -33,6 +37,7 @@ def fixTimeout(second):
assert isinstance(second, (int, long))
return max(second, 1)
if not IMPLEMENTATION:
try:
from signal import signal, alarm, SIGALRM
@ -48,6 +53,7 @@ if not IMPLEMENTATION:
alarm(0)
signal(SIGALRM, old_alarm)
IMPLEMENTATION = "signal.alarm()"
except ImportError:
pass
@ -57,6 +63,7 @@ if not IMPLEMENTATION:
from signal import signal, SIGXCPU
from resource import getrlimit, setrlimit, RLIMIT_CPU
# resource.setrlimit(RLIMIT_CPU) implementation
# "Bug": timeout is 'CPU' time so sleep() are not part of the timeout
def limitedTime(second, func, *args, **kw):
@ -70,7 +77,7 @@ if not IMPLEMENTATION:
setrlimit(RLIMIT_CPU, current)
signal(SIGXCPU, old_alarm)
IMPLEMENTATION = "resource.setrlimit(RLIMIT_CPU)"
except ImportError:
pass

View file

@ -2,12 +2,13 @@
Utilities used to convert a field to human classic reprentation of data.
"""
from hachoir_core.tools import (
from hachoir.core.tools import (
humanDuration, humanFilesize, alignValue,
durationWin64 as doDurationWin64,
deprecated)
from types import FunctionType, MethodType
from hachoir_core.field import Field
from hachoir.field import Field
def textHandler(field, handler):
assert isinstance(handler, (FunctionType, MethodType))
@ -15,12 +16,14 @@ def textHandler(field, handler):
field.createDisplay = lambda: handler(field)
return field
def displayHandler(field, handler):
assert isinstance(handler, (FunctionType, MethodType))
assert issubclass(field.__class__, Field)
field.createDisplay = lambda: handler(field.value)
return field
@deprecated("Use TimedeltaWin64 field type")
def durationWin64(field):
"""
@ -37,12 +40,14 @@ def durationWin64(field):
delta = doDurationWin64(field.value)
return humanDuration(delta)
def filesizeHandler(field):
"""
Format field value using humanFilesize()
"""
return displayHandler(field, humanFilesize)
def hexadecimal(field):
"""
Convert an integer to hexadecimal in lower case. Returns unicode string.
@ -57,4 +62,3 @@ def hexadecimal(field):
padding = alignValue(size, 4) // 4
pattern = u"0x%%0%ux" % padding
return pattern % field.value

View file

@ -4,12 +4,13 @@
Various utilities.
"""
from hachoir_core.i18n import _, ngettext
from hachoir.core.i18n import _, ngettext
import re
import stat
from datetime import datetime, timedelta, MAXYEAR
from warnings import warn
def deprecated(comment=None):
"""
This is a decorator which can be used to mark functions
@ -26,6 +27,7 @@ def deprecated(comment=None):
Code from: http://code.activestate.com/recipes/391367/
"""
def _deprecated(func):
def newFunc(*args, **kwargs):
message = "Call to deprecated function %s" % func.__name__
@ -33,12 +35,15 @@ def deprecated(comment=None):
message += ": " + comment
warn(message, category=DeprecationWarning, stacklevel=2)
return func(*args, **kwargs)
newFunc.__name__ = func.__name__
newFunc.__doc__ = func.__doc__
newFunc.__dict__.update(func.__dict__)
return newFunc
return _deprecated
def paddingSize(value, align):
"""
Compute size of a padding field.
@ -57,6 +62,7 @@ def paddingSize(value, align):
else:
return 0
def alignValue(value, align):
"""
Align a value to next 'align' multiple.
@ -76,6 +82,7 @@ def alignValue(value, align):
else:
return value
def timedelta2seconds(delta):
"""
Convert a datetime.timedelta() objet to a number of second
@ -87,7 +94,8 @@ def timedelta2seconds(delta):
60.25
"""
return delta.microseconds / 1000000.0 \
+ delta.seconds + delta.days * 60*60*24
+ delta.seconds + delta.days * 60 * 60 * 24
def humanDurationNanosec(nsec):
"""
@ -105,14 +113,15 @@ def humanDurationNanosec(nsec):
# Micro seconds
usec, nsec = divmod(nsec, 1000)
if usec < 1000:
return u"%.2f usec" % (usec+float(nsec)/1000)
return u"%.2f usec" % (usec + float(nsec) / 1000)
# Milli seconds
msec, usec = divmod(usec, 1000)
if msec < 1000:
return u"%.2f ms" % (msec + float(usec)/1000)
return u"%.2f ms" % (msec + float(usec) / 1000)
return humanDuration(msec)
def humanDuration(delta):
"""
Convert a duration in millisecond to human natural representation.
@ -128,12 +137,12 @@ def humanDuration(delta):
u'1 hour 46 min 42 sec'
"""
if not isinstance(delta, timedelta):
delta = timedelta(microseconds=delta*1000)
delta = timedelta(microseconds=delta * 1000)
# Milliseconds
text = []
if 1000 <= delta.microseconds:
text.append(u"%u ms" % (delta.microseconds//1000))
text.append(u"%u ms" % (delta.microseconds // 1000))
# Seconds
minutes, seconds = divmod(delta.seconds, 60)
@ -157,6 +166,7 @@ def humanDuration(delta):
return u"0 ms"
return u" ".join(reversed(text))
def humanFilesize(size):
"""
Convert a file size in byte to human natural representation.
@ -181,6 +191,7 @@ def humanFilesize(size):
return "%.1f %s" % (size, unit)
return "%u %s" % (size, unit)
def humanBitSize(size):
"""
Convert a size in bit to human classic representation.
@ -205,6 +216,7 @@ def humanBitSize(size):
return "%.1f %s" % (size, unit)
return u"%u %s" % (size, unit)
def humanBitRate(size):
"""
Convert a bit rate to human classic representation. It uses humanBitSize()
@ -217,6 +229,7 @@ def humanBitRate(size):
"""
return "".join((humanBitSize(size), "/sec"))
def humanFrequency(hertz):
"""
Convert a frequency in hertz to human classic representation.
@ -239,18 +252,20 @@ def humanFrequency(hertz):
return u"%.1f %s" % (hertz, unit)
return u"%s %s" % (hertz, unit)
regex_control_code = re.compile(r"([\x00-\x1f\x7f])")
controlchars = tuple({
# Don't use "\0", because "\0"+"0"+"1" = "\001" = "\1" (1 character)
# Same rease to not use octal syntax ("\1")
ord("\n"): r"\n",
ord("\r"): r"\r",
ord("\t"): r"\t",
ord("\a"): r"\a",
ord("\b"): r"\b",
}.get(code, '\\x%02x' % code)
for code in xrange(128)
)
# Don't use "\0", because "\0"+"0"+"1" = "\001" = "\1" (1 character)
# Same rease to not use octal syntax ("\1")
ord("\n"): r"\n",
ord("\r"): r"\r",
ord("\t"): r"\t",
ord("\a"): r"\a",
ord("\b"): r"\b",
}.get(code, '\\x%02x' % code)
for code in xrange(128)
)
def makePrintable(data, charset, quote=None, to_unicode=False, smart=True):
r"""
@ -309,6 +324,8 @@ def makePrintable(data, charset, quote=None, to_unicode=False, smart=True):
data = ''.join((quote, data, quote))
elif quote:
data = "(empty)"
else:
data = ""
data = data.encode(charset, "backslashreplace")
if smart:
# Replace \x00\x01 by \0\1
@ -317,6 +334,7 @@ def makePrintable(data, charset, quote=None, to_unicode=False, smart=True):
data = unicode(data, charset)
return data
def makeUnicode(text):
r"""
Convert text to printable Unicode string. For byte string (type 'str'),
@ -343,6 +361,7 @@ def makeUnicode(text):
text = re.sub(r"\\x0([0-7])(?=[^0-7]|$)", r"\\\1", text)
return text
def binarySearch(seq, cmp_func):
"""
Search a value in a sequence using binary search. Returns index of the
@ -376,20 +395,40 @@ def binarySearch(seq, cmp_func):
return index
return None
def lowerBound(seq, cmp_func):
f = 0
l = len(seq)
while l > 0:
h = l >> 1
seqlen = len(seq)
while seqlen > 0:
h = seqlen >> 1
m = f + h
if cmp_func(seq[m]):
f = m
f += 1
l -= h + 1
seqlen -= h + 1
else:
l = h
seqlen = h
return f
def _ftypelet(mode):
if stat.S_ISREG(mode) or not stat.S_IFMT(mode):
return '-'
if stat.S_ISBLK(mode):
return 'b'
if stat.S_ISCHR(mode):
return 'c'
if stat.S_ISDIR(mode):
return 'd'
if stat.S_ISFIFO(mode):
return 'p'
if stat.S_ISLNK(mode):
return 'l'
if stat.S_ISSOCK(mode):
return 's'
return '?'
def humanUnixAttributes(mode):
"""
Convert a Unix file attributes (or "file mode") to an unicode string.
@ -403,18 +442,7 @@ def humanUnixAttributes(mode):
u'-rwxr-sr-x (2755)'
"""
def ftypelet(mode):
if stat.S_ISREG (mode) or not stat.S_IFMT(mode):
return '-'
if stat.S_ISBLK (mode): return 'b'
if stat.S_ISCHR (mode): return 'c'
if stat.S_ISDIR (mode): return 'd'
if stat.S_ISFIFO(mode): return 'p'
if stat.S_ISLNK (mode): return 'l'
if stat.S_ISSOCK(mode): return 's'
return '?'
chars = [ ftypelet(mode), 'r', 'w', 'x', 'r', 'w', 'x', 'r', 'w', 'x' ]
chars = [_ftypelet(mode), 'r', 'w', 'x', 'r', 'w', 'x', 'r', 'w', 'x']
for i in xrange(1, 10):
if not mode & 1 << 9 - i:
chars[i] = '-'
@ -435,6 +463,7 @@ def humanUnixAttributes(mode):
chars[9] = 't'
return u"%s (%o)" % (''.join(chars), mode)
def createDict(data, index):
"""
Create a new dictionnay from dictionnary key=>values:
@ -446,11 +475,13 @@ def createDict(data, index):
>>> createDict(data, 2)
{10: 'a', 20: 'b'}
"""
return dict( (key,values[index]) for key, values in data.iteritems() )
return dict((key, values[index]) for key, values in data.iteritems())
# Start of UNIX timestamp (Epoch): 1st January 1970 at 00:00
UNIX_TIMESTAMP_T0 = datetime(1970, 1, 1)
def timestampUNIX(value):
"""
Convert an UNIX (32-bit) timestamp to datetime object. Timestamp value
@ -470,13 +501,15 @@ def timestampUNIX(value):
"""
if not isinstance(value, (float, int, long)):
raise TypeError("timestampUNIX(): an integer or float is required")
if not(0 <= value <= 2147483647):
if not (0 <= value <= 2147483647):
raise ValueError("timestampUNIX(): value have to be in 0..2147483647")
return UNIX_TIMESTAMP_T0 + timedelta(seconds=value)
# Start of Macintosh timestamp: 1st January 1904 at 00:00
MAC_TIMESTAMP_T0 = datetime(1904, 1, 1)
def timestampMac32(value):
"""
Convert an Mac (32-bit) timestamp to string. The format is the number
@ -489,10 +522,11 @@ def timestampMac32(value):
"""
if not isinstance(value, (float, int, long)):
raise TypeError("an integer or float is required")
if not(0 <= value <= 4294967295):
if not (0 <= value <= 4294967295):
return _("invalid Mac timestamp (%s)") % value
return MAC_TIMESTAMP_T0 + timedelta(seconds=value)
def durationWin64(value):
"""
Convert Windows 64-bit duration to string. The timestamp format is
@ -507,11 +541,13 @@ def durationWin64(value):
raise TypeError("an integer or float is required")
if value < 0:
raise ValueError("value have to be a positive or nul integer")
return timedelta(microseconds=value/10)
return timedelta(microseconds=value / 10)
# Start of 64-bit Windows timestamp: 1st January 1600 at 00:00
WIN64_TIMESTAMP_T0 = datetime(1601, 1, 1, 0, 0, 0)
def timestampWin64(value):
"""
Convert Windows 64-bit timestamp to string. The timestamp format is
@ -527,11 +563,14 @@ def timestampWin64(value):
try:
return WIN64_TIMESTAMP_T0 + durationWin64(value)
except OverflowError:
raise ValueError(_("date newer than year %s (value=%s)") % (MAXYEAR, value))
raise ValueError(_("date newer than year %s (value=%s)")
% (MAXYEAR, value))
# Start of 60-bit UUID timestamp: 15 October 1582 at 00:00
UUID60_TIMESTAMP_T0 = datetime(1582, 10, 15, 0, 0, 0)
def timestampUUID60(value):
"""
Convert UUID 60-bit timestamp to string. The timestamp format is
@ -548,10 +587,11 @@ def timestampUUID60(value):
if value < 0:
raise ValueError("value have to be a positive or nul integer")
try:
return UUID60_TIMESTAMP_T0 + timedelta(microseconds=value/10)
return UUID60_TIMESTAMP_T0 + timedelta(microseconds=value / 10)
except OverflowError:
raise ValueError(_("timestampUUID60() overflow (value=%s)") % value)
def humanDatetime(value, strip_microsecond=True):
"""
Convert a timestamp to Unicode string: use ISO format with space separator.
@ -569,8 +609,10 @@ def humanDatetime(value, strip_microsecond=True):
text = text.split(".")[0]
return text
NEWLINES_REGEX = re.compile("\n+")
def normalizeNewline(text):
r"""
Replace Windows and Mac newlines with Unix newlines.
@ -586,4 +628,3 @@ def normalizeNewline(text):
text = text.replace("\r\n", "\n")
text = text.replace("\r", "\n")
return NEWLINES_REGEX.sub("\n", text)

View file

@ -0,0 +1,59 @@
# Field classes
from hachoir.field.field import Field, FieldError, MissingField, joinPath
from hachoir.field.bit_field import Bit, Bits, RawBits
from hachoir.field.byte_field import Bytes, RawBytes
from hachoir.field.sub_file import SubFile, CompressedField
from hachoir.field.character import Character
from hachoir.field.integer import (
Int8, Int16, Int24, Int32, Int64,
UInt8, UInt16, UInt24, UInt32, UInt64,
GenericInteger)
from hachoir.field.enum import Enum
from hachoir.field.string_field import (GenericString,
String, CString, UnixLine,
PascalString8, PascalString16, PascalString32)
from hachoir.field.padding import (PaddingBits, PaddingBytes,
NullBits, NullBytes)
# Functions
from hachoir.field.helper import (isString, isInteger,
createPaddingField, createNullField, createRawField,
writeIntoFile, createOrphanField)
# FieldSet classes
from hachoir.field.fake_array import FakeArray
from hachoir.field.basic_field_set import (BasicFieldSet,
ParserError, MatchError)
from hachoir.field.generic_field_set import GenericFieldSet
from hachoir.field.seekable_field_set import SeekableFieldSet, RootSeekableFieldSet
from hachoir.field.field_set import FieldSet
from hachoir.field.static_field_set import StaticFieldSet
from hachoir.field.parser import Parser
from hachoir.field.vector import GenericVector, UserVector
# Complex types
from hachoir.field.float import Float32, Float64, Float80
from hachoir.field.timestamp import (GenericTimestamp,
TimestampUnix32, TimestampUnix64, TimestampMac32, TimestampUUID60, TimestampWin64,
DateTimeMSDOS32, TimeDateMSDOS32, TimedeltaWin64)
# Special Field classes
from hachoir.field.link import Link, Fragment
from hachoir.field.fragment import FragmentGroup, CustomFragment
available_types = (
Bit, Bits, RawBits,
Bytes, RawBytes,
SubFile,
Character,
Int8, Int16, Int24, Int32, Int64,
UInt8, UInt16, UInt24, UInt32, UInt64,
String, CString, UnixLine,
PascalString8, PascalString16, PascalString32,
Float32, Float64,
PaddingBits, PaddingBytes,
NullBits, NullBytes,
TimestampUnix32, TimestampMac32, TimestampWin64,
DateTimeMSDOS32, TimeDateMSDOS32,
# GenericInteger, GenericString,
)

View file

@ -1,7 +1,8 @@
from hachoir_core.field import Field, FieldError
from hachoir_core.stream import InputStream
from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from hachoir_core.event_handler import EventHandler
from hachoir.field import Field, FieldError
from hachoir.stream import InputStream
from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from hachoir.core.event_handler import EventHandler
class ParserError(FieldError):
"""
@ -11,6 +12,7 @@ class ParserError(FieldError):
"""
pass
class MatchError(FieldError):
"""
Error raised by a field set when the stream content doesn't
@ -20,6 +22,7 @@ class MatchError(FieldError):
"""
pass
class BasicFieldSet(Field):
_event_handler = None
is_field_set = True
@ -132,16 +135,21 @@ class BasicFieldSet(Field):
def createFields(self):
raise NotImplementedError()
def __iter__(self):
raise NotImplementedError()
def __len__(self):
raise NotImplementedError()
def getField(self, key, const=True):
raise NotImplementedError()
def nextFieldAddress(self):
raise NotImplementedError()
def getFieldIndex(self, field):
raise NotImplementedError()
def readMoreFields(self, number):
raise NotImplementedError()

View file

@ -5,9 +5,10 @@ Bit sized classes:
- RawBits: unknown content with a size in bits.
"""
from hachoir_core.field import Field
from hachoir_core.i18n import _
from hachoir_core import config
from hachoir.field import Field
from hachoir.core.i18n import _
from hachoir.core import config
class RawBits(Field):
"""
@ -33,9 +34,11 @@ class RawBits(Field):
return unicode(self.value)
else:
return _("<%s size=%u>" %
(self.__class__.__name__, self._size))
(self.__class__.__name__, self._size))
createRawDisplay = createDisplay
class Bits(RawBits):
"""
Positive integer with a size in bits
@ -45,6 +48,7 @@ class Bits(RawBits):
"""
pass
class Bit(RawBits):
"""
Single bit: value can be False or True, and size is exactly one bit.
@ -61,8 +65,7 @@ class Bit(RawBits):
def createValue(self):
return 1 == self._parent.stream.readBits(
self.absolute_address, 1, self._parent.endian)
self.absolute_address, 1, self._parent.endian)
def createRawDisplay(self):
return unicode(int(self.value))

View file

@ -3,12 +3,15 @@ Very basic field: raw content with a size in byte. Use this class for
unknown content.
"""
from hachoir_core.field import Field, FieldError
from hachoir_core.tools import makePrintable
from hachoir_core.bits import str2hex
from hachoir_core import config
import types
from hachoir.field import Field, FieldError
from hachoir.core.tools import makePrintable
from hachoir.core.bits import str2hex
from hachoir.core import config
MAX_LENGTH = (2 ** 64)
MAX_LENGTH = (2**64)
class RawBytes(Field):
"""
@ -16,25 +19,25 @@ class RawBytes(Field):
@see: L{Bytes}
"""
static_size = staticmethod(lambda *args, **kw: args[1]*8)
static_size = staticmethod(lambda *args, **kw: args[1] * 8)
def __init__(self, parent, name, length, description="Raw data"):
assert issubclass(parent.__class__, Field)
if not(0 < length <= MAX_LENGTH):
if not (0 < length <= MAX_LENGTH):
raise FieldError("Invalid RawBytes length (%s)!" % length)
Field.__init__(self, parent, name, length*8, description)
Field.__init__(self, parent, name, length * 8, description)
self._display = None
def _createDisplay(self, human):
max_bytes = config.max_byte_length
if type(self._getValue) is type(lambda: None):
display = self.value[:max_bytes]
if isinstance(self._getValue, types.FunctionType):
display = makePrintable(self.value[:max_bytes], "ASCII")
else:
if self._display is None:
address = self.absolute_address
length = min(self._size / 8, max_bytes)
length = min(self._size // 8, max_bytes)
self._display = self._parent.stream.readBytes(address, length)
display = self._display
display = makePrintable(self._display, "ASCII")
truncated = (8 * len(display) < self._size)
if human:
if truncated:
@ -61,7 +64,8 @@ class RawBytes(Field):
if self._display:
self._display = None
return self._parent.stream.readBytes(
self.absolute_address, self._size / 8)
self.absolute_address, self._size // 8)
class Bytes(RawBytes):
"""
@ -70,4 +74,3 @@ class Bytes(RawBytes):
@see: L{RawBytes}
"""
pass

Some files were not shown because too many files have changed in this diff Show more