From c3c7ee9b5799a7f9c2dad2691ddd08499512a2ad Mon Sep 17 00:00:00 2001 From: Adam Date: Sat, 25 Jul 2015 17:19:46 +0800 Subject: [PATCH 01/95] Bump release version --- CHANGES.md | 9 --------- readme.md | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 163f1d91..4644528d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -109,15 +109,6 @@ * Change accept SD titles that contain audio quality * Change readme.md -[develop changelog] -* Update Requests library 2.7.0 (ab1f493) to 2.7.0 (8b5e457) -* Update Tornado webserver from 4.2.dev1 (609dbb9) to 4.2b1 (61a16c9) -* Change reload_module call to explicit import lib/six.moves -* Change queue, httplib, cookielib and xmlrpclib to use explicit import lib/six.moves -* Change zoneinfo update/loader to be compatible with dateutil 2.4.2 -* Change use metadata for zoneinfo files and remove hack of dateutil lib -* Change param item "features" passed to Beautiful Soup to prevent false +ve warning in r353 - ### 0.9.1 (2015-05-25 03:03:00 UTC) diff --git a/readme.md b/readme.md index f2157151..39b69270 100644 --- a/readme.md +++ b/readme.md @@ -18,7 +18,7 @@ SickGear provides management of TV shows and/or Anime, can detect wanted or epis * Uses well known established index sites to gather show information * Searches for known alternatively named shows with a fallback to user edited names * Searches for known alternatively numbered episodes with a fallback to user edited numbers - * Forward search results to a downloader program (e.g. SABNZBd, NZBGet, uTorrent, and others) + * Forward search results to a downloader (e.g. SABNZBd, NZBGet, uTorrent, and others) * Save search results to a "blackhole" folder that can be periodically scanned for taking action * Post-process downloaded episodes into customisable layouts, with or without extra metadata * Advanced Failed Download Handling (FDH) From 8b2225db8b1aa37367e0a924ecce8b38aa6285da Mon Sep 17 00:00:00 2001 From: Prinz23 Date: Mon, 10 Aug 2015 16:16:22 +0200 Subject: [PATCH 02/95] Change to only refresh scene exception data for shows that need it. Fix small cosmetic issue to correctly display "full backlog" date. --- CHANGES.md | 3 ++- sickbeard/scene_numbering.py | 4 ++++ sickbeard/searchBacklog.py | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 5690dcb8..10fe667a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ ### 0.11.0 (2015-xx-xx xx:xx:xx UTC) -* +* Change to only refresh scene exception data for shows that need it +* Fix small cosmetic issue to correctly display "full backlog" date ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/sickbeard/scene_numbering.py b/sickbeard/scene_numbering.py index c516309c..8e0deec9 100644 --- a/sickbeard/scene_numbering.py +++ b/sickbeard/scene_numbering.py @@ -30,6 +30,7 @@ import sickbeard from sickbeard import logger from sickbeard import db from sickbeard.exceptions import ex +from sickbeard.indexers.indexer_config import INDEXER_TVDB def get_scene_numbering(indexer_id, indexer, season, episode, fallback_to_xem=True): @@ -460,6 +461,9 @@ def xem_refresh(indexer_id, indexer, force=False): indexer_id = int(indexer_id) indexer = int(indexer) + if not (indexer_id in sickbeard.scene_exceptions.xem_tvdb_ids_list if INDEXER_TVDB == indexer else sickbeard.scene_exceptions.xem_rage_ids_list): + return + # XEM API URL url = 'http://thexem.de/map/all?id=%s&origin=%s&destination=scene' % ( indexer_id, sickbeard.indexerApi(indexer).config['xem_origin']) diff --git a/sickbeard/searchBacklog.py b/sickbeard/searchBacklog.py index 515318cd..f4790040 100644 --- a/sickbeard/searchBacklog.py +++ b/sickbeard/searchBacklog.py @@ -126,6 +126,7 @@ class BacklogSearcher: # or if we only did certain shows if fromDate == datetime.date.fromordinal(1) and not which_shows: self._set_lastBacklog(curDate) + self._get_lastBacklog() self.amActive = False self._resetPI() From b124b453fe4796a4c5576143c27973379505594f Mon Sep 17 00:00:00 2001 From: JackDandy Date: Sat, 8 Aug 2015 00:53:30 +0100 Subject: [PATCH 03/95] Add search crawler exclusions. --- CHANGES.md | 1 + gui/slick/interfaces/default/inc_top.tmpl | 59 ++++++++++++----------- sickbeard/webapi.py | 1 + sickbeard/webserve.py | 8 ++- sickbeard/webserveInit.py | 31 ++++++------ 5 files changed, 54 insertions(+), 46 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 10fe667a..9d97ec53 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,7 @@ * Change to only refresh scene exception data for shows that need it * Fix small cosmetic issue to correctly display "full backlog" date +* Add search crawler exclusions ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/gui/slick/interfaces/default/inc_top.tmpl b/gui/slick/interfaces/default/inc_top.tmpl index 9d522803..9b152cbf 100644 --- a/gui/slick/interfaces/default/inc_top.tmpl +++ b/gui/slick/interfaces/default/inc_top.tmpl @@ -5,8 +5,9 @@ - + + SickGear - BRANCH:[$sickbeard.BRANCH] - $title @@ -34,35 +35,35 @@ - - - - - - - + + + + + + + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + #if $sickbeard.FUZZY_DATING - - + + #end if - + #set $tab = 4 #set $body_attr = '' diff --git a/sickbeard/webapi.py b/sickbeard/webapi.py index 3733a45f..8d3f2d05 100644 --- a/sickbeard/webapi.py +++ b/sickbeard/webapi.py @@ -75,6 +75,7 @@ class Api(webserve.BaseHandler): def set_default_headers(self): self.set_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0') + self.set_header('X-Robots-Tag', 'noindex, nofollow, noarchive, nocache, noodp, noydir, noimageindex, nosnippet') def get(self, route, *args, **kwargs): route = route.strip('/') or 'index' diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index 5212f034..8fe99512 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -49,7 +49,7 @@ from sickbeard.browser import foldersAtPath from sickbeard.blackandwhitelist import BlackAndWhiteList, short_group_names from sickbeard.searchBacklog import FULL_BACKLOG, LIMITED_BACKLOG from tornado import gen -from tornado.web import RequestHandler, authenticated +from tornado.web import RequestHandler, StaticFileHandler, authenticated from lib import adba from lib import subliminal from lib.dateutil import tz @@ -111,9 +111,15 @@ class PageTemplate(Template): return super(PageTemplate, self).compile(*args, **kwargs) +class BaseStaticFileHandler(StaticFileHandler): + def set_extra_headers(self, path): + self.set_header('X-Robots-Tag', 'noindex, nofollow, noarchive, nocache, noodp, noydir, noimageindex, nosnippet') + + class BaseHandler(RequestHandler): def set_default_headers(self): self.set_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0') + self.set_header('X-Robots-Tag', 'noindex, nofollow, noarchive, nocache, noodp, noydir, noimageindex, nosnippet') def redirect(self, url, permanent=False, status=None): if not url.startswith(sickbeard.WEB_ROOT): diff --git a/sickbeard/webserveInit.py b/sickbeard/webserveInit.py index a881af61..19720e19 100644 --- a/sickbeard/webserveInit.py +++ b/sickbeard/webserveInit.py @@ -7,7 +7,7 @@ import webapi from sickbeard import logger from sickbeard.helpers import create_https_certificates -from tornado.web import Application, StaticFileHandler +from tornado.web import Application from tornado.httpserver import HTTPServer from tornado.ioloop import IOLoop @@ -41,8 +41,8 @@ class WebServer(threading.Thread): if self.enable_https: # If either the HTTPS certificate or key do not exist, make some self-signed ones. - if not (self.https_cert and os.path.exists(self.https_cert)) or not ( - self.https_key and os.path.exists(self.https_key)): + if not (self.https_cert and os.path.exists(self.https_cert))\ + or not (self.https_key and os.path.exists(self.https_key)): if not create_https_certificates(self.https_cert, self.https_key): logger.log(u'Unable to create CERT/KEY files, disabling HTTPS') sickbeard.ENABLE_HTTPS = False @@ -55,13 +55,13 @@ class WebServer(threading.Thread): # Load the app self.app = Application([], - debug=True, - autoreload=False, - gzip=True, - xheaders=sickbeard.HANDLE_REVERSE_PROXY, - cookie_secret=sickbeard.COOKIE_SECRET, - login_url='%s/login/' % self.options['web_root'], - ) + debug=True, + autoreload=False, + gzip=True, + xheaders=sickbeard.HANDLE_REVERSE_PROXY, + cookie_secret=sickbeard.COOKIE_SECRET, + login_url='%s/login/' % self.options['web_root'] + ) # Main Handler self.app.add_handlers('.*$', [ @@ -104,27 +104,26 @@ class WebServer(threading.Thread): # Static File Handlers self.app.add_handlers('.*$', [ # favicon - (r'%s/(favicon\.ico)' % self.options['web_root'], StaticFileHandler, + (r'%s/(favicon\.ico)' % self.options['web_root'], webserve.BaseStaticFileHandler, {'path': os.path.join(self.options['data_root'], 'images/ico/favicon.ico')}), # images - (r'%s/images/(.*)' % self.options['web_root'], StaticFileHandler, + (r'%s/images/(.*)' % self.options['web_root'], webserve.BaseStaticFileHandler, {'path': os.path.join(self.options['data_root'], 'images')}), # cached images - (r'%s/cache/images/(.*)' % self.options['web_root'], StaticFileHandler, + (r'%s/cache/images/(.*)' % self.options['web_root'], webserve.BaseStaticFileHandler, {'path': os.path.join(sickbeard.CACHE_DIR, 'images')}), # css - (r'%s/css/(.*)' % self.options['web_root'], StaticFileHandler, + (r'%s/css/(.*)' % self.options['web_root'], webserve.BaseStaticFileHandler, {'path': os.path.join(self.options['data_root'], 'css')}), # javascript - (r'%s/js/(.*)' % self.options['web_root'], StaticFileHandler, + (r'%s/js/(.*)' % self.options['web_root'], webserve.BaseStaticFileHandler, {'path': os.path.join(self.options['data_root'], 'js')}), ]) - def run(self): if self.enable_https: protocol = 'https' From 53eb406817c46245e7b3deabaab60ea5db746672 Mon Sep 17 00:00:00 2001 From: Adam Date: Sun, 9 Aug 2015 11:12:49 +0800 Subject: [PATCH 04/95] Fix saving default show list group on add new show options page --- CHANGES.md | 1 + gui/slick/js/addShowOptions.js | 5 +++-- sickbeard/__init__.py | 2 +- sickbeard/webserve.py | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 9d97ec53..4b2469e5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ * Change to only refresh scene exception data for shows that need it * Fix small cosmetic issue to correctly display "full backlog" date * Add search crawler exclusions +* Fix saving default show list group on add new show options page ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/gui/slick/js/addShowOptions.js b/gui/slick/js/addShowOptions.js index a11b91e0..207e1c81 100644 --- a/gui/slick/js/addShowOptions.js +++ b/gui/slick/js/addShowOptions.js @@ -19,7 +19,8 @@ $(document).ready(function(){ default_flatten_folders: $('#flatten_folders').prop('checked'), default_scene: $('#scene').prop('checked'), default_subtitles: $('#subtitles').prop('checked'), - default_anime: $('#anime').prop('checked') + default_anime: $('#anime').prop('checked'), + default_tag: $('#tag').val() }); new PNotify({ @@ -32,7 +33,7 @@ $(document).ready(function(){ }); $('#statusSelect, #qualityPreset, #anyQualities, #bestQualities, #wanted_begin, #wanted_latest,' - + ' #flatten_folders, #scene, #subtitles, #anime').change(function() { + + ' #flatten_folders, #scene, #subtitles, #anime, #tag').change(function() { $('#saveDefaultsButton').attr('disabled', false); }); diff --git a/sickbeard/__init__.py b/sickbeard/__init__.py index 419bd95a..9adf0637 100755 --- a/sickbeard/__init__.py +++ b/sickbeard/__init__.py @@ -1792,7 +1792,7 @@ def save_config(): new_config['GUI']['poster_sortdir'] = POSTER_SORTDIR new_config['GUI']['show_tags'] = ','.join(SHOW_TAGS) new_config['GUI']['showlist_tagview'] = SHOWLIST_TAGVIEW - new_config['GUI']['default_tag'] = DEFAULT_SHOW_TAG + new_config['GUI']['default_show_tag'] = DEFAULT_SHOW_TAG new_config['Subtitles'] = {} new_config['Subtitles']['use_subtitles'] = int(USE_SUBTITLES) diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index 8fe99512..76fd3c35 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -3438,7 +3438,7 @@ class ConfigGeneral(Config): def saveAddShowDefaults(self, default_status, any_qualities='', best_qualities='', default_wanted_begin=None, default_wanted_latest=None, default_flatten_folders=False, default_scene=False, - default_subtitles=False, default_anime=False): + default_subtitles=False, default_anime=False, default_tag=''): any_qualities = ([], any_qualities.split(','))[any(any_qualities)] best_qualities = ([], best_qualities.split(','))[any(best_qualities)] @@ -3451,6 +3451,7 @@ class ConfigGeneral(Config): sickbeard.SCENE_DEFAULT = config.checkbox_to_value(default_scene) sickbeard.SUBTITLES_DEFAULT = config.checkbox_to_value(default_subtitles) sickbeard.ANIME_DEFAULT = config.checkbox_to_value(default_anime) + sickbeard.DEFAULT_SHOW_TAG = default_tag sickbeard.save_config() From 20ec75aea3c4660c324f6c237688ff70504178d7 Mon Sep 17 00:00:00 2001 From: Adam Date: Mon, 10 Aug 2015 06:36:46 +0800 Subject: [PATCH 05/95] Remove legacy anime split home option from anime settings tab (new option located in general/interface tab) --- CHANGES.md | 1 + gui/slick/interfaces/default/config_anime.tmpl | 10 ---------- sickbeard/__init__.py | 4 +--- sickbeard/webserve.py | 3 +-- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4b2469e5..201255d4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,7 @@ * Fix small cosmetic issue to correctly display "full backlog" date * Add search crawler exclusions * Fix saving default show list group on add new show options page +* Remove legacy anime split home option from anime settings tab (new option located in general/interface tab) ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/gui/slick/interfaces/default/config_anime.tmpl b/gui/slick/interfaces/default/config_anime.tmpl index c7babfc6..109f6f41 100644 --- a/gui/slick/interfaces/default/config_anime.tmpl +++ b/gui/slick/interfaces/default/config_anime.tmpl @@ -36,16 +36,6 @@
-
- -
-
diff --git a/gui/slick/interfaces/default/home_newShow.tmpl b/gui/slick/interfaces/default/home_newShow.tmpl index fb2bc8f7..09c78adf 100644 --- a/gui/slick/interfaces/default/home_newShow.tmpl +++ b/gui/slick/interfaces/default/home_newShow.tmpl @@ -10,6 +10,10 @@ #import os.path #include $os.path.join($sickbeard.PROG_DIR, 'gui/slick/interfaces/default/inc_top.tmpl') + + diff --git a/gui/slick/interfaces/default/inc_addShowOptions.tmpl b/gui/slick/interfaces/default/inc_addShowOptions.tmpl index 14efc2e0..bd1fbb0c 100644 --- a/gui/slick/interfaces/default/inc_addShowOptions.tmpl +++ b/gui/slick/interfaces/default/inc_addShowOptions.tmpl @@ -79,7 +79,7 @@ Scene numbering -

search for episodes that are numbered by scene groups instead of by the TV network

+

search for episodes numbered by scene groups instead of by the TV network

diff --git a/gui/slick/interfaces/default/inc_top.tmpl b/gui/slick/interfaces/default/inc_top.tmpl index 9b152cbf..49f12b68 100644 --- a/gui/slick/interfaces/default/inc_top.tmpl +++ b/gui/slick/interfaces/default/inc_top.tmpl @@ -90,7 +90,7 @@ $('#SubMenu a:contains("Backlog Overview")').addClass('btn').html('Backlog Overview'); $('#SubMenu a[href$="/home/updatePLEX/"]').addClass('btn').html('Update PLEX'); $('#SubMenu a:contains("Force")').addClass('btn').html('Force Full Update'); - $('#SubMenu a:contains("Rename")').addClass('btn').html('Preview Rename'); + $('#SubMenu a:contains("Rename")').addClass('btn').html('Media Renamer'); $('#SubMenu a[href$="/config/subtitles/"]').addClass('btn').html('Search Subtitles'); $('#SubMenu a[href*="/home/subtitleShow"]').addClass('btn').html('Download Subtitles'); $('#SubMenu a:contains("Anime")').addClass('btn').html('Anime'); diff --git a/gui/slick/js/newShow.js b/gui/slick/js/newShow.js index f671fe5f..c4503961 100644 --- a/gui/slick/js/newShow.js +++ b/gui/slick/js/newShow.js @@ -189,22 +189,23 @@ $(document).ready(function () { function updateSampleText() { // if something's selected then we have some behavior to figure out - var show_name, + var show_name = '', sep_char, elRadio = $('input:radio[name="whichSeries"]:checked'), elInput = $('input:hidden[name="whichSeries"]'), + elScene = $('#scene'), elRootDirs = $('#rootDirs'), elFullShowPath = $('#fullShowPath'); // if they've picked a radio button then use that if (elRadio.length) { show_name = elRadio.val().split('|')[4]; + elScene[0].checked = 0 <= show_scene_maps.indexOf(parseInt(elRadio.val().split('|')[3], 10)); + $('#scene-maps-found').css('display', elScene.is(':checked') ? 'inline' : 'None'); } // if we provided a show in the hidden field, use that else if (elInput.length && elInput.val().length) { show_name = $('#providedName').val(); - } else { - show_name = ''; } update_bwlist(show_name); var sample_text = '

Adding show ' + cleanseText(show_name, !0) + '' diff --git a/sickbeard/name_parser/parser.py b/sickbeard/name_parser/parser.py index 07494c8f..c9c702c2 100644 --- a/sickbeard/name_parser/parser.py +++ b/sickbeard/name_parser/parser.py @@ -27,7 +27,6 @@ import regexes import sickbeard from sickbeard import logger, helpers, scene_numbering, common, scene_exceptions, encodingKludge as ek, db -from dateutil import parser from sickbeard.exceptions import ex from sickbeard.common import cpu_presets @@ -55,39 +54,40 @@ class NameParser(object): else: self._compile_regexes(self.ALL_REGEX) - def clean_series_name(self, series_name): + @staticmethod + def clean_series_name(series_name): """Cleans up series name by removing any . and _ characters, along with any trailing hyphens. Is basically equivalent to replacing all _ and . with a space, but handles decimal numbers in string, for example: - >>> cleanRegexedSeriesName("an.example.1.0.test") + >>> clean_series_name('an.example.1.0.test') 'an example 1.0 test' - >>> cleanRegexedSeriesName("an_example_1.0_test") + >>> clean_series_name('an_example_1.0_test') 'an example 1.0 test' Stolen from dbr's tvnamer """ - series_name = re.sub("(\D)\.(?!\s)(\D)", "\\1 \\2", series_name) - series_name = re.sub("(\d)\.(\d{4})", "\\1 \\2", series_name) # if it ends in a year then don't keep the dot - series_name = re.sub("(\D)\.(?!\s)", "\\1 ", series_name) - series_name = re.sub("\.(?!\s)(\D)", " \\1", series_name) - series_name = series_name.replace("_", " ") - series_name = re.sub("-$", "", series_name) - series_name = re.sub("^\[.*\]", "", series_name) + series_name = re.sub('(\D)\.(?!\s)(\D)', '\\1 \\2', series_name) + series_name = re.sub('(\d)\.(\d{4})', '\\1 \\2', series_name) # if it ends in a year then don't keep the dot + series_name = re.sub('(\D)\.(?!\s)', '\\1 ', series_name) + series_name = re.sub('\.(?!\s)(\D)', ' \\1', series_name) + series_name = series_name.replace('_', ' ') + series_name = re.sub('-$', '', series_name) + series_name = re.sub('^\[.*\]', '', series_name) return series_name.strip() def _compile_regexes(self, regexMode): - if regexMode == self.ANIME_REGEX: - logger.log(u"Using ANIME regexs", logger.DEBUG) + if self.ANIME_REGEX == regexMode: + logger.log(u'Using ANIME regexs', logger.DEBUG) uncompiled_regex = [regexes.anime_regexes] - elif regexMode == self.NORMAL_REGEX: - logger.log(u"Using NORMAL regexs", logger.DEBUG) + elif self.NORMAL_REGEX == regexMode: + logger.log(u'Using NORMAL regexs', logger.DEBUG) uncompiled_regex = [regexes.normal_regexes] else: - logger.log(u"Using ALL regexes", logger.DEBUG) + logger.log(u'Using ALL regexes', logger.DEBUG) uncompiled_regex = [regexes.normal_regexes, regexes.anime_regexes] self.compiled_regexes = {0: [], 1: []} @@ -97,7 +97,7 @@ class NameParser(object): try: cur_regex = re.compile(cur_pattern, re.VERBOSE | re.IGNORECASE) except re.error as errormsg: - logger.log(u"WARNING: Invalid episode_pattern, %s. %s" % (errormsg, cur_pattern)) + logger.log(u'WARNING: Invalid episode_pattern, %s. %s' % (errormsg, cur_pattern)) else: self.compiled_regexes[index].append([cur_pattern_num, cur_pattern_name, cur_regex]) index += 1 @@ -107,7 +107,7 @@ class NameParser(object): return matches = [] - bestResult = None + for regex in self.compiled_regexes: for (cur_regex_num, cur_regex_name, cur_regex) in self.compiled_regexes[regex]: match = cur_regex.match(name) @@ -132,7 +132,7 @@ class NameParser(object): if 'season_num' in named_groups: tmp_season = int(match.group('season_num')) - if cur_regex_name == 'bare' and tmp_season in (19, 20): + if 'bare' == cur_regex_name and tmp_season in (19, 20): continue result.season_number = tmp_season result.score += 1 @@ -161,7 +161,7 @@ class NameParser(object): month = int(match.group('air_month')) day = int(match.group('air_day')) # make an attempt to detect YYYY-DD-MM formats - if month > 12: + if 12 < month: tmp_month = month month = day day = tmp_month @@ -174,7 +174,7 @@ class NameParser(object): tmp_extra_info = match.group('extra_info') # Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season - if tmp_extra_info and cur_regex_name == 'season_only' and re.search( + if tmp_extra_info and 'season_only' == cur_regex_name and re.search( r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, re.I): continue result.extra_info = tmp_extra_info @@ -198,42 +198,42 @@ class NameParser(object): if len(matches): # pick best match with highest score based on placement - bestResult = max(sorted(matches, reverse=True, key=lambda x: x.which_regex), key=lambda x: x.score) + best_result = max(sorted(matches, reverse=True, key=lambda x: x.which_regex), key=lambda x: x.score) show = None if not self.naming_pattern: # try and create a show object for this result - show = helpers.get_show(bestResult.series_name, self.try_indexers, self.try_scene_exceptions) + show = helpers.get_show(best_result.series_name, self.try_indexers, self.try_scene_exceptions) # confirm passed in show object indexer id matches result show object indexer id if show and not self.testing: if self.showObj and show.indexerid != self.showObj.indexerid: show = None - bestResult.show = show elif not show and self.showObj: - bestResult.show = self.showObj + show = self.showObj + best_result.show = show - if bestResult.show and bestResult.show.is_anime and len(self.compiled_regexes[1]) > 1 and regex != 1: + if show and show.is_anime and 1 < len(self.compiled_regexes[1]) and 1 != regex: continue # if this is a naming pattern test then return best result - if not bestResult.show or self.naming_pattern: - return bestResult + if not show or self.naming_pattern: + return best_result # get quality - bestResult.quality = common.Quality.nameQuality(name, bestResult.show.is_anime) + best_result.quality = common.Quality.nameQuality(name, show.is_anime) new_episode_numbers = [] new_season_numbers = [] new_absolute_numbers = [] # if we have an air-by-date show then get the real season/episode numbers - if bestResult.is_air_by_date: - airdate = bestResult.air_date.toordinal() - myDB = db.DBConnection() - sql_result = myDB.select( - "SELECT season, episode FROM tv_episodes WHERE showid = ? and indexer = ? and airdate = ?", - [bestResult.show.indexerid, bestResult.show.indexer, airdate]) + if best_result.is_air_by_date: + airdate = best_result.air_date.toordinal() + my_db = db.DBConnection() + sql_result = my_db.select( + 'SELECT season, episode FROM tv_episodes WHERE showid = ? and indexer = ? and airdate = ?', + [show.indexerid, show.indexer, airdate]) season_number = None episode_numbers = [] @@ -244,64 +244,64 @@ class NameParser(object): if not season_number or not len(episode_numbers): try: - lINDEXER_API_PARMS = sickbeard.indexerApi(bestResult.show.indexer).api_params.copy() + lindexer_api_parms = sickbeard.indexerApi(show.indexer).api_params.copy() - if bestResult.show.lang: - lINDEXER_API_PARMS['language'] = bestResult.show.lang + if show.lang: + lindexer_api_parms['language'] = show.lang - t = sickbeard.indexerApi(bestResult.show.indexer).indexer(**lINDEXER_API_PARMS) + t = sickbeard.indexerApi(show.indexer).indexer(**lindexer_api_parms) - epObj = t[bestResult.show.indexerid].airedOn(bestResult.air_date)[0] + ep_obj = t[show.indexerid].airedOn(best_result.air_date)[0] - season_number = int(epObj["seasonnumber"]) - episode_numbers = [int(epObj["episodenumber"])] + season_number = int(ep_obj['seasonnumber']) + episode_numbers = [int(ep_obj['episodenumber'])] except sickbeard.indexer_episodenotfound: - logger.log(u"Unable to find episode with date " + str(bestResult.air_date) + " for show " + bestResult.show.name + ", skipping", logger.WARNING) + logger.log(u'Unable to find episode with date ' + str(best_result.air_date) + ' for show ' + show.name + ', skipping', logger.WARNING) episode_numbers = [] except sickbeard.indexer_error as e: - logger.log(u"Unable to contact " + sickbeard.indexerApi(bestResult.show.indexer).name + ": " + ex(e), logger.WARNING) + logger.log(u'Unable to contact ' + sickbeard.indexerApi(show.indexer).name + ': ' + ex(e), logger.WARNING) episode_numbers = [] for epNo in episode_numbers: s = season_number e = epNo - if self.convert: - (s, e) = scene_numbering.get_indexer_numbering(bestResult.show.indexerid, - bestResult.show.indexer, + if self.convert and show.is_scene: + (s, e) = scene_numbering.get_indexer_numbering(show.indexerid, + show.indexer, season_number, epNo) new_episode_numbers.append(e) new_season_numbers.append(s) - elif bestResult.show.is_anime and len(bestResult.ab_episode_numbers) and not self.testing: - scene_season = scene_exceptions.get_scene_exception_by_name(bestResult.series_name)[1] - for epAbsNo in bestResult.ab_episode_numbers: + elif show.is_anime and len(best_result.ab_episode_numbers) and not self.testing: + scene_season = scene_exceptions.get_scene_exception_by_name(best_result.series_name)[1] + for epAbsNo in best_result.ab_episode_numbers: a = epAbsNo - if self.convert: - a = scene_numbering.get_indexer_absolute_numbering(bestResult.show.indexerid, - bestResult.show.indexer, epAbsNo, + if self.convert and show.is_scene: + a = scene_numbering.get_indexer_absolute_numbering(show.indexerid, + show.indexer, epAbsNo, True, scene_season) - (s, e) = helpers.get_all_episodes_from_absolute_number(bestResult.show, [a]) + (s, e) = helpers.get_all_episodes_from_absolute_number(show, [a]) new_absolute_numbers.append(a) new_episode_numbers.extend(e) new_season_numbers.append(s) - elif bestResult.season_number and len(bestResult.episode_numbers) and not self.testing: - for epNo in bestResult.episode_numbers: - s = bestResult.season_number + elif best_result.season_number and len(best_result.episode_numbers) and not self.testing: + for epNo in best_result.episode_numbers: + s = best_result.season_number e = epNo - if self.convert: - (s, e) = scene_numbering.get_indexer_numbering(bestResult.show.indexerid, - bestResult.show.indexer, - bestResult.season_number, + if self.convert and show.is_scene: + (s, e) = scene_numbering.get_indexer_numbering(show.indexerid, + show.indexer, + best_result.season_number, epNo) - if bestResult.show.is_anime: - a = helpers.get_absolute_number_from_season_and_episode(bestResult.show, s, e) + if show.is_anime: + a = helpers.get_absolute_number_from_season_and_episode(show, s, e) if a: new_absolute_numbers.append(a) @@ -312,11 +312,11 @@ class NameParser(object): # from more than one season (by tvdb numbering), and this is just too much # for sickbeard, so we'd need to flag it. new_season_numbers = list(set(new_season_numbers)) # remove duplicates - if len(new_season_numbers) > 1: - raise InvalidNameException("Scene numbering results episodes from " - "seasons %s, (i.e. more than one) and " - "SickGear does not support this. " - "Sorry." % (str(new_season_numbers))) + if 1 < len(new_season_numbers): + raise InvalidNameException('Scene numbering results episodes from ' + 'seasons %s, (i.e. more than one) and ' + 'SickGear does not support this. ' + 'Sorry.' % (str(new_season_numbers))) # I guess it's possible that we'd have duplicate episodes too, so lets # eliminate them @@ -328,24 +328,24 @@ class NameParser(object): new_absolute_numbers.sort() if len(new_absolute_numbers): - bestResult.ab_episode_numbers = new_absolute_numbers + best_result.ab_episode_numbers = new_absolute_numbers if len(new_season_numbers) and len(new_episode_numbers): - bestResult.episode_numbers = new_episode_numbers - bestResult.season_number = new_season_numbers[0] + best_result.episode_numbers = new_episode_numbers + best_result.season_number = new_season_numbers[0] - if self.convert: - logger.log( - u"Converted parsed result " + bestResult.original_name + " into " + str(bestResult).decode('utf-8', - 'xmlcharrefreplace'), - logger.DEBUG) + if self.convert and show.is_scene: + logger.log(u'Converted parsed result %s into %s' + % (best_result.original_name, str(best_result).decode('utf-8', 'xmlcharrefreplace')), + logger.DEBUG) # CPU sleep time.sleep(cpu_presets[sickbeard.CPU_PRESET]) - return bestResult + return best_result - def _combine_results(self, first, second, attr): + @staticmethod + def _combine_results(first, second, attr): # if the first doesn't exist then return the second or nothing if not first: if not second: @@ -361,19 +361,21 @@ class NameParser(object): b = getattr(second, attr) # if a is good use it - if a != None or (type(a) == list and len(a)): + if None is not a or (list == type(a) and len(a)): return a # if not use b (if b isn't set it'll just be default) else: return b - def _unicodify(self, obj, encoding="utf-8"): + @staticmethod + def _unicodify(obj, encoding='utf-8'): if isinstance(obj, basestring): if not isinstance(obj, unicode): obj = unicode(obj, encoding, 'replace') return obj - def _convert_number(self, org_number): + @staticmethod + def _convert_number(org_number): """ Convert org_number into an integer org_number: integer or representation of a number: string or unicode @@ -392,8 +394,7 @@ class NameParser(object): # on error try converting from Roman numerals roman_to_int_map = (('M', 1000), ('CM', 900), ('D', 500), ('CD', 400), ('C', 100), ('XC', 90), ('L', 50), ('XL', 40), ('X', 10), - ('IX', 9), ('V', 5), ('IV', 4), ('I', 1) - ) + ('IX', 9), ('V', 5), ('IV', 4), ('I', 1)) roman_numeral = str(org_number).upper() number = 0 @@ -469,19 +470,18 @@ class NameParser(object): if not final_result.show: if self.testing: pass - #final_result.which_regex = [] else: - raise InvalidShowException( - "Unable to parse " + name.encode(sickbeard.SYS_ENCODING, 'xmlcharrefreplace')) + raise InvalidShowException('Unable to parse %s' + % name.encode(sickbeard.SYS_ENCODING, 'xmlcharrefreplace')) # if there's no useful info in it then raise an exception - if final_result.season_number == None and not final_result.episode_numbers and final_result.air_date == None and not final_result.ab_episode_numbers and not final_result.series_name: - raise InvalidNameException("Unable to parse " + name.encode(sickbeard.SYS_ENCODING, 'xmlcharrefreplace')) + if None is final_result.season_number and not final_result.episode_numbers and None is final_result.air_date and not final_result.ab_episode_numbers and not final_result.series_name: + raise InvalidNameException('Unable to parse %s' % name.encode(sickbeard.SYS_ENCODING, 'xmlcharrefreplace')) if cache_result: name_parser_cache.add(name, final_result) - logger.log(u"Parsed " + name + " into " + str(final_result).decode('utf-8', 'xmlcharrefreplace'), logger.DEBUG) + logger.log(u'Parsed %s into %s' % (name, str(final_result).decode('utf-8', 'xmlcharrefreplace')), logger.DEBUG) return final_result @@ -498,8 +498,7 @@ class ParseResult(object): show=None, score=None, quality=None, - version=None - ): + version=None): self.original_name = original_name @@ -549,23 +548,15 @@ class ParseResult(object): return False if self.ab_episode_numbers != other.ab_episode_numbers: return False - #if self.show != other.show: - # return False - #if self.score != other.score: - # return False - #if self.quality != other.quality: - # return False - #if self.version != other.version: - # return False return True def __str__(self): - if self.series_name != None: + if None is not self.series_name: to_return = self.series_name + u' - ' else: to_return = u'' - if self.season_number != None: + if None is not self.season_number: to_return += 'S' + str(self.season_number) if self.episode_numbers and len(self.episode_numbers): for e in self.episode_numbers: @@ -574,17 +565,17 @@ class ParseResult(object): if self.is_air_by_date: to_return += str(self.air_date) if self.ab_episode_numbers: - to_return += ' [ABS: ' + str(self.ab_episode_numbers) + ']' + to_return += ' [ABS: %s]' % str(self.ab_episode_numbers) if self.is_anime: if self.version: - to_return += ' [ANIME VER: ' + str(self.version) + ']' + to_return += ' [ANIME VER: %s]' % str(self.version) if self.release_group: - to_return += ' [GROUP: ' + self.release_group + ']' + to_return += ' [GROUP: %s]' % self.release_group - to_return += ' [ABD: ' + str(self.is_air_by_date) + ']' - to_return += ' [ANIME: ' + str(self.is_anime) + ']' - to_return += ' [whichReg: ' + str(self.which_regex) + ']' + to_return += ' [ABD: %s]' % str(self.is_air_by_date) + to_return += ' [ANIME: %s]' % str(self.is_anime) + to_return += ' [whichReg: %s]' % str(self.which_regex) return to_return.encode('utf-8') @@ -614,7 +605,7 @@ class NameParserCache(object): def get(self, name): if name in self._previous_parsed: - logger.log("Using cached parse result for: " + name, logger.DEBUG) + logger.log('Using cached parse result for: ' + name, logger.DEBUG) return self._previous_parsed[name] @@ -622,8 +613,8 @@ name_parser_cache = NameParserCache() class InvalidNameException(Exception): - "The given release name is not valid" + """The given release name is not valid""" class InvalidShowException(Exception): - "The given show name is not valid" \ No newline at end of file + """The given show name is not valid""" diff --git a/sickbeard/providers/beyondhd.py b/sickbeard/providers/beyondhd.py index fe965f18..357dce42 100644 --- a/sickbeard/providers/beyondhd.py +++ b/sickbeard/providers/beyondhd.py @@ -62,7 +62,7 @@ class BeyondHDProvider(generic.TorrentProvider): for mode in search_params.keys(): if 'Cache' != mode: show_type = self.show.air_by_date and 'Air By Date' \ - or self.show.sports and 'Sports' or self.show.anime and 'Anime' or None + or self.show.is_sports and 'Sports' or self.show.is_anime and 'Anime' or None if show_type: logger.log(u'Provider does not carry shows of type: [%s], skipping' % show_type, logger.DEBUG) return results diff --git a/sickbeard/providers/btn.py b/sickbeard/providers/btn.py index a04e5b19..6fc480cb 100644 --- a/sickbeard/providers/btn.py +++ b/sickbeard/providers/btn.py @@ -171,13 +171,13 @@ class BTNProvider(generic.TorrentProvider): current_params = {'category': 'Season'} # Search for entire seasons: no need to do special things for air by date or sports shows - if ep_obj.show.air_by_date or ep_obj.show.sports: + if ep_obj.show.air_by_date or ep_obj.show.is_sports: # Search for the year of the air by date show current_params['name'] = str(ep_obj.airdate).split('-')[0] elif ep_obj.show.is_anime: current_params['name'] = '%s' % ep_obj.scene_absolute_number else: - current_params['name'] = 'Season ' + str(ep_obj.scene_season) + current_params['name'] = 'Season %s' % (ep_obj.season, ep_obj.scene_season)[bool(ep_obj.show.is_scene)] # search if 1 == ep_obj.show.indexer: @@ -206,17 +206,19 @@ class BTNProvider(generic.TorrentProvider): search_params = {'category': 'Episode'} # episode - if ep_obj.show.air_by_date or ep_obj.show.sports: + if ep_obj.show.air_by_date or ep_obj.show.is_sports: date_str = str(ep_obj.airdate) # BTN uses dots in dates, we just search for the date since that # combined with the series identifier should result in just one episode search_params['name'] = date_str.replace('-', '.') - elif ep_obj.show.anime: + elif ep_obj.show.is_anime: search_params['name'] = '%s' % ep_obj.scene_absolute_number else: # Do a general name search for the episode, formatted like SXXEYY - search_params['name'] = 'S%02dE%02d' % (ep_obj.scene_season, ep_obj.scene_episode) + season, episode = ((ep_obj.season, ep_obj.episode), + (ep_obj.scene_season, ep_obj.scene_episode))[bool(ep_obj.show.is_scene)] + search_params['name'] = 'S%02dE%02d' % (season, episode) # search if 1 == ep_obj.show.indexer: diff --git a/sickbeard/providers/generic.py b/sickbeard/providers/generic.py index c92a93b3..d2624498 100644 --- a/sickbeard/providers/generic.py +++ b/sickbeard/providers/generic.py @@ -149,7 +149,7 @@ class GenericProvider: if GenericProvider.TORRENT == self.providerType: try: - torrent_hash = re.findall('urn:btih:([0-9a-f]{32,40})', result.url)[0].upper() + torrent_hash = re.findall('(?i)urn:btih:([0-9a-f]{32,40})', result.url)[0].upper() if 32 == len(torrent_hash): torrent_hash = b16encode(b32decode(torrent_hash)).lower() @@ -158,34 +158,40 @@ class GenericProvider: logger.log('Unable to extract torrent hash from link: ' + ex(result.url), logger.ERROR) return False - urls = ['https://%s/%s.torrent' % (u, torrent_hash) - for u in ('torcache.net/torrent', 'torrage.com/torrent', 'getstrike.net/torrents/api/download')] + urls = ['http%s://%s/%s.torrent' % (u + (torrent_hash,)) + for u in (('s', 'torcache.net/torrent'), ('s', 'getstrike.net/torrents/api/download'), + ('', 'thetorrent.org'))] except: urls = [result.url] - filename = ek.ek(os.path.join, sickbeard.TORRENT_DIR, - helpers.sanitizeFileName(result.name) + '.' + self.providerType) elif GenericProvider.NZB == self.providerType: urls = [result.url] - filename = ek.ek(os.path.join, sickbeard.NZB_DIR, - helpers.sanitizeFileName(result.name) + '.' + self.providerType) else: return for url in urls: - if helpers.download_file(url, filename, session=self.session): - logger.log(u'Downloading a result from ' + self.name + ' at ' + url) + cache_dir = sickbeard.CACHE_DIR or helpers._getTempDir() + base_name = '%s.%s' % (helpers.sanitizeFileName(result.name), self.providerType) + cache_file = ek.ek(os.path.join, cache_dir, base_name) - if GenericProvider.TORRENT == self.providerType: - logger.log(u'Saved magnet link to ' + filename, logger.MESSAGE) - else: - logger.log(u'Saved result to ' + filename, logger.MESSAGE) + if helpers.download_file(url, cache_file, session=self.session): + logger.log(u'Downloaded a result from %s at %s' % (self.name, url)) - if self._verify_download(filename): - return True - elif ek.ek(os.path.isfile, filename): - ek.ek(os.remove, filename) + if self._verify_download(cache_file): + if GenericProvider.TORRENT == self.providerType: + final_dir, link_type = (sickbeard.TORRENT_DIR, 'magnet') + else: + final_dir, link_type = (sickbeard.NZB_DIR, 'nzb') + final_file = ek.ek(os.path.join, final_dir, base_name) + + helpers.moveFile(cache_file, final_file) + if not ek.ek(os.path.isfile, cache_file) and ek.ek(os.path.isfile, final_file): + logger.log(u'Saved %s link to %s' % (link_type, final_file), logger.MESSAGE) + return True + + if ek.ek(os.path.isfile, cache_file): + ek.ek(os.remove, cache_file) logger.log(u'Failed to download result', logger.ERROR) return False @@ -348,7 +354,7 @@ class GenericProvider: version = parse_result.version add_cache_entry = False - if not (show_obj.air_by_date or show_obj.sports): + if not (show_obj.air_by_date or show_obj.is_sports): if 'sponly' == search_mode: if len(parse_result.episode_numbers): logger.log(u'This is supposed to be a season pack search but the result ' + title @@ -644,14 +650,14 @@ class TorrentProvider(GenericProvider): def _get_season_search_strings(self, ep_obj, detail_only=False, scene=True): - if ep_obj.show.air_by_date or ep_obj.show.sports: + if ep_obj.show.air_by_date or ep_obj.show.is_sports: ep_detail = str(ep_obj.airdate).split('-')[0] - elif ep_obj.show.anime: + elif ep_obj.show.is_anime: ep_detail = ep_obj.scene_absolute_number else: - ep_detail = 'S%02d' % int(ep_obj.scene_season) + ep_detail = 'S%02d' % int((ep_obj.season, ep_obj.scene_season)[bool(ep_obj.show.is_scene)]) - detail = ({}, {'Season_only': [ep_detail]})[detail_only and not self.show.sports and not self.show.anime] + detail = ({}, {'Season_only': [ep_detail]})[detail_only and not self.show.is_sports and not self.show.is_anime] return [dict({'Season': self._build_search_strings(ep_detail, scene)}.items() + detail.items())] def _get_episode_search_strings(self, ep_obj, add_string='', detail_only=False, scene=True, sep_date=' ', use_or=True): @@ -659,18 +665,20 @@ class TorrentProvider(GenericProvider): if not ep_obj: return [] - if self.show.air_by_date or self.show.sports: + if self.show.air_by_date or self.show.is_sports: ep_detail = str(ep_obj.airdate).replace('-', sep_date) - if self.show.sports: + if self.show.is_sports: month = ep_obj.airdate.strftime('%b') ep_detail = ([ep_detail] + [month], '%s|%s' % (ep_detail, month))[use_or] - elif self.show.anime: + elif self.show.is_anime: ep_detail = ep_obj.scene_absolute_number else: - ep_detail = sickbeard.config.naming_ep_type[2] % {'seasonnumber': ep_obj.scene_season, - 'episodenumber': ep_obj.scene_episode} - append = (add_string, '')[self.show.anime] - detail = ({}, {'Episode_only': [ep_detail]})[detail_only and not self.show.sports and not self.show.anime] + season, episode = ((ep_obj.season, ep_obj.episode), + (ep_obj.scene_season, ep_obj.scene_episode))[bool(ep_obj.show.is_scene)] + ep_dict = {'seasonnumber': season, 'episodenumber': episode} + ep_detail = sickbeard.config.naming_ep_type[2] % ep_dict + append = (add_string, '')[self.show.is_anime] + detail = ({}, {'Episode_only': [ep_detail]})[detail_only and not self.show.is_sports and not self.show.is_anime] return [dict({'Episode': self._build_search_strings(ep_detail, scene, append)}.items() + detail.items())] def _build_search_strings(self, ep_detail, process_name=True, append=''): diff --git a/sickbeard/providers/hdbits.py b/sickbeard/providers/hdbits.py index 33f2cdbc..4b2ec1f6 100644 --- a/sickbeard/providers/hdbits.py +++ b/sickbeard/providers/hdbits.py @@ -117,18 +117,18 @@ class HDBitsProvider(generic.TorrentProvider): if episode: if show.air_by_date: param['episode'] = str(episode.airdate).replace('-', '|') - elif show.sports: + elif show.is_sports: param['episode'] = episode.airdate.strftime('%b') - elif show.anime: + elif show.is_anime: param['episode'] = '%i' % int(episode.scene_absolute_number) else: param['season'] = episode.scene_season param['episode'] = episode.scene_episode if season: - if show.air_by_date or show.sports: + if show.air_by_date or show.is_sports: param['season'] = str(season.airdate)[:7] - elif show.anime: + elif show.is_anime: param['season'] = '%d' % season.scene_absolute_number else: param['season'] = season.scene_season diff --git a/sickbeard/providers/kat.py b/sickbeard/providers/kat.py index 28f5b809..27bf6def 100644 --- a/sickbeard/providers/kat.py +++ b/sickbeard/providers/kat.py @@ -110,39 +110,39 @@ class KATProvider(generic.TorrentProvider): def _get_season_search_strings(self, ep_obj, **kwargs): - if ep_obj.show.air_by_date or ep_obj.show.sports: + if ep_obj.show.air_by_date or ep_obj.show.is_sports: airdate = str(ep_obj.airdate).split('-')[0] ep_detail = [airdate, 'Season ' + airdate] - elif ep_obj.show.anime: + elif ep_obj.show.is_anime: ep_detail = '%02i' % ep_obj.scene_absolute_number else: - ep_detail = ['S%(s)02i -S%(s)02iE' % {'s': ep_obj.scene_season}, - 'Season %s -Ep*' % ep_obj.scene_season] + season = (ep_obj.season, ep_obj.scene_season)[bool(ep_obj.show.is_scene)] + ep_detail = ['S%(s)02i -S%(s)02iE' % {'s': season}, 'Season %s -Ep*' % season] - return [{'Season': self._build_search_strings(ep_detail, append=(' category:tv', '')[self.show.anime])}] + return [{'Season': self._build_search_strings(ep_detail, append=(' category:tv', '')[self.show.is_anime])}] def _get_episode_search_strings(self, ep_obj, add_string='', **kwargs): if not ep_obj: return [] - if self.show.air_by_date or self.show.sports: + if self.show.air_by_date or self.show.is_sports: ep_detail = str(ep_obj.airdate).replace('-', ' ') - if self.show.sports: + if self.show.is_sports: ep_detail += '|' + ep_obj.airdate.strftime('%b') - elif self.show.anime: + elif self.show.is_anime: ep_detail = '%02i' % ep_obj.scene_absolute_number else: - ep_detail = '%s|%s' % (config.naming_ep_type[2] % {'seasonnumber': ep_obj.scene_season, - 'episodenumber': ep_obj.scene_episode}, - config.naming_ep_type[0] % {'seasonnumber': ep_obj.scene_season, - 'episodenumber': ep_obj.scene_episode}) + season, episode = ((ep_obj.season, ep_obj.episode), + (ep_obj.scene_season, ep_obj.scene_episode))[bool(ep_obj.show.is_scene)] + ep_dict = {'seasonnumber': season, 'episodenumber': episode} + ep_detail = '%s|%s' % (config.naming_ep_type[2] % ep_dict, config.naming_ep_type[0] % ep_dict) # include provider specific appends if not isinstance(add_string, list): add_string = [add_string] add_string = [x + ' category:tv' for x in add_string] - return [{'Episode': self._build_search_strings(ep_detail, append=(add_string, '')[self.show.anime])}] + return [{'Episode': self._build_search_strings(ep_detail, append=(add_string, '')[self.show.is_anime])}] def _do_search(self, search_params, search_mode='eponly', epcount=0, age=0): diff --git a/sickbeard/providers/newznab.py b/sickbeard/providers/newznab.py index bd272cd4..79dbef24 100755 --- a/sickbeard/providers/newznab.py +++ b/sickbeard/providers/newznab.py @@ -116,14 +116,14 @@ class NewznabProvider(generic.NZBProvider): cur_params = {} # season - if ep_obj.show.air_by_date or ep_obj.show.sports: + if ep_obj.show.air_by_date or ep_obj.show.is_sports: date_str = str(ep_obj.airdate).split('-')[0] cur_params['season'] = date_str cur_params['q'] = date_str.replace('-', '.') elif ep_obj.show.is_anime: cur_params['season'] = '%d' % ep_obj.scene_absolute_number else: - cur_params['season'] = str(ep_obj.scene_season) + cur_params['season'] = str((ep_obj.season, ep_obj.scene_season)[bool(ep_obj.show.is_scene)]) # search rid = helpers.mapIndexersToShow(ep_obj.show)[2] @@ -151,15 +151,16 @@ class NewznabProvider(generic.NZBProvider): if not ep_obj: return [params] - if ep_obj.show.air_by_date or ep_obj.show.sports: + if ep_obj.show.air_by_date or ep_obj.show.is_sports: date_str = str(ep_obj.airdate) params['season'] = date_str.partition('-')[0] params['ep'] = date_str.partition('-')[2].replace('-', '/') - elif ep_obj.show.anime: + elif ep_obj.show.is_anime: params['ep'] = '%i' % int( ep_obj.scene_absolute_number if int(ep_obj.scene_absolute_number) > 0 else ep_obj.scene_episode) else: - params['season'], params['ep'] = ep_obj.scene_season, ep_obj.scene_episode + params['season'], params['ep'] = ((ep_obj.season, ep_obj.episode), + (ep_obj.scene_season, ep_obj.scene_episode))[bool(ep_obj.show.is_scene)] # search rid = helpers.mapIndexersToShow(ep_obj.show)[2] @@ -177,7 +178,7 @@ class NewznabProvider(generic.NZBProvider): cur_return['q'] = cur_exception to_return.append(cur_return) - if ep_obj.show.anime: + if ep_obj.show.is_anime: # Experimental, add a searchstring without search explicitly for the episode! # Remove the ?ep=e46 paramater and use add the episode number to the query paramater. # Can be usefull for newznab indexers that do not have the episodes 100% parsed. diff --git a/sickbeard/providers/rarbg.py b/sickbeard/providers/rarbg.py index eef91bef..fb054070 100644 --- a/sickbeard/providers/rarbg.py +++ b/sickbeard/providers/rarbg.py @@ -171,7 +171,7 @@ class RarbgProvider(generic.TorrentProvider): def _get_episode_search_strings(self, ep_obj, add_string='', **kwargs): search_params = generic.TorrentProvider._get_episode_search_strings(self, ep_obj, detail_only=True) - if self.show.air_by_date and self.show.sports: + if self.show.air_by_date and self.show.is_sports: for x, types in enumerate(search_params): for y, ep_type in enumerate(types): search_params[x][ep_type][y] = '{{%s}}' % search_params[x][ep_type][y] diff --git a/sickbeard/providers/thepiratebay.py b/sickbeard/providers/thepiratebay.py index 1f74765c..fac59758 100644 --- a/sickbeard/providers/thepiratebay.py +++ b/sickbeard/providers/thepiratebay.py @@ -123,26 +123,26 @@ class ThePirateBayProvider(generic.TorrentProvider): elif ep_obj.show.anime: ep_detail = '%02i' % ep_obj.scene_absolute_number else: - ep_detail = ['S%02d' % int(ep_obj.scene_season), - 'Season %s -Ep*' % ep_obj.scene_season] + season = (ep_obj.season, ep_obj.scene_season)[bool(ep_obj.show.is_scene)] + ep_detail = ['S%02d' % int(season), 'Season %s -Ep*' % season] return [{'Season': self._build_search_strings(ep_detail)}] def _get_episode_search_strings(self, ep_obj, add_string='', **kwargs): - if self.show.air_by_date or self.show.sports: + if self.show.air_by_date or self.show.is_sports: ep_detail = str(ep_obj.airdate).replace('-', ' ') - if self.show.sports: + if self.show.is_sports: ep_detail += '|' + ep_obj.airdate.strftime('%b') - elif self.show.anime: + elif self.show.is_anime: ep_detail = '%02i' % ep_obj.scene_absolute_number else: - ep_detail = '%s|%s' % (config.naming_ep_type[2] % {'seasonnumber': ep_obj.scene_season, - 'episodenumber': ep_obj.scene_episode}, - config.naming_ep_type[0] % {'seasonnumber': ep_obj.scene_season, - 'episodenumber': ep_obj.scene_episode}) + season, episode = ((ep_obj.season, ep_obj.episode), + (ep_obj.scene_season, ep_obj.scene_episode))[bool(ep_obj.show.is_scene)] + ep_dict = {'seasonnumber': season, 'episodenumber': episode} + ep_detail = '%s|%s' % (config.naming_ep_type[2] % ep_dict, config.naming_ep_type[0] % ep_dict) - return [{'Episode': self._build_search_strings(ep_detail, append=(add_string, '')[self.show.anime])}] + return [{'Episode': self._build_search_strings(ep_detail, append=(add_string, '')[self.show.is_anime])}] def _do_search(self, search_params, search_mode='eponly', epcount=0, age=0): diff --git a/sickbeard/providers/totv.py b/sickbeard/providers/totv.py index a96de4eb..029127f7 100644 --- a/sickbeard/providers/totv.py +++ b/sickbeard/providers/totv.py @@ -88,7 +88,8 @@ class ToTVProvider(generic.TorrentProvider): def _get_season_search_strings(self, ep_obj, **kwargs): - return self._build_search_str(ep_obj, {'season': 'Season %02d' % ep_obj.scene_season}) + return self._build_search_str(ep_obj, {'season': 'Season %02d' % + int((ep_obj.season, ep_obj.scene_season)[bool(ep_obj.show.is_scene)])}) def _get_episode_search_strings(self, ep_obj, add_string='', **kwargs): @@ -96,8 +97,9 @@ class ToTVProvider(generic.TorrentProvider): return [{}] # Do a general name search for the episode, formatted like SXXEYY - return self._build_search_str(ep_obj, {'episode': 'S%02dE%02d %s' - % (ep_obj.scene_season, ep_obj.scene_episode, add_string)}) + season, episode = ((ep_obj.season, ep_obj.episode), + (ep_obj.scene_season, ep_obj.scene_episode))[bool(ep_obj.show.is_scene)] + return self._build_search_str(ep_obj, {'episode': 'S%02dE%02d %s' % (season, episode, add_string)}) @staticmethod def _build_search_str(ep_obj, search_params): diff --git a/sickbeard/show_queue.py b/sickbeard/show_queue.py index 502cb57c..3aa7cf75 100644 --- a/sickbeard/show_queue.py +++ b/sickbeard/show_queue.py @@ -471,15 +471,14 @@ class QueueItemAdd(ShowQueueItem): # Load XEM data to DB for show sickbeard.scene_numbering.xem_refresh(self.show.indexerid, self.show.indexer, force=True) + # check if show has XEM mapping and if user disabled scene numbering during add show, output availability to log + if not self.scene and self.show.indexerid in sickbeard.scene_exceptions.xem_tvdb_ids_list\ + + sickbeard.scene_exceptions.xem_rage_ids_list: + logger.log(u'Alternative scene episode numbers were disabled during add show. Edit show to enable them for searching.') # update internal name cache name_cache.buildNameCache(self.show) - # check if show has XEM mapping so we can determine if searches should go by scene numbering or indexer numbering. - if not self.scene and sickbeard.scene_numbering.get_xem_numbering_for_show(self.show.indexerid, - self.show.indexer): - self.show.scene = 1 - self.finish() def _finishEarly(self): diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index bd309483..f3496d06 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -1091,7 +1091,7 @@ class Home(MainHandler): t.submenu.append({'title': 'Update show in Kodi', 'path': 'home/updateKODI?showName=%s' % urllib.quote_plus( showObj.name.encode('utf-8')), 'requires': self.haveKODI}) - t.submenu.append({'title': 'Preview Rename', 'path': 'home/testRename?show=%d' % showObj.indexerid}) + t.submenu.append({'title': 'Media Renamer', 'path': 'home/testRename?show=%d' % showObj.indexerid}) if sickbeard.USE_SUBTITLES and not sickbeard.showQueueScheduler.action.isBeingSubtitled( showObj) and showObj.subtitles: t.submenu.append( @@ -1271,6 +1271,7 @@ class Home(MainHandler): with showObj.lock: t.show = showObj t.scene_exceptions = get_scene_exceptions(showObj.indexerid) + t.show_has_scene_map = showObj.indexerid in sickbeard.scene_exceptions.xem_tvdb_ids_list + sickbeard.scene_exceptions.xem_rage_ids_list return t.respond() @@ -2172,14 +2173,6 @@ class NewHomeAddShows(Home): indexer, show_dir, indexer_id, show_name = self.split_extra_show(show_to_add) - if indexer_id and indexer and show_name: - use_provided_info = True - else: - use_provided_info = False - - # tell the template whether we're giving it show name & Indexer ID - t.use_provided_info = use_provided_info - # use the given show_dir for the indexer search if available if use_show_name: t.default_show_name = show_name @@ -2196,7 +2189,9 @@ class NewHomeAddShows(Home): elif type(other_shows) != list: other_shows = [other_shows] - if use_provided_info: + # tell the template whether we're giving it show name & Indexer ID + t.use_provided_info = bool(indexer_id and indexer and show_name) + if t.use_provided_info: t.provided_indexer_id = int(indexer_id or 0) t.provided_indexer_name = show_name @@ -2208,6 +2203,8 @@ class NewHomeAddShows(Home): t.blacklist = [] t.groups = [] + t.show_scene_maps = sickbeard.scene_exceptions.xem_tvdb_ids_list + sickbeard.scene_exceptions.xem_rage_ids_list + return t.respond() def recommendedShows(self, *args, **kwargs): From d092d418913e24cec7201aaa8cdaa6135fb001ff Mon Sep 17 00:00:00 2001 From: JackDandy Date: Wed, 12 Aug 2015 15:16:56 +0100 Subject: [PATCH 10/95] Remove "Manage Torrents". --- CHANGES.md | 1 + gui/slick/interfaces/default/inc_top.tmpl | 5 --- .../interfaces/default/manage_torrents.tmpl | 23 ----------- sickbeard/webserve.py | 40 ------------------- 4 files changed, 1 insertion(+), 68 deletions(-) delete mode 100644 gui/slick/interfaces/default/manage_torrents.tmpl diff --git a/CHANGES.md b/CHANGES.md index a9a6b1f7..dad0436f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,7 @@ * Add search crawler exclusions * Fix saving default show list group on add new show options page * Remove legacy anime split home option from anime settings tab (new option located in general/interface tab) +* Remove "Manage Torrents" * Update Beautiful Soup 4.3.2 to 4.4.0 (r390) diff --git a/gui/slick/interfaces/default/inc_top.tmpl b/gui/slick/interfaces/default/inc_top.tmpl index 49f12b68..7e93dc9f 100644 --- a/gui/slick/interfaces/default/inc_top.tmpl +++ b/gui/slick/interfaces/default/inc_top.tmpl @@ -179,11 +179,6 @@ #if $sickbeard.USE_KODI and $sickbeard.KODI_HOST != ''

  • Update Kodi
  • #end if -#if $sickbeard.USE_TORRENTS and $sickbeard.TORRENT_METHOD != 'blackhole' \ - and ($sickbeard.ENABLE_HTTPS and $sickbeard.TORRENT_HOST[:5] == 'https' \ - or not $sickbeard.ENABLE_HTTPS and $sickbeard.TORRENT_HOST[:5] == 'http:') -
  • Manage Torrents
  • -#end if #if $sickbeard.USE_FAILED_DOWNLOADS
  • Failed Downloads
  • #end if diff --git a/gui/slick/interfaces/default/manage_torrents.tmpl b/gui/slick/interfaces/default/manage_torrents.tmpl deleted file mode 100644 index 455eeaca..00000000 --- a/gui/slick/interfaces/default/manage_torrents.tmpl +++ /dev/null @@ -1,23 +0,0 @@ -#import sickbeard -#import datetime -#from sickbeard.common import * -## -#set global $title = 'Manage Torrents' -#set global $header = 'Manage Torrents' -#set global $sbPath = '..' -#set global $topmenu = 'manage' -## -#import os.path -#include $os.path.join($sickbeard.PROG_DIR, 'gui/slick/interfaces/default/inc_top.tmpl') - - -#if $varExists('header') -

    $header

    -#else -

    $title

    -#end if - - $info_download_station - - -#include $os.path.join($sickbeard.PROG_DIR, 'gui/slick/interfaces/default/inc_bottom.tmpl') \ No newline at end of file diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index f3496d06..b75ea6de 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -518,7 +518,6 @@ class Home(MainHandler): {'title': 'Update XBMC', 'path': 'home/updateXBMC/', 'requires': self.haveXBMC}, {'title': 'Update Kodi', 'path': 'home/updateKODI/', 'requires': self.haveKODI}, {'title': 'Update Plex', 'path': 'home/updatePLEX/', 'requires': self.havePLEX}, - {'title': 'Manage Torrents', 'path': 'manage/manageTorrents', 'requires': self.haveTORRENT}, {'title': 'Restart', 'path': 'home/restart/?pid=' + str(sickbeard.PID), 'confirm': True}, {'title': 'Shutdown', 'path': 'home/shutdown/?pid=' + str(sickbeard.PID), 'confirm': True}, ] @@ -535,15 +534,6 @@ class Home(MainHandler): def havePLEX(): return sickbeard.USE_PLEX and sickbeard.PLEX_UPDATE_LIBRARY - @staticmethod - def haveTORRENT(): - if sickbeard.USE_TORRENTS and sickbeard.TORRENT_METHOD != 'blackhole' \ - and (sickbeard.ENABLE_HTTPS and sickbeard.TORRENT_HOST[:5] == 'https' - or not sickbeard.ENABLE_HTTPS and sickbeard.TORRENT_HOST[:5] == 'http:'): - return True - else: - return False - @staticmethod def _getEpisode(show, season=None, episode=None, absolute=None): if show is None: @@ -2518,11 +2508,6 @@ class Manage(MainHandler): {'title': 'Show Queue Overview', 'path': 'manage/showQueueOverview/'}, {'title': 'Episode Status Management', 'path': 'manage/episodeStatuses/'}, ] - if sickbeard.USE_TORRENTS and sickbeard.TORRENT_METHOD != 'blackhole' \ - and (sickbeard.ENABLE_HTTPS and sickbeard.TORRENT_HOST[:5] == 'https' - or not sickbeard.ENABLE_HTTPS and sickbeard.TORRENT_HOST[:5] == 'http:'): - manageMenu.append({'title': 'Manage Torrents', 'path': 'manage/manageTorrents/'}) - if sickbeard.USE_SUBTITLES: manageMenu.append({'title': 'Missed Subtitle Management', 'path': 'manage/subtitleMissed/'}) @@ -3166,31 +3151,6 @@ class Manage(MainHandler): self.redirect('/manage/') - def manageTorrents(self, *args, **kwargs): - - t = PageTemplate(headers=self.request.headers, file='manage_torrents.tmpl') - t.info_download_station = '' - t.submenu = self.ManageMenu() - - if re.search('localhost', sickbeard.TORRENT_HOST): - - if sickbeard.LOCALHOST_IP == '': - t.webui_url = re.sub('localhost', helpers.get_lan_ip(), sickbeard.TORRENT_HOST) - else: - t.webui_url = re.sub('localhost', sickbeard.LOCALHOST_IP, sickbeard.TORRENT_HOST) - else: - t.webui_url = sickbeard.TORRENT_HOST - - if sickbeard.TORRENT_METHOD == 'utorrent': - t.webui_url = '/'.join(s.strip('/') for s in (t.webui_url, 'gui/')) - if sickbeard.TORRENT_METHOD == 'download_station': - if helpers.check_url(t.webui_url + 'download/'): - t.webui_url = t.webui_url + 'download/' - else: - t.info_download_station = '

    To have a better experience please set the Download Station alias as download, you can check this setting in the Synology DSM Control Panel > Application Portal. Make sure you allow DSM to be embedded with iFrames too in Control Panel > DSM Settings > Security.


    There is more information about this available here.


    ' - - return t.respond() - def failedDownloads(self, limit=100, toRemove=None): myDB = db.DBConnection('failed.db') From a86dccd99db6d360207af7fb3ba3b155dd923631 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Sat, 15 Aug 2015 15:22:29 +0100 Subject: [PATCH 11/95] Update Hachoir library 1.3.3 to 1.3.4 (r1383) --- lib/hachoir_core/__init__.py | 2 +- lib/hachoir_core/benchmark.py | 4 +- lib/hachoir_core/bits.py | 50 +- lib/hachoir_core/cmd_line.py | 8 +- lib/hachoir_core/dict.py | 4 +- lib/hachoir_core/endian.py | 5 +- lib/hachoir_core/error.py | 4 +- lib/hachoir_core/field/__init__.py | 42 +- lib/hachoir_core/field/basic_field_set.py | 10 +- lib/hachoir_core/field/bit_field.py | 6 +- lib/hachoir_core/field/byte_field.py | 8 +- lib/hachoir_core/field/character.py | 7 +- lib/hachoir_core/field/enum.py | 3 +- lib/hachoir_core/field/fake_array.py | 2 +- lib/hachoir_core/field/field.py | 16 +- lib/hachoir_core/field/field_set.py | 2 +- lib/hachoir_core/field/float.py | 10 +- lib/hachoir_core/field/generic_field_set.py | 16 +- lib/hachoir_core/field/helper.py | 4 +- lib/hachoir_core/field/integer.py | 6 +- lib/hachoir_core/field/link.py | 4 +- .../field/new_seekable_field_set.py | 4 +- lib/hachoir_core/field/padding.py | 6 +- lib/hachoir_core/field/parser.py | 12 +- lib/hachoir_core/field/seekable_field_set.py | 220 ++--- lib/hachoir_core/field/static_field_set.py | 4 +- lib/hachoir_core/field/string_field.py | 10 +- lib/hachoir_core/field/sub_file.py | 6 +- lib/hachoir_core/field/timestamp.py | 6 +- lib/hachoir_core/field/vector.py | 2 +- lib/hachoir_core/i18n.py | 6 +- lib/hachoir_core/iso639.py | 2 +- lib/hachoir_core/language.py | 2 +- lib/hachoir_core/log.py | 6 +- lib/hachoir_core/stream/__init__.py | 10 +- lib/hachoir_core/stream/input.py | 32 +- lib/hachoir_core/stream/input_helper.py | 4 +- lib/hachoir_core/stream/output.py | 8 +- lib/hachoir_core/stream/stream.py | 2 +- lib/hachoir_core/text_handler.py | 4 +- lib/hachoir_core/tools.py | 11 +- lib/hachoir_core/version.py | 2 +- lib/hachoir_metadata/__init__.py | 22 +- lib/hachoir_metadata/archive.py | 14 +- lib/hachoir_metadata/audio.py | 14 +- lib/hachoir_metadata/file_system.py | 6 +- lib/hachoir_metadata/filter.py | 2 +- lib/hachoir_metadata/formatter.py | 2 +- lib/hachoir_metadata/image.py | 14 +- lib/hachoir_metadata/jpeg.py | 96 +- lib/hachoir_metadata/metadata.py | 18 +- lib/hachoir_metadata/metadata_item.py | 8 +- lib/hachoir_metadata/misc.py | 63 +- lib/hachoir_metadata/program.py | 6 +- lib/hachoir_metadata/qt/__init__.py | 0 lib/hachoir_metadata/qt/dialog.ui | 64 -- lib/hachoir_metadata/qt/dialog_ui.py | 52 -- lib/hachoir_metadata/register.py | 14 +- lib/hachoir_metadata/riff.py | 14 +- lib/hachoir_metadata/safe.py | 2 +- lib/hachoir_metadata/setter.py | 6 +- lib/hachoir_metadata/video.py | 27 +- lib/hachoir_parser/__init__.py | 10 +- lib/hachoir_parser/archive/__init__.py | 25 +- lib/hachoir_parser/archive/ace.py | 10 +- lib/hachoir_parser/archive/ar.py | 6 +- lib/hachoir_parser/archive/bzip2_parser.py | 178 +++- lib/hachoir_parser/archive/cab.py | 231 ++++- lib/hachoir_parser/archive/gzip_parser.py | 10 +- lib/hachoir_parser/archive/lzx.py | 267 ++++++ lib/hachoir_parser/archive/mar.py | 8 +- lib/hachoir_parser/archive/mozilla_ar.py | 60 ++ lib/hachoir_parser/archive/rar.py | 10 +- lib/hachoir_parser/archive/rpm.py | 10 +- lib/hachoir_parser/archive/sevenzip.py | 8 +- lib/hachoir_parser/archive/tar.py | 8 +- lib/hachoir_parser/archive/zip.py | 52 +- lib/hachoir_parser/archive/zlib.py | 301 +++++++ lib/hachoir_parser/audio/8svx.py | 126 --- lib/hachoir_parser/audio/__init__.py | 22 +- lib/hachoir_parser/audio/aiff.py | 12 +- lib/hachoir_parser/audio/au.py | 10 +- lib/hachoir_parser/audio/flac.py | 10 +- lib/hachoir_parser/audio/id3.py | 8 +- lib/hachoir_parser/audio/itunesdb.py | 110 ++- lib/hachoir_parser/audio/midi.py | 60 +- lib/hachoir_parser/audio/mod.py | 8 +- lib/hachoir_parser/audio/modplug.py | 6 +- lib/hachoir_parser/audio/mpeg_audio.py | 16 +- lib/hachoir_parser/audio/real_audio.py | 10 +- lib/hachoir_parser/audio/s3m.py | 12 +- lib/hachoir_parser/audio/xm.py | 12 +- lib/hachoir_parser/common/deflate.py | 6 +- lib/hachoir_parser/common/msdos.py | 4 +- lib/hachoir_parser/common/win32.py | 30 +- lib/hachoir_parser/container/__init__.py | 12 +- lib/hachoir_parser/container/action_script.py | 373 +++++++- lib/hachoir_parser/container/asn1.py | 12 +- lib/hachoir_parser/container/mkv.py | 18 +- lib/hachoir_parser/container/ogg.py | 12 +- lib/hachoir_parser/container/realmedia.py | 8 +- lib/hachoir_parser/container/riff.py | 14 +- lib/hachoir_parser/container/swf.py | 61 +- lib/hachoir_parser/file_system/__init__.py | 14 +- lib/hachoir_parser/file_system/ext2.py | 10 +- lib/hachoir_parser/file_system/fat.py | 16 +- lib/hachoir_parser/file_system/iso9660.py | 6 +- lib/hachoir_parser/file_system/linux_swap.py | 10 +- lib/hachoir_parser/file_system/mbr.py | 10 +- lib/hachoir_parser/file_system/ntfs.py | 12 +- lib/hachoir_parser/file_system/reiser_fs.py | 72 +- lib/hachoir_parser/game/__init__.py | 8 +- lib/hachoir_parser/game/blp.py | 10 +- lib/hachoir_parser/game/laf.py | 6 +- lib/hachoir_parser/game/spider_man_video.py | 8 +- lib/hachoir_parser/game/zsnes.py | 6 +- lib/hachoir_parser/guess.py | 22 +- lib/hachoir_parser/image/__init__.py | 22 +- lib/hachoir_parser/image/bmp.py | 12 +- lib/hachoir_parser/image/common.py | 2 +- lib/hachoir_parser/image/exif.py | 640 +++++++------- lib/hachoir_parser/image/gif.py | 209 ++++- lib/hachoir_parser/image/ico.py | 10 +- lib/hachoir_parser/image/iptc.py | 4 +- lib/hachoir_parser/image/jpeg.py | 275 +++++- lib/hachoir_parser/image/pcx.py | 8 +- .../image/photoshop_metadata.py | 124 ++- lib/hachoir_parser/image/png.py | 16 +- lib/hachoir_parser/image/psd.py | 8 +- lib/hachoir_parser/image/tga.py | 8 +- lib/hachoir_parser/image/tiff.py | 187 +--- lib/hachoir_parser/image/wmf.py | 12 +- lib/hachoir_parser/image/xcf.py | 8 +- lib/hachoir_parser/misc/__init__.py | 32 +- lib/hachoir_parser/misc/bplist.py | 25 +- lib/hachoir_parser/misc/chm.py | 172 +++- lib/hachoir_parser/misc/common.py | 2 +- lib/hachoir_parser/misc/dsstore.py | 211 +++++ lib/hachoir_parser/misc/file_3do.py | 8 +- lib/hachoir_parser/misc/file_3ds.py | 12 +- lib/hachoir_parser/misc/gnome_keyring.py | 8 +- lib/hachoir_parser/misc/hlp.py | 8 +- lib/hachoir_parser/misc/lnk.py | 65 +- lib/hachoir_parser/misc/mapsforge_map.py | 357 ++++++++ lib/hachoir_parser/misc/msoffice.py | 746 ++++++++++++++-- lib/hachoir_parser/misc/msoffice_summary.py | 124 +-- lib/hachoir_parser/misc/mstask.py | 168 ++++ lib/hachoir_parser/misc/ole2.py | 75 +- lib/hachoir_parser/misc/ole2_util.py | 35 + lib/hachoir_parser/misc/pcf.py | 10 +- lib/hachoir_parser/misc/pdf.py | 8 +- lib/hachoir_parser/misc/pifv.py | 10 +- lib/hachoir_parser/misc/torrent.py | 10 +- lib/hachoir_parser/misc/ttf.py | 8 +- lib/hachoir_parser/misc/word_2.py | 168 ++++ lib/hachoir_parser/misc/word_doc.py | 640 ++++++++------ lib/hachoir_parser/network/__init__.py | 2 +- lib/hachoir_parser/network/common.py | 8 +- lib/hachoir_parser/network/tcpdump.py | 14 +- lib/hachoir_parser/parser.py | 10 +- lib/hachoir_parser/parser_list.py | 9 +- lib/hachoir_parser/program/__init__.py | 11 +- lib/hachoir_parser/program/elf.py | 282 ++++-- lib/hachoir_parser/program/exe.py | 14 +- lib/hachoir_parser/program/exe_ne.py | 4 +- lib/hachoir_parser/program/exe_pe.py | 8 +- lib/hachoir_parser/program/exe_res.py | 10 +- lib/hachoir_parser/program/java.py | 10 +- lib/hachoir_parser/program/nds.py | 359 ++++++++ lib/hachoir_parser/program/prc.py | 6 +- lib/hachoir_parser/program/python.py | 42 +- lib/hachoir_parser/template.py | 8 +- lib/hachoir_parser/version.py | 2 +- lib/hachoir_parser/video/__init__.py | 10 +- lib/hachoir_parser/video/amf.py | 4 +- lib/hachoir_parser/video/asf.py | 14 +- lib/hachoir_parser/video/flv.py | 12 +- lib/hachoir_parser/video/mov.py | 821 ++++++++++++++++-- lib/hachoir_parser/video/mpeg_ts.py | 18 +- lib/hachoir_parser/video/mpeg_video.py | 12 +- 180 files changed, 7071 insertions(+), 2458 deletions(-) delete mode 100644 lib/hachoir_metadata/qt/__init__.py delete mode 100644 lib/hachoir_metadata/qt/dialog.ui delete mode 100644 lib/hachoir_metadata/qt/dialog_ui.py create mode 100644 lib/hachoir_parser/archive/lzx.py create mode 100644 lib/hachoir_parser/archive/mozilla_ar.py create mode 100644 lib/hachoir_parser/archive/zlib.py delete mode 100644 lib/hachoir_parser/audio/8svx.py create mode 100644 lib/hachoir_parser/misc/dsstore.py create mode 100644 lib/hachoir_parser/misc/mapsforge_map.py create mode 100644 lib/hachoir_parser/misc/mstask.py create mode 100644 lib/hachoir_parser/misc/ole2_util.py create mode 100644 lib/hachoir_parser/misc/word_2.py create mode 100644 lib/hachoir_parser/program/nds.py diff --git a/lib/hachoir_core/__init__.py b/lib/hachoir_core/__init__.py index 5fcd20a4..df1988fd 100644 --- a/lib/hachoir_core/__init__.py +++ b/lib/hachoir_core/__init__.py @@ -1,2 +1,2 @@ -from lib.hachoir_core.version import VERSION as __version__, PACKAGE, WEBSITE, LICENSE +from hachoir_core.version import VERSION as __version__, PACKAGE, WEBSITE, LICENSE diff --git a/lib/hachoir_core/benchmark.py b/lib/hachoir_core/benchmark.py index a046c9a9..f823cfae 100644 --- a/lib/hachoir_core/benchmark.py +++ b/lib/hachoir_core/benchmark.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.tools import humanDurationNanosec -from lib.hachoir_core.i18n import _ +from hachoir_core.tools import humanDurationNanosec +from hachoir_core.i18n import _ from math import floor from time import time diff --git a/lib/hachoir_core/bits.py b/lib/hachoir_core/bits.py index b18547dd..97e84af8 100644 --- a/lib/hachoir_core/bits.py +++ b/lib/hachoir_core/bits.py @@ -3,8 +3,8 @@ Utilities to convert integers and binary strings to binary (number), binary string, number, hexadecimal, etc. """ -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.compatibility import reversed +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN +from hachoir_core.compatibility import reversed from itertools import chain, repeat from struct import calcsize, unpack, error as struct_error @@ -30,6 +30,28 @@ def swap32(value): | ((value & 0x00FF0000L) >> 8) \ | ((value & 0xFF000000L) >> 24) +def arrswapmid(data): + r""" + Convert an array of characters from middle-endian to big-endian and vice-versa. + + >>> arrswapmid("badcfehg") + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] + """ + assert len(data)%2 == 0 + ret = ['']*len(data) + ret[1::2] = data[0::2] + ret[0::2] = data[1::2] + return ret + +def strswapmid(data): + r""" + Convert raw data from middle-endian to big-endian and vice-versa. + + >>> strswapmid("badcfehg") + 'abcdefgh' + """ + return ''.join(arrswapmid(data)) + def bin2long(text, endian): """ Convert binary number written in a string into an integer. @@ -45,9 +67,10 @@ def bin2long(text, endian): assert endian in (LITTLE_ENDIAN, BIG_ENDIAN) bits = [ (ord(character)-ord("0")) \ for character in text if character in "01" ] - assert len(bits) != 0 if endian is not BIG_ENDIAN: - bits = reversed(bits) + bits = bits[::-1] + size = len(bits) + assert 0 < size value = 0 for bit in bits: value *= 2 @@ -142,7 +165,7 @@ def long2raw(value, endian, size=None): '\x19\x12\x00\x00' """ assert (not size and 0 < value) or (0 <= value) - assert endian in (LITTLE_ENDIAN, BIG_ENDIAN) + assert endian in (LITTLE_ENDIAN, BIG_ENDIAN, MIDDLE_ENDIAN) text = [] while (value != 0 or text == ""): byte = value % 256 @@ -153,13 +176,15 @@ def long2raw(value, endian, size=None): else: need = 0 if need: - if endian is BIG_ENDIAN: - text = chain(repeat("\0", need), reversed(text)) - else: + if endian is LITTLE_ENDIAN: text = chain(text, repeat("\0", need)) + else: + text = chain(repeat("\0", need), reversed(text)) else: - if endian is BIG_ENDIAN: + if endian is not LITTLE_ENDIAN: text = reversed(text) + if endian is MIDDLE_ENDIAN: + text = arrswapmid(text) return "".join(text) def long2bin(size, value, endian, classic_mode=False): @@ -257,6 +282,8 @@ def str2long(data, endian): True >>> str2long("\xff\xff\xff\xff\xff\xff\xff\xff", BIG_ENDIAN) == (2**64-1) True + >>> str2long("\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d + True """ assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits try: @@ -264,14 +291,15 @@ def str2long(data, endian): except KeyError: pass - assert endian in (BIG_ENDIAN, LITTLE_ENDIAN) + assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) shift = 0 value = 0 if endian is BIG_ENDIAN: data = reversed(data) + elif endian is MIDDLE_ENDIAN: + data = reversed(strswapmid(data)) for character in data: byte = ord(character) value += (byte << shift) shift += 8 return value - diff --git a/lib/hachoir_core/cmd_line.py b/lib/hachoir_core/cmd_line.py index 80161712..8c4178df 100644 --- a/lib/hachoir_core/cmd_line.py +++ b/lib/hachoir_core/cmd_line.py @@ -1,8 +1,8 @@ from optparse import OptionGroup -from lib.hachoir_core.log import log -from lib.hachoir_core.i18n import _, getTerminalCharset -from lib.hachoir_core.tools import makePrintable -import lib.hachoir_core.config as config +from hachoir_core.log import log +from hachoir_core.i18n import _, getTerminalCharset +from hachoir_core.tools import makePrintable +import hachoir_core.config as config def getHachoirOptions(parser): """ diff --git a/lib/hachoir_core/dict.py b/lib/hachoir_core/dict.py index f7eb65e6..f887683c 100644 --- a/lib/hachoir_core/dict.py +++ b/lib/hachoir_core/dict.py @@ -2,8 +2,8 @@ Dictionnary classes which store values order. """ -from lib.hachoir_core.error import HachoirError -from lib.hachoir_core.i18n import _ +from hachoir_core.error import HachoirError +from hachoir_core.i18n import _ class UniqKeyError(HachoirError): """ diff --git a/lib/hachoir_core/endian.py b/lib/hachoir_core/endian.py index 6d09e261..3568010a 100644 --- a/lib/hachoir_core/endian.py +++ b/lib/hachoir_core/endian.py @@ -2,14 +2,15 @@ Constant values about endian. """ -from lib.hachoir_core.i18n import _ +from hachoir_core.i18n import _ BIG_ENDIAN = "ABCD" LITTLE_ENDIAN = "DCBA" +MIDDLE_ENDIAN = "BADC" NETWORK_ENDIAN = BIG_ENDIAN endian_name = { BIG_ENDIAN: _("Big endian"), LITTLE_ENDIAN: _("Little endian"), + MIDDLE_ENDIAN: _("Middle endian"), } - diff --git a/lib/hachoir_core/error.py b/lib/hachoir_core/error.py index 78f614a2..9ec6b573 100644 --- a/lib/hachoir_core/error.py +++ b/lib/hachoir_core/error.py @@ -2,8 +2,8 @@ Functions to display an error (error, warning or information) message. """ -from lib.hachoir_core.log import log -from lib.hachoir_core.tools import makePrintable +from hachoir_core.log import log +from hachoir_core.tools import makePrintable import sys, traceback def getBacktrace(empty="Empty backtrace."): diff --git a/lib/hachoir_core/field/__init__.py b/lib/hachoir_core/field/__init__.py index 66f3ed6b..f313c9ba 100644 --- a/lib/hachoir_core/field/__init__.py +++ b/lib/hachoir_core/field/__init__.py @@ -1,44 +1,44 @@ # Field classes -from lib.hachoir_core.field.field import Field, FieldError, MissingField, joinPath -from lib.hachoir_core.field.bit_field import Bit, Bits, RawBits -from lib.hachoir_core.field.byte_field import Bytes, RawBytes -from lib.hachoir_core.field.sub_file import SubFile, CompressedField -from lib.hachoir_core.field.character import Character -from lib.hachoir_core.field.integer import ( +from hachoir_core.field.field import Field, FieldError, MissingField, joinPath +from hachoir_core.field.bit_field import Bit, Bits, RawBits +from hachoir_core.field.byte_field import Bytes, RawBytes +from hachoir_core.field.sub_file import SubFile, CompressedField +from hachoir_core.field.character import Character +from hachoir_core.field.integer import ( Int8, Int16, Int24, Int32, Int64, UInt8, UInt16, UInt24, UInt32, UInt64, GenericInteger) -from lib.hachoir_core.field.enum import Enum -from lib.hachoir_core.field.string_field import (GenericString, +from hachoir_core.field.enum import Enum +from hachoir_core.field.string_field import (GenericString, String, CString, UnixLine, PascalString8, PascalString16, PascalString32) -from lib.hachoir_core.field.padding import (PaddingBits, PaddingBytes, +from hachoir_core.field.padding import (PaddingBits, PaddingBytes, NullBits, NullBytes) # Functions -from lib.hachoir_core.field.helper import (isString, isInteger, +from hachoir_core.field.helper import (isString, isInteger, createPaddingField, createNullField, createRawField, writeIntoFile, createOrphanField) # FieldSet classes -from lib.hachoir_core.field.fake_array import FakeArray -from lib.hachoir_core.field.basic_field_set import (BasicFieldSet, +from hachoir_core.field.fake_array import FakeArray +from hachoir_core.field.basic_field_set import (BasicFieldSet, ParserError, MatchError) -from lib.hachoir_core.field.generic_field_set import GenericFieldSet -from lib.hachoir_core.field.seekable_field_set import SeekableFieldSet, RootSeekableFieldSet -from lib.hachoir_core.field.field_set import FieldSet -from lib.hachoir_core.field.static_field_set import StaticFieldSet -from lib.hachoir_core.field.parser import Parser -from lib.hachoir_core.field.vector import GenericVector, UserVector +from hachoir_core.field.generic_field_set import GenericFieldSet +from hachoir_core.field.seekable_field_set import SeekableFieldSet, RootSeekableFieldSet +from hachoir_core.field.field_set import FieldSet +from hachoir_core.field.static_field_set import StaticFieldSet +from hachoir_core.field.parser import Parser +from hachoir_core.field.vector import GenericVector, UserVector # Complex types -from lib.hachoir_core.field.float import Float32, Float64, Float80 -from lib.hachoir_core.field.timestamp import (GenericTimestamp, +from hachoir_core.field.float import Float32, Float64, Float80 +from hachoir_core.field.timestamp import (GenericTimestamp, TimestampUnix32, TimestampUnix64, TimestampMac32, TimestampUUID60, TimestampWin64, DateTimeMSDOS32, TimeDateMSDOS32, TimedeltaWin64) # Special Field classes -from lib.hachoir_core.field.link import Link, Fragment +from hachoir_core.field.link import Link, Fragment available_types = ( Bit, Bits, RawBits, diff --git a/lib/hachoir_core/field/basic_field_set.py b/lib/hachoir_core/field/basic_field_set.py index 5c1bf9f8..74dc0571 100644 --- a/lib/hachoir_core/field/basic_field_set.py +++ b/lib/hachoir_core/field/basic_field_set.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.field import Field, FieldError -from lib.hachoir_core.stream import InputStream -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.event_handler import EventHandler +from hachoir_core.field import Field, FieldError +from hachoir_core.stream import InputStream +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN +from hachoir_core.event_handler import EventHandler class ParserError(FieldError): """ @@ -60,7 +60,7 @@ class BasicFieldSet(Field): self._global_event_handler = None # Sanity checks (post-conditions) - assert self.endian in (BIG_ENDIAN, LITTLE_ENDIAN) + assert self.endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) if (self._size is not None) and (self._size <= 0): raise ParserError("Invalid parser '%s' size: %s" % (self.path, self._size)) diff --git a/lib/hachoir_core/field/bit_field.py b/lib/hachoir_core/field/bit_field.py index 85f5502d..8fae3c7c 100644 --- a/lib/hachoir_core/field/bit_field.py +++ b/lib/hachoir_core/field/bit_field.py @@ -5,9 +5,9 @@ Bit sized classes: - RawBits: unknown content with a size in bits. """ -from lib.hachoir_core.field import Field -from lib.hachoir_core.i18n import _ -from lib.hachoir_core import config +from hachoir_core.field import Field +from hachoir_core.i18n import _ +from hachoir_core import config class RawBits(Field): """ diff --git a/lib/hachoir_core/field/byte_field.py b/lib/hachoir_core/field/byte_field.py index 4591daa1..16db1810 100644 --- a/lib/hachoir_core/field/byte_field.py +++ b/lib/hachoir_core/field/byte_field.py @@ -3,10 +3,10 @@ Very basic field: raw content with a size in byte. Use this class for unknown content. """ -from lib.hachoir_core.field import Field, FieldError -from lib.hachoir_core.tools import makePrintable -from lib.hachoir_core.bits import str2hex -from lib.hachoir_core import config +from hachoir_core.field import Field, FieldError +from hachoir_core.tools import makePrintable +from hachoir_core.bits import str2hex +from hachoir_core import config MAX_LENGTH = (2**64) diff --git a/lib/hachoir_core/field/character.py b/lib/hachoir_core/field/character.py index 002a18c9..566c4332 100644 --- a/lib/hachoir_core/field/character.py +++ b/lib/hachoir_core/field/character.py @@ -2,9 +2,9 @@ Character field class: a 8-bit character """ -from lib.hachoir_core.field import Bits -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.tools import makePrintable +from hachoir_core.field import Bits +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import makePrintable class Character(Bits): """ @@ -24,4 +24,3 @@ class Character(Bits): def createDisplay(self): return makePrintable(self.value, "ASCII", quote="'", to_unicode=True) - diff --git a/lib/hachoir_core/field/enum.py b/lib/hachoir_core/field/enum.py index cc04a29e..61873504 100644 --- a/lib/hachoir_core/field/enum.py +++ b/lib/hachoir_core/field/enum.py @@ -1,7 +1,7 @@ def Enum(field, enum, key_func=None): """ Enum is an adapter to another field: it will just change its display - attribute. It uses a dictionnary to associate a value to another. + attribute. It uses a dictionary to associate a value to another. key_func is an optional function with prototype "def func(key)->key" which is called to transform key. @@ -23,4 +23,3 @@ def Enum(field, enum, key_func=None): field.createDisplay = createDisplay field.getEnum = lambda: enum return field - diff --git a/lib/hachoir_core/field/fake_array.py b/lib/hachoir_core/field/fake_array.py index f5ae6d95..5535cafe 100644 --- a/lib/hachoir_core/field/fake_array.py +++ b/lib/hachoir_core/field/fake_array.py @@ -1,5 +1,5 @@ import itertools -from lib.hachoir_core.field import MissingField +from hachoir_core.field import MissingField class FakeArray: """ diff --git a/lib/hachoir_core/field/field.py b/lib/hachoir_core/field/field.py index ccb9e172..cc59e9ce 100644 --- a/lib/hachoir_core/field/field.py +++ b/lib/hachoir_core/field/field.py @@ -2,12 +2,12 @@ Parent of all (field) classes in Hachoir: Field. """ -from lib.hachoir_core.compatibility import reversed -from lib.hachoir_core.stream import InputFieldStream -from lib.hachoir_core.error import HachoirError, HACHOIR_ERRORS -from lib.hachoir_core.log import Logger -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import makePrintable +from hachoir_core.compatibility import reversed +from hachoir_core.stream import InputFieldStream +from hachoir_core.error import HachoirError, HACHOIR_ERRORS +from hachoir_core.log import Logger +from hachoir_core.i18n import _ +from hachoir_core.tools import makePrintable from weakref import ref as weakref_ref class FieldError(HachoirError): @@ -70,6 +70,8 @@ class Field(Logger): assert issubclass(parent.__class__, Field) assert (size is None) or (0 <= size) self._parent = parent + if not name: + raise ValueError("empty field name") self._name = name self._address = parent.nextFieldAddress() self._size = size @@ -166,7 +168,7 @@ class Field(Logger): return '/' names = [] field = self - while field: + while field is not None: names.append(field._name) field = field._parent names[-1] = '' diff --git a/lib/hachoir_core/field/field_set.py b/lib/hachoir_core/field/field_set.py index c535081f..92b51926 100644 --- a/lib/hachoir_core/field/field_set.py +++ b/lib/hachoir_core/field/field_set.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import BasicFieldSet, GenericFieldSet +from hachoir_core.field import BasicFieldSet, GenericFieldSet class FieldSet(GenericFieldSet): def __init__(self, parent, name, *args, **kw): diff --git a/lib/hachoir_core/field/float.py b/lib/hachoir_core/field/float.py index 56d77867..025b57df 100644 --- a/lib/hachoir_core/field/float.py +++ b/lib/hachoir_core/field/float.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.field import Bit, Bits, FieldSet -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN +from hachoir_core.field import Bit, Bits, FieldSet +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN import struct # Make sure that we use right struct types @@ -85,15 +85,15 @@ def floatFactory(name, format, mantissa_bits, exponent_bits, doc): cls.__name__ = name return cls -# 32-bit float (standart: IEEE 754/854) +# 32-bit float (standard: IEEE 754/854) Float32 = floatFactory("Float32", "f", 23, 8, "Floating point number: format IEEE 754 int 32 bit") -# 64-bit float (standart: IEEE 754/854) +# 64-bit float (standard: IEEE 754/854) Float64 = floatFactory("Float64", "d", 52, 11, "Floating point number: format IEEE 754 in 64 bit") -# 80-bit float (standart: IEEE 754/854) +# 80-bit float (standard: IEEE 754/854) Float80 = floatFactory("Float80", None, 64, 15, "Floating point number: format IEEE 754 in 80 bit") diff --git a/lib/hachoir_core/field/generic_field_set.py b/lib/hachoir_core/field/generic_field_set.py index 66634827..4817c2fc 100644 --- a/lib/hachoir_core/field/generic_field_set.py +++ b/lib/hachoir_core/field/generic_field_set.py @@ -1,9 +1,9 @@ -from lib.hachoir_core.field import (MissingField, BasicFieldSet, Field, ParserError, +from hachoir_core.field import (MissingField, BasicFieldSet, Field, ParserError, createRawField, createNullField, createPaddingField, FakeArray) -from lib.hachoir_core.dict import Dict, UniqKeyError -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_core.tools import lowerBound -import lib.hachoir_core.config as config +from hachoir_core.dict import Dict, UniqKeyError +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.tools import lowerBound, makeUnicode +import hachoir_core.config as config class GenericFieldSet(BasicFieldSet): """ @@ -12,8 +12,8 @@ class GenericFieldSet(BasicFieldSet): document). Class attributes: - - endian: Bytes order (L{BIG_ENDIAN} or L{LITTLE_ENDIAN}). Optional if the - field set has a parent ; + - endian: Bytes order (L{BIG_ENDIAN}, L{LITTLE_ENDIAN} or L{MIDDLE_ENDIAN}). + Optional if the field set has a parent ; - static_size: (optional) Size of FieldSet in bits. This attribute should be used in parser of constant size. @@ -310,7 +310,7 @@ class GenericFieldSet(BasicFieldSet): """ if self._size is None or not self.autofix: return False - self.warning(unicode(exception)) + self.warning(makeUnicode(exception)) return self._fixLastField() def _feedUntil(self, field_name): diff --git a/lib/hachoir_core/field/helper.py b/lib/hachoir_core/field/helper.py index 174d79e2..ba44f68e 100644 --- a/lib/hachoir_core/field/helper.py +++ b/lib/hachoir_core/field/helper.py @@ -1,9 +1,9 @@ -from lib.hachoir_core.field import (FieldError, +from hachoir_core.field import (FieldError, RawBits, RawBytes, PaddingBits, PaddingBytes, NullBits, NullBytes, GenericString, GenericInteger) -from lib.hachoir_core.stream import FileOutputStream +from hachoir_core.stream import FileOutputStream def createRawField(parent, size, name="raw[]", description=None): if size <= 0: diff --git a/lib/hachoir_core/field/integer.py b/lib/hachoir_core/field/integer.py index bad64996..1f98322b 100644 --- a/lib/hachoir_core/field/integer.py +++ b/lib/hachoir_core/field/integer.py @@ -4,15 +4,15 @@ Integer field classes: - Int8, Int16, Int24, Int32, Int64: signed integer of 8, 16, 32, 64 bits. """ -from lib.hachoir_core.field import Bits, FieldError +from hachoir_core.field import Bits, FieldError class GenericInteger(Bits): """ Generic integer class used to generate other classes. """ def __init__(self, parent, name, signed, size, description=None): - if not (8 <= size <= 256): - raise FieldError("Invalid integer size (%s): have to be in 8..256" % size) + if not (8 <= size <= 16384): + raise FieldError("Invalid integer size (%s): have to be in 8..16384" % size) Bits.__init__(self, parent, name, size, description) self.signed = signed diff --git a/lib/hachoir_core/field/link.py b/lib/hachoir_core/field/link.py index cccf2e67..b331c3b8 100644 --- a/lib/hachoir_core/field/link.py +++ b/lib/hachoir_core/field/link.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.field import Field, FieldSet, ParserError, Bytes, MissingField -from lib.hachoir_core.stream import FragmentedStream +from hachoir_core.field import Field, FieldSet, ParserError, Bytes, MissingField +from hachoir_core.stream import FragmentedStream class Link(Field): diff --git a/lib/hachoir_core/field/new_seekable_field_set.py b/lib/hachoir_core/field/new_seekable_field_set.py index 22403b6d..d145ab96 100644 --- a/lib/hachoir_core/field/new_seekable_field_set.py +++ b/lib/hachoir_core/field/new_seekable_field_set.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.field import BasicFieldSet, GenericFieldSet, ParserError, createRawField -from lib.hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.field import BasicFieldSet, GenericFieldSet, ParserError, createRawField +from hachoir_core.error import HACHOIR_ERRORS # getgaps(int, int, [listof (int, int)]) -> generator of (int, int) # Gets all the gaps not covered by a block in `blocks` from `start` for `length` units. diff --git a/lib/hachoir_core/field/padding.py b/lib/hachoir_core/field/padding.py index 3d0fcc0f..c1c4b8c0 100644 --- a/lib/hachoir_core/field/padding.py +++ b/lib/hachoir_core/field/padding.py @@ -1,6 +1,6 @@ -from lib.hachoir_core.field import Bits, Bytes -from lib.hachoir_core.tools import makePrintable, humanFilesize -from lib.hachoir_core import config +from hachoir_core.field import Bits, Bytes +from hachoir_core.tools import makePrintable, humanFilesize +from hachoir_core import config class PaddingBits(Bits): """ diff --git a/lib/hachoir_core/field/parser.py b/lib/hachoir_core/field/parser.py index f4aebadd..8c16bf13 100644 --- a/lib/hachoir_core/field/parser.py +++ b/lib/hachoir_core/field/parser.py @@ -1,13 +1,13 @@ -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.field import GenericFieldSet -from lib.hachoir_core.log import Logger -import lib.hachoir_core.config as config +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN +from hachoir_core.field import GenericFieldSet +from hachoir_core.log import Logger +import hachoir_core.config as config class Parser(GenericFieldSet): """ A parser is the root of all other fields. It create first level of fields and have special attributes and methods: - - endian: Byte order (L{BIG_ENDIAN} or L{LITTLE_ENDIAN}) of input data ; + - endian: Byte order (L{BIG_ENDIAN}, L{LITTLE_ENDIAN} or L{MIDDLE_ENDIAN}) of input data ; - stream: Data input stream (set in L{__init__()}) ; - size: Field set size will be size of input stream. """ @@ -21,7 +21,7 @@ class Parser(GenericFieldSet): """ # Check arguments assert hasattr(self, "endian") \ - and self.endian in (BIG_ENDIAN, LITTLE_ENDIAN) + and self.endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) # Call parent constructor GenericFieldSet.__init__(self, None, "root", stream, description, stream.askSize(self)) diff --git a/lib/hachoir_core/field/seekable_field_set.py b/lib/hachoir_core/field/seekable_field_set.py index ee7b1127..c3a3b448 100644 --- a/lib/hachoir_core/field/seekable_field_set.py +++ b/lib/hachoir_core/field/seekable_field_set.py @@ -1,182 +1,82 @@ -from lib.hachoir_core.field import Field, BasicFieldSet, FakeArray, MissingField, ParserError -from lib.hachoir_core.tools import makeUnicode -from lib.hachoir_core.error import HACHOIR_ERRORS -from itertools import repeat -import lib.hachoir_core.config as config +from hachoir_core.field import BasicFieldSet, GenericFieldSet, ParserError, createRawField +from hachoir_core.error import HACHOIR_ERRORS -class RootSeekableFieldSet(BasicFieldSet): - def __init__(self, parent, name, stream, description, size): - BasicFieldSet.__init__(self, parent, name, stream, description, size) - self._generator = self.createFields() - self._offset = 0 - self._current_size = 0 - if size: - self._current_max_size = size - else: - self._current_max_size = 0 - self._field_dict = {} - self._field_array = [] - - def _feedOne(self): - assert self._generator - field = self._generator.next() - self._addField(field) - return field - - def array(self, key): - return FakeArray(self, key) - - def getFieldByAddress(self, address, feed=True): - for field in self._field_array: - if field.address <= address < field.address + field.size: - return field - for field in self._readFields(): - if field.address <= address < field.address + field.size: - return field - return None - - def _stopFeed(self): - self._size = self._current_max_size - self._generator = None - done = property(lambda self: not bool(self._generator)) - - def _getSize(self): - if self._size is None: - self._feedAll() - return self._size - size = property(_getSize) - - def _getField(self, key, const): - field = Field._getField(self, key, const) - if field is not None: - return field - if key in self._field_dict: - return self._field_dict[key] - if self._generator and not const: - try: - while True: - field = self._feedOne() - if field.name == key: - return field - except StopIteration: - self._stopFeed() - except HACHOIR_ERRORS, err: - self.error("Error: %s" % makeUnicode(err)) - self._stopFeed() - return None - - def getField(self, key, const=True): - if isinstance(key, (int, long)): - if key < 0: - raise KeyError("Key must be positive!") - if not const: - self.readFirstFields(key+1) - if len(self._field_array) <= key: - raise MissingField(self, key) - return self._field_array[key] - return Field.getField(self, key, const) - - def _addField(self, field): - if field._name.endswith("[]"): - self.setUniqueFieldName(field) - if config.debug: - self.info("[+] DBG: _addField(%s)" % field.name) - - if field._address != self._offset: - self.warning("Set field %s address to %s (was %s)" % ( - field.path, self._offset//8, field._address//8)) - field._address = self._offset - assert field.name not in self._field_dict - - self._checkFieldSize(field) - - self._field_dict[field.name] = field - self._field_array.append(field) - self._current_size += field.size - self._offset += field.size - self._current_max_size = max(self._current_max_size, field.address + field.size) - - def _checkAddress(self, address): - if self._size is not None: - max_addr = self._size - else: - # FIXME: Use parent size - max_addr = self.stream.size - return address < max_addr - - def _checkFieldSize(self, field): - size = field.size - addr = field.address - if not self._checkAddress(addr+size-1): - raise ParserError("Unable to add %s: field is too large" % field.name) +# getgaps(int, int, [listof (int, int)]) -> generator of (int, int) +# Gets all the gaps not covered by a block in `blocks` from `start` for `length` units. +def getgaps(start, length, blocks): + ''' + Example: + >>> list(getgaps(0, 20, [(15,3), (6,2), (6,2), (1,2), (2,3), (11,2), (9,5)])) + [(0, 1), (5, 1), (8, 1), (14, 1), (18, 2)] + ''' + # done this way to avoid mutating the original + blocks = sorted(blocks, key=lambda b: b[0]) + end = start+length + for s, l in blocks: + if s > start: + yield (start, s-start) + start = s + if s+l > start: + start = s+l + if start < end: + yield (start, end-start) +class RootSeekableFieldSet(GenericFieldSet): def seekBit(self, address, relative=True): if not relative: address -= self.absolute_address if address < 0: raise ParserError("Seek below field set start (%s.%s)" % divmod(address, 8)) - if not self._checkAddress(address): - raise ParserError("Seek above field set end (%s.%s)" % divmod(address, 8)) - self._offset = address + self._current_size = address return None def seekByte(self, address, relative=True): return self.seekBit(address*8, relative) - def readMoreFields(self, number): - return self._readMoreFields(xrange(number)) + def _fixLastField(self): + """ + Try to fix last field when we know current field set size. + Returns new added field if any, or None. + """ + assert self._size is not None - def _feedAll(self): - return self._readMoreFields(repeat(1)) + # Stop parser + message = ["stop parser"] + self._field_generator = None - def _readFields(self): - while True: - added = self._readMoreFields(xrange(1)) - if not added: - break - yield self._field_array[-1] + # If last field is too big, delete it + while self._size < self._current_size: + field = self._deleteField(len(self._fields)-1) + message.append("delete field %s" % field.path) + assert self._current_size <= self._size - def _readMoreFields(self, index_generator): - added = 0 - if self._generator: - try: - for index in index_generator: - self._feedOne() - added += 1 - except StopIteration: - self._stopFeed() - except HACHOIR_ERRORS, err: - self.error("Error: %s" % makeUnicode(err)) - self._stopFeed() - return added + blocks = [(x.absolute_address, x.size) for x in self._fields] + fields = [] + self._size = max(self._size, max(a+b for a,b in blocks) - self.absolute_address) + for start, length in getgaps(self.absolute_address, self._size, blocks): + self.seekBit(start, relative=False) + field = createRawField(self, length, "unparsed[]") + self.setUniqueFieldName(field) + self._fields.append(field.name, field) + fields.append(field) + message.append("found unparsed segment: start %s, length %s" % (start, length)) + self.seekBit(self._size + self.absolute_address, relative=False) + message = ", ".join(message) + if fields: + self.warning("[Autofix] Fix parser error: " + message) + return fields - current_length = property(lambda self: len(self._field_array)) - current_size = property(lambda self: self._offset) + def _stopFeeding(self): + new_field = None + if self._size is None: + if self._parent: + self._size = self._current_size - def __iter__(self): - for field in self._field_array: - yield field - if self._generator: - try: - while True: - yield self._feedOne() - except StopIteration: - self._stopFeed() - raise StopIteration - - def __len__(self): - if self._generator: - self._feedAll() - return len(self._field_array) - - def nextFieldAddress(self): - return self._offset - - def getFieldIndex(self, field): - return self._field_array.index(field) + new_field = self._fixLastField() + self._field_generator = None + return new_field class SeekableFieldSet(RootSeekableFieldSet): def __init__(self, parent, name, description=None, size=None): assert issubclass(parent.__class__, BasicFieldSet) RootSeekableFieldSet.__init__(self, parent, name, parent.stream, description, size) - diff --git a/lib/hachoir_core/field/static_field_set.py b/lib/hachoir_core/field/static_field_set.py index 2e9e689e..e3897b30 100644 --- a/lib/hachoir_core/field/static_field_set.py +++ b/lib/hachoir_core/field/static_field_set.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import FieldSet, ParserError +from hachoir_core.field import FieldSet, ParserError class StaticFieldSet(FieldSet): """ @@ -20,7 +20,7 @@ class StaticFieldSet(FieldSet): if cls._class is not cls.__name__: cls._class = cls.__name__ cls.static_size = cls._computeStaticSize() - return object.__new__(cls) + return object.__new__(cls, *args, **kw) @staticmethod def _computeItemSize(item): diff --git a/lib/hachoir_core/field/string_field.py b/lib/hachoir_core/field/string_field.py index e2bb4ed0..e44e24dc 100644 --- a/lib/hachoir_core/field/string_field.py +++ b/lib/hachoir_core/field/string_field.py @@ -15,11 +15,11 @@ Note: For PascalStringXX, prefixed value is the number of bytes and not of characters! """ -from lib.hachoir_core.field import FieldError, Bytes -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_core.tools import alignValue, makePrintable -from lib.hachoir_core.i18n import guessBytesCharset, _ -from lib.hachoir_core import config +from hachoir_core.field import FieldError, Bytes +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.tools import alignValue, makePrintable +from hachoir_core.i18n import guessBytesCharset, _ +from hachoir_core import config from codecs import BOM_UTF16_LE, BOM_UTF16_BE, BOM_UTF32_LE, BOM_UTF32_BE # Default charset used to convert byte string to Unicode diff --git a/lib/hachoir_core/field/sub_file.py b/lib/hachoir_core/field/sub_file.py index b5993ba4..0f2912d4 100644 --- a/lib/hachoir_core/field/sub_file.py +++ b/lib/hachoir_core/field/sub_file.py @@ -1,6 +1,6 @@ -from lib.hachoir_core.field import Bytes -from lib.hachoir_core.tools import makePrintable, humanFilesize -from lib.hachoir_core.stream import InputIOStream +from hachoir_core.field import Bytes +from hachoir_core.tools import makePrintable, humanFilesize +from hachoir_core.stream import InputIOStream class SubFile(Bytes): """ diff --git a/lib/hachoir_core/field/timestamp.py b/lib/hachoir_core/field/timestamp.py index 57906537..8a07bcdf 100644 --- a/lib/hachoir_core/field/timestamp.py +++ b/lib/hachoir_core/field/timestamp.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.tools import (humanDatetime, humanDuration, +from hachoir_core.tools import (humanDatetime, humanDuration, timestampUNIX, timestampMac32, timestampUUID60, timestampWin64, durationWin64) -from lib.hachoir_core.field import Bits, FieldSet +from hachoir_core.field import Bits, FieldSet from datetime import datetime class GenericTimestamp(Bits): @@ -32,7 +32,7 @@ def timestampFactory(cls_name, handler, size): TimestampUnix32 = timestampFactory("TimestampUnix32", timestampUNIX, 32) TimestampUnix64 = timestampFactory("TimestampUnix64", timestampUNIX, 64) -TimestampMac32 = timestampFactory("TimestampUnix32", timestampMac32, 32) +TimestampMac32 = timestampFactory("TimestampMac32", timestampMac32, 32) TimestampUUID60 = timestampFactory("TimestampUUID60", timestampUUID60, 60) TimestampWin64 = timestampFactory("TimestampWin64", timestampWin64, 64) diff --git a/lib/hachoir_core/field/vector.py b/lib/hachoir_core/field/vector.py index eba86471..953fdbc3 100644 --- a/lib/hachoir_core/field/vector.py +++ b/lib/hachoir_core/field/vector.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import Field, FieldSet, ParserError +from hachoir_core.field import Field, FieldSet, ParserError class GenericVector(FieldSet): def __init__(self, parent, name, nb_items, item_class, item_name="item", description=None): diff --git a/lib/hachoir_core/i18n.py b/lib/hachoir_core/i18n.py index 8babf8e9..b34c7480 100644 --- a/lib/hachoir_core/i18n.py +++ b/lib/hachoir_core/i18n.py @@ -14,8 +14,8 @@ WARNING: Loading this module indirectly calls initLocale() which sets settings. """ -import lib.hachoir_core.config as config -import lib.hachoir_core +import hachoir_core.config as config +import hachoir_core import locale from os import path import sys @@ -133,7 +133,7 @@ def _initGettext(): return (_dummy_gettext, _dummy_ngettext) # Gettext variables - package = lib.hachoir_core.PACKAGE + package = hachoir_core.PACKAGE locale_dir = path.join(path.dirname(__file__), "..", "locale") # Initialize gettext module diff --git a/lib/hachoir_core/iso639.py b/lib/hachoir_core/iso639.py index 61a0ba93..5da70e11 100644 --- a/lib/hachoir_core/iso639.py +++ b/lib/hachoir_core/iso639.py @@ -328,7 +328,6 @@ _ISO639 = ( (u"Micmac", "mic", None), (u"Minangkabau", "min", None), (u"Mirandese", "mwl", None), - (u"Miscellaneous languages", "mis", None), (u"Mohawk", "moh", None), (u"Moksha", "mdf", None), (u"Moldavian", "mol", "mo"), @@ -513,6 +512,7 @@ _ISO639 = ( (u"Uighur", "uig", "ug"), (u"Ukrainian", "ukr", "uk"), (u"Umbundu", "umb", None), + (u"Uncoded languages", "mis", None), (u"Undetermined", "und", None), (u"Upper Sorbian", "hsb", None), (u"Urdu", "urd", "ur"), diff --git a/lib/hachoir_core/language.py b/lib/hachoir_core/language.py index 2f80ddce..997f7a61 100644 --- a/lib/hachoir_core/language.py +++ b/lib/hachoir_core/language.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.iso639 import ISO639_2 +from hachoir_core.iso639 import ISO639_2 class Language: def __init__(self, code): diff --git a/lib/hachoir_core/log.py b/lib/hachoir_core/log.py index f777ab6b..32fca06d 100644 --- a/lib/hachoir_core/log.py +++ b/lib/hachoir_core/log.py @@ -1,6 +1,6 @@ import os, sys, time -import lib.hachoir_core.config as config -from lib.hachoir_core.i18n import _ +import hachoir_core.config as config +from hachoir_core.i18n import _ class Log: LOG_INFO = 0 @@ -75,7 +75,7 @@ class Log: level <= self.LOG_INFO and not config.verbose: return if config.debug: - from lib.hachoir_core.error import getBacktrace + from hachoir_core.error import getBacktrace backtrace = getBacktrace(None) if backtrace: text += "\n\n" + backtrace diff --git a/lib/hachoir_core/stream/__init__.py b/lib/hachoir_core/stream/__init__.py index c2a8f6dc..163e12a3 100644 --- a/lib/hachoir_core/stream/__init__.py +++ b/lib/hachoir_core/stream/__init__.py @@ -1,11 +1,11 @@ -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.stream.stream import StreamError -from lib.hachoir_core.stream.input import ( +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN +from hachoir_core.stream.stream import StreamError +from hachoir_core.stream.input import ( InputStreamError, InputStream, InputIOStream, StringInputStream, InputSubStream, InputFieldStream, FragmentedStream, ConcatStream) -from lib.hachoir_core.stream.input_helper import FileInputStream, guessStreamCharset -from lib.hachoir_core.stream.output import (OutputStreamError, +from hachoir_core.stream.input_helper import FileInputStream, guessStreamCharset +from hachoir_core.stream.output import (OutputStreamError, FileOutputStream, StringOutputStream, OutputStream) diff --git a/lib/hachoir_core/stream/input.py b/lib/hachoir_core/stream/input.py index ec01e6e4..79ca6da0 100644 --- a/lib/hachoir_core/stream/input.py +++ b/lib/hachoir_core/stream/input.py @@ -1,14 +1,14 @@ -from lib.hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.error import info -from lib.hachoir_core.log import Logger -from lib.hachoir_core.bits import str2long -from lib.hachoir_core.i18n import getTerminalCharset -from lib.hachoir_core.tools import lowerBound -from lib.hachoir_core.i18n import _ -from os import dup, fdopen +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN +from hachoir_core.error import info +from hachoir_core.log import Logger +from hachoir_core.bits import str2long +from hachoir_core.i18n import getTerminalCharset +from hachoir_core.tools import lowerBound +from hachoir_core.i18n import _ +from hachoir_core.tools import alignValue from errno import ESPIPE from weakref import ref as weakref_ref -from lib.hachoir_core.stream import StreamError +from hachoir_core.stream import StreamError class InputStreamError(StreamError): pass @@ -168,13 +168,20 @@ class InputStream(Logger): raise NotImplementedError def readBits(self, address, nbits, endian): - assert endian in (BIG_ENDIAN, LITTLE_ENDIAN) + assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) - shift, data, missing = self.read(address, nbits) + if endian is MIDDLE_ENDIAN: + # read an aligned chunk of words + wordaddr, remainder = divmod(address, 16) + wordnbits = alignValue(remainder+nbits, 16) + _, data, missing = self.read(wordaddr*16, wordnbits) + shift = remainder + else: + shift, data, missing = self.read(address, nbits) if missing: raise ReadStreamError(nbits, address) value = str2long(data, endian) - if endian is BIG_ENDIAN: + if endian in (BIG_ENDIAN, MIDDLE_ENDIAN): value >>= len(data) * 8 - shift - nbits else: value >>= shift @@ -404,6 +411,7 @@ class InputIOStream(InputStream): def file(self): if hasattr(self._input, "fileno"): + from os import dup, fdopen new_fd = dup(self._input.fileno()) new_file = fdopen(new_fd, "r") new_file.seek(0) diff --git a/lib/hachoir_core/stream/input_helper.py b/lib/hachoir_core/stream/input_helper.py index 9c222f3a..e7938310 100644 --- a/lib/hachoir_core/stream/input_helper.py +++ b/lib/hachoir_core/stream/input_helper.py @@ -1,5 +1,5 @@ -from lib.hachoir_core.i18n import getTerminalCharset, guessBytesCharset, _ -from lib.hachoir_core.stream import InputIOStream, InputSubStream, InputStreamError +from hachoir_core.i18n import getTerminalCharset, guessBytesCharset, _ +from hachoir_core.stream import InputIOStream, InputSubStream, InputStreamError def FileInputStream(filename, real_filename=None, **args): """ diff --git a/lib/hachoir_core/stream/output.py b/lib/hachoir_core/stream/output.py index a809f76b..4300cc66 100644 --- a/lib/hachoir_core/stream/output.py +++ b/lib/hachoir_core/stream/output.py @@ -1,7 +1,7 @@ from cStringIO import StringIO -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.bits import long2raw -from lib.hachoir_core.stream import StreamError +from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN +from hachoir_core.bits import long2raw +from hachoir_core.stream import StreamError from errno import EBADF MAX_READ_NBYTES = 2 ** 16 @@ -21,6 +21,7 @@ class OutputStream(object): filename = property(_getFilename) def writeBit(self, state, endian): + assert endian in (BIG_ENDIAN, LITTLE_ENDIAN) # middle endian not yet supported if self._bit_pos == 7: self._bit_pos = 0 if state: @@ -39,6 +40,7 @@ class OutputStream(object): self._bit_pos += 1 def writeBits(self, count, value, endian): + assert endian in (BIG_ENDIAN, LITTLE_ENDIAN) # middle endian not yet supported assert 0 <= value < 2**count # Feed bits to align to byte address diff --git a/lib/hachoir_core/stream/stream.py b/lib/hachoir_core/stream/stream.py index 101fcf68..58c9aea8 100644 --- a/lib/hachoir_core/stream/stream.py +++ b/lib/hachoir_core/stream/stream.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.error import HachoirError +from hachoir_core.error import HachoirError class StreamError(HachoirError): pass diff --git a/lib/hachoir_core/text_handler.py b/lib/hachoir_core/text_handler.py index 1a67e9be..e2c65f0a 100644 --- a/lib/hachoir_core/text_handler.py +++ b/lib/hachoir_core/text_handler.py @@ -2,12 +2,12 @@ Utilities used to convert a field to human classic reprentation of data. """ -from lib.hachoir_core.tools import ( +from hachoir_core.tools import ( humanDuration, humanFilesize, alignValue, durationWin64 as doDurationWin64, deprecated) from types import FunctionType, MethodType -from lib.hachoir_core.field import Field +from hachoir_core.field import Field def textHandler(field, handler): assert isinstance(handler, (FunctionType, MethodType)) diff --git a/lib/hachoir_core/tools.py b/lib/hachoir_core/tools.py index 1994dbfb..a8f77334 100644 --- a/lib/hachoir_core/tools.py +++ b/lib/hachoir_core/tools.py @@ -4,7 +4,7 @@ Various utilities. """ -from lib.hachoir_core.i18n import _, ngettext +from hachoir_core.i18n import _, ngettext import re import stat from datetime import datetime, timedelta, MAXYEAR @@ -330,7 +330,14 @@ def makeUnicode(text): if isinstance(text, str): text = unicode(text, "ISO-8859-1") elif not isinstance(text, unicode): - text = unicode(text) + try: + text = unicode(text) + except UnicodeError: + try: + text = str(text) + except Exception: + text = repr(text) + return makeUnicode(text) text = regex_control_code.sub( lambda regs: controlchars[ord(regs.group(1))], text) text = re.sub(r"\\x0([0-7])(?=[^0-7]|$)", r"\\\1", text) diff --git a/lib/hachoir_core/version.py b/lib/hachoir_core/version.py index c5e95447..e3506e93 100644 --- a/lib/hachoir_core/version.py +++ b/lib/hachoir_core/version.py @@ -1,5 +1,5 @@ PACKAGE = "hachoir-core" -VERSION = "1.3.3" +VERSION = "1.3.4" WEBSITE = 'http://bitbucket.org/haypo/hachoir/wiki/hachoir-core' LICENSE = 'GNU GPL v2' diff --git a/lib/hachoir_metadata/__init__.py b/lib/hachoir_metadata/__init__.py index 54adbaa8..5ab4743c 100644 --- a/lib/hachoir_metadata/__init__.py +++ b/lib/hachoir_metadata/__init__.py @@ -1,15 +1,15 @@ -from lib.hachoir_metadata.version import VERSION as __version__ -from lib.hachoir_metadata.metadata import extractMetadata +from hachoir_metadata.version import VERSION as __version__ +from hachoir_metadata.metadata import extractMetadata # Just import the module, # each module use registerExtractor() method -import lib.hachoir_metadata.archive -import lib.hachoir_metadata.audio -import lib.hachoir_metadata.file_system -import lib.hachoir_metadata.image -import lib.hachoir_metadata.jpeg -import lib.hachoir_metadata.misc -import lib.hachoir_metadata.program -import lib.hachoir_metadata.riff -import lib.hachoir_metadata.video +import hachoir_metadata.archive +import hachoir_metadata.audio +import hachoir_metadata.file_system +import hachoir_metadata.image +import hachoir_metadata.jpeg +import hachoir_metadata.misc +import hachoir_metadata.program +import hachoir_metadata.riff +import hachoir_metadata.video diff --git a/lib/hachoir_metadata/archive.py b/lib/hachoir_metadata/archive.py index 64289ac9..2ca16321 100644 --- a/lib/hachoir_metadata/archive.py +++ b/lib/hachoir_metadata/archive.py @@ -1,11 +1,11 @@ -from lib.hachoir_metadata.metadata_item import QUALITY_BEST, QUALITY_FASTEST -from lib.hachoir_metadata.safe import fault_tolerant, getValue -from lib.hachoir_metadata.metadata import ( +from hachoir_metadata.metadata_item import QUALITY_BEST, QUALITY_FASTEST +from hachoir_metadata.safe import fault_tolerant, getValue +from hachoir_metadata.metadata import ( RootMetadata, Metadata, MultipleMetadata, registerExtractor) -from lib.hachoir_parser.archive import (Bzip2Parser, CabFile, GzipParser, +from hachoir_parser.archive import (Bzip2Parser, CabFile, GzipParser, TarFile, ZipFile, MarFile) -from lib.hachoir_core.tools import humanUnixAttributes -from lib.hachoir_core.i18n import _ +from hachoir_core.tools import humanUnixAttributes +from hachoir_core.i18n import _ def maxNbFile(meta): if meta.quality <= QUALITY_FASTEST: @@ -110,7 +110,7 @@ class CabMetadata(MultipleMetadata): def extract(self, cab): if "folder[0]" in cab: self.useFolder(cab["folder[0]"]) - self.format_version = "Microsoft Cabinet version %s" % cab["cab_version"].display + self.format_version = "Microsoft Cabinet version %s.%s" % (cab["major_version"].display, cab["minor_version"].display) self.comment = "%s folders, %s files" % ( cab["nb_folder"].value, cab["nb_files"].value) max_nb = maxNbFile(self) diff --git a/lib/hachoir_metadata/audio.py b/lib/hachoir_metadata/audio.py index 86284c47..566613e0 100644 --- a/lib/hachoir_metadata/audio.py +++ b/lib/hachoir_metadata/audio.py @@ -1,12 +1,12 @@ -from lib.hachoir_metadata.metadata import (registerExtractor, +from hachoir_metadata.metadata import (registerExtractor, Metadata, RootMetadata, MultipleMetadata) -from lib.hachoir_parser.audio import AuFile, MpegAudioFile, RealAudioFile, AiffFile, FlacParser -from lib.hachoir_parser.container import OggFile, RealMediaFile -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import makePrintable, timedelta2seconds, humanBitRate +from hachoir_parser.audio import AuFile, MpegAudioFile, RealAudioFile, AiffFile, FlacParser +from hachoir_parser.container import OggFile, RealMediaFile +from hachoir_core.i18n import _ +from hachoir_core.tools import makePrintable, timedelta2seconds, humanBitRate from datetime import timedelta -from lib.hachoir_metadata.metadata_item import QUALITY_FAST, QUALITY_NORMAL, QUALITY_BEST -from lib.hachoir_metadata.safe import fault_tolerant, getValue +from hachoir_metadata.metadata_item import QUALITY_FAST, QUALITY_NORMAL, QUALITY_BEST +from hachoir_metadata.safe import fault_tolerant, getValue def computeComprRate(meta, size): if not meta.has("duration") \ diff --git a/lib/hachoir_metadata/file_system.py b/lib/hachoir_metadata/file_system.py index ff5ff997..b111c486 100644 --- a/lib/hachoir_metadata/file_system.py +++ b/lib/hachoir_metadata/file_system.py @@ -1,6 +1,6 @@ -from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor -from lib.hachoir_metadata.safe import fault_tolerant -from lib.hachoir_parser.file_system import ISO9660 +from hachoir_metadata.metadata import RootMetadata, registerExtractor +from hachoir_metadata.safe import fault_tolerant +from hachoir_parser.file_system import ISO9660 from datetime import datetime class ISO9660_Metadata(RootMetadata): diff --git a/lib/hachoir_metadata/filter.py b/lib/hachoir_metadata/filter.py index 0807c5d7..b4af8e3c 100644 --- a/lib/hachoir_metadata/filter.py +++ b/lib/hachoir_metadata/filter.py @@ -1,4 +1,4 @@ -from lib.hachoir_metadata.timezone import UTC +from hachoir_metadata.timezone import UTC from datetime import date, datetime # Year in 1850..2030 diff --git a/lib/hachoir_metadata/formatter.py b/lib/hachoir_metadata/formatter.py index d52c12b2..0d04f920 100644 --- a/lib/hachoir_metadata/formatter.py +++ b/lib/hachoir_metadata/formatter.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.i18n import _, ngettext +from hachoir_core.i18n import _, ngettext NB_CHANNEL_NAME = {1: _("mono"), 2: _("stereo")} diff --git a/lib/hachoir_metadata/image.py b/lib/hachoir_metadata/image.py index fbe41a3c..1416a8f9 100644 --- a/lib/hachoir_metadata/image.py +++ b/lib/hachoir_metadata/image.py @@ -1,12 +1,12 @@ -from lib.hachoir_metadata.metadata import (registerExtractor, +from hachoir_metadata.metadata import (registerExtractor, Metadata, RootMetadata, MultipleMetadata) -from lib.hachoir_parser.image import ( +from hachoir_parser.image import ( BmpFile, IcoFile, PcxFile, GifFile, PngFile, TiffFile, XcfFile, TargaFile, WMF_File, PsdFile) -from lib.hachoir_parser.image.png import getBitsPerPixel as pngBitsPerPixel -from lib.hachoir_parser.image.xcf import XcfProperty -from lib.hachoir_core.i18n import _ -from lib.hachoir_metadata.safe import fault_tolerant +from hachoir_parser.image.png import getBitsPerPixel as pngBitsPerPixel +from hachoir_parser.image.xcf import XcfProperty +from hachoir_core.i18n import _ +from hachoir_metadata.safe import fault_tolerant def computeComprRate(meta, compr_size): """ @@ -240,7 +240,7 @@ class GifMetadata(RootMetadata): def useScreen(self, screen): self.width = screen["width"].value self.height = screen["height"].value - self.bits_per_pixel = (1 + screen["bpp"].value) + self.bits_per_pixel = (1 + screen["size_global_map"].value) class TargaMetadata(RootMetadata): def extract(self, tga): diff --git a/lib/hachoir_metadata/jpeg.py b/lib/hachoir_metadata/jpeg.py index 9a3fe1aa..a112318f 100644 --- a/lib/hachoir_metadata/jpeg.py +++ b/lib/hachoir_metadata/jpeg.py @@ -1,14 +1,14 @@ -from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor -from lib.hachoir_metadata.image import computeComprRate -from lib.hachoir_parser.image.exif import ExifEntry -from lib.hachoir_parser.image.jpeg import ( +from hachoir_metadata.metadata import RootMetadata, registerExtractor +from hachoir_metadata.image import computeComprRate +from hachoir_parser.image.exif import IFD, BasicIFDEntry +from hachoir_parser.image.jpeg import ( JpegFile, JpegChunk, QUALITY_HASH_COLOR, QUALITY_SUM_COLOR, QUALITY_HASH_GRAY, QUALITY_SUM_GRAY) -from lib.hachoir_core.field import MissingField -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import makeUnicode -from lib.hachoir_metadata.safe import fault_tolerant +from hachoir_core.field import MissingField +from hachoir_core.i18n import _ +from hachoir_core.tools import makeUnicode +from hachoir_metadata.safe import fault_tolerant from datetime import datetime def deg2float(degree, minute, second): @@ -17,21 +17,21 @@ def deg2float(degree, minute, second): class JpegMetadata(RootMetadata): EXIF_KEY = { # Exif metadatas - ExifEntry.TAG_CAMERA_MANUFACTURER: "camera_manufacturer", - ExifEntry.TAG_CAMERA_MODEL: "camera_model", - ExifEntry.TAG_ORIENTATION: "image_orientation", - ExifEntry.TAG_EXPOSURE: "camera_exposure", - ExifEntry.TAG_FOCAL: "camera_focal", - ExifEntry.TAG_BRIGHTNESS: "camera_brightness", - ExifEntry.TAG_APERTURE: "camera_aperture", + "Make": "camera_manufacturer", + "Model": "camera_model", + "Orientation": "image_orientation", + "ExposureTime": "camera_exposure", + "FNumber": "camera_focal", + "BrightnessValue": "camera_brightness", + "MaxApertureValue": "camera_aperture", # Generic metadatas - ExifEntry.TAG_IMG_TITLE: "title", - ExifEntry.TAG_SOFTWARE: "producer", - ExifEntry.TAG_FILE_TIMESTAMP: "creation_date", - ExifEntry.TAG_WIDTH: "width", - ExifEntry.TAG_HEIGHT: "height", - ExifEntry.TAG_USER_COMMENT: "comment", + "ImageDescription": "title", + "Software": "producer", + "DateTime": "creation_date", + "PixelXDimension": "width", + "PixelYDimension": "height", + "UserComment": "comment", } IPTC_KEY = { @@ -63,7 +63,8 @@ class JpegMetadata(RootMetadata): self.extractAPP0(jpeg["app0/content"]) if "exif/content" in jpeg: - for ifd in jpeg.array("exif/content/ifd"): + for ifd in jpeg['exif/content']: + if not isinstance(ifd, IFD): continue for entry in ifd.array("entry"): self.processIfdEntry(ifd, entry) self.readGPS(ifd) @@ -156,7 +157,7 @@ class JpegMetadata(RootMetadata): @fault_tolerant def processIfdEntry(self, ifd, entry): # Skip unknown tags - tag = entry["tag"].value + tag = entry["tag"].display if tag not in self.EXIF_KEY: return key = self.EXIF_KEY[tag] @@ -166,20 +167,17 @@ class JpegMetadata(RootMetadata): return # Read value - if "value" in entry: - value = entry["value"].value - else: - value = ifd["value_%s" % entry.name].value + value = ifd.getEntryValues(entry)[0].value # Convert value to string - if tag == ExifEntry.TAG_ORIENTATION: + if tag == "Orientation": value = self.orientation_name.get(value, value) - elif tag == ExifEntry.TAG_EXPOSURE: + elif tag == "ExposureTime": if not value: return if isinstance(value, float): value = (value, u"1/%g" % (1/value)) - elif entry["type"].value in (ExifEntry.TYPE_RATIONAL, ExifEntry.TYPE_SIGNED_RATIONAL): + elif entry["type"].value in (BasicIFDEntry.TYPE_RATIONAL, BasicIFDEntry.TYPE_SIGNED_RATIONAL): value = (value, u"%.3g" % value) # Store information @@ -197,35 +195,33 @@ class JpegMetadata(RootMetadata): timestamp = None datestamp = None for entry in ifd.array("entry"): - tag = entry["tag"].value - if tag == ExifEntry.TAG_GPS_LATITUDE_REF: - if entry["value"].value == "N": + tag = entry["tag"].display + values = [v.value for v in ifd.getEntryValues(entry)] + if tag == "GPSLatitudeRef": + if values[0] == "N": latitude_ref = 1 else: latitude_ref = -1 - elif tag == ExifEntry.TAG_GPS_LONGITUDE_REF: - if entry["value"].value == "E": + elif tag == "GPSLongitudeRef": + if values[0] == "E": longitude_ref = 1 else: longitude_ref = -1 - elif tag == ExifEntry.TAG_GPS_ALTITUDE_REF: - if entry["value"].value == 1: + elif tag == "GPSAltitudeRef": + if values[0] == 1: altitude_ref = -1 else: altitude_ref = 1 - elif tag == ExifEntry.TAG_GPS_LATITUDE: - latitude = [ifd["value_%s[%u]" % (entry.name, index)].value for index in xrange(3)] - elif tag == ExifEntry.TAG_GPS_LONGITUDE: - longitude = [ifd["value_%s[%u]" % (entry.name, index)].value for index in xrange(3)] - elif tag == ExifEntry.TAG_GPS_ALTITUDE: - altitude = ifd["value_%s" % entry.name].value - elif tag == ExifEntry.TAG_GPS_DATESTAMP: - datestamp = ifd["value_%s" % entry.name].value - elif tag == ExifEntry.TAG_GPS_TIMESTAMP: - items = [ifd["value_%s[%u]" % (entry.name, index)].value for index in xrange(3)] - items = map(int, items) - items = map(str, items) - timestamp = ":".join(items) + elif tag == "GPSLatitude": + latitude = values + elif tag == "GPSLongitude": + longitude = values + elif tag == "GPSAltitude": + altitude = values[0] + elif tag == "GPSDateStamp": + datestamp = values[0] + elif tag == "GPSTimeStamp": + timestamp = ':'.join(str(int(x)) for x in values) if latitude_ref and latitude: value = deg2float(*latitude) if latitude_ref < 0: diff --git a/lib/hachoir_metadata/metadata.py b/lib/hachoir_metadata/metadata.py index 489a5466..37461c9d 100644 --- a/lib/hachoir_metadata/metadata.py +++ b/lib/hachoir_metadata/metadata.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- -from lib.hachoir_core.compatibility import any, sorted -from lib.hachoir_core.endian import endian_name -from lib.hachoir_core.tools import makePrintable, makeUnicode -from lib.hachoir_core.dict import Dict -from lib.hachoir_core.error import error, HACHOIR_ERRORS -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.log import Logger -from lib.hachoir_metadata.metadata_item import ( +from hachoir_core.compatibility import any, sorted +from hachoir_core.endian import endian_name +from hachoir_core.tools import makePrintable, makeUnicode +from hachoir_core.dict import Dict +from hachoir_core.error import error, HACHOIR_ERRORS +from hachoir_core.i18n import _ +from hachoir_core.log import Logger +from hachoir_metadata.metadata_item import ( MIN_PRIORITY, MAX_PRIORITY, QUALITY_NORMAL) -from lib.hachoir_metadata.register import registerAllItems +from hachoir_metadata.register import registerAllItems extractors = {} diff --git a/lib/hachoir_metadata/metadata_item.py b/lib/hachoir_metadata/metadata_item.py index 4b5573af..bddd3b07 100644 --- a/lib/hachoir_metadata/metadata_item.py +++ b/lib/hachoir_metadata/metadata_item.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.tools import makeUnicode, normalizeNewline -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_metadata import config -from lib.hachoir_metadata.setter import normalizeString +from hachoir_core.tools import makeUnicode, normalizeNewline +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_metadata import config +from hachoir_metadata.setter import normalizeString MIN_PRIORITY = 100 MAX_PRIORITY = 999 diff --git a/lib/hachoir_metadata/misc.py b/lib/hachoir_metadata/misc.py index 67647784..c6bbe97f 100644 --- a/lib/hachoir_metadata/misc.py +++ b/lib/hachoir_metadata/misc.py @@ -1,11 +1,11 @@ -from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor -from lib.hachoir_metadata.safe import fault_tolerant -from lib.hachoir_parser.container import SwfFile -from lib.hachoir_parser.misc import TorrentFile, TrueTypeFontFile, OLE2_File, PcfFile -from lib.hachoir_core.field import isString -from lib.hachoir_core.error import warning -from lib.hachoir_parser import guessParser -from lib.hachoir_metadata.setter import normalizeString +from hachoir_metadata.metadata import RootMetadata, registerExtractor +from hachoir_metadata.safe import fault_tolerant +from hachoir_parser.container import SwfFile +from hachoir_parser.misc import TorrentFile, TrueTypeFontFile, OLE2_File, PcfFile +from hachoir_core.field import isString +from hachoir_core.error import warning +from hachoir_parser import guessParser +from hachoir_metadata.setter import normalizeString class TorrentMetadata(RootMetadata): KEY_TO_ATTR = { @@ -109,45 +109,42 @@ class OLE2_Metadata(RootMetadata): def extract(self, ole2): self._extract(ole2) - def _extract(self, fieldset, main_document=True): - if main_document: - # _feedAll() is needed to make sure that we get all root[*] fragments + def _extract(self, fieldset): + try: fieldset._feedAll() - if "root[0]" in fieldset: - self.useRoot(fieldset["root[0]"]) - doc_summary = self.getField(fieldset, main_document, "doc_summary[0]") + except StopIteration: + pass + if "root[0]" in fieldset: + self._extract(self.getFragment(fieldset["root[0]"])) + doc_summary = self.getField(fieldset, "doc_summary[0]") if doc_summary: self.useSummary(doc_summary, True) - word_doc = self.getField(fieldset, main_document, "word_doc[0]") + word_doc = self.getField(fieldset, "word_doc[0]") if word_doc: self.useWordDocument(word_doc) - summary = self.getField(fieldset, main_document, "summary[0]") + summary = self.getField(fieldset, "summary[0]") if summary: self.useSummary(summary, False) - @fault_tolerant - def useRoot(self, root): - stream = root.getSubIStream() + def getFragment(self, frag): + stream = frag.getSubIStream() ministream = guessParser(stream) if not ministream: warning("Unable to create the OLE2 mini stream parser!") - return - self._extract(ministream, main_document=False) + return frag + return ministream - def getField(self, fieldset, main_document, name): - if name not in fieldset: - return None + def getField(self, fieldset, name): # _feedAll() is needed to make sure that we get all fragments # eg. summary[0], summary[1], ..., summary[n] - fieldset._feedAll() + try: + fieldset._feedAll() + except StopIteration: + pass + if name not in fieldset: + return None field = fieldset[name] - if main_document: - stream = field.getSubIStream() - field = guessParser(stream) - if not field: - warning("Unable to create the OLE2 parser for %s!" % name) - return None - return field + return self.getFragment(field) @fault_tolerant def useSummary(self, summary, is_doc_summary): @@ -161,7 +158,7 @@ class OLE2_Metadata(RootMetadata): @fault_tolerant def useWordDocument(self, doc): - self.comment = "Encrypted: %s" % doc["fEncrypted"].value + self.comment = "Encrypted: %s" % doc["FIB/fEncrypted"].value @fault_tolerant def useProperty(self, summary, property, is_doc_summary): diff --git a/lib/hachoir_metadata/program.py b/lib/hachoir_metadata/program.py index 14027d50..a524cee6 100644 --- a/lib/hachoir_metadata/program.py +++ b/lib/hachoir_metadata/program.py @@ -1,6 +1,6 @@ -from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor -from lib.hachoir_parser.program import ExeFile -from lib.hachoir_metadata.safe import fault_tolerant, getValue +from hachoir_metadata.metadata import RootMetadata, registerExtractor +from hachoir_parser.program import ExeFile +from hachoir_metadata.safe import fault_tolerant, getValue class ExeMetadata(RootMetadata): KEY_TO_ATTR = { diff --git a/lib/hachoir_metadata/qt/__init__.py b/lib/hachoir_metadata/qt/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/hachoir_metadata/qt/dialog.ui b/lib/hachoir_metadata/qt/dialog.ui deleted file mode 100644 index 498a8dae..00000000 --- a/lib/hachoir_metadata/qt/dialog.ui +++ /dev/null @@ -1,64 +0,0 @@ - - Form - - - - 0 - 0 - 441 - 412 - - - - hachoir-metadata - - - - - - - - Open - - - - - - - - 0 - 0 - - - - - - - - - - true - - - false - - - 0 - - - 0 - - - - - - - Quit - - - - - - - - diff --git a/lib/hachoir_metadata/qt/dialog_ui.py b/lib/hachoir_metadata/qt/dialog_ui.py deleted file mode 100644 index 970257cf..00000000 --- a/lib/hachoir_metadata/qt/dialog_ui.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- - -# Form implementation generated from reading ui file 'hachoir_metadata/qt/dialog.ui' -# -# Created: Mon Jul 26 03:10:06 2010 -# by: PyQt4 UI code generator 4.7.3 -# -# WARNING! All changes made in this file will be lost! - -from PyQt4 import QtCore, QtGui - -class Ui_Form(object): - def setupUi(self, Form): - Form.setObjectName("Form") - Form.resize(441, 412) - self.verticalLayout = QtGui.QVBoxLayout(Form) - self.verticalLayout.setObjectName("verticalLayout") - self.horizontalLayout_2 = QtGui.QHBoxLayout() - self.horizontalLayout_2.setObjectName("horizontalLayout_2") - self.open_button = QtGui.QPushButton(Form) - self.open_button.setObjectName("open_button") - self.horizontalLayout_2.addWidget(self.open_button) - self.files_combo = QtGui.QComboBox(Form) - sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(self.files_combo.sizePolicy().hasHeightForWidth()) - self.files_combo.setSizePolicy(sizePolicy) - self.files_combo.setObjectName("files_combo") - self.horizontalLayout_2.addWidget(self.files_combo) - self.verticalLayout.addLayout(self.horizontalLayout_2) - self.metadata_table = QtGui.QTableWidget(Form) - self.metadata_table.setAlternatingRowColors(True) - self.metadata_table.setShowGrid(False) - self.metadata_table.setRowCount(0) - self.metadata_table.setColumnCount(0) - self.metadata_table.setObjectName("metadata_table") - self.metadata_table.setColumnCount(0) - self.metadata_table.setRowCount(0) - self.verticalLayout.addWidget(self.metadata_table) - self.quit_button = QtGui.QPushButton(Form) - self.quit_button.setObjectName("quit_button") - self.verticalLayout.addWidget(self.quit_button) - - self.retranslateUi(Form) - QtCore.QMetaObject.connectSlotsByName(Form) - - def retranslateUi(self, Form): - Form.setWindowTitle(QtGui.QApplication.translate("Form", "hachoir-metadata", None, QtGui.QApplication.UnicodeUTF8)) - self.open_button.setText(QtGui.QApplication.translate("Form", "Open", None, QtGui.QApplication.UnicodeUTF8)) - self.quit_button.setText(QtGui.QApplication.translate("Form", "Quit", None, QtGui.QApplication.UnicodeUTF8)) - diff --git a/lib/hachoir_metadata/register.py b/lib/hachoir_metadata/register.py index 9916d36f..3cbde86d 100644 --- a/lib/hachoir_metadata/register.py +++ b/lib/hachoir_metadata/register.py @@ -1,17 +1,17 @@ -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import ( +from hachoir_core.i18n import _ +from hachoir_core.tools import ( humanDuration, humanBitRate, humanFrequency, humanBitSize, humanFilesize, humanDatetime) -from lib.hachoir_core.language import Language -from lib.hachoir_metadata.filter import Filter, NumberFilter, DATETIME_FILTER +from hachoir_core.language import Language +from hachoir_metadata.filter import Filter, NumberFilter, DATETIME_FILTER from datetime import date, datetime, timedelta -from lib.hachoir_metadata.formatter import ( +from hachoir_metadata.formatter import ( humanAudioChannel, humanFrameRate, humanComprRate, humanAltitude, humanPixelSize, humanDPI) -from lib.hachoir_metadata.setter import ( +from hachoir_metadata.setter import ( setDatetime, setTrackNumber, setTrackTotal, setLanguage) -from lib.hachoir_metadata.metadata_item import Data +from hachoir_metadata.metadata_item import Data MIN_SAMPLE_RATE = 1000 # 1 kHz MAX_SAMPLE_RATE = 192000 # 192 kHz diff --git a/lib/hachoir_metadata/riff.py b/lib/hachoir_metadata/riff.py index e3bfa6f4..adcc0bd9 100644 --- a/lib/hachoir_metadata/riff.py +++ b/lib/hachoir_metadata/riff.py @@ -2,13 +2,13 @@ Extract metadata from RIFF file format: AVI video and WAV sound. """ -from lib.hachoir_metadata.metadata import Metadata, MultipleMetadata, registerExtractor -from lib.hachoir_metadata.safe import fault_tolerant, getValue -from lib.hachoir_parser.container.riff import RiffFile -from lib.hachoir_parser.video.fourcc import UNCOMPRESSED_AUDIO -from lib.hachoir_core.tools import humanFilesize, makeUnicode, timedelta2seconds -from lib.hachoir_core.i18n import _ -from lib.hachoir_metadata.audio import computeComprRate as computeAudioComprRate +from hachoir_metadata.metadata import Metadata, MultipleMetadata, registerExtractor +from hachoir_metadata.safe import fault_tolerant, getValue +from hachoir_parser.container.riff import RiffFile +from hachoir_parser.video.fourcc import UNCOMPRESSED_AUDIO +from hachoir_core.tools import humanFilesize, makeUnicode, timedelta2seconds +from hachoir_core.i18n import _ +from hachoir_metadata.audio import computeComprRate as computeAudioComprRate from datetime import timedelta class RiffMetadata(MultipleMetadata): diff --git a/lib/hachoir_metadata/safe.py b/lib/hachoir_metadata/safe.py index 708a3c2f..e1d91abb 100644 --- a/lib/hachoir_metadata/safe.py +++ b/lib/hachoir_metadata/safe.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.error import HACHOIR_ERRORS, warning +from hachoir_core.error import HACHOIR_ERRORS, warning def fault_tolerant(func, *args): def safe_func(*args, **kw): diff --git a/lib/hachoir_metadata/setter.py b/lib/hachoir_metadata/setter.py index 77ecf668..41da4140 100644 --- a/lib/hachoir_metadata/setter.py +++ b/lib/hachoir_metadata/setter.py @@ -1,10 +1,10 @@ from datetime import date, datetime import re -from lib.hachoir_core.language import Language +from hachoir_core.language import Language from locale import setlocale, LC_ALL from time import strptime -from lib.hachoir_metadata.timezone import createTimezone -from lib.hachoir_metadata import config +from hachoir_metadata.timezone import createTimezone +from hachoir_metadata import config NORMALIZE_REGEX = re.compile("[-/.: ]+") YEAR_REGEX1 = re.compile("^([0-9]{4})$") diff --git a/lib/hachoir_metadata/video.py b/lib/hachoir_metadata/video.py index e7cdc682..5fcb2dd1 100644 --- a/lib/hachoir_metadata/video.py +++ b/lib/hachoir_metadata/video.py @@ -1,14 +1,14 @@ -from lib.hachoir_core.field import MissingField -from lib.hachoir_metadata.metadata import (registerExtractor, +from hachoir_core.field import MissingField +from hachoir_metadata.metadata import (registerExtractor, Metadata, RootMetadata, MultipleMetadata) -from lib.hachoir_metadata.metadata_item import QUALITY_GOOD -from lib.hachoir_metadata.safe import fault_tolerant -from lib.hachoir_parser.video import MovFile, AsfFile, FlvFile -from lib.hachoir_parser.video.asf import Descriptor as ASF_Descriptor -from lib.hachoir_parser.container import MkvFile -from lib.hachoir_parser.container.mkv import dateToDatetime -from lib.hachoir_core.i18n import _ -from lib.hachoir_core.tools import makeUnicode, makePrintable, timedelta2seconds +from hachoir_metadata.metadata_item import QUALITY_GOOD +from hachoir_metadata.safe import fault_tolerant +from hachoir_parser.video import MovFile, AsfFile, FlvFile +from hachoir_parser.video.asf import Descriptor as ASF_Descriptor +from hachoir_parser.container import MkvFile +from hachoir_parser.container.mkv import dateToDatetime +from hachoir_core.i18n import _ +from hachoir_core.tools import makeUnicode, makePrintable, timedelta2seconds from datetime import timedelta class MkvMetadata(MultipleMetadata): @@ -59,9 +59,10 @@ class MkvMetadata(MultipleMetadata): def trackCommon(self, track, meta): if "Name/unicode" in track: meta.title = track["Name/unicode"].value - if "Language/string" in track \ - and track["Language/string"].value not in ("mis", "und"): + if "Language/string" in track: meta.language = track["Language/string"].value + else: + meta.language = "eng" def processVideo(self, track): video = Metadata(self) @@ -222,7 +223,7 @@ class MovMetadata(RootMetadata): self.last_modification = hdr["lastmod_date"].value self.duration = timedelta(seconds=float(hdr["duration"].value) / hdr["time_scale"].value) self.comment = _("Play speed: %.1f%%") % (hdr["play_speed"].value*100) - self.comment = _("User volume: %.1f%%") % (float(hdr["volume"].value)*100//255) + self.comment = _("User volume: %.1f%%") % (float(hdr["volume"].value)*100) @fault_tolerant def processTrackHeader(self, hdr): diff --git a/lib/hachoir_parser/__init__.py b/lib/hachoir_parser/__init__.py index 0d1e0469..1b9860ab 100644 --- a/lib/hachoir_parser/__init__.py +++ b/lib/hachoir_parser/__init__.py @@ -1,7 +1,7 @@ -from lib.hachoir_parser.version import __version__ -from lib.hachoir_parser.parser import ValidateError, HachoirParser, Parser -from lib.hachoir_parser.parser_list import ParserList, HachoirParserList -from lib.hachoir_parser.guess import (QueryParser, guessParser, createParser) -from lib.hachoir_parser import (archive, audio, container, +from hachoir_parser.version import __version__ +from hachoir_parser.parser import ValidateError, HachoirParser, Parser +from hachoir_parser.parser_list import ParserList, HachoirParserList +from hachoir_parser.guess import (QueryParser, guessParser, createParser) +from hachoir_parser import (archive, audio, container, file_system, image, game, misc, network, program, video) diff --git a/lib/hachoir_parser/archive/__init__.py b/lib/hachoir_parser/archive/__init__.py index 86fbb9eb..46103c1a 100644 --- a/lib/hachoir_parser/archive/__init__.py +++ b/lib/hachoir_parser/archive/__init__.py @@ -1,12 +1,13 @@ -from lib.hachoir_parser.archive.ace import AceFile -from lib.hachoir_parser.archive.ar import ArchiveFile -from lib.hachoir_parser.archive.bzip2_parser import Bzip2Parser -from lib.hachoir_parser.archive.cab import CabFile -from lib.hachoir_parser.archive.gzip_parser import GzipParser -from lib.hachoir_parser.archive.tar import TarFile -from lib.hachoir_parser.archive.zip import ZipFile -from lib.hachoir_parser.archive.rar import RarFile -from lib.hachoir_parser.archive.rpm import RpmFile -from lib.hachoir_parser.archive.sevenzip import SevenZipParser -from lib.hachoir_parser.archive.mar import MarFile - +from hachoir_parser.archive.ace import AceFile +from hachoir_parser.archive.ar import ArchiveFile +from hachoir_parser.archive.bzip2_parser import Bzip2Parser +from hachoir_parser.archive.cab import CabFile +from hachoir_parser.archive.gzip_parser import GzipParser +from hachoir_parser.archive.tar import TarFile +from hachoir_parser.archive.zip import ZipFile +from hachoir_parser.archive.rar import RarFile +from hachoir_parser.archive.rpm import RpmFile +from hachoir_parser.archive.sevenzip import SevenZipParser +from hachoir_parser.archive.mar import MarFile +from hachoir_parser.archive.mozilla_ar import MozillaArchive +from hachoir_parser.archive.zlib import ZlibData diff --git a/lib/hachoir_parser/archive/ace.py b/lib/hachoir_parser/archive/ace.py index ff65bbb6..03652920 100644 --- a/lib/hachoir_parser/archive/ace.py +++ b/lib/hachoir_parser/archive/ace.py @@ -11,15 +11,15 @@ Author: Christophe Gisquet Creation date: 19 january 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, Bit, Bits, NullBits, RawBytes, Enum, UInt8, UInt16, UInt32, PascalString8, PascalString16, String, TimeDateMSDOS32) -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.msdos import MSDOSFileAttr32 +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.msdos import MSDOSFileAttr32 MAGIC = "**ACE**" diff --git a/lib/hachoir_parser/archive/ar.py b/lib/hachoir_parser/archive/ar.py index e314e9a1..421cdc53 100644 --- a/lib/hachoir_parser/archive/ar.py +++ b/lib/hachoir_parser/archive/ar.py @@ -2,10 +2,10 @@ GNU ar archive : archive file (.a) and Debian (.deb) archive. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, String, RawBytes, UnixLine) -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.endian import BIG_ENDIAN class ArchiveFileEntry(FieldSet): def createFields(self): diff --git a/lib/hachoir_parser/archive/bzip2_parser.py b/lib/hachoir_parser/archive/bzip2_parser.py index 50760b7d..c7df9ea7 100644 --- a/lib/hachoir_parser/archive/bzip2_parser.py +++ b/lib/hachoir_parser/archive/bzip2_parser.py @@ -1,14 +1,18 @@ """ BZIP2 archive file -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (ParserError, String, - Bytes, Character, UInt8, UInt32, CompressedField) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser import Parser +from hachoir_core.tools import paddingSize +from hachoir_core.field import (Field, FieldSet, GenericVector, + ParserError, String, + PaddingBits, Bit, Bits, Character, + UInt32, Enum, CompressedField) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.archive.zlib import build_tree, HuffmanCode try: from bz2 import BZ2Decompressor @@ -27,6 +31,152 @@ try: except ImportError: has_deflate = False +class ZeroTerminatedNumber(Field): + """Zero (bit) terminated number: e.g. 11110 is 4.""" + def __init__(self, parent, name, description=None): + Field.__init__(self, parent, name, 0, description) + + endian = self.parent.endian + stream = self.parent.stream + addr = self.absolute_address + + value = 0 + while True: + bit = stream.readBits(addr, 1, endian) + addr += 1 + self._size += 1 + if not bit: + break + value += 1 + self._value = value + def createValue(self): + return self._value + +def move_to_front(l, c): + l[:] = l[c:c+1] + l[0:c] + l[c+1:] + +class Bzip2Bitmap(FieldSet): + def __init__(self, parent, name, nb_items, start_index, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.nb_items = nb_items + self.start_index = start_index + + def createFields(self): + for i in xrange(self.start_index, self.start_index+self.nb_items): + yield Bit(self, "symbol_used[%i]"%i, "Is the symbol %i (%r) used?"%(i, chr(i))) + +class Bzip2Lengths(FieldSet): + def __init__(self, parent, name, symbols, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.symbols = symbols + + def createFields(self): + yield Bits(self, "start_length", 5) + length = self["start_length"].value + lengths = [] + for i in xrange(self.symbols): + while True: + bit = Bit(self, "change_length[%i][]"%i, "Should the length be changed for symbol %i?"%i) + yield bit + if not bit.value: + break + else: + bit = Enum(Bit(self, "length_decrement[%i][]"%i, "Decrement the value?"), {True: "Decrement", False: "Increment"}) + yield bit + if bit.value: + length -= 1 + else: + length += 1 + lengths.append(length) + self.final_length = length + self.tree = build_tree(lengths) + +class Bzip2Selectors(FieldSet): + def __init__(self, parent, name, ngroups, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.groups = range(ngroups) + + def createFields(self): + for i in xrange(self["../selectors_used"].value): + field = ZeroTerminatedNumber(self, "selector_list[]") + move_to_front(self.groups, field.value) + field.realvalue = self.groups[0] + field._description = "MTF'ed selector index: raw value %i, real value %i"%(field.value, field.realvalue) + yield field + +class Bzip2Block(FieldSet): + def createFields(self): + yield textHandler(Bits(self, "blockheader", 48, "Block header"), hexadecimal) + if self["blockheader"].value != 0x314159265359: # pi + raise ParserError("Invalid block header!") + yield textHandler(UInt32(self, "crc32", "CRC32 for this block"), hexadecimal) + yield Bit(self, "randomized", "Is this block randomized?") + yield Bits(self, "orig_bwt_pointer", 24, "Starting pointer into BWT after untransform") + yield GenericVector(self, "huffman_used_map", 16, Bit, 'block_used', "Bitmap showing which blocks (representing 16 literals each) are in use") + symbols_used = [] + for index, block_used in enumerate(self["huffman_used_map"].array('block_used')): + if block_used.value: + start_index = index*16 + field = Bzip2Bitmap(self, "huffman_used_bitmap[%i]"%index, 16, start_index, "Bitmap for block %i (literals %i to %i) showing which symbols are in use"%(index, start_index, start_index + 15)) + yield field + for i, used in enumerate(field): + if used.value: + symbols_used.append(start_index + i) + yield Bits(self, "huffman_groups", 3, "Number of different Huffman tables in use") + yield Bits(self, "selectors_used", 15, "Number of times the Huffman tables are switched") + yield Bzip2Selectors(self, "selectors_list", self["huffman_groups"].value) + trees = [] + for group in xrange(self["huffman_groups"].value): + field = Bzip2Lengths(self, "huffman_lengths[]", len(symbols_used)+2) + yield field + trees.append(field.tree) + counter = 0 + rle_run = 0 + selector_tree = None + while True: + if counter%50 == 0: + select_id = self["selectors_list"].array("selector_list")[counter//50].realvalue + selector_tree = trees[select_id] + field = HuffmanCode(self, "huffman_code[]", selector_tree) + if field.realvalue in [0, 1]: + # RLE codes + if rle_run == 0: + rle_power = 1 + rle_run += (field.realvalue + 1) * rle_power + rle_power <<= 1 + field._description = "RLE Run Code %i (for %r); Total accumulated run %i (Huffman Code %i)" % (field.realvalue, chr(symbols_used[0]), rle_run, field.value) + elif field.realvalue == len(symbols_used)+1: + field._description = "Block Terminator (%i) (Huffman Code %i)"%(field.realvalue, field.value) + yield field + break + else: + rle_run = 0 + move_to_front(symbols_used, field.realvalue-1) + field._description = "Literal %r (value %i) (Huffman Code %i)"%(chr(symbols_used[0]), field.realvalue, field.value) + yield field + if field.realvalue == len(symbols_used)+1: + break + counter += 1 + +class Bzip2Stream(FieldSet): + START_BLOCK = 0x314159265359 # pi + END_STREAM = 0x177245385090 # sqrt(pi) + def createFields(self): + end = False + while not end: + marker = self.stream.readBits(self.absolute_address + self.current_size, 48, self.endian) + if marker == self.START_BLOCK: + yield Bzip2Block(self, "block[]") + elif marker == self.END_STREAM: + yield textHandler(Bits(self, "stream_end", 48, "End-of-stream marker"), hexadecimal) + yield textHandler(UInt32(self, "crc32", "CRC32 for entire stream"), hexadecimal) + padding = paddingSize(self.current_size, 8) + if padding: + yield PaddingBits(self, "padding[]", padding) + end = True + else: + raise ParserError("Invalid marker 0x%02X!"%marker) + class Bzip2Parser(Parser): PARSER_TAGS = { "id": "bzip2", @@ -37,7 +187,7 @@ class Bzip2Parser(Parser): "magic": (('BZh', 0),), "description": "bzip2 archive" } - endian = LITTLE_ENDIAN + endian = BIG_ENDIAN def validate(self): if self.stream.readBytes(0, 3) != 'BZh': @@ -50,18 +200,6 @@ class Bzip2Parser(Parser): yield String(self, "id", 3, "Identifier (BZh)", charset="ASCII") yield Character(self, "blocksize", "Block size (KB of memory needed to uncompress)") - yield UInt8(self, "blockheader", "Block header") - if self["blockheader"].value == 0x17: - yield String(self, "id2", 4, "Identifier2 (re8P)", charset="ASCII") - yield UInt8(self, "id3", "Identifier3 (0x90)") - elif self["blockheader"].value == 0x31: - yield String(self, "id2", 5, "Identifier 2 (AY&SY)", charset="ASCII") - if self["id2"].value != "AY&SY": - raise ParserError("Invalid identifier 2 (AY&SY)!") - else: - raise ParserError("Invalid block header!") - yield textHandler(UInt32(self, "crc32", "CRC32"), hexadecimal) - if self._size is None: # TODO: is it possible to handle piped input? raise NotImplementedError @@ -73,7 +211,7 @@ class Bzip2Parser(Parser): break else: filename = None - data = Bytes(self, "file", size) + data = Bzip2Stream(self, "file", size=size*8) if has_deflate: CompressedField(self, Bunzip2) def createInputStream(**args): diff --git a/lib/hachoir_parser/archive/cab.py b/lib/hachoir_parser/archive/cab.py index ef6ab7c7..66c0eec1 100644 --- a/lib/hachoir_parser/archive/cab.py +++ b/lib/hachoir_parser/archive/cab.py @@ -1,18 +1,24 @@ """ Microsoft Cabinet (CAB) archive. -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao Creation date: 31 january 2007 -""" -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Enum, +- Microsoft Cabinet SDK + http://msdn2.microsoft.com/en-us/library/ms974336.aspx +""" +from __future__ import absolute_import +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, CString, String, - UInt16, UInt32, Bit, Bits, PaddingBits, NullBits, + UInt8, UInt16, UInt32, Bit, Bits, PaddingBits, NullBits, DateTimeMSDOS32, RawBytes) -from lib.hachoir_parser.common.msdos import MSDOSFileAttr16 -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import paddingSize +from hachoir_core.stream import StringInputStream +from hachoir_parser.archive.lzx import LZXStream, lzx_decompress +from hachoir_parser.archive.zlib import DeflateBlock MAX_NB_FOLDER = 30 @@ -26,38 +32,54 @@ COMPRESSION_NAME = { class Folder(FieldSet): def createFields(self): - yield UInt32(self, "off_data", "Offset of data") - yield UInt16(self, "cf_data") + yield UInt32(self, "offset", "Offset to data (from file start)") + yield UInt16(self, "data_blocks", "Number of data blocks which are in this cabinet") yield Enum(Bits(self, "compr_method", 4, "Compression method"), COMPRESSION_NAME) - yield Bits(self, "compr_level", 5, "Compression level") - yield PaddingBits(self, "padding", 7) + if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level + yield PaddingBits(self, "padding[]", 4) + yield Bits(self, "compr_level", 5, "Compression level") + yield PaddingBits(self, "padding[]", 3) + else: + yield PaddingBits(self, "padding[]", 12) + if self["../flags/has_reserved"].value and self["../reserved_folder_size"].value: + yield RawBytes(self, "reserved_folder", self["../reserved_folder_size"].value, "Per-folder reserved area") def createDescription(self): text= "Folder: compression %s" % self["compr_method"].display - if self["compr_method"].value != COMPRESSION_NONE: - text += " (level %u)" % self["compr_level"].value + if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level + text += " (level %u: window size %u)" % (self["compr_level"].value, 2**self["compr_level"].value) return text +class CabFileAttributes(FieldSet): + def createFields(self): + yield Bit(self, "readonly") + yield Bit(self, "hidden") + yield Bit(self, "system") + yield Bits(self, "reserved[]", 2) + yield Bit(self, "archive", "Has the file been modified since the last backup?") + yield Bit(self, "exec", "Run file after extraction?") + yield Bit(self, "name_is_utf", "Is the filename using UTF-8?") + yield Bits(self, "reserved[]", 8) + class File(FieldSet): def createFields(self): yield filesizeHandler(UInt32(self, "filesize", "Uncompressed file size")) - yield UInt32(self, "offset", "File offset after decompression") - yield UInt16(self, "iFolder", "file control id") + yield UInt32(self, "folder_offset", "File offset in uncompressed folder") + yield Enum(UInt16(self, "folder_index", "Containing folder ID (index)"), { + 0xFFFD:"Folder continued from previous cabinet (real folder ID = 0)", + 0xFFFE:"Folder continued to next cabinet (real folder ID = %i)" % (self["../nb_folder"].value - 1), + 0xFFFF:"Folder spanning previous, current and next cabinets (real folder ID = 0)"}) yield DateTimeMSDOS32(self, "timestamp") - yield MSDOSFileAttr16(self, "attributes") - yield CString(self, "filename", charset="ASCII") + yield CabFileAttributes(self, "attributes") + if self["attributes/name_is_utf"].value: + yield CString(self, "filename", charset="UTF-8") + else: + yield CString(self, "filename", charset="ASCII") def createDescription(self): return "File %s (%s)" % ( self["filename"].display, self["filesize"].display) -class Reserved(FieldSet): - def createFields(self): - yield UInt32(self, "size") - size = self["size"].value - if size: - yield RawBytes(self, "data", size) - class Flags(FieldSet): static_size = 16 def createFields(self): @@ -66,6 +88,111 @@ class Flags(FieldSet): yield Bit(self, "has_reserved") yield NullBits(self, "padding", 13) +class FragmentGroup: + def __init__(self, parser): + self.items = [] + self.parser = parser + self.args = {} + + def add(self, item): + self.items.append(item) + + def createInputStream(self): + # FIXME: Use lazy stream creation + data = [] + for item in self.items: + data.append( item["rawdata"].value ) + data = "".join(data) + + # FIXME: Use smarter code to send arguments + self.args["compr_level"] = self.items[0].parent.parent.folder["compr_level"].value + tags = {"class": self.parser, "args": self.args} + tags = tags.iteritems() + return StringInputStream(data, "", tags=tags) + +class CustomFragment(FieldSet): + def __init__(self, parent, name, size, parser, description=None, group=None): + FieldSet.__init__(self, parent, name, description, size=size) + if not group: + group = FragmentGroup(parser) + self.field_size = size + self.group = group + self.group.add(self) + + def createFields(self): + yield RawBytes(self, "rawdata", self.field_size//8) + + def _createInputStream(self, **args): + return self.group.createInputStream() + +class DataBlock(FieldSet): + def __init__(self, *args, **kwargs): + FieldSet.__init__(self, *args, **kwargs) + size = (self["size"].value + 8) * 8 # +8 for header values + if self["/flags/has_reserved"].value: + size += self["/reserved_data_size"].value * 8 + self._size = size + + def createFields(self): + yield textHandler(UInt32(self, "crc32"), hexadecimal) + yield UInt16(self, "size") + yield UInt16(self, "uncompressed_size", "If this is 0, this block is continued in a subsequent cabinet") + if self["/flags/has_reserved"].value and self["/reserved_data_size"].value: + yield RawBytes(self, "reserved_data", self["/reserved_data_size"].value, "Per-datablock reserved area") + compr_method = self.parent.folder["compr_method"].value + if compr_method == 0: # Uncompressed + yield RawBytes(self, "data", self["size"].value, "Folder Data") + self.parent.uncompressed_data += self["data"].value + elif compr_method == 1: # MSZIP + yield String(self, "mszip_signature", 2, "MSZIP Signature (CK)") + yield DeflateBlock(self, "deflate_block", self.parent.uncompressed_data) + padding = paddingSize(self.current_size, 8) + if padding: + yield PaddingBits(self, "padding[]", padding) + self.parent.uncompressed_data = self["deflate_block"].uncomp_data + elif compr_method == 2: # Quantum + yield RawBytes(self, "compr_data", self["size"].value, "Compressed Folder Data") + elif compr_method == 3: # LZX + group = getattr(self.parent.folder, "lzx_group", None) + field = CustomFragment(self, "data", self["size"].value*8, LZXStream, "LZX data fragment", group) + self.parent.folder.lzx_group = field.group + yield field + +class FolderParser(Parser): + endian = LITTLE_ENDIAN + def createFields(self): + for file in sorted(self.files, key=lambda x:x["folder_offset"].value): + padding = self.seekByte(file["folder_offset"].value) + if padding: + yield padding + yield RawBytes(self, "file[]", file["filesize"].value, file.description) + +class FolderData(FieldSet): + def __init__(self, parent, name, folder, files, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + def createInputStream(cis, source=None, **args): + stream = cis(source=source) + tags = args.setdefault("tags",[]) + tags.extend(stream.tags) + tags.append(( "class", FolderParser )) + tags.append(( "args", {'files': files} )) + for unused in self: + pass + if folder["compr_method"].value == 3: # LZX + self.uncompressed_data = lzx_decompress(self["block[0]/data"].getSubIStream(), folder["compr_level"].value) + return StringInputStream(self.uncompressed_data, source=source, **args) + self.setSubIStream(createInputStream) + self.files = files + self.folder = folder # Folder fieldset + + def createFields(self): + self.uncompressed_data = "" + for index in xrange(self.folder["data_blocks"].value): + block = DataBlock(self, "block[]") + for i in block: + pass + yield block + class CabFile(Parser): endian = LITTLE_ENDIAN MAGIC = "MSCF" @@ -82,8 +209,8 @@ class CabFile(Parser): def validate(self): if self.stream.readBytes(0, 4) != self.MAGIC: return "Invalid magic" - if self["cab_version"].value != 0x0103: - return "Unknown version (%s)" % self["cab_version"].display + if self["major_version"].value != 1 or self["minor_version"].value != 3: + return "Unknown version (%i.%i)" % (self["major_version"].value, self["minor_version"].value) if not (1 <= self["nb_folder"].value <= MAX_NB_FOLDER): return "Invalid number of folder (%s)" % self["nb_folder"].value return True @@ -95,26 +222,54 @@ class CabFile(Parser): yield textHandler(UInt32(self, "fld_checksum", "Folders checksum (0 if not used)"), hexadecimal) yield UInt32(self, "off_file", "Offset of first file") yield textHandler(UInt32(self, "files_checksum", "Files checksum (0 if not used)"), hexadecimal) - yield textHandler(UInt16(self, "cab_version", "Cabinet version"), hexadecimal) + yield UInt8(self, "minor_version", "Minor version (should be 3)") + yield UInt8(self, "major_version", "Major version (should be 1)") yield UInt16(self, "nb_folder", "Number of folders") yield UInt16(self, "nb_files", "Number of files") yield Flags(self, "flags") yield UInt16(self, "setid") - yield UInt16(self, "number", "Zero-based cabinet number") + yield UInt16(self, "cabinet_serial", "Zero-based cabinet number") - # --- TODO: Support flags if self["flags/has_reserved"].value: - yield Reserved(self, "reserved") - #(3) Previous cabinet name, if CAB_HEADER.flags & CAB_FLAG_HASPREV - #(4) Previous disk name, if CAB_HEADER.flags & CAB_FLAG_HASPREV - #(5) Next cabinet name, if CAB_HEADER.flags & CAB_FLAG_HASNEXT - #(6) Next disk name, if CAB_HEADER.flags & CAB_FLAG_HASNEXT - # ---- + yield UInt16(self, "reserved_header_size", "Size of per-cabinet reserved area") + yield UInt8(self, "reserved_folder_size", "Size of per-folder reserved area") + yield UInt8(self, "reserved_data_size", "Size of per-datablock reserved area") + if self["reserved_header_size"].value: + yield RawBytes(self, "reserved_header", self["reserved_header_size"].value, "Per-cabinet reserved area") + if self["flags/has_previous"].value: + yield CString(self, "previous_cabinet", "File name of previous cabinet", charset="ASCII") + yield CString(self, "previous_disk", "Description of disk/media on which previous cabinet resides", charset="ASCII") + if self["flags/has_next"].value: + yield CString(self, "next_cabinet", "File name of next cabinet", charset="ASCII") + yield CString(self, "next_disk", "Description of disk/media on which next cabinet resides", charset="ASCII") + folders = [] + files = [] for index in xrange(self["nb_folder"].value): - yield Folder(self, "folder[]") + folder = Folder(self, "folder[]") + yield folder + folders.append(folder) for index in xrange(self["nb_files"].value): - yield File(self, "file[]") + file = File(self, "file[]") + yield file + files.append(file) + + folders = sorted(enumerate(folders), key=lambda x:x[1]["offset"].value) + + for i in xrange(len(folders)): + index, folder = folders[i] + padding = self.seekByte(folder["offset"].value) + if padding: + yield padding + files = [] + for file in files: + if file["folder_index"].value == index: + files.append(file) + if i+1 == len(folders): + size = (self.size // 8) - folder["offset"].value + else: + size = (folders[i+1][1]["offset"].value) - folder["offset"].value + yield FolderData(self, "folder_data[%i]" % index, folder, files, size=size*8) end = self.seekBit(self.size, "endraw") if end: diff --git a/lib/hachoir_parser/archive/gzip_parser.py b/lib/hachoir_parser/archive/gzip_parser.py index 5f63cbc1..c0820332 100644 --- a/lib/hachoir_parser/archive/gzip_parser.py +++ b/lib/hachoir_parser/archive/gzip_parser.py @@ -4,14 +4,14 @@ GZIP archive parser. Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import ( +from hachoir_parser import Parser +from hachoir_core.field import ( UInt8, UInt16, UInt32, Enum, TimestampUnix32, Bit, CString, SubFile, NullBits, Bytes, RawBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.deflate import Deflate +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.deflate import Deflate class GzipParser(Parser): endian = LITTLE_ENDIAN diff --git a/lib/hachoir_parser/archive/lzx.py b/lib/hachoir_parser/archive/lzx.py new file mode 100644 index 00000000..39f5a6ef --- /dev/null +++ b/lib/hachoir_parser/archive/lzx.py @@ -0,0 +1,267 @@ +"""LZX data stream parser. + +Also includes a decompression function (slow!!) which can decompress +LZX data stored in a Hachoir stream. + +Author: Robert Xiao +Creation date: July 18, 2007 +""" +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, + UInt32, Bit, Bits, PaddingBits, + RawBytes, ParserError) +from hachoir_core.endian import MIDDLE_ENDIAN, LITTLE_ENDIAN +from hachoir_core.tools import paddingSize, alignValue +from hachoir_parser.archive.zlib import build_tree, HuffmanCode, extend_data +from hachoir_core.bits import str2long +import new # for instancemethod + +class LZXPreTreeEncodedTree(FieldSet): + def __init__(self, parent, name, num_elements, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.num_elements = num_elements + + def createFields(self): + for i in xrange(20): + yield Bits(self, "pretree_lengths[]", 4) + pre_tree = build_tree([self['pretree_lengths[%d]'%x].value for x in xrange(20)]) + if not hasattr(self.root, "lzx_tree_lengths_"+self.name): + self.lengths = [0] * self.num_elements + setattr(self.root, "lzx_tree_lengths_"+self.name, self.lengths) + else: + self.lengths = getattr(self.root, "lzx_tree_lengths_"+self.name) + i = 0 + while i < self.num_elements: + field = HuffmanCode(self, "tree_code[]", pre_tree) + if field.realvalue <= 16: + self.lengths[i] = (self.lengths[i] - field.realvalue) % 17 + field._description = "Literal tree delta length %i (new length value %i for element %i)" % ( + field.realvalue, self.lengths[i], i) + i += 1 + yield field + elif field.realvalue == 17: + field._description = "Tree Code 17: Zeros for 4-19 elements" + yield field + extra = Bits(self, "extra[]", 4) + zeros = 4 + extra.value + extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1) + yield extra + self.lengths[i:i+zeros] = [0] * zeros + i += zeros + elif field.realvalue == 18: + field._description = "Tree Code 18: Zeros for 20-51 elements" + yield field + extra = Bits(self, "extra[]", 5) + zeros = 20 + extra.value + extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1) + yield extra + self.lengths[i:i+zeros] = [0] * zeros + i += zeros + elif field.realvalue == 19: + field._description = "Tree Code 19: Same code for 4-5 elements" + yield field + extra = Bits(self, "extra[]", 1) + run = 4 + extra.value + extra._description = "Extra bits: run for %i elements (elements %i through %i)" % (run, i, i+run-1) + yield extra + newfield = HuffmanCode(self, "tree_code[]", pre_tree) + assert newfield.realvalue <= 16 + newfield._description = "Literal tree delta length %i (new length value %i for elements %i through %i)" % ( + newfield.realvalue, self.lengths[i], i, i+run-1) + self.lengths[i:i+run] = [(self.lengths[i] - newfield.realvalue) % 17] * run + i += run + yield newfield + +class LZXBlock(FieldSet): + WINDOW_SIZE = {15:30, + 16:32, + 17:34, + 18:36, + 19:38, + 20:42, + 21:50} + POSITION_SLOTS = {0:(0,0,0), + 1:(1,1,0), + 2:(2,2,0), + 3:(3,3,0), + 4:(4,5,1), + 5:(6,7,1), + 6:(8,11,2), + 7:(12,15,2), + 8:(16,23,3), + 9:(24,31,3), + 10:(32,47,4), + 11:(48,63,4), + 12:(64,95,5), + 13:(96,127,5), + 14:(128,191,6), + 15:(192,255,6), + 16:(256,383,7), + 17:(384,511,7), + 18:(512,767,8), + 19:(768,1023,8), + 20:(1024,1535,9), + 21:(1536,2047,9), + 22:(2048,3071,10), + 23:(3072,4095,10), + 24:(4096,6143,11), + 25:(6144,8191,11), + 26:(8192,12287,12), + 27:(12288,16383,12), + 28:(16384,24575,13), + 29:(24576,32767,13), + 30:(32768,49151,14), + 31:(49152,65535,14), + 32:(65536,98303,15), + 33:(98304,131071,15), + 34:(131072,196607,16), + 35:(196608,262143,16), + 36:(262144,393215,17), + 37:(393216,524287,17), + 38:(524288,655359,17), + 39:(655360,786431,17), + 40:(786432,917503,17), + 41:(917504,1048575,17), + 42:(1048576,1179647,17), + 43:(1179648,1310719,17), + 44:(1310720,1441791,17), + 45:(1441792,1572863,17), + 46:(1572864,1703935,17), + 47:(1703936,1835007,17), + 48:(1835008,1966079,17), + 49:(1966080,2097151,17), + } + def createFields(self): + yield Bits(self, "block_type", 3) + yield Bits(self, "block_size", 24) + self.uncompressed_size = self["block_size"].value + self.compression_level = self.root.compr_level + self.window_size = self.WINDOW_SIZE[self.compression_level] + self.block_type = self["block_type"].value + curlen = len(self.parent.uncompressed_data) + if self.block_type in (1, 2): # Verbatim or aligned offset block + if self.block_type == 2: + for i in xrange(8): + yield Bits(self, "aligned_len[]", 3) + aligned_tree = build_tree([self['aligned_len[%d]'%i].value for i in xrange(8)]) + yield LZXPreTreeEncodedTree(self, "main_tree_start", 256) + yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8) + main_tree = build_tree(self["main_tree_start"].lengths + self["main_tree_rest"].lengths) + yield LZXPreTreeEncodedTree(self, "length_tree", 249) + length_tree = build_tree(self["length_tree"].lengths) + current_decoded_size = 0 + while current_decoded_size < self.uncompressed_size: + if (curlen+current_decoded_size) % 32768 == 0 and (curlen+current_decoded_size) != 0: + padding = paddingSize(self.address + self.current_size, 16) + if padding: + yield PaddingBits(self, "padding[]", padding) + field = HuffmanCode(self, "main_code[]", main_tree) + if field.realvalue < 256: + field._description = "Literal value %r" % chr(field.realvalue) + current_decoded_size += 1 + self.parent.uncompressed_data += chr(field.realvalue) + yield field + continue + position_header, length_header = divmod(field.realvalue - 256, 8) + info = self.POSITION_SLOTS[position_header] + if info[2] == 0: + if info[0] == 0: + position = self.parent.r0 + field._description = "Position Slot %i, Position [R0] (%i)" % (position_header, position) + elif info[0] == 1: + position = self.parent.r1 + self.parent.r1 = self.parent.r0 + self.parent.r0 = position + field._description = "Position Slot %i, Position [R1] (%i)" % (position_header, position) + elif info[0] == 2: + position = self.parent.r2 + self.parent.r2 = self.parent.r0 + self.parent.r0 = position + field._description = "Position Slot %i, Position [R2] (%i)" % (position_header, position) + else: + position = info[0] - 2 + self.parent.r2 = self.parent.r1 + self.parent.r1 = self.parent.r0 + self.parent.r0 = position + field._description = "Position Slot %i, Position %i" % (position_header, position) + else: + field._description = "Position Slot %i, Positions %i to %i" % (position_header, info[0] - 2, info[1] - 2) + if length_header == 7: + field._description += ", Length Values 9 and up" + yield field + length_field = HuffmanCode(self, "length_code[]", length_tree) + length = length_field.realvalue + 9 + length_field._description = "Length Code %i, total length %i" % (length_field.realvalue, length) + yield length_field + else: + field._description += ", Length Value %i (Huffman Code %i)"%(length_header + 2, field.value) + yield field + length = length_header + 2 + if info[2]: + if self.block_type == 1 or info[2] < 3: # verbatim + extrafield = Bits(self, "position_extra[%s" % field.name.split('[')[1], info[2]) + position = extrafield.value + info[0] - 2 + extrafield._description = "Position Extra Bits (%i), total position %i"%(extrafield.value, position) + yield extrafield + else: # aligned offset + position = info[0] - 2 + if info[2] > 3: + extrafield = Bits(self, "position_verbatim[%s" % field.name.split('[')[1], info[2]-3) + position += extrafield.value*8 + extrafield._description = "Position Verbatim Bits (%i), added position %i"%(extrafield.value, extrafield.value*8) + yield extrafield + if info[2] >= 3: + extrafield = HuffmanCode(self, "position_aligned[%s" % field.name.split('[')[1], aligned_tree) + position += extrafield.realvalue + extrafield._description = "Position Aligned Bits (%i), total position %i"%(extrafield.realvalue, position) + yield extrafield + self.parent.r2 = self.parent.r1 + self.parent.r1 = self.parent.r0 + self.parent.r0 = position + self.parent.uncompressed_data = extend_data(self.parent.uncompressed_data, length, position) + current_decoded_size += length + elif self.block_type == 3: # Uncompressed block + padding = paddingSize(self.address + self.current_size, 16) + if padding: + yield PaddingBits(self, "padding[]", padding) + else: + yield PaddingBits(self, "padding[]", 16) + self.endian = LITTLE_ENDIAN + yield UInt32(self, "r[]", "New value of R0") + yield UInt32(self, "r[]", "New value of R1") + yield UInt32(self, "r[]", "New value of R2") + self.parent.r0 = self["r[0]"].value + self.parent.r1 = self["r[1]"].value + self.parent.r2 = self["r[2]"].value + yield RawBytes(self, "data", self.uncompressed_size) + self.parent.uncompressed_data+=self["data"].value + if self["block_size"].value % 2: + yield PaddingBits(self, "padding", 8) + else: + raise ParserError("Unknown block type %d!"%self.block_type) + +class LZXStream(Parser): + endian = MIDDLE_ENDIAN + def createFields(self): + self.uncompressed_data = "" + self.r0 = 1 + self.r1 = 1 + self.r2 = 1 + yield Bit(self, "filesize_indicator") + if self["filesize_indicator"].value: + yield UInt32(self, "filesize") + while self.current_size < self.size: + block = LZXBlock(self, "block[]") + yield block + if self.size - self.current_size < 16: + padding = paddingSize(self.address + self.current_size, 16) + if padding: + yield PaddingBits(self, "padding[]", padding) + break + +def lzx_decompress(stream, window_bits): + data = LZXStream(stream) + data.compr_level = window_bits + for unused in data: + pass + return data.uncompressed_data diff --git a/lib/hachoir_parser/archive/mar.py b/lib/hachoir_parser/archive/mar.py index 05be1cbd..6a7e31a7 100644 --- a/lib/hachoir_parser/archive/mar.py +++ b/lib/hachoir_parser/archive/mar.py @@ -7,10 +7,10 @@ Creation date: 2007-03-04 MAX_NB_FILE = 100000 -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import FieldSet, String, UInt32, SubFile -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, String, UInt32, SubFile +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal class FileIndex(FieldSet): static_size = 68*8 diff --git a/lib/hachoir_parser/archive/mozilla_ar.py b/lib/hachoir_parser/archive/mozilla_ar.py new file mode 100644 index 00000000..5b18f434 --- /dev/null +++ b/lib/hachoir_parser/archive/mozilla_ar.py @@ -0,0 +1,60 @@ +"""MAR (Mozilla ARchive) parser + +Author: Robert Xiao +Creation date: July 10, 2007 + +""" + +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, + String, CString, UInt32, RawBytes) +from hachoir_core.text_handler import displayHandler, filesizeHandler +from hachoir_core.tools import humanUnixAttributes +from hachoir_parser import HachoirParser + +class IndexEntry(FieldSet): + def createFields(self): + yield UInt32(self, "offset", "Offset in bytes relative to start of archive") + yield filesizeHandler(UInt32(self, "length", "Length in bytes")) + yield displayHandler(UInt32(self, "flags"), humanUnixAttributes) + yield CString(self, "name", "Filename (byte array)") + + def createDescription(self): + return 'File %s, Size %s, Mode %s'%( + self["name"].display, self["length"].display, self["flags"].display) + +class MozillaArchive(HachoirParser, RootSeekableFieldSet): + MAGIC = "MAR1" + PARSER_TAGS = { + "id": "mozilla_ar", + "category": "archive", + "file_ext": ("mar",), + "min_size": (8+4+13)*8, # Header, Index Header, 1 Index Entry + "magic": ((MAGIC, 0),), + "description": "Mozilla Archive", + } + endian = BIG_ENDIAN + + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0, 4) != self.MAGIC: + return "Invalid magic" + return True + + def createFields(self): + yield String(self, "magic", 4, "File signature (MAR1)", charset="ASCII") + yield UInt32(self, "index_offset", "Offset to index relative to file start") + self.seekByte(self["index_offset"].value, False) + yield UInt32(self, "index_size", "size of index in bytes") + current_index_size = 0 # bytes + while current_index_size < self["index_size"].value: + # plus 4 compensates for index_size + self.seekByte(self["index_offset"].value + current_index_size + 4, False) + entry = IndexEntry(self, "index_entry[]") + yield entry + current_index_size += entry.size // 8 + self.seekByte(entry["offset"].value, False) + yield RawBytes(self, "file[]", entry["length"].value) diff --git a/lib/hachoir_parser/archive/rar.py b/lib/hachoir_parser/archive/rar.py index 166ae041..2be5887c 100644 --- a/lib/hachoir_parser/archive/rar.py +++ b/lib/hachoir_parser/archive/rar.py @@ -5,15 +5,15 @@ Status: can only read higher-level attructures Author: Christophe Gisquet """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, Bit, Bits, Enum, UInt8, UInt16, UInt32, UInt64, String, TimeDateMSDOS32, NullBytes, NullBits, RawBytes) -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.msdos import MSDOSFileAttr32 +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.msdos import MSDOSFileAttr32 MAX_FILESIZE = 1000 * 1024 * 1024 diff --git a/lib/hachoir_parser/archive/rpm.py b/lib/hachoir_parser/archive/rpm.py index 60235755..ccb8d2e5 100644 --- a/lib/hachoir_parser/archive/rpm.py +++ b/lib/hachoir_parser/archive/rpm.py @@ -4,14 +4,14 @@ RPM archive parser. Author: Victor Stinner, 1st December 2005. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, UInt32, UInt64, Enum, NullBytes, Bytes, RawBytes, SubFile, Character, CString, String) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_parser.archive.gzip_parser import GzipParser -from lib.hachoir_parser.archive.bzip2_parser import Bzip2Parser +from hachoir_core.endian import BIG_ENDIAN +from hachoir_parser.archive.gzip_parser import GzipParser +from hachoir_parser.archive.bzip2_parser import Bzip2Parser class ItemContent(FieldSet): format_type = { diff --git a/lib/hachoir_parser/archive/sevenzip.py b/lib/hachoir_parser/archive/sevenzip.py index 91f9716c..7a0148f5 100644 --- a/lib/hachoir_parser/archive/sevenzip.py +++ b/lib/hachoir_parser/archive/sevenzip.py @@ -9,13 +9,13 @@ Author: Olivier SCHWAB Creation date: 6 december 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (Field, FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (Field, FieldSet, ParserError, GenericVector, Enum, UInt8, UInt32, UInt64, Bytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler class SZUInt64(Field): """ diff --git a/lib/hachoir_parser/archive/tar.py b/lib/hachoir_parser/archive/tar.py index bd2dc1ad..08a9040b 100644 --- a/lib/hachoir_parser/archive/tar.py +++ b/lib/hachoir_parser/archive/tar.py @@ -4,11 +4,11 @@ Tar archive parser. Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, UInt8, SubFile, String, NullBytes) -from lib.hachoir_core.tools import humanFilesize, paddingSize, timestampUNIX -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import humanFilesize, paddingSize, timestampUNIX +from hachoir_core.endian import BIG_ENDIAN import re class FileEntry(FieldSet): diff --git a/lib/hachoir_parser/archive/zip.py b/lib/hachoir_parser/archive/zip.py index 98a41293..8271ac93 100644 --- a/lib/hachoir_parser/archive/zip.py +++ b/lib/hachoir_parser/archive/zip.py @@ -5,18 +5,18 @@ Status: can read most important headers Authors: Christophe Gisquet and Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, Bit, Bits, Enum, TimeDateMSDOS32, SubFile, UInt8, UInt16, UInt32, UInt64, String, PascalString16, RawBytes) -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_core.tools import makeUnicode -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.deflate import Deflate +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.tools import makeUnicode +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.deflate import Deflate MAX_FILESIZE = 1000 * 1024 * 1024 @@ -80,16 +80,7 @@ class ZipGeneralFlags(FieldSet): # Need the compression info from the parent, and that is the byte following method = self.stream.readBits(self.absolute_address+16, 16, LITTLE_ENDIAN) - yield Bits(self, "unused[]", 2, "Unused") - yield Bit(self, "encrypted_central_dir", "Selected data values in the Local Header are masked") - yield Bit(self, "incomplete", "Reserved by PKWARE for enhanced compression.") - yield Bit(self, "uses_unicode", "Filename and comments are in UTF-8") - yield Bits(self, "unused[]", 4, "Unused") - yield Bit(self, "strong_encrypt", "Strong encryption (version >= 50)") - yield Bit(self, "is_patched", "File is compressed with patched data?") - yield Bit(self, "enhanced_deflate", "Reserved for use with method 8") - yield Bit(self, "has_descriptor", - "Compressed data followed by descriptor?") + yield Bit(self, "is_encrypted", "File is encrypted?") if method == 6: yield Bit(self, "use_8k_sliding", "Use 8K sliding dictionary (instead of 4K)") yield Bit(self, "use_3shannon", "Use a 3 Shannon-Fano tree (instead of 2 Shannon-Fano)") @@ -106,7 +97,16 @@ class ZipGeneralFlags(FieldSet): yield Bit(self, "unused[]") else: yield Bits(self, "compression_info", 2) - yield Bit(self, "is_encrypted", "File is encrypted?") + yield Bit(self, "has_descriptor", + "Compressed data followed by descriptor?") + yield Bit(self, "enhanced_deflate", "Reserved for use with method 8") + yield Bit(self, "is_patched", "File is compressed with patched data?") + yield Bit(self, "strong_encrypt", "Strong encryption (version >= 50)") + yield Bits(self, "unused[]", 4, "Unused") + yield Bit(self, "uses_unicode", "Filename and comments are in UTF-8") + yield Bit(self, "incomplete", "Reserved by PKWARE for enhanced compression.") + yield Bit(self, "encrypted_central_dir", "Selected data values in the Local Header are masked") + yield Bits(self, "unused[]", 2, "Unused") class ExtraField(FieldSet): EXTRA_FIELD_ID = { @@ -141,7 +141,12 @@ class ExtraField(FieldSet): size = UInt16(self, "field_data_size", "Extra field data size") yield size if size.value > 0: - yield RawBytes(self, "field_data", size, "Unknown field data") + yield RawBytes(self, "field_data", size.value, "Unknown field data") + +class ExtraFields(FieldSet): + def createFields(self): + while self.current_size < self.size: + yield ExtraField(self, "extra[]") def ZipStartCommonFields(self): yield ZipVersion(self, "version_needed", "Version needed") @@ -179,8 +184,8 @@ class ZipCentralDirectory(FieldSet): yield String(self, "filename", self["filename_length"].value, "Filename", charset=charset) if 0 < self["extra_length"].value: - yield RawBytes(self, "extra", self["extra_length"].value, - "Extra fields") + yield ExtraFields(self, "extra", size=self["extra_length"].value*8, + description="Extra fields") if 0 < self["comment_length"].value: yield String(self, "comment", self["comment_length"].value, "Comment", charset=charset) @@ -278,14 +283,15 @@ class FileEntry(FieldSet): yield filename self.filename = filename.value if self["extra_length"].value: - yield RawBytes(self, "extra", self["extra_length"].value, "Extra") + yield ExtraFields(self, "extra", size=self["extra_length"].value*8, + description="Extra fields") size = self["compressed_size"].value if size > 0: yield self.data(size) elif self["flags/incomplete"].value: for field in self.resync(): yield field - if self["flags/has_descriptor"].value: + if self["flags/has_descriptor"].value and self['crc32'].value == 0: yield ZipDataDescriptor(self, "data_desc", "Data descriptor") def createDescription(self): diff --git a/lib/hachoir_parser/archive/zlib.py b/lib/hachoir_parser/archive/zlib.py new file mode 100644 index 00000000..bde94b1d --- /dev/null +++ b/lib/hachoir_parser/archive/zlib.py @@ -0,0 +1,301 @@ +"""Detailed ZLIB parser + +Author: Robert Xiao +Creation date: July 9 2007 + +""" + +from hachoir_parser import Parser +from hachoir_core.field import (Bit, Bits, Field, Int16, UInt32, + Enum, FieldSet, GenericFieldSet, + PaddingBits, ParserError, RawBytes) +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import paddingSize, alignValue + +def extend_data(data, length, offset): + """Extend data using a length and an offset.""" + if length >= offset: + new_data = data[-offset:] * (alignValue(length, offset) // offset) + return data + new_data[:length] + else: + return data + data[-offset:-offset+length] + +def build_tree(lengths): + """Build a Huffman tree from a list of lengths. + The ith entry of the input list is the length of the Huffman code corresponding to + integer i, or 0 if the integer i is unused.""" + max_length = max(lengths) + 1 + bit_counts = [0]*max_length + next_code = [0]*max_length + tree = {} + for i in lengths: + if i: + bit_counts[i] += 1 + code = 0 + for i in xrange(1, len(bit_counts)): + next_code[i] = code = (code + bit_counts[i-1]) << 1 + for i, ln in enumerate(lengths): + if ln: + tree[(ln, next_code[ln])] = i + next_code[ln] += 1 + return tree + +class HuffmanCode(Field): + """Huffman code. Uses tree parameter as the Huffman tree.""" + def __init__(self, parent, name, tree, description=None): + Field.__init__(self, parent, name, 0, description) + + endian = self.parent.endian + stream = self.parent.stream + addr = self.absolute_address + + value = 0 + while (self.size, value) not in tree: + if self.size > 256: + raise ParserError("Huffman code too long!") + bit = stream.readBits(addr, 1, endian) + value <<= 1 + value += bit + self._size += 1 + addr += 1 + self.huffvalue = value + self.realvalue = tree[(self.size, value)] + def createValue(self): + return self.huffvalue + +class DeflateBlock(FieldSet): + # code: (min, max, extrabits) + LENGTH_SYMBOLS = {257:(3,3,0), + 258:(4,4,0), + 259:(5,5,0), + 260:(6,6,0), + 261:(7,7,0), + 262:(8,8,0), + 263:(9,9,0), + 264:(10,10,0), + 265:(11,12,1), + 266:(13,14,1), + 267:(15,16,1), + 268:(17,18,1), + 269:(19,22,2), + 270:(23,26,2), + 271:(27,30,2), + 272:(31,34,2), + 273:(35,42,3), + 274:(43,50,3), + 275:(51,58,3), + 276:(59,66,3), + 277:(67,82,4), + 278:(83,98,4), + 279:(99,114,4), + 280:(115,130,4), + 281:(131,162,5), + 282:(163,194,5), + 283:(195,226,5), + 284:(227,257,5), + 285:(258,258,0) + } + DISTANCE_SYMBOLS = {0:(1,1,0), + 1:(2,2,0), + 2:(3,3,0), + 3:(4,4,0), + 4:(5,6,1), + 5:(7,8,1), + 6:(9,12,2), + 7:(13,16,2), + 8:(17,24,3), + 9:(25,32,3), + 10:(33,48,4), + 11:(49,64,4), + 12:(65,96,5), + 13:(97,128,5), + 14:(129,192,6), + 15:(193,256,6), + 16:(257,384,7), + 17:(385,512,7), + 18:(513,768,8), + 19:(769,1024,8), + 20:(1025,1536,9), + 21:(1537,2048,9), + 22:(2049,3072,10), + 23:(3073,4096,10), + 24:(4097,6144,11), + 25:(6145,8192,11), + 26:(8193,12288,12), + 27:(12289,16384,12), + 28:(16385,24576,13), + 29:(24577,32768,13), + } + CODE_LENGTH_ORDER = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] + def __init__(self, parent, name, uncomp_data="", *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.uncomp_data = uncomp_data + + def createFields(self): + yield Bit(self, "final", "Is this the final block?") # BFINAL + yield Enum(Bits(self, "compression_type", 2), # BTYPE + {0:"None", 1:"Fixed Huffman", 2:"Dynamic Huffman", 3:"Reserved"}) + if self["compression_type"].value == 0: # no compression + padding = paddingSize(self.current_size + self.absolute_address, 8) # align on byte boundary + if padding: + yield PaddingBits(self, "padding[]", padding) + yield Int16(self, "len") + yield Int16(self, "nlen", "One's complement of len") + if self["len"].value != ~self["nlen"].value: + raise ParserError("len must be equal to the one's complement of nlen!") + if self["len"].value: # null stored blocks produced by some encoders (e.g. PIL) + yield RawBytes(self, "data", self["len"].value, "Uncompressed data") + return + elif self["compression_type"].value == 1: # Fixed Huffman + length_tree = {} # (size, huffman code): value + distance_tree = {} + for i in xrange(144): + length_tree[(8, i+48)] = i + for i in xrange(144, 256): + length_tree[(9, i+256)] = i + for i in xrange(256, 280): + length_tree[(7, i-256)] = i + for i in xrange(280, 288): + length_tree[(8, i-88)] = i + for i in xrange(32): + distance_tree[(5, i)] = i + elif self["compression_type"].value == 2: # Dynamic Huffman + yield Bits(self, "huff_num_length_codes", 5, "Number of Literal/Length Codes, minus 257") + yield Bits(self, "huff_num_distance_codes", 5, "Number of Distance Codes, minus 1") + yield Bits(self, "huff_num_code_length_codes", 4, "Number of Code Length Codes, minus 4") + code_length_code_lengths = [0]*19 # confusing variable name... + for i in self.CODE_LENGTH_ORDER[:self["huff_num_code_length_codes"].value+4]: + field = Bits(self, "huff_code_length_code[%i]" % i, 3, "Code lengths for the code length alphabet") + yield field + code_length_code_lengths[i] = field.value + code_length_tree = build_tree(code_length_code_lengths) + length_code_lengths = [] + distance_code_lengths = [] + for numcodes, name, lengths in ( + (self["huff_num_length_codes"].value + 257, "length", length_code_lengths), + (self["huff_num_distance_codes"].value + 1, "distance", distance_code_lengths)): + while len(lengths) < numcodes: + field = HuffmanCode(self, "huff_%s_code[]" % name, code_length_tree) + value = field.realvalue + if value < 16: + prev_value = value + field._description = "Literal Code Length %i (Huffman Code %i)" % (value, field.value) + yield field + lengths.append(value) + else: + info = {16: (3,6,2), + 17: (3,10,3), + 18: (11,138,7)}[value] + if value == 16: + repvalue = prev_value + else: + repvalue = 0 + field._description = "Repeat Code %i, Repeating value (%i) %i to %i times (Huffman Code %i)" % (value, repvalue, info[0], info[1], field.value) + yield field + extrafield = Bits(self, "huff_%s_code_extra[%s" % (name, field.name.split('[')[1]), info[2]) + num_repeats = extrafield.value+info[0] + extrafield._description = "Repeat Extra Bits (%i), total repeats %i"%(extrafield.value, num_repeats) + yield extrafield + lengths += [repvalue]*num_repeats + length_tree = build_tree(length_code_lengths) + distance_tree = build_tree(distance_code_lengths) + else: + raise ParserError("Unsupported compression type 3!") + while True: + field = HuffmanCode(self, "length_code[]", length_tree) + value = field.realvalue + if value < 256: + field._description = "Literal Code %r (Huffman Code %i)" % (chr(value), field.value) + yield field + self.uncomp_data += chr(value) + if value == 256: + field._description = "Block Terminator Code (256) (Huffman Code %i)" % field.value + yield field + break + elif value > 256: + info = self.LENGTH_SYMBOLS[value] + if info[2] == 0: + field._description = "Length Code %i, Value %i (Huffman Code %i)" % (value, info[0], field.value) + length = info[0] + yield field + else: + field._description = "Length Code %i, Values %i to %i (Huffman Code %i)" % (value, info[0], info[1], field.value) + yield field + extrafield = Bits(self, "length_extra[%s" % field.name.split('[')[1], info[2]) + length = extrafield.value + info[0] + extrafield._description = "Length Extra Bits (%i), total length %i"%(extrafield.value, length) + yield extrafield + field = HuffmanCode(self, "distance_code[]", distance_tree) + value = field.realvalue + info = self.DISTANCE_SYMBOLS[value] + if info[2] == 0: + field._description = "Distance Code %i, Value %i (Huffman Code %i)" % (value, info[0], field.value) + distance = info[0] + yield field + else: + field._description = "Distance Code %i, Values %i to %i (Huffman Code %i)" % (value, info[0], info[1], field.value) + yield field + extrafield = Bits(self, "distance_extra[%s" % field.name.split('[')[1], info[2]) + distance = extrafield.value + info[0] + extrafield._description = "Distance Extra Bits (%i), total length %i"%(extrafield.value, distance) + yield extrafield + self.uncomp_data = extend_data(self.uncomp_data, length, distance) + +class DeflateData(GenericFieldSet): + endian = LITTLE_ENDIAN + def createFields(self): + uncomp_data = "" + blk=DeflateBlock(self, "compressed_block[]", uncomp_data) + yield blk + uncomp_data = blk.uncomp_data + while not blk["final"].value: + blk=DeflateBlock(self, "compressed_block[]", uncomp_data) + yield blk + uncomp_data = blk.uncomp_data + padding = paddingSize(self.current_size + self.absolute_address, 8) # align on byte boundary + if padding: + yield PaddingBits(self, "padding[]", padding) + self.uncompressed_data = uncomp_data + +class ZlibData(Parser): + PARSER_TAGS = { + "id": "zlib", + "category": "archive", + "file_ext": ("zlib",), + "min_size": 8*8, + "description": "ZLIB Data", + } + endian = LITTLE_ENDIAN + + def validate(self): + if self["compression_method"].value != 8: + return "Incorrect compression method" + if ((self["compression_info"].value << 12) + + (self["compression_method"].value << 8) + + (self["flag_compression_level"].value << 6) + + (self["flag_dictionary_present"].value << 5) + + (self["flag_check_bits"].value)) % 31 != 0: + return "Invalid flag check value" + return True + + def createFields(self): + yield Enum(Bits(self, "compression_method", 4), {8:"deflate", 15:"reserved"}) # CM + yield Bits(self, "compression_info", 4, "base-2 log of the window size") # CINFO + yield Bits(self, "flag_check_bits", 5) # FCHECK + yield Bit(self, "flag_dictionary_present") # FDICT + yield Enum(Bits(self, "flag_compression_level", 2), # FLEVEL + {0:"Fastest", 1:"Fast", 2:"Default", 3:"Maximum, Slowest"}) + if self["flag_dictionary_present"].value: + yield textHandler(UInt32(self, "dict_checksum", "ADLER32 checksum of dictionary information"), hexadecimal) + yield DeflateData(self, "data", self.stream, description = "Compressed Data") + yield textHandler(UInt32(self, "data_checksum", "ADLER32 checksum of compressed data"), hexadecimal) + +def zlib_inflate(stream, wbits=None, prevdata=""): + if wbits is None or wbits >= 0: + return ZlibData(stream)["data"].uncompressed_data + else: + data = DeflateData(None, "root", stream, "", stream.askSize(None)) + for unused in data: + pass + return data.uncompressed_data diff --git a/lib/hachoir_parser/audio/8svx.py b/lib/hachoir_parser/audio/8svx.py deleted file mode 100644 index 16d0f703..00000000 --- a/lib/hachoir_parser/audio/8svx.py +++ /dev/null @@ -1,126 +0,0 @@ -""" -Audio Interchange File Format (AIFF) parser. - -Author: Victor Stinner -Creation: 27 december 2006 -""" - -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, - UInt16, UInt32, Float80, TimestampMac32, - RawBytes, NullBytes, - String, Enum, PascalString32) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import filesizeHandler -from lib.hachoir_core.tools import alignValue -from lib.hachoir_parser.audio.id3 import ID3v2 - -CODEC_NAME = { - 'ACE2': u"ACE 2-to-1", - 'ACE8': u"ACE 8-to-3", - 'MAC3': u"MAC 3-to-1", - 'MAC6': u"MAC 6-to-1", - 'NONE': u"None", - 'sowt': u"Little-endian, no compression", -} - -class Comment(FieldSet): - def createFields(self): - yield TimestampMac32(self, "timestamp") - yield PascalString32(self, "text") - -def parseText(self): - yield String(self, "text", self["size"].value) - -def parseID3(self): - yield ID3v2(self, "id3v2", size=self["size"].value*8) - -def parseComment(self): - yield UInt16(self, "nb_comment") - for index in xrange(self["nb_comment"].value): - yield Comment(self, "comment[]") - -def parseCommon(self): - yield UInt16(self, "nb_channel") - yield UInt32(self, "nb_sample") - yield UInt16(self, "sample_size") - yield Float80(self, "sample_rate") - yield Enum(String(self, "codec", 4, strip="\0", charset="ASCII"), CODEC_NAME) - -def parseVersion(self): - yield TimestampMac32(self, "timestamp") - -def parseSound(self): - yield UInt32(self, "offset") - yield UInt32(self, "block_size") - size = (self.size - self.current_size) // 8 - if size: - yield RawBytes(self, "data", size) - -class Chunk(FieldSet): - TAG_INFO = { - 'COMM': ('common', "Common chunk", parseCommon), - 'COMT': ('comment', "Comment", parseComment), - 'NAME': ('name', "Name", parseText), - 'AUTH': ('author', "Author", parseText), - 'FVER': ('version', "Version", parseVersion), - 'SSND': ('sound', "Sound data", parseSound), - 'ID3 ': ('id3', "ID3", parseID3), - } - - def __init__(self, *args): - FieldSet.__init__(self, *args) - self._size = (8 + alignValue(self["size"].value, 2)) * 8 - tag = self["type"].value - if tag in self.TAG_INFO: - self._name, self._description, self._parser = self.TAG_INFO[tag] - else: - self._parser = None - - def createFields(self): - yield String(self, "type", 4, "Signature (FORM)", charset="ASCII") - yield filesizeHandler(UInt32(self, "size")) - size = self["size"].value - if size: - if self._parser: - for field in self._parser(self): - yield field - if size % 2: - yield NullBytes(self, "padding", 1) - else: - yield RawBytes(self, "data", size) - -class HeightSVX(Parser): - PARSER_TAGS = { - "id": "8svx", - "category": "audio", - "file_ext": ("8svx",), - "mime": (u"audio/x-aiff",), - "min_size": 12*8, - "description": "8SVX (audio) format" - } - endian = BIG_ENDIAN - - def validate(self): - if self.stream.readBytes(0, 4) != "FORM": - return "Invalid signature" - if self.stream.readBytes(8*8, 4) != "8SVX": - return "Invalid type" - return True - - def createFields(self): - yield String(self, "signature", 4, "Signature (FORM)", charset="ASCII") - yield filesizeHandler(UInt32(self, "filesize")) - yield String(self, "type", 4, "Form type (AIFF or AIFC)", charset="ASCII") - while not self.eof: - yield Chunk(self, "chunk[]") - - def createDescription(self): - if self["type"].value == "AIFC": - return "Audio Interchange File Format Compressed (AIFC)" - else: - return "Audio Interchange File Format (AIFF)" - - def createContentSize(self): - return self["filesize"].value * 8 - diff --git a/lib/hachoir_parser/audio/__init__.py b/lib/hachoir_parser/audio/__init__.py index 0c6ac749..1cc33a23 100644 --- a/lib/hachoir_parser/audio/__init__.py +++ b/lib/hachoir_parser/audio/__init__.py @@ -1,12 +1,12 @@ -from lib.hachoir_parser.audio.aiff import AiffFile -from lib.hachoir_parser.audio.au import AuFile -from lib.hachoir_parser.audio.itunesdb import ITunesDBFile -from lib.hachoir_parser.audio.midi import MidiFile -from lib.hachoir_parser.audio.mpeg_audio import MpegAudioFile -from lib.hachoir_parser.audio.real_audio import RealAudioFile -from lib.hachoir_parser.audio.xm import XMModule -from lib.hachoir_parser.audio.s3m import S3MModule -from lib.hachoir_parser.audio.s3m import PTMModule -from lib.hachoir_parser.audio.mod import AmigaModule -from lib.hachoir_parser.audio.flac import FlacParser +from hachoir_parser.audio.aiff import AiffFile +from hachoir_parser.audio.au import AuFile +from hachoir_parser.audio.itunesdb import ITunesDBFile +from hachoir_parser.audio.midi import MidiFile +from hachoir_parser.audio.mpeg_audio import MpegAudioFile +from hachoir_parser.audio.real_audio import RealAudioFile +from hachoir_parser.audio.xm import XMModule +from hachoir_parser.audio.s3m import S3MModule +from hachoir_parser.audio.s3m import PTMModule +from hachoir_parser.audio.mod import AmigaModule +from hachoir_parser.audio.flac import FlacParser diff --git a/lib/hachoir_parser/audio/aiff.py b/lib/hachoir_parser/audio/aiff.py index 89c7e61b..d8f41696 100644 --- a/lib/hachoir_parser/audio/aiff.py +++ b/lib/hachoir_parser/audio/aiff.py @@ -5,15 +5,15 @@ Author: Victor Stinner Creation: 27 december 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt16, UInt32, Float80, TimestampMac32, RawBytes, NullBytes, String, Enum, PascalString32) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import filesizeHandler -from lib.hachoir_core.tools import alignValue -from lib.hachoir_parser.audio.id3 import ID3v2 +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import filesizeHandler +from hachoir_core.tools import alignValue +from hachoir_parser.audio.id3 import ID3v2 CODEC_NAME = { 'ACE2': u"ACE 2-to-1", diff --git a/lib/hachoir_parser/audio/au.py b/lib/hachoir_parser/audio/au.py index 59b2f8e2..ab9d9c11 100644 --- a/lib/hachoir_parser/audio/au.py +++ b/lib/hachoir_parser/audio/au.py @@ -5,11 +5,11 @@ Author: Victor Stinner Creation: 12 july 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import UInt32, Enum, String, RawBytes -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import displayHandler, filesizeHandler -from lib.hachoir_core.tools import createDict, humanFrequency +from hachoir_parser import Parser +from hachoir_core.field import UInt32, Enum, String, RawBytes +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import displayHandler, filesizeHandler +from hachoir_core.tools import createDict, humanFrequency class AuFile(Parser): PARSER_TAGS = { diff --git a/lib/hachoir_parser/audio/flac.py b/lib/hachoir_parser/audio/flac.py index a30c6b00..f739ff70 100644 --- a/lib/hachoir_parser/audio/flac.py +++ b/lib/hachoir_parser/audio/flac.py @@ -9,11 +9,11 @@ Author: Esteban Loiseau Creation date: 2008-04-09 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import FieldSet, String, Bit, Bits, UInt16, UInt24, RawBytes, Enum, NullBytes -from lib.hachoir_core.stream import BIG_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.tools import createDict -from lib.hachoir_parser.container.ogg import parseVorbisComment +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, String, Bit, Bits, UInt16, UInt24, RawBytes, Enum, NullBytes +from hachoir_core.stream import BIG_ENDIAN, LITTLE_ENDIAN +from hachoir_core.tools import createDict +from hachoir_parser.container.ogg import parseVorbisComment class VorbisComment(FieldSet): endian = LITTLE_ENDIAN diff --git a/lib/hachoir_parser/audio/id3.py b/lib/hachoir_parser/audio/id3.py index 9616baa2..3cfda25f 100644 --- a/lib/hachoir_parser/audio/id3.py +++ b/lib/hachoir_parser/audio/id3.py @@ -6,13 +6,13 @@ Informations: http://www.id3.org/ Author: Victor Stinner """ -from lib.hachoir_core.field import (FieldSet, MatchError, ParserError, +from hachoir_core.field import (FieldSet, MatchError, ParserError, Enum, UInt8, UInt24, UInt32, CString, String, RawBytes, Bit, Bits, NullBytes, NullBits) -from lib.hachoir_core.text_handler import textHandler -from lib.hachoir_core.tools import humanDuration -from lib.hachoir_core.endian import NETWORK_ENDIAN +from hachoir_core.text_handler import textHandler +from hachoir_core.tools import humanDuration +from hachoir_core.endian import NETWORK_ENDIAN class ID3v1(FieldSet): static_size = 128 * 8 diff --git a/lib/hachoir_parser/audio/itunesdb.py b/lib/hachoir_parser/audio/itunesdb.py index 76b4f936..a70d9cb0 100644 --- a/lib/hachoir_parser/audio/itunesdb.py +++ b/lib/hachoir_parser/audio/itunesdb.py @@ -8,13 +8,13 @@ Author: Romain HERAULT Creation date: 19 august 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, - UInt8, UInt16, UInt32, UInt64, TimestampMac32, - String, Float32, NullBytes, Enum) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import humanDuration -from lib.hachoir_core.text_handler import displayHandler, filesizeHandler +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, + UInt8, UInt16, UInt32, Int32, UInt64, TimestampMac32, + String, Float32, NullBytes, Enum, RawBytes) +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import humanDuration +from hachoir_core.text_handler import displayHandler, filesizeHandler list_order={ 1 : "playlist order (manual sort order)", @@ -75,6 +75,9 @@ class DataObject(FieldSet): 51:"Smart Playlist Rules", 52:"Library Playlist Index", 100:"Column info", + 200:"Album name (for album descriptions)", + 201:"Album artist (for album descriptions)", + 202:"Album sort artist (for album descriptions)" } mhod52_sort_index_type_name={ @@ -94,7 +97,7 @@ class DataObject(FieldSet): yield UInt32(self, "header_length", "Header Length") yield UInt32(self, "entry_length", "Entry Length") yield Enum(UInt32(self, "type", "type"),self.type_name) - if(self["type"].value<15): + if(self["type"].value<15) or (self["type"].value >= 200): yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]") yield UInt32(self, "position", "Position") @@ -162,7 +165,7 @@ class TrackItem(FieldSet): yield Enum(UInt8(self, "x2_type", "Extended type 2"),self.x2_type_name) yield UInt8(self, "compilation_flag", "Compilation Flag") yield UInt8(self, "rating", "Rating") - yield TimestampMac32(self, "added_date", "Date when the item was added") + yield TimestampMac32(self, "last_modified", "Time of the last modification of the track") yield filesizeHandler(UInt32(self, "size", "Track size in bytes")) yield displayHandler(UInt32(self, "length", "Track length in milliseconds"), humanDuration) yield UInt32(self, "track_number", "Number of this track") @@ -180,23 +183,24 @@ class TrackItem(FieldSet): yield UInt32(self, "disc_number", "disc number in multi disc sets") yield UInt32(self, "total_discs", "Total number of discs in the disc set") yield UInt32(self, "userid", "User ID in the DRM scheme") - yield TimestampMac32(self, "last_modified", "Time of the last modification of the track") + yield TimestampMac32(self, "added_date", "Date when the item was added") yield UInt32(self, "bookmark_time", "Bookmark time for AudioBook") yield UInt64(self, "dbid", "Unique DataBase ID for the song (identical in mhit and in mhii)") yield UInt8(self, "checked", "song is checked") yield UInt8(self, "application_rating", "Last Rating before change") yield UInt16(self, "BPM", "BPM of the track") - yield UInt16(self, "artwork_count", "number of artworks fo this item") + yield UInt16(self, "artwork_count", "number of artworks for this item") yield UInt16(self, "unknown[]") yield UInt32(self, "artwork_size", "Total size of artworks in bytes") yield UInt32(self, "unknown[]") yield Float32(self, "sample_rate_2", "Sample Rate express in float") yield UInt32(self, "released_date", "Date of release in Music Store or in Podcast") + yield UInt16(self, "unknown[]") + yield UInt16(self, "explicit_flag[]", "Explicit flag") yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") + yield UInt32(self, "skip_count[]", "Skip Count") + yield TimestampMac32(self, "last_skipped", "Date when the item was last skipped") yield UInt8(self, "has_artwork", "0x01 for track with artwork, 0x02 otherwise") yield UInt8(self, "skip_wen_shuffling", "Skip that track when shuffling") yield UInt8(self, "remember_playback_position", "Remember playback position") @@ -207,11 +211,10 @@ class TrackItem(FieldSet): yield UInt8(self, "played_mark", "Track has been played") yield UInt8(self, "unknown[]") yield UInt32(self, "unknown[]") + yield UInt32(self, "pregap[]", "Number of samples of silence before the song starts") + yield UInt64(self, "sample_count", "Number of samples in the song (only for WAV and AAC files)") yield UInt32(self, "unknown[]") - yield UInt32(self, "sample_count", "Number of samples in the song (only for WAV and AAC files)") - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") - yield UInt32(self, "unknown[]") + yield UInt32(self, "postgap[]", "Number of samples of silence at the end of the song") yield UInt32(self, "unknown[]") yield Enum(UInt32(self, "media_type", "Media Type for video iPod"),self.media_type_name) yield UInt32(self, "season_number", "Season Number") @@ -222,6 +225,20 @@ class TrackItem(FieldSet): yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]") yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt32(self, "gapless_data[]","The size in bytes from first Sync Frame until the 8th before the last frame." ) + yield UInt32(self, "unknown[]") + yield UInt16(self, "gaplessTrackFlag[]", "1 if track has gapless data") + yield UInt16(self, "gaplessAlbumFlag[]", "1 if track uses crossfading in iTunes") + yield RawBytes(self, "unknown[]", 20) + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt16(self, "unknown[]") + yield UInt16(self, "album_id[]", "Album ID (used to link tracks with MHIAs)") + yield RawBytes(self, "unknown[]", 52) + yield UInt32(self, "mhii_link[]", "Artwork ID (used to link tracks with MHIIs)") padding = self.seekByte(self["header_length"].value, "header padding") if padding: yield padding @@ -319,7 +336,7 @@ class Playlist(FieldSet): self._size = self["entry_length"].value *8 def createFields(self): - yield String(self, "header_id", 4, "Playlist List Header Markup (\"mhyp\")", charset="ISO-8859-1") + yield String(self, "header_id", 4, "Playlist Header Markup (\"mhyp\")", charset="ISO-8859-1") yield UInt32(self, "header_length", "Header Length") yield UInt32(self, "entry_length", "Entry Length") yield UInt32(self, "data_object_child_count", "Number of Child Data Objects") @@ -360,11 +377,48 @@ class PlaylistList(FieldSet): for i in xrange(self["playlist_number"].value): yield Playlist(self, "playlist[]") +class Album(FieldSet): + def __init__(self, *args, **kw): + FieldSet.__init__(self, *args, **kw) + self._size = self["entry_length"].value *8 + + def createFields(self): + yield String(self, "header_id", 4, "Album Item Header Markup (\"mhia\")", charset="ISO-8859-1") + yield UInt32(self, "header_length", "Header Length") + yield UInt32(self, "entry_length", "Entry Length") + yield UInt32(self, "data_object_child_count", "Number of Child Data Objects") + yield UInt16(self, "unknow[]") + yield UInt16(self, "album_id[]", "Album ID") + yield UInt32(self, "unknow[]") + yield UInt32(self, "unknow[]") + yield UInt32(self, "unknow[]") + + padding = self.seekByte(self["header_length"].value, "entry padding") + if padding: + yield padding + + for i in xrange(self["data_object_child_count"].value): + yield DataObject(self, "mhod[]") + +class AlbumList(FieldSet): + def createFields(self): + yield String(self, "header_id", 4, "Album List Header Markup (\"mhla\")", charset="ISO-8859-1") + yield UInt32(self, "header_length", "Header Length") + yield UInt32(self, "album_number", "Number of Albums") + + padding = self.seekByte(self["header_length"].value, "header padding") + if padding: + yield padding + + for i in xrange(self["album_number"].value): + yield Album(self, "album[]") + class DataSet(FieldSet): type_name={ 1:"Track List", 2:"Play List", - 3:"Podcast List" + 3:"Podcast List", + 4:"Album List" } def __init__(self, *args, **kw): FieldSet.__init__(self, *args, **kw) @@ -384,6 +438,8 @@ class DataSet(FieldSet): yield PlaylistList(self, "playlist_list[]"); if self["type"].value == 3: yield PlaylistList(self, "podcast_list[]"); + if self["type"].value == 4: + yield AlbumList(self, "album_list[]"); padding = self.seekBit(self._size, "entry padding") if padding: yield padding @@ -417,8 +473,20 @@ class ITunesDBFile(Parser): yield UInt32(self, "version_number", "Version Number") yield UInt32(self, "child_number", "Number of Children") yield UInt64(self, "id", "ID for this database") + yield UInt16(self, "unknown[]") yield UInt32(self, "unknown[]") - yield UInt64(self, "initial_dbid", "Initial DBID") + yield UInt64(self, "unknown[]") + yield UInt16(self, "unknown[]") + yield UInt16(self, "hashing_scheme[]", "Algorithm used to calculate the database hash") + yield NullBytes(self, "unknown[]", 20) + yield String(self, "language_id", 2, "Language ID") + yield UInt64(self, "persistent_id", "Library Persistent ID") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield RawBytes(self, "hash[]", 20) + yield Int32(self, "timezone_offset[]", "Timezone offset in seconds") + yield UInt16(self, "unknown[]") + yield RawBytes(self, "iphone_hash[]", 45) size = self["header_length"].value-self.current_size/ 8 if size>0: yield NullBytes(self, "padding", size) diff --git a/lib/hachoir_parser/audio/midi.py b/lib/hachoir_parser/audio/midi.py index 5382f2dd..211e7b78 100644 --- a/lib/hachoir_parser/audio/midi.py +++ b/lib/hachoir_parser/audio/midi.py @@ -8,13 +8,13 @@ Author: Victor Stinner Creation: 27 december 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Bits, ParserError, - String, UInt32, UInt24, UInt16, UInt8, Enum, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import createDict, humanDurationNanosec -from lib.hachoir_parser.common.tracker import NOTE_NAME +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Bits, ParserError, + String, UInt32, UInt24, UInt16, UInt8, Enum, RawBits, RawBytes) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import createDict, humanDurationNanosec +from hachoir_parser.common.tracker import NOTE_NAME MAX_FILESIZE = 10 * 1024 * 1024 @@ -46,7 +46,7 @@ def parseControl(parser): def parsePatch(parser): yield UInt8(parser, "program", "New program number") -def parseChannel(parser): +def parseChannel(parser, size=1): yield UInt8(parser, "channel", "Channel number") def parsePitch(parser): @@ -56,6 +56,16 @@ def parsePitch(parser): def parseText(parser, size): yield String(parser, "text", size) +def parseSMPTEOffset(parser, size): + yield RawBits(parser, "padding", 1) + yield Enum(Bits(parser, "frame_rate", 2), + {0:"24 fps", 1:"25 fps", 2:"30 fps (drop frame)", 3:"30 fps"}) + yield Bits(parser, "hour", 5) + yield UInt8(parser, "minute") + yield UInt8(parser, "second") + yield UInt8(parser, "frame") + yield UInt8(parser, "subframe", "100 subframes per frame") + def formatTempo(field): return humanDurationNanosec(field.value*1000) @@ -92,8 +102,10 @@ class Command(FieldSet): 0x05: ("Lyric", parseText), 0x06: ("Marker", parseText), 0x07: ("Cue point", parseText), + 0x20: ("MIDI Channel Prefix", parseChannel), 0x2F: ("End of the track", None), 0x51: ("Set tempo", parseTempo), + 0x54: ("SMPTE offset", parseSMPTEOffset), 0x58: ("Time Signature", parseTimeSignature), 0x59: ("Key signature", None), 0x7F: ("Sequencer specific information", None), @@ -101,11 +113,27 @@ class Command(FieldSet): META_COMMAND_DESC = createDict(META_COMMAND, 0) META_COMMAND_PARSER = createDict(META_COMMAND, 1) + def __init__(self, *args, **kwargs): + if 'prev_command' in kwargs: + self.prev_command = kwargs['prev_command'] + del kwargs['prev_command'] + else: + self.prev_command = None + self.command = None + FieldSet.__init__(self, *args, **kwargs) + def createFields(self): yield Integer(self, "time", "Delta time in ticks") - yield Enum(textHandler(UInt8(self, "command"), hexadecimal), self.COMMAND_DESC) - command = self["command"].value - if command == 0xFF: + next = self.stream.readBits(self.absolute_address+self.current_size, 8, self.root.endian) + if next & 0x80 == 0: + # "Running Status" command + if self.prev_command is None: + raise ParserError("Running Status command not preceded by another command.") + self.command = self.prev_command.command + else: + yield Enum(textHandler(UInt8(self, "command"), hexadecimal), self.COMMAND_DESC) + self.command = self["command"].value + if self.command == 0xFF: yield Enum(textHandler(UInt8(self, "meta_command"), hexadecimal), self.META_COMMAND_DESC) yield UInt8(self, "data_len") size = self["data_len"].value @@ -121,9 +149,9 @@ class Command(FieldSet): else: yield RawBytes(self, "data", size) else: - if command not in self.COMMAND_PARSER: + if self.command not in self.COMMAND_PARSER: raise ParserError("Unknown command: %s" % self["command"].display) - parser = self.COMMAND_PARSER[command] + parser = self.COMMAND_PARSER[self.command] for field in parser(self): yield field @@ -131,7 +159,7 @@ class Command(FieldSet): if "meta_command" in self: return self["meta_command"].display else: - return self["command"].display + return self.COMMAND_DESC[self.command] class Track(FieldSet): def __init__(self, *args): @@ -141,9 +169,11 @@ class Track(FieldSet): def createFields(self): yield String(self, "marker", 4, "Track marker (MTrk)", charset="ASCII") yield UInt32(self, "size") + cur = None if True: while not self.eof: - yield Command(self, "command[]") + cur = Command(self, "command[]", prev_command=cur) + yield cur else: size = self["size"].value if size: diff --git a/lib/hachoir_parser/audio/mod.py b/lib/hachoir_parser/audio/mod.py index 5ed300f0..75025e0c 100644 --- a/lib/hachoir_parser/audio/mod.py +++ b/lib/hachoir_parser/audio/mod.py @@ -18,12 +18,12 @@ Creation: 18th February 2007 """ from math import log10 -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Bits, UInt16, UInt8, RawBytes, String, GenericVector) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler # Old NoiseTracker 15-samples modules can have anything here. MODULE_TYPE = { diff --git a/lib/hachoir_parser/audio/modplug.py b/lib/hachoir_parser/audio/modplug.py index 6790bc03..d0ea0ff4 100644 --- a/lib/hachoir_parser/audio/modplug.py +++ b/lib/hachoir_parser/audio/modplug.py @@ -8,11 +8,11 @@ Author: Christophe GISQUET Creation: 10th February 2007 """ -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.field import (FieldSet, UInt32, UInt16, UInt8, Int8, Float32, RawBytes, String, GenericVector, ParserError) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal MAX_ENVPOINTS = 32 diff --git a/lib/hachoir_parser/audio/mpeg_audio.py b/lib/hachoir_parser/audio/mpeg_audio.py index b6f2ba91..04e7d327 100644 --- a/lib/hachoir_parser/audio/mpeg_audio.py +++ b/lib/hachoir_parser/audio/mpeg_audio.py @@ -5,18 +5,18 @@ Creation: 12 decembre 2005 Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, MissingField, ParserError, createOrphanField, Bit, Bits, Enum, PaddingBits, PaddingBytes, RawBytes) -from lib.hachoir_parser.audio.id3 import ID3v1, ID3v2 -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.tools import humanFrequency, humanBitSize -from lib.hachoir_core.bits import long2raw -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_core.stream import InputStreamError +from hachoir_parser.audio.id3 import ID3v1, ID3v2 +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import humanFrequency, humanBitSize +from hachoir_core.bits import long2raw +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.stream import InputStreamError # Max MP3 filesize: 200 MB MAX_FILESIZE = 200*1024*1024*8 diff --git a/lib/hachoir_parser/audio/real_audio.py b/lib/hachoir_parser/audio/real_audio.py index 5a2100e6..289ed6e1 100644 --- a/lib/hachoir_parser/audio/real_audio.py +++ b/lib/hachoir_parser/audio/real_audio.py @@ -8,14 +8,14 @@ Samples: http://samples.mplayerhq.hu/real/RA/ """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, UInt32, Bytes, RawBytes, String, PascalString8) -from lib.hachoir_core.tools import humanFrequency -from lib.hachoir_core.text_handler import displayHandler -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import humanFrequency +from hachoir_core.text_handler import displayHandler +from hachoir_core.endian import BIG_ENDIAN class Metadata(FieldSet): def createFields(self): diff --git a/lib/hachoir_parser/audio/s3m.py b/lib/hachoir_parser/audio/s3m.py index c03d72a0..a3e28579 100644 --- a/lib/hachoir_parser/audio/s3m.py +++ b/lib/hachoir_parser/audio/s3m.py @@ -9,15 +9,15 @@ Author: Christophe GISQUET Creation: 11th February 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, Field, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, Field, Bit, Bits, UInt32, UInt16, UInt8, Enum, PaddingBytes, RawBytes, NullBytes, String, GenericVector, ParserError) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import alignValue +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import alignValue class Chunk: def __init__(self, cls, name, offset, size, *args): @@ -326,7 +326,7 @@ class PTMHeader(Header): # static_size should prime over _size, right? static_size = 8*608 - def getTrackerVersion(val): + def getTrackerVersion(self, val): val = val.value return "ProTracker x%04X" % val diff --git a/lib/hachoir_parser/audio/xm.py b/lib/hachoir_parser/audio/xm.py index 17fd28e0..0b13b41f 100644 --- a/lib/hachoir_parser/audio/xm.py +++ b/lib/hachoir_parser/audio/xm.py @@ -13,15 +13,15 @@ Author: Christophe GISQUET Creation: 8th February 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, Bit, RawBits, Bits, UInt32, UInt16, UInt8, Int8, Enum, RawBytes, String, GenericVector) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_parser.audio.modplug import ParseModplugMetadata -from lib.hachoir_parser.common.tracker import NOTE_NAME +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_parser.audio.modplug import ParseModplugMetadata +from hachoir_parser.common.tracker import NOTE_NAME def parseSigned(val): return "%i" % (val.value-128) diff --git a/lib/hachoir_parser/common/deflate.py b/lib/hachoir_parser/common/deflate.py index ee369a29..8aa8e51a 100644 --- a/lib/hachoir_parser/common/deflate.py +++ b/lib/hachoir_parser/common/deflate.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import CompressedField +from hachoir_core.field import CompressedField try: from zlib import decompressobj, MAX_WBITS @@ -12,8 +12,8 @@ try: def __call__(self, size, data=None): if data is None: - data = self.gzip.unconsumed_tail - return self.gzip.decompress(data, size) + data = '' + return self.gzip.decompress(self.gzip.unconsumed_tail+data, size) class DeflateStreamWbits(DeflateStream): def __init__(self, stream): diff --git a/lib/hachoir_parser/common/msdos.py b/lib/hachoir_parser/common/msdos.py index e16920b7..addd1495 100644 --- a/lib/hachoir_parser/common/msdos.py +++ b/lib/hachoir_parser/common/msdos.py @@ -6,8 +6,8 @@ Documentation: http://www.cs.colorado.edu/~main/cs1300/include/ddk/winddk.h """ -from lib.hachoir_core.field import StaticFieldSet -from lib.hachoir_core.field import Bit, NullBits +from hachoir_core.field import StaticFieldSet +from hachoir_core.field import Bit, NullBits _FIELDS = ( (Bit, "read_only"), diff --git a/lib/hachoir_parser/common/win32.py b/lib/hachoir_parser/common/win32.py index 07a02f56..f5adf4fd 100644 --- a/lib/hachoir_parser/common/win32.py +++ b/lib/hachoir_parser/common/win32.py @@ -1,9 +1,9 @@ -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.field import (FieldSet, UInt16, UInt32, Enum, String, Bytes, Bits, TimestampUUID60) -from lib.hachoir_parser.video.fourcc import video_fourcc_name -from lib.hachoir_core.bits import str2hex -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.network.common import MAC48_Address +from hachoir_parser.video.fourcc import video_fourcc_name +from hachoir_core.bits import str2hex +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.network.common import MAC48_Address # Dictionary: Windows codepage => Python charset name CODEPAGE_CHARSET = { @@ -24,6 +24,26 @@ CODEPAGE_CHARSET = { 65001: "UTF-8", } +class PascalStringWin16(FieldSet): + def __init__(self, parent, name, description=None, strip=None, charset="UTF-16-LE"): + FieldSet.__init__(self, parent, name, description) + length = self["length"].value + self._size = 16 + length * 16 + self.strip = strip + self.charset = charset + + def createFields(self): + yield UInt16(self, "length", "Length in widechar characters") + size = self["length"].value + if size: + yield String(self, "text", size*2, charset=self.charset, strip=self.strip) + + def createValue(self): + if "text" in self: + return self["text"].value + else: + return None + class PascalStringWin32(FieldSet): def __init__(self, parent, name, description=None, strip=None, charset="UTF-16-LE"): FieldSet.__init__(self, parent, name, description) diff --git a/lib/hachoir_parser/container/__init__.py b/lib/hachoir_parser/container/__init__.py index ff22d57a..6fd7d3ed 100644 --- a/lib/hachoir_parser/container/__init__.py +++ b/lib/hachoir_parser/container/__init__.py @@ -1,7 +1,7 @@ -from lib.hachoir_parser.container.asn1 import ASN1File -from lib.hachoir_parser.container.mkv import MkvFile -from lib.hachoir_parser.container.ogg import OggFile, OggStream -from lib.hachoir_parser.container.riff import RiffFile -from lib.hachoir_parser.container.swf import SwfFile -from lib.hachoir_parser.container.realmedia import RealMediaFile +from hachoir_parser.container.asn1 import ASN1File +from hachoir_parser.container.mkv import MkvFile +from hachoir_parser.container.ogg import OggFile, OggStream +from hachoir_parser.container.riff import RiffFile +from hachoir_parser.container.swf import SwfFile +from hachoir_parser.container.realmedia import RealMediaFile diff --git a/lib/hachoir_parser/container/action_script.py b/lib/hachoir_parser/container/action_script.py index 747f772f..4e22cef9 100644 --- a/lib/hachoir_parser/container/action_script.py +++ b/lib/hachoir_parser/container/action_script.py @@ -5,29 +5,64 @@ Documentation: - Alexis' SWF Reference: http://www.m2osw.com/swf_alexref.html + - Tamarin ABC format: + http://www.m2osw.com/abc_format.html -Author: Sebastien Ponce +Authors: Sebastien Ponce, Robert Xiao Creation date: 26 April 2008 """ -from lib.hachoir_core.field import (FieldSet, ParserError, - Bit, Bits, UInt8, UInt32, Int16, UInt16, Float32, CString, - RawBytes) -#from lib.hachoir_core.field import Field -from lib.hachoir_core.field.float import FloatExponent +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, + Bit, Bits, UInt8, UInt32, Int16, UInt16, Float32, Float64, CString, Enum, + Bytes, RawBytes, NullBits, String, SubFile, Field) +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.field.float import FloatExponent from struct import unpack +class FlashPackedInteger(Bits): + def __init__(self, parent, name, signed=False, nbits=30, description=None): + Bits.__init__(self, parent, name, 8, description) + stream = self._parent.stream + addr = self.absolute_address + size = 0 + value = 0 + mult = 1 + while True: + byte = stream.readBits(addr+size, 8, LITTLE_ENDIAN) + value += mult * (byte & 0x7f) + size += 8 + mult <<= 7 + if byte < 128: + break + self._size = size + if signed and (1 << (nbits-1)) <= value: + value -= (1 << nbits) + self.createValue = lambda: value + +class FlashU30(FlashPackedInteger): + def __init__(self, parent, name, description=None): + FlashPackedInteger.__init__(self, parent, name, signed=False, nbits=30, description=description) + +class FlashS32(FlashPackedInteger): + def __init__(self, parent, name, description=None): + FlashPackedInteger.__init__(self, parent, name, signed=True, nbits=32, description=description) + +class FlashU32(FlashPackedInteger): + def __init__(self, parent, name, description=None): + FlashPackedInteger.__init__(self, parent, name, signed=False, nbits=32, description=description) + class FlashFloat64(FieldSet): def createFields(self): - yield Bits(self, "mantisa_high", 20) + yield Bits(self, "mantissa_high", 20) yield FloatExponent(self, "exponent", 11) yield Bit(self, "negative") - yield Bits(self, "mantisa_low", 32) + yield Bits(self, "mantissa_low", 32) def createValue(self): # Manual computation: - # mantisa = mantisa_high * 2^32 + mantisa_low - # float = 2^exponent + (1 + mantisa / 2^52) + # mantissa = mantissa_high * 2^32 + mantissa_low + # float = 2^exponent + (1 + mantissa / 2^52) # (and float is negative if negative=True) bytes = self.parent.stream.readBytes( self.absolute_address, self.size//8) @@ -44,8 +79,8 @@ TYPE_INFO = { 0x05: (UInt8, "Boolean[]"), 0x06: (FlashFloat64, "Double[]"), 0x07: (UInt32, "Integer[]"), - 0x08: (UInt8, "Dictionnary_Lookup_Index[]"), - 0x09: (UInt16, "Large_Dictionnary_Lookup_Index[]"), + 0x08: (UInt8, "Dictionary_Lookup_Index[]"), + 0x09: (UInt16, "Large_Dictionary_Lookup_Index[]"), } def parseBranch(parent, size): @@ -135,7 +170,7 @@ def parseWaitForFrame(parent, size): def parseWaitForFrameDyn(parent, size): yield UInt8(parent, "skip") -def parseDeclareDictionnary(parent, size): +def parseDeclareDictionary(parent, size): count = UInt16(parent, "count") yield count for i in range(count.value): @@ -231,7 +266,7 @@ class Instruction(FieldSet): # Objects 0x2B: ("Cast_Object[]", "Cast Object", None), 0x42: ("Declare_Array[]", "Declare Array", None), - 0x88: ("Declare_Dictionary[]", "Declare Dictionary", parseDeclareDictionnary), + 0x88: ("Declare_Dictionary[]", "Declare Dictionary", parseDeclareDictionary), 0x43: ("Declare_Object[]", "Declare Object", None), 0x3A: ("Delete[]", "Delete", None), 0x3B: ("Delete_All[]", "Delete All", None), @@ -314,3 +349,313 @@ class ActionScript(FieldSet): def parseActionScript(parent, size): yield ActionScript(parent, "action", size=size*8) +def FindABC(field): + while not getattr(field, "isABC", False): + field = field.parent + if field is None: + return None + return field + +def GetConstant(field, pool, index): + if index == 0: + return None + return FindABC(field)["constant_%s_pool/constant[%i]"%(pool, index)] + +def GetMultiname(field, index): + fld = GetConstant(field, "multiname", index) + if fld is None: + return "*" + if "name_index" not in fld: + return "?" + fld2 = GetConstant(fld, "string", fld["name_index"].value) + if fld2 is None: + return "*" + return fld2.value + +class ABCStringIndex(FlashU30): + def createDisplay(self): + fld = GetConstant(self, "string", self.value) + if fld is None: + return "*" + return fld.value + +class ABCNSIndex(FlashU30): + def createDisplay(self): + fld = GetConstant(self, "namespace", self.value) + if fld is None: + return "*" + return fld.display + +class ABCMethodIndex(FlashU30): + def createDisplay(self): + fld = FindABC(self)["method_array/method[%i]"%self.value] + if fld is None: + return "*" + return fld.description + +class ABCMultinameIndex(FlashU30): + def createDisplay(self): + return GetMultiname(self, self.value) + +class ABCConstantPool(FieldSet): + def __init__(self, parent, name, klass): + FieldSet.__init__(self, parent, 'constant_%s_pool'%name) + self.klass = klass + def createFields(self): + ctr = FlashU30(self, "count") + yield ctr + for i in xrange(ctr.value-1): + yield self.klass(self, "constant[%i]"%(i+1)) + +class ABCObjectArray(FieldSet): + def __init__(self, parent, name, klass): + self.arrname = name + FieldSet.__init__(self, parent, name+'_array') + self.klass = klass + def createFields(self): + ctr = FlashU30(self, "count") + yield ctr + for i in xrange(ctr.value): + yield self.klass(self, self.arrname+"[]") + +class ABCClassArray(FieldSet): + def __init__(self, parent, name): + FieldSet.__init__(self, parent, name+'_array') + def createFields(self): + ctr = FlashU30(self, "count") + yield ctr + for i in xrange(ctr.value): + yield ABCInstanceInfo(self, "instance[]") + for i in xrange(ctr.value): + yield ABCClassInfo(self, "class[]") + +class ABCConstantString(FieldSet): + def createFields(self): + yield FlashU30(self, "length") + size = self["length"].value + if size: + yield String(self, "data", size, charset="UTF-8") + + def createDisplay(self): + if "data" in self: + return self["data"].display + else: + return "" + + def createValue(self): + if "data" in self: + return self["data"].value + else: + return "" + +class ABCConstantNamespace(FieldSet): + NAMESPACE_KIND = {8: "Namespace", + 5: "PrivateNamespace", + 22: "PackageNamespace", + 23: "PacakgeInternalNamespace", + 24: "ProtectedNamespace", + 25: "ExplicitNamespace", + 26: "MultinameL"} + def createFields(self): + yield Enum(UInt8(self, "kind"), self.NAMESPACE_KIND) + yield ABCStringIndex(self, "name_index") + + def createDisplay(self): + return "%s %s"%(self["kind"].display, self["name_index"].display) + + def createValue(self): + return self["name_index"].value + +class ABCConstantNamespaceSet(FieldSet): + def createFields(self): + ctr = FlashU30(self, "namespace_count") + yield ctr + for i in xrange(ctr.value): + yield ABCNSIndex(self, "namespace_index[]") + + def createDescription(self): + ret = [fld.display for fld in self.array("namespace_index")] + return ', '.join(ret) + +class ABCConstantMultiname(FieldSet): + MULTINAME_KIND = {7: "Qname", + 13: "QnameA", + 9: "Multiname", + 14: "MultinameA", + 15: "RTQname", + 16: "RTQnameA", + 27: "MultinameL", + 17: "RTQnameL", + 18: "RTQnameLA"} + def createFields(self): + yield Enum(UInt8(self, "kind"), self.MULTINAME_KIND) + kind = self["kind"].value + if kind in (7,13): # Qname + yield FlashU30(self, "namespace_index") + yield ABCStringIndex(self, "name_index") + elif kind in (9,14): # Multiname + yield ABCStringIndex(self, "name_index") + yield FlashU30(self, "namespace_set_index") + elif kind in (15,16): # RTQname + yield ABCStringIndex(self, "name_index") + elif kind == 27: # MultinameL + yield FlashU30(self, "namespace_set_index") + elif kind in (17,18): # RTQnameL + pass + + def createDisplay(self): + kind = self["kind"].display + if "name_index" in self: + return kind + " " + self["name_index"].display + return kind + + def createValue(self): + return self["kind"].value + +class ABCTrait(FieldSet): + TRAIT_KIND = {0: "slot", + 1: "method", + 2: "getter", + 3: "setter", + 4: "class", + 5: "function", + 6: "const",} + def createFields(self): + yield ABCMultinameIndex(self, "name_index") + yield Enum(Bits(self, "kind", 4), self.TRAIT_KIND) + yield Enum(Bit(self, "is_final"), {True:'final',False:'virtual'}) + yield Enum(Bit(self, "is_override"), {True:'override',False:'new'}) + yield Bit(self, "has_metadata") + yield Bits(self, "unused", 1) + kind = self["kind"].value + if kind in (0,6): # slot, const + yield FlashU30(self, "slot_id") + yield ABCMultinameIndex(self, "type_index") + ### TODO reference appropriate constant pool using value_kind + yield FlashU30(self, "value_index") + if self['value_index'].value != 0: + yield UInt8(self, "value_kind") + elif kind in (1,2,3): # method, getter, setter + yield FlashU30(self, "disp_id") + yield ABCMethodIndex(self, "method_info") + elif kind == 4: # class + yield FlashU30(self, "disp_id") + yield FlashU30(self, "class_info") + elif kind == 5: # function + yield FlashU30(self, "disp_id") + yield ABCMethodIndex(self, "method_info") + if self['has_metadata'].value: + yield ABCObjectArray(self, "metadata", FlashU30) + +class ABCValueKind(FieldSet): + def createFields(self): + yield FlashU30(self, "value_index") + yield UInt8(self, "value_kind") + +class ABCMethodInfo(FieldSet): + def createFields(self): + yield FlashU30(self, "param_count") + yield ABCMultinameIndex(self, "ret_type") + for i in xrange(self["param_count"].value): + yield ABCMultinameIndex(self, "param_type[]") + yield ABCStringIndex(self, "name_index") + yield Bit(self, "need_arguments") + yield Bit(self, "need_activation") + yield Bit(self, "need_rest") + yield Bit(self, "has_optional") + yield Bit(self, "ignore_rest") + yield Bit(self, "explicit") + yield Bit(self, "setsdxns") + yield Bit(self, "has_paramnames") + if self["has_optional"].value: + yield ABCObjectArray(self, "optional", ABCValueKind) + if self["has_paramnames"].value: + for i in xrange(self["param_count"].value): + yield FlashU30(self, "param_name[]") + + def createDescription(self): + ret = GetMultiname(self, self["ret_type"].value) + ret += " " + self["name_index"].display + ret += "(" + ", ".join(GetMultiname(self, fld.value) for fld in self.array("param_type")) + ")" + return ret + +class ABCMetadataInfo(FieldSet): + def createFields(self): + yield ABCStringIndex(self, "name_index") + yield FlashU30(self, "values_count") + count = self["values_count"].value + for i in xrange(count): + yield FlashU30(self, "key[]") + for i in xrange(count): + yield FlashU30(self, "value[]") + +class ABCInstanceInfo(FieldSet): + def createFields(self): + yield ABCMultinameIndex(self, "name_index") + yield ABCMultinameIndex(self, "super_index") + yield Bit(self, "is_sealed") + yield Bit(self, "is_final") + yield Bit(self, "is_interface") + yield Bit(self, "is_protected") + yield Bits(self, "unused", 4) + if self['is_protected'].value: + yield ABCNSIndex(self, "protectedNS") + yield FlashU30(self, "interfaces_count") + for i in xrange(self["interfaces_count"].value): + yield ABCMultinameIndex(self, "interface[]") + yield ABCMethodIndex(self, "iinit_index") + yield ABCObjectArray(self, "trait", ABCTrait) + +class ABCClassInfo(FieldSet): + def createFields(self): + yield ABCMethodIndex(self, "cinit_index") + yield ABCObjectArray(self, "trait", ABCTrait) + +class ABCScriptInfo(FieldSet): + def createFields(self): + yield ABCMethodIndex(self, "init_index") + yield ABCObjectArray(self, "trait", ABCTrait) + +class ABCException(FieldSet): + def createFields(self): + yield FlashU30(self, "start") + yield FlashU30(self, "end") + yield FlashU30(self, "target") + yield FlashU30(self, "type_index") + yield FlashU30(self, "name_index") + +class ABCMethodBody(FieldSet): + def createFields(self): + yield ABCMethodIndex(self, "method_info") + yield FlashU30(self, "max_stack") + yield FlashU30(self, "max_regs") + yield FlashU30(self, "scope_depth") + yield FlashU30(self, "max_scope") + yield FlashU30(self, "code_length") + yield RawBytes(self, "code", self['code_length'].value) + yield ABCObjectArray(self, "exception", ABCException) + yield ABCObjectArray(self, "trait", ABCTrait) + +def parseABC(parent, size): + code = parent["code"].value + if code == parent.TAG_DO_ABC_DEFINE: + yield UInt32(parent, "action_flags") + yield CString(parent, "action_name") + yield UInt16(parent, "minor_version") + yield UInt16(parent, "major_version") + parent.isABC = True + + yield ABCConstantPool(parent, "int", FlashS32) + yield ABCConstantPool(parent, "uint", FlashU32) + yield ABCConstantPool(parent, "double", Float64) + yield ABCConstantPool(parent, "string", ABCConstantString) + yield ABCConstantPool(parent, "namespace", ABCConstantNamespace) + yield ABCConstantPool(parent, "namespace_set", ABCConstantNamespaceSet) + yield ABCConstantPool(parent, "multiname", ABCConstantMultiname) + + yield ABCObjectArray(parent, "method", ABCMethodInfo) + yield ABCObjectArray(parent, "metadata", ABCMetadataInfo) + yield ABCClassArray(parent, "class") + yield ABCObjectArray(parent, "script", ABCScriptInfo) + yield ABCObjectArray(parent, "body", ABCMethodBody) + diff --git a/lib/hachoir_parser/container/asn1.py b/lib/hachoir_parser/container/asn1.py index d1c3d113..dfac847b 100644 --- a/lib/hachoir_parser/container/asn1.py +++ b/lib/hachoir_parser/container/asn1.py @@ -39,15 +39,15 @@ Author: Victor Stinner Creation date: 24 september 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, FieldError, ParserError, Bit, Bits, Bytes, UInt8, GenericInteger, String, Field, Enum, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.tools import createDict, humanDatetime -from lib.hachoir_core.stream import InputStreamError -from lib.hachoir_core.text_handler import textHandler +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.tools import createDict, humanDatetime +from hachoir_core.stream import InputStreamError +from hachoir_core.text_handler import textHandler # --- Field parser --- diff --git a/lib/hachoir_parser/container/mkv.py b/lib/hachoir_parser/container/mkv.py index 65230d61..0d3974c0 100644 --- a/lib/hachoir_parser/container/mkv.py +++ b/lib/hachoir_parser/container/mkv.py @@ -4,18 +4,18 @@ # Created: 8 june 2006 # -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Link, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Link, MissingField, ParserError, Enum as _Enum, String as _String, Float32, Float64, NullBits, Bits, Bit, RawBytes, Bytes, Int16, GenericInteger) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.iso639 import ISO639_2 -from lib.hachoir_core.tools import humanDatetime -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.container.ogg import XiphInt +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.iso639 import ISO639_2 +from hachoir_core.tools import humanDatetime +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.container.ogg import XiphInt from datetime import datetime, timedelta class RawInt(GenericInteger): @@ -66,7 +66,7 @@ def SInt(parent): return GenericInteger(parent, 'signed', True, parent['size'].value*8) def String(parent): - return _String(parent, 'string', parent['size'].value, charset="ASCII") + return _String(parent, 'string', parent['size'].value, charset="ASCII", strip="\0") def EnumString(parent, enum): return _Enum(String(parent), enum) @@ -206,7 +206,7 @@ class Block(FieldSet): yield Bit(self, 'invisible') yield self.lacing() yield NullBits(self, 'reserved[]', 1) - elif self.parent._name == 'SimpleBlock[]': + elif self.parent._name.startswith('SimpleBlock'): yield Bit(self, 'keyframe') yield NullBits(self, 'reserved', 3) yield Bit(self, 'invisible') diff --git a/lib/hachoir_parser/container/ogg.py b/lib/hachoir_parser/container/ogg.py index 82b961d0..fa2d26cb 100644 --- a/lib/hachoir_parser/container/ogg.py +++ b/lib/hachoir_parser/container/ogg.py @@ -4,15 +4,15 @@ # Created: 10 june 2006 # -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (Field, FieldSet, createOrphanField, +from hachoir_parser import Parser +from hachoir_core.field import (Field, FieldSet, createOrphanField, NullBits, Bit, Bits, Enum, Fragment, MissingField, ParserError, UInt8, UInt16, UInt24, UInt32, UInt64, RawBytes, String, PascalString32, NullBytes) -from lib.hachoir_core.stream import FragmentedStream, InputStreamError -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_core.tools import humanDurationNanosec -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.stream import FragmentedStream, InputStreamError +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.tools import humanDurationNanosec +from hachoir_core.text_handler import textHandler, hexadecimal MAX_FILESIZE = 1000 * 1024 * 1024 diff --git a/lib/hachoir_parser/container/realmedia.py b/lib/hachoir_parser/container/realmedia.py index 337be9b0..45c8173b 100644 --- a/lib/hachoir_parser/container/realmedia.py +++ b/lib/hachoir_parser/container/realmedia.py @@ -13,12 +13,12 @@ Samples: - http://samples.mplayerhq.hu/real/ """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt16, UInt32, Bit, RawBits, RawBytes, String, PascalString8, PascalString16) -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import BIG_ENDIAN def parseHeader(self): yield UInt32(self, "filever", "File version") diff --git a/lib/hachoir_parser/container/riff.py b/lib/hachoir_parser/container/riff.py index d5e96b93..a5e4fc0a 100644 --- a/lib/hachoir_parser/container/riff.py +++ b/lib/hachoir_parser/container/riff.py @@ -29,17 +29,17 @@ Thanks to: format information """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, UInt32, Enum, Bit, NullBits, NullBytes, RawBytes, String, PaddingBytes, SubFile) -from lib.hachoir_core.tools import alignValue, humanDuration -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import filesizeHandler, textHandler -from lib.hachoir_parser.video.fourcc import audio_codec_name, video_fourcc_name -from lib.hachoir_parser.image.ico import IcoFile +from hachoir_core.tools import alignValue, humanDuration +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import filesizeHandler, textHandler +from hachoir_parser.video.fourcc import audio_codec_name, video_fourcc_name +from hachoir_parser.image.ico import IcoFile from datetime import timedelta def parseText(self): diff --git a/lib/hachoir_parser/container/swf.py b/lib/hachoir_parser/container/swf.py index 4c0e5d69..942e3d9e 100644 --- a/lib/hachoir_parser/container/swf.py +++ b/lib/hachoir_parser/container/swf.py @@ -13,18 +13,18 @@ Author: Victor Stinner Creation date: 29 october 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, - Bit, Bits, UInt8, UInt32, UInt16, CString, Enum, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, + Bit, Bits, UInt8, UInt16, Int32, UInt32, Int64, CString, Enum, Bytes, RawBytes, NullBits, String, SubFile) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, filesizeHandler -from lib.hachoir_core.tools import paddingSize, humanFrequency -from lib.hachoir_parser.image.common import RGB -from lib.hachoir_parser.image.jpeg import JpegChunk, JpegFile -from lib.hachoir_core.stream import StringInputStream, ConcatStream -from lib.hachoir_parser.common.deflate import Deflate, has_deflate -from lib.hachoir_parser.container.action_script import parseActionScript +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.text_handler import textHandler, filesizeHandler +from hachoir_core.tools import paddingSize, humanFrequency +from hachoir_parser.image.common import RGB +from hachoir_parser.image.jpeg import JpegChunk, JpegFile +from hachoir_core.stream import StringInputStream, ConcatStream +from hachoir_parser.common.deflate import Deflate, has_deflate +from hachoir_parser.container.action_script import parseActionScript, parseABC import math # Maximum file size (50 MB) @@ -206,10 +206,35 @@ def parseExport(parent, size): for index in xrange(parent["count"].value): yield Export(parent, "export[]") +def parseProductInfo(parent, size): + yield Int32(parent, "product_id") + yield Int32(parent, "edition") + yield UInt8(parent, "major_version") + yield UInt8(parent, "minor_version") + yield Int64(parent, "build_number") + yield Int64(parent, "compilation_date") + +def parseScriptLimits(parent, size): + yield UInt16(parent, "max_recursion_limit") + yield UInt16(parent, "timeout_seconds", "Seconds of processing until the SWF is considered 'stuck'") + +def parseSymbolClass(parent, size): + yield UInt16(parent, "count") + for index in xrange(parent["count"].value): + yield UInt16(parent, "symbol_id[]") + yield CString(parent, "symbol_name[]") + +def parseBinaryData(parent, size): + yield UInt16(parent, "data_id") + yield UInt32(parent, "reserved") + if size > 6: + yield RawBytes(parent, "data", size-6) + class Tag(FieldSet): TAG_BITS = 6 TAG_BITS_JPEG2 = 32 TAG_BITS_JPEG3 = 35 + TAG_DO_ABC_DEFINE = 82 TAG_INFO = { # SWF version 1.0 0: ("end[]", "End", None), @@ -253,7 +278,7 @@ class Tag(FieldSet): 36: ("def_bits_lossless2[]", "Define bits lossless 2", None), 39: ("def_sprite[]", "Define sprite", None), 40: ("name_character[]", "Name character", None), - 41: ("serial_number", "Serial number", None), + 41: ("product_info", "Generator product info", parseProductInfo), 42: ("generator_text[]", "Generator text", None), 43: ("frame_label[]", "Frame label", None), 45: ("sound_hdr2[]", "Sound stream header2", parseSoundHeader), @@ -283,7 +308,7 @@ class Tag(FieldSet): 64: ("enable_debug2", "Enable debugger 2", None), # SWF version 7.0 - 65: ("script_limits[]", "Script limits", None), + 65: ("script_limits[]", "Script limits", parseScriptLimits), 66: ("tab_index[]", "Set tab index", None), # SWF version 8.0 @@ -297,6 +322,14 @@ class Tag(FieldSet): 78: ("def_scale_grid[]", "Define scaling factors", None), 83: ("def_shape4[]", "Define shape 4", None), 84: ("def_morph2[]", "Define a morphing shape 2", None), + + # SWF version 9.0 + 72: ("do_abc[]", "SWF 9 ActionScript container; actions only", parseABC), + 76: ("symbol_class[]", "Instantiate objects from a set of classes", parseSymbolClass), + 82: ("do_abc_define[]", "SWF 9 ActionScript container; identifier, name, actions", parseABC), + 86: ("def_scene_frame[]", "Define raw data for scenes and frames", None), + 87: ("def_binary_data[]", "Defines a buffer of any size with any binary user data", parseBinaryData), + 88: ("def_font_name[]", "Define the legal font name and copyright", None), } def __init__(self, *args): @@ -332,7 +365,7 @@ class Tag(FieldSet): return "Tag: %s (%s)" % (self["code"].display, self["length"].display) class SwfFile(Parser): - VALID_VERSIONS = set(xrange(1, 9+1)) + VALID_VERSIONS = set(xrange(1, 10+1)) PARSER_TAGS = { "id": "swf", "category": "container", diff --git a/lib/hachoir_parser/file_system/__init__.py b/lib/hachoir_parser/file_system/__init__.py index 92c82d26..863aae3a 100644 --- a/lib/hachoir_parser/file_system/__init__.py +++ b/lib/hachoir_parser/file_system/__init__.py @@ -1,8 +1,8 @@ -from lib.hachoir_parser.file_system.ext2 import EXT2_FS -from lib.hachoir_parser.file_system.fat import FAT12, FAT16, FAT32 -from lib.hachoir_parser.file_system.mbr import MSDos_HardDrive -from lib.hachoir_parser.file_system.ntfs import NTFS -from lib.hachoir_parser.file_system.iso9660 import ISO9660 -from lib.hachoir_parser.file_system.reiser_fs import REISER_FS -from lib.hachoir_parser.file_system.linux_swap import LinuxSwapFile +from hachoir_parser.file_system.ext2 import EXT2_FS +from hachoir_parser.file_system.fat import FAT12, FAT16, FAT32 +from hachoir_parser.file_system.mbr import MSDos_HardDrive +from hachoir_parser.file_system.ntfs import NTFS +from hachoir_parser.file_system.iso9660 import ISO9660 +from hachoir_parser.file_system.reiser_fs import REISER_FS +from hachoir_parser.file_system.linux_swap import LinuxSwapFile diff --git a/lib/hachoir_parser/file_system/ext2.py b/lib/hachoir_parser/file_system/ext2.py index 8b19b46b..634fe063 100644 --- a/lib/hachoir_parser/file_system/ext2.py +++ b/lib/hachoir_parser/file_system/ext2.py @@ -10,14 +10,14 @@ Sources: http://www.nondot.org/sabre/os/files/FileSystems/ext2fs/ """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, Bit, Bits, UInt8, UInt16, UInt32, Enum, String, TimestampUnix32, RawBytes, NullBytes) -from lib.hachoir_core.tools import (alignValue, +from hachoir_core.tools import (alignValue, humanDuration, humanFilesize) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler from itertools import izip class DirectoryEntry(FieldSet): diff --git a/lib/hachoir_parser/file_system/fat.py b/lib/hachoir_parser/file_system/fat.py index fc0ae6cb..2aebe175 100644 --- a/lib/hachoir_parser/file_system/fat.py +++ b/lib/hachoir_parser/file_system/fat.py @@ -1,14 +1,14 @@ -from lib.hachoir_core.compatibility import sorted -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, StaticFieldSet, +from hachoir_core.compatibility import sorted +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, StaticFieldSet, RawBytes, PaddingBytes, createPaddingField, Link, Fragment, Bit, Bits, UInt8, UInt16, UInt32, String, Bytes, NullBytes) -from lib.hachoir_core.field.integer import GenericInteger -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.error import error -from lib.hachoir_core.tools import humanFilesize, makePrintable +from hachoir_core.field.integer import GenericInteger +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.error import error +from hachoir_core.tools import humanFilesize, makePrintable import datetime import re diff --git a/lib/hachoir_parser/file_system/iso9660.py b/lib/hachoir_parser/file_system/iso9660.py index 9fc0dc9f..3d93593a 100644 --- a/lib/hachoir_parser/file_system/iso9660.py +++ b/lib/hachoir_parser/file_system/iso9660.py @@ -9,11 +9,11 @@ Author: Victor Stinner Creation: 11 july 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt32, UInt64, Enum, NullBytes, RawBytes, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN class PrimaryVolumeDescriptor(FieldSet): static_size = 2041*8 diff --git a/lib/hachoir_parser/file_system/linux_swap.py b/lib/hachoir_parser/file_system/linux_swap.py index 72fdf909..ea2e0dba 100644 --- a/lib/hachoir_parser/file_system/linux_swap.py +++ b/lib/hachoir_parser/file_system/linux_swap.py @@ -9,13 +9,13 @@ Author: Victor Stinner Creation date: 25 december 2006 (christmas ;-)) """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (ParserError, GenericVector, +from hachoir_parser import Parser +from hachoir_core.field import (ParserError, GenericVector, UInt32, String, Bytes, NullBytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import humanFilesize -from lib.hachoir_core.bits import str2hex +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import humanFilesize +from hachoir_core.bits import str2hex PAGE_SIZE = 4096 diff --git a/lib/hachoir_parser/file_system/mbr.py b/lib/hachoir_parser/file_system/mbr.py index 4174fa31..d5c366f8 100644 --- a/lib/hachoir_parser/file_system/mbr.py +++ b/lib/hachoir_parser/file_system/mbr.py @@ -12,13 +12,13 @@ Master Boot Record. # 2. Ask the system (ioctl/HDIO_GETGEO). # 3. 255 heads and 63 sectors/cylinder. -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, Bits, UInt8, UInt16, UInt32, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import humanFilesize -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import humanFilesize +from hachoir_core.text_handler import textHandler, hexadecimal BLOCK_SIZE = 512 # bytes diff --git a/lib/hachoir_parser/file_system/ntfs.py b/lib/hachoir_parser/file_system/ntfs.py index 0710f990..efea7e71 100644 --- a/lib/hachoir_parser/file_system/ntfs.py +++ b/lib/hachoir_parser/file_system/ntfs.py @@ -13,15 +13,15 @@ Author: Victor Stinner SECTOR_SIZE = 512 -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Enum, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, UInt8, UInt16, UInt32, UInt64, TimestampWin64, String, Bytes, Bit, NullBits, NullBytes, PaddingBytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.tools import humanFilesize, createDict -from lib.hachoir_parser.common.msdos import MSDOSFileAttr32 +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.tools import humanFilesize, createDict +from hachoir_parser.common.msdos import MSDOSFileAttr32 class BiosParameterBlock(FieldSet): """ diff --git a/lib/hachoir_parser/file_system/reiser_fs.py b/lib/hachoir_parser/file_system/reiser_fs.py index 73933c27..52a0dbf8 100644 --- a/lib/hachoir_parser/file_system/reiser_fs.py +++ b/lib/hachoir_parser/file_system/reiser_fs.py @@ -1,5 +1,5 @@ """ -ReiserFS file system version 3 parser (version 1, 2 and 4 are not supported). +ReiserFS file system version 3 parser (other version have not been tested). Author: Frederic Weisbecker Creation date: 8 december 2006 @@ -20,10 +20,63 @@ Kurz. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Enum, - UInt16, UInt32, String, RawBytes, NullBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Enum, + UInt16, UInt32, String, RawBytes, NullBytes, SeekableFieldSet, Bit) +from hachoir_core.endian import LITTLE_ENDIAN + + +class BlockState(Bit): + """The state (used/free) of a ReiserFs Block""" + + STATE={ + True : "used", + False : "free" + } + + block_nb = 0 + + def __init__(self, parent, name, nb_block): + """@param nb_block: Number of the block concerned""" + Bit.__init__(self, parent, name) + self.block_nb = self.__class__.block_nb + self.__class__.block_nb += 1 + + def createDescription(self): + return "State of the block %d" % self.block_nb + + def createDisplay(self): + return self.STATE[Bit.createValue(self)] + + +class BitmapBlock(SeekableFieldSet): + """ The bitmap blocks are Reiserfs blocks where each byte contains + the state of 8 blocks in the filesystem. So each bit will describe + the state of a block to tell if it is used or not. + """ + def createFields(self): + block_size=self["/superblock/blocksize"].value + + for i in xrange(0, block_size * 8): + yield BlockState(self, "block[]", i) + + +class BitmapBlockGroup(SeekableFieldSet): + """The group that manages the Bitmap Blocks""" + + def createFields(self): + block_size=self["/superblock/blocksize"].value + nb_bitmap_block = self["/superblock/bmap_nr"].value + # Position of the first bitmap block + self.seekByte(REISER_FS.SUPERBLOCK_OFFSET + block_size, relative=False) + + yield BitmapBlock(self, "BitmapBlock[]", "Bitmap blocks tells for each block if it is used") + # The other bitmap blocks + for i in xrange(1, nb_bitmap_block): + self.seekByte( (block_size**2) * 8 * i, relative=False) + yield BitmapBlock(self, "BitmapBlock[]", "Bitmap blocks tells for each block if it is used") + + class Journal_params(FieldSet): static_size = 32*8 @@ -44,7 +97,7 @@ class Journal_params(FieldSet): return "Parameters of the journal" class SuperBlock(FieldSet): - static_size = 204*8 + #static_size = 204*8 UMOUNT_STATE = { 1: "unmounted", 2: "not unmounted" } HASH_FUNCTIONS = { @@ -84,6 +137,7 @@ class SuperBlock(FieldSet): yield RawBytes(self, "uuid", 16, "Filesystem unique identifier") yield String(self, "label", 16, "Filesystem volume label", strip="\0") yield NullBytes(self, "unused", 88) + yield NullBytes(self, "Bytes before end of the block", self["blocksize"].value-204) def createDescription(self): return "Superblock: ReiserFs Filesystem" @@ -108,13 +162,11 @@ class REISER_FS(Parser): def validate(self): # Let's look at the magic field in the superblock magic = self.stream.readBytes(self.MAGIC_OFFSET*8, 9).rstrip("\0") - if magic == "ReIsEr3Fs": + if magic in ("ReIsEr3Fs", "ReIsErFs", "ReIsEr2Fs"): return True - if magic in ("ReIsEr2Fs", "ReIsErFs"): - return "Unsupported version of ReiserFs" return "Invalid magic string" def createFields(self): yield NullBytes(self, "padding[]", self.SUPERBLOCK_OFFSET) yield SuperBlock(self, "superblock") - + yield BitmapBlockGroup(self, "Group of bitmap blocks") diff --git a/lib/hachoir_parser/game/__init__.py b/lib/hachoir_parser/game/__init__.py index 17f7cd0a..1b6447b9 100644 --- a/lib/hachoir_parser/game/__init__.py +++ b/lib/hachoir_parser/game/__init__.py @@ -1,4 +1,4 @@ -from lib.hachoir_parser.game.zsnes import ZSNESFile -from lib.hachoir_parser.game.spider_man_video import SpiderManVideoFile -from lib.hachoir_parser.game.laf import LafFile -from lib.hachoir_parser.game.blp import BLP1File, BLP2File \ No newline at end of file +from hachoir_parser.game.zsnes import ZSNESFile +from hachoir_parser.game.spider_man_video import SpiderManVideoFile +from hachoir_parser.game.laf import LafFile +from hachoir_parser.game.blp import BLP1File, BLP2File \ No newline at end of file diff --git a/lib/hachoir_parser/game/blp.py b/lib/hachoir_parser/game/blp.py index 2c81770e..218e8640 100644 --- a/lib/hachoir_parser/game/blp.py +++ b/lib/hachoir_parser/game/blp.py @@ -12,11 +12,11 @@ Creation date: July 10 2007 http://en.wikipedia.org/wiki/S3_Texture_Compression """ -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.field import String, UInt32, UInt8, Enum, FieldSet, RawBytes, GenericVector, Bit, Bits -from lib.hachoir_parser.parser import Parser -from lib.hachoir_parser.image.common import PaletteRGBA -from lib.hachoir_core.tools import alignValue +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.field import String, UInt32, UInt8, Enum, FieldSet, RawBytes, GenericVector, Bit, Bits +from hachoir_parser.parser import Parser +from hachoir_parser.image.common import PaletteRGBA +from hachoir_core.tools import alignValue class PaletteIndex(UInt8): def createDescription(self): diff --git a/lib/hachoir_parser/game/laf.py b/lib/hachoir_parser/game/laf.py index 88154629..4a8e15cf 100644 --- a/lib/hachoir_parser/game/laf.py +++ b/lib/hachoir_parser/game/laf.py @@ -7,10 +7,10 @@ Author: Cyril Zorin Creation date: 1 January 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, UInt32, GenericVector) -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.endian import LITTLE_ENDIAN class CharData(FieldSet): def __init__(self, chars, *args): diff --git a/lib/hachoir_parser/game/spider_man_video.py b/lib/hachoir_parser/game/spider_man_video.py index 23842617..b9092f33 100644 --- a/lib/hachoir_parser/game/spider_man_video.py +++ b/lib/hachoir_parser/game/spider_man_video.py @@ -7,10 +7,10 @@ Creation date: 2006-09-30 File samples: http://samples.mplayerhq.hu/game-formats/spiderman-segacd-bin/ """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import FieldSet, UInt32, String, RawBytes -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, UInt32, String, RawBytes +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal class Chunk(FieldSet): tag_info = { diff --git a/lib/hachoir_parser/game/zsnes.py b/lib/hachoir_parser/game/zsnes.py index e11b3528..a8f75506 100644 --- a/lib/hachoir_parser/game/zsnes.py +++ b/lib/hachoir_parser/game/zsnes.py @@ -5,11 +5,11 @@ Author: Jason Gorski Creation date: 2006-09-15 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, StaticFieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, StaticFieldSet, UInt8, UInt16, UInt32, String, PaddingBytes, Bytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.endian import LITTLE_ENDIAN class ZSTHeader(StaticFieldSet): format = ( diff --git a/lib/hachoir_parser/guess.py b/lib/hachoir_parser/guess.py index 0a7178af..758cec65 100644 --- a/lib/hachoir_parser/guess.py +++ b/lib/hachoir_parser/guess.py @@ -4,10 +4,11 @@ Parser list managment: """ import os -from lib.hachoir_core.error import warning, info, HACHOIR_ERRORS -from lib.hachoir_parser import ValidateError, HachoirParserList -from lib.hachoir_core.stream import FileInputStream -from lib.hachoir_core.i18n import _ +from hachoir_core.error import warning, info, HACHOIR_ERRORS +from hachoir_parser import ValidateError, HachoirParserList +from hachoir_core.stream import FileInputStream +from hachoir_core.i18n import _ +import weakref class QueryParser(object): @@ -80,6 +81,19 @@ class QueryParser(object): return parsers def parse(self, stream, fallback=True): + if hasattr(stream, "_cached_parser"): + parser = stream._cached_parser() + else: + parser = None + if parser is not None: + if parser.__class__ in self.parsers: + return parser + parser = self.doparse(stream, fallback) + if parser is not None: + stream._cached_parser = weakref.ref(parser) + return parser + + def doparse(self, stream, fallback=True): fb = None warn = warning for parser in self.parsers: diff --git a/lib/hachoir_parser/image/__init__.py b/lib/hachoir_parser/image/__init__.py index f66ffdd7..78c9c20b 100644 --- a/lib/hachoir_parser/image/__init__.py +++ b/lib/hachoir_parser/image/__init__.py @@ -1,12 +1,12 @@ -from lib.hachoir_parser.image.bmp import BmpFile -from lib.hachoir_parser.image.gif import GifFile -from lib.hachoir_parser.image.ico import IcoFile -from lib.hachoir_parser.image.jpeg import JpegFile -from lib.hachoir_parser.image.pcx import PcxFile -from lib.hachoir_parser.image.psd import PsdFile -from lib.hachoir_parser.image.png import PngFile -from lib.hachoir_parser.image.tga import TargaFile -from lib.hachoir_parser.image.tiff import TiffFile -from lib.hachoir_parser.image.wmf import WMF_File -from lib.hachoir_parser.image.xcf import XcfFile +from hachoir_parser.image.bmp import BmpFile +from hachoir_parser.image.gif import GifFile +from hachoir_parser.image.ico import IcoFile +from hachoir_parser.image.jpeg import JpegFile +from hachoir_parser.image.pcx import PcxFile +from hachoir_parser.image.psd import PsdFile +from hachoir_parser.image.png import PngFile +from hachoir_parser.image.tga import TargaFile +from hachoir_parser.image.tiff import TiffFile +from hachoir_parser.image.wmf import WMF_File +from hachoir_parser.image.xcf import XcfFile diff --git a/lib/hachoir_parser/image/bmp.py b/lib/hachoir_parser/image/bmp.py index 51c94400..c4865d3b 100644 --- a/lib/hachoir_parser/image/bmp.py +++ b/lib/hachoir_parser/image/bmp.py @@ -6,15 +6,15 @@ Author: Victor Stinner Creation: 16 december 2005 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, UInt32, Bits, String, RawBytes, Enum, PaddingBytes, NullBytes, createPaddingField) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.image.common import RGB, PaletteRGBA -from lib.hachoir_core.tools import alignValue +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.image.common import RGB, PaletteRGBA +from hachoir_core.tools import alignValue class Pixel4bit(Bits): static_size = 4 diff --git a/lib/hachoir_parser/image/common.py b/lib/hachoir_parser/image/common.py index ca7152a3..5046058a 100644 --- a/lib/hachoir_parser/image/common.py +++ b/lib/hachoir_parser/image/common.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import FieldSet, UserVector, UInt8 +from hachoir_core.field import FieldSet, UserVector, UInt8 class RGB(FieldSet): color_name = { diff --git a/lib/hachoir_parser/image/exif.py b/lib/hachoir_parser/image/exif.py index dcaaf77e..449c7ba0 100644 --- a/lib/hachoir_parser/image/exif.py +++ b/lib/hachoir_parser/image/exif.py @@ -1,19 +1,26 @@ """ -EXIF metadata parser (can be found in a JPEG picture for example) +EXIF metadata parser; also parses TIFF file headers. -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao + +References: +- Exif 2.2 Specification (JEITA CP-3451) + http://www.exif.org/Exif2-2.PDF +- TIFF 6.0 Specification + http://partners.adobe.com/public/developer/en/tiff/TIFF6.pdf """ -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_core.field import (FieldSet, SeekableFieldSet, ParserError, UInt8, UInt16, UInt32, - Int32, Enum, String, - Bytes, SubFile, - NullBytes, createPaddingField) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN, NETWORK_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import createDict + Int8, Int16, Int32, + Float32, Float64, + Enum, String, Bytes, SubFile, + NullBits, NullBytes, createPaddingField) +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN, NETWORK_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import createDict -MAX_COUNT = 1000 +MAX_COUNT = 1000 # maximum number of array entries in an IFD entry (excluding string types) def rationalFactory(class_name, size, field_class): class Rational(FieldSet): @@ -32,6 +39,16 @@ def rationalFactory(class_name, size, field_class): RationalInt32 = rationalFactory("RationalInt32", 64, Int32) RationalUInt32 = rationalFactory("RationalUInt32", 64, UInt32) +class ASCIIString(String): + def __init__(self, parent, name, nbytes, description=None, strip=' \0', charset='ISO-8859-1', *args, **kwargs): + String.__init__(self, parent, name, nbytes, description, strip, charset, *args, **kwargs) + +class IFDTag(UInt16): + def getTag(self): + return self.parent.TAG_INFO.get(self.value, (hex(self.value), "")) + def createDisplay(self): + return self.getTag()[0] + class BasicIFDEntry(FieldSet): TYPE_BYTE = 0 TYPE_UNDEFINED = 7 @@ -39,323 +56,364 @@ class BasicIFDEntry(FieldSet): TYPE_SIGNED_RATIONAL = 10 TYPE_INFO = { 1: (UInt8, "BYTE (8 bits)"), - 2: (String, "ASCII (8 bits)"), + 2: (ASCIIString, "ASCII (8 bits)"), 3: (UInt16, "SHORT (16 bits)"), 4: (UInt32, "LONG (32 bits)"), 5: (RationalUInt32, "RATIONAL (2x LONG, 64 bits)"), + 6: (Int8, "SBYTE (8 bits)"), 7: (Bytes, "UNDEFINED (8 bits)"), - 9: (Int32, "SIGNED LONG (32 bits)"), - 10: (RationalInt32, "SRATIONAL (2x SIGNED LONGs, 64 bits)"), + 8: (Int16, "SSHORT (16 bits)"), + 9: (Int32, "SLONG (32 bits)"), + 10: (RationalInt32, "SRATIONAL (2x SLONG, 64 bits)"), + 11: (Float32, "FLOAT (32 bits)"), + 12: (Float64, "DOUBLE (64 bits)"), } ENTRY_FORMAT = createDict(TYPE_INFO, 0) TYPE_NAME = createDict(TYPE_INFO, 1) + TAG_INFO = {} def createFields(self): - yield Enum(textHandler(UInt16(self, "tag", "Tag"), hexadecimal), self.TAG_NAME) - yield Enum(textHandler(UInt16(self, "type", "Type"), hexadecimal), self.TYPE_NAME) + yield IFDTag(self, "tag", "Tag") + yield Enum(UInt16(self, "type", "Type"), self.TYPE_NAME) + self.value_cls = self.ENTRY_FORMAT.get(self['type'].value, Bytes) + if issubclass(self.value_cls, Bytes): + self.value_size = 8 + else: + self.value_size = self.value_cls.static_size yield UInt32(self, "count", "Count") - if self["type"].value not in (self.TYPE_BYTE, self.TYPE_UNDEFINED) \ - and MAX_COUNT < self["count"].value: + + if not issubclass(self.value_cls, Bytes) \ + and self["count"].value > MAX_COUNT: raise ParserError("EXIF: Invalid count value (%s)" % self["count"].value) - value_size, array_size = self.getSizes() - # Get offset/value - if not value_size: + count = self['count'].value + totalsize = self.value_size * count + if count == 0: yield NullBytes(self, "padding", 4) - elif value_size <= 32: - if 1 < array_size: - name = "value[]" + elif totalsize <= 32: + name = "value" + if issubclass(self.value_cls, Bytes): + yield self.value_cls(self, name, count) else: - name = "value" - kw = {} - cls = self.value_cls - if cls is String: - args = (self, name, value_size/8, "Value") - kw["strip"] = " \0" - kw["charset"] = "ISO-8859-1" - elif cls is Bytes: - args = (self, name, value_size/8, "Value") - else: - args = (self, name, "Value") - for index in xrange(array_size): - yield cls(*args, **kw) - - size = array_size * value_size - if size < 32: - yield NullBytes(self, "padding", (32-size)//8) + if count > 1: + name += "[]" + for i in xrange(count): + yield self.value_cls(self, name) + if totalsize < 32: + yield NullBits(self, "padding", 32-totalsize) else: yield UInt32(self, "offset", "Value offset") - def getSizes(self): - """ - Returns (value_size, array_size): value_size in bits and - array_size in number of items. - """ - # Create format - self.value_cls = self.ENTRY_FORMAT.get(self["type"].value, Bytes) + def createValue(self): + if "value" in self: + return self['value'].value + return None - # Set size - count = self["count"].value - if self.value_cls in (String, Bytes): - return 8 * count, 1 - else: - return self.value_cls.static_size * count, count + def createDescription(self): + return "Entry: "+self["tag"].getTag()[1] -class ExifEntry(BasicIFDEntry): - OFFSET_JPEG_SOI = 0x0201 +class IFDEntry(BasicIFDEntry): EXIF_IFD_POINTER = 0x8769 + GPS_IFD_POINTER = 0x8825 + INTEROP_IFD_POINTER = 0xA005 - TAG_WIDTH = 0xA002 - TAG_HEIGHT = 0xA003 - - TAG_GPS_LATITUDE_REF = 0x0001 - TAG_GPS_LATITUDE = 0x0002 - TAG_GPS_LONGITUDE_REF = 0x0003 - TAG_GPS_LONGITUDE = 0x0004 - TAG_GPS_ALTITUDE_REF = 0x0005 - TAG_GPS_ALTITUDE = 0x0006 - TAG_GPS_TIMESTAMP = 0x0007 - TAG_GPS_DATESTAMP = 0x001d - - TAG_IMG_TITLE = 0x010e - TAG_FILE_TIMESTAMP = 0x0132 - TAG_SOFTWARE = 0x0131 - TAG_CAMERA_MODEL = 0x0110 - TAG_CAMERA_MANUFACTURER = 0x010f - TAG_ORIENTATION = 0x0112 - TAG_EXPOSURE = 0x829A - TAG_FOCAL = 0x829D - TAG_BRIGHTNESS = 0x9203 - TAG_APERTURE = 0x9205 - TAG_USER_COMMENT = 0x9286 - - TAG_NAME = { - # GPS - 0x0000: "GPS version ID", - 0x0001: "GPS latitude ref", - 0x0002: "GPS latitude", - 0x0003: "GPS longitude ref", - 0x0004: "GPS longitude", - 0x0005: "GPS altitude ref", - 0x0006: "GPS altitude", - 0x0007: "GPS timestamp", - 0x0008: "GPS satellites", - 0x0009: "GPS status", - 0x000a: "GPS measure mode", - 0x000b: "GPS DOP", - 0x000c: "GPS speed ref", - 0x000d: "GPS speed", - 0x000e: "GPS track ref", - 0x000f: "GPS track", - 0x0010: "GPS img direction ref", - 0x0011: "GPS img direction", - 0x0012: "GPS map datum", - 0x0013: "GPS dest latitude ref", - 0x0014: "GPS dest latitude", - 0x0015: "GPS dest longitude ref", - 0x0016: "GPS dest longitude", - 0x0017: "GPS dest bearing ref", - 0x0018: "GPS dest bearing", - 0x0019: "GPS dest distance ref", - 0x001a: "GPS dest distance", - 0x001b: "GPS processing method", - 0x001c: "GPS area information", - 0x001d: "GPS datestamp", - 0x001e: "GPS differential", - - 0x0100: "Image width", - 0x0101: "Image height", - 0x0102: "Number of bits per component", - 0x0103: "Compression scheme", - 0x0106: "Pixel composition", - TAG_ORIENTATION: "Orientation of image", - 0x0115: "Number of components", - 0x011C: "Image data arrangement", - 0x0212: "Subsampling ratio Y to C", - 0x0213: "Y and C positioning", - 0x011A: "Image resolution width direction", - 0x011B: "Image resolution in height direction", - 0x0128: "Unit of X and Y resolution", - - 0x0111: "Image data location", - 0x0116: "Number of rows per strip", - 0x0117: "Bytes per compressed strip", - 0x0201: "Offset to JPEG SOI", - 0x0202: "Bytes of JPEG data", - - 0x012D: "Transfer function", - 0x013E: "White point chromaticity", - 0x013F: "Chromaticities of primaries", - 0x0211: "Color space transformation matrix coefficients", - 0x0214: "Pair of blank and white reference values", - - TAG_FILE_TIMESTAMP: "File change date and time", - TAG_IMG_TITLE: "Image title", - TAG_CAMERA_MANUFACTURER: "Camera (Image input equipment) manufacturer", - TAG_CAMERA_MODEL: "Camera (Input input equipment) model", - TAG_SOFTWARE: "Software", - 0x013B: "File change date and time", - 0x8298: "Copyright holder", - 0x8769: "Exif IFD Pointer", - - TAG_EXPOSURE: "Exposure time", - TAG_FOCAL: "F number", - 0x8822: "Exposure program", - 0x8824: "Spectral sensitivity", - 0x8827: "ISO speed rating", - 0x8828: "Optoelectric conversion factor OECF", - 0x9201: "Shutter speed", - 0x9202: "Aperture", - TAG_BRIGHTNESS: "Brightness", - 0x9204: "Exposure bias", - TAG_APERTURE: "Maximum lens aperture", - 0x9206: "Subject distance", - 0x9207: "Metering mode", - 0x9208: "Light source", - 0x9209: "Flash", - 0x920A: "Lens focal length", - 0x9214: "Subject area", - 0xA20B: "Flash energy", - 0xA20C: "Spatial frequency response", - 0xA20E: "Focal plane X resolution", - 0xA20F: "Focal plane Y resolution", - 0xA210: "Focal plane resolution unit", - 0xA214: "Subject location", - 0xA215: "Exposure index", - 0xA217: "Sensing method", - 0xA300: "File source", - 0xA301: "Scene type", - 0xA302: "CFA pattern", - 0xA401: "Custom image processing", - 0xA402: "Exposure mode", - 0xA403: "White balance", - 0xA404: "Digital zoom ratio", - 0xA405: "Focal length in 35 mm film", - 0xA406: "Scene capture type", - 0xA407: "Gain control", - 0xA408: "Contrast", - - 0x9000: "Exif version", - 0xA000: "Supported Flashpix version", - 0xA001: "Color space information", - 0x9101: "Meaning of each component", - 0x9102: "Image compression mode", - TAG_WIDTH: "Valid image width", - TAG_HEIGHT: "Valid image height", - 0x927C: "Manufacturer notes", - TAG_USER_COMMENT: "User comments", - 0xA004: "Related audio file", - 0x9003: "Date and time of original data generation", - 0x9004: "Date and time of digital data generation", - 0x9290: "DateTime subseconds", - 0x9291: "DateTimeOriginal subseconds", - 0x9292: "DateTimeDigitized subseconds", - 0xA420: "Unique image ID", - 0xA005: "Interoperability IFD Pointer" + TAG_INFO = { + # image data structure + 0x0100: ("ImageWidth", "Image width"), + 0x0101: ("ImageLength", "Image height"), + 0x0102: ("BitsPerSample", "Number of bits per component"), + 0x0103: ("Compression", "Compression scheme"), + 0x0106: ("PhotometricInterpretation", "Pixel composition"), + 0x0112: ("Orientation", "Orientation of image"), + 0x0115: ("SamplesPerPixel", "Number of components"), + 0x011C: ("PlanarConfiguration", "Image data arrangement"), + 0x0212: ("YCbCrSubSampling", "Subsampling ratio of Y to C"), + 0x0213: ("YCbCrPositioning", "Y and C positioning"), + 0x011A: ("XResolution", "Image resolution in width direction"), + 0x011B: ("YResolution", "Image resolution in height direction"), + 0x0128: ("ResolutionUnit", "Unit of X and Y resolution"), + # recording offset + 0x0111: ("StripOffsets", "Image data location"), + 0x0116: ("RowsPerStrip", "Number of rows per strip"), + 0x0117: ("StripByteCounts", "Bytes per compressed strip"), + 0x0201: ("JPEGInterchangeFormat", "Offset to JPEG SOI"), + 0x0202: ("JPEGInterchangeFormatLength", "Bytes of JPEG data"), + # image data characteristics + 0x012D: ("TransferFunction", "Transfer function"), + 0x013E: ("WhitePoint", "White point chromaticity"), + 0x013F: ("PrimaryChromaticities", "Chromaticities of primaries"), + 0x0211: ("YCbCrCoefficients", "Color space transformation matrix coefficients"), + 0x0214: ("ReferenceBlackWhite", "Pair of black and white reference values"), + # other tags + 0x0132: ("DateTime", "File change date and time"), + 0x010E: ("ImageDescription", "Image title"), + 0x010F: ("Make", "Image input equipment manufacturer"), + 0x0110: ("Model", "Image input equipment model"), + 0x0131: ("Software", "Software used"), + 0x013B: ("Artist", "Person who created the image"), + 0x8298: ("Copyright", "Copyright holder"), + # TIFF-specific tags + 0x00FE: ("NewSubfileType", "NewSubfileType"), + 0x00FF: ("SubfileType", "SubfileType"), + 0x0107: ("Threshholding", "Threshholding"), + 0x0108: ("CellWidth", "CellWidth"), + 0x0109: ("CellLength", "CellLength"), + 0x010A: ("FillOrder", "FillOrder"), + 0x010D: ("DocumentName", "DocumentName"), + 0x0118: ("MinSampleValue", "MinSampleValue"), + 0x0119: ("MaxSampleValue", "MaxSampleValue"), + 0x011D: ("PageName", "PageName"), + 0x011E: ("XPosition", "XPosition"), + 0x011F: ("YPosition", "YPosition"), + 0x0120: ("FreeOffsets", "FreeOffsets"), + 0x0121: ("FreeByteCounts", "FreeByteCounts"), + 0x0122: ("GrayResponseUnit", "GrayResponseUnit"), + 0x0123: ("GrayResponseCurve", "GrayResponseCurve"), + 0x0124: ("T4Options", "T4Options"), + 0x0125: ("T6Options", "T6Options"), + 0x0129: ("PageNumber", "PageNumber"), + 0x013C: ("HostComputer", "HostComputer"), + 0x013D: ("Predictor", "Predictor"), + 0x0140: ("ColorMap", "ColorMap"), + 0x0141: ("HalftoneHints", "HalftoneHints"), + 0x0142: ("TileWidth", "TileWidth"), + 0x0143: ("TileLength", "TileLength"), + 0x0144: ("TileOffsets", "TileOffsets"), + 0x0145: ("TileByteCounts", "TileByteCounts"), + 0x014C: ("InkSet", "InkSet"), + 0x014D: ("InkNames", "InkNames"), + 0x014E: ("NumberOfInks", "NumberOfInks"), + 0x0150: ("DotRange", "DotRange"), + 0x0151: ("TargetPrinter", "TargetPrinter"), + 0x0152: ("ExtraSamples", "ExtraSamples"), + 0x0153: ("SampleFormat", "SampleFormat"), + 0x0154: ("SMinSampleValue", "SMinSampleValue"), + 0x0155: ("SMaxSampleValue", "SMaxSampleValue"), + 0x0156: ("TransferRange", "TransferRange"), + 0x0200: ("JPEGProc", "JPEGProc"), + 0x0203: ("JPEGRestartInterval", "JPEGRestartInterval"), + 0x0205: ("JPEGLosslessPredictors", "JPEGLosslessPredictors"), + 0x0206: ("JPEGPointTransforms", "JPEGPointTransforms"), + 0x0207: ("JPEGQTables", "JPEGQTables"), + 0x0208: ("JPEGDCTables", "JPEGDCTables"), + 0x0209: ("JPEGACTables", "JPEGACTables"), + # IFD pointers + EXIF_IFD_POINTER: ("IFDExif", "Exif IFD Pointer"), + GPS_IFD_POINTER: ("IFDGPS", "GPS IFD Pointer"), + INTEROP_IFD_POINTER: ("IFDInterop", "Interoperability IFD Pointer"), } - def createDescription(self): - return "Entry: %s" % self["tag"].display +class ExifIFDEntry(BasicIFDEntry): + TAG_INFO = { + # version + 0x9000: ("ExifVersion", "Exif version"), + 0xA000: ("FlashpixVersion", "Supported Flashpix version"), + # image data characteristics + 0xA001: ("ColorSpace", "Color space information"), + # image configuration + 0x9101: ("ComponentsConfiguration", "Meaning of each component"), + 0x9102: ("CompressedBitsPerPixel", "Image compression mode"), + 0xA002: ("PixelXDimension", "Valid image width"), + 0xA003: ("PixelYDimension", "Valid image height"), + # user information + 0x927C: ("MakerNote", "Manufacturer notes"), + 0x9286: ("UserComment", "User comments"), + # related file information + 0xA004: ("RelatedSoundFile", "Related audio file"), + # date and time + 0x9003: ("DateTimeOriginal", "Date and time of original data generation"), + 0x9004: ("DateTimeDigitized", "Date and time of digital data generation"), + 0x9290: ("SubSecTime", "DateTime subseconds"), + 0x9291: ("SubSecTimeOriginal", "DateTimeOriginal subseconds"), + 0x9292: ("SubSecTimeDigitized", "DateTimeDigitized subseconds"), + # picture-taking conditions + 0x829A: ("ExposureTime", "Exposure time"), + 0x829D: ("FNumber", "F number"), + 0x8822: ("ExposureProgram", "Exposure program"), + 0x8824: ("SpectralSensitivity", "Spectral sensitivity"), + 0x8827: ("ISOSpeedRatings", "ISO speed rating"), + 0x8828: ("OECF", "Optoelectric conversion factor"), + 0x9201: ("ShutterSpeedValue", "Shutter speed"), + 0x9202: ("ApertureValue", "Aperture"), + 0x9203: ("BrightnessValue", "Brightness"), + 0x9204: ("ExposureBiasValue", "Exposure bias"), + 0x9205: ("MaxApertureValue", "Maximum lens aperture"), + 0x9206: ("SubjectDistance", "Subject distance"), + 0x9207: ("MeteringMode", "Metering mode"), + 0x9208: ("LightSource", "Light source"), + 0x9209: ("Flash", "Flash"), + 0x920A: ("FocalLength", "Lens focal length"), + 0x9214: ("SubjectArea", "Subject area"), + 0xA20B: ("FlashEnergy", "Flash energy"), + 0xA20C: ("SpatialFrequencyResponse", "Spatial frequency response"), + 0xA20E: ("FocalPlaneXResolution", "Focal plane X resolution"), + 0xA20F: ("FocalPlaneYResolution", "Focal plane Y resolution"), + 0xA210: ("FocalPlaneResolutionUnit", "Focal plane resolution unit"), + 0xA214: ("SubjectLocation", "Subject location"), + 0xA215: ("ExposureIndex", "Exposure index"), + 0xA217: ("SensingMethod", "Sensing method"), + 0xA300: ("FileSource", "File source"), + 0xA301: ("SceneType", "Scene type"), + 0xA302: ("CFAPattern", "CFA pattern"), + 0xA401: ("CustomRendered", "Custom image processing"), + 0xA402: ("ExposureMode", "Exposure mode"), + 0xA403: ("WhiteBalance", "White balance"), + 0xA404: ("DigitalZoomRatio", "Digital zoom ratio"), + 0xA405: ("FocalLengthIn35mmFilm", "Focal length in 35 mm film"), + 0xA406: ("SceneCaptureType", "Scene capture type"), + 0xA407: ("GainControl", "Gain control"), + 0xA408: ("Contrast", "Contrast"), + 0xA409: ("Saturation", "Saturation"), + 0xA40A: ("Sharpness", "Sharpness"), + 0xA40B: ("DeviceSettingDescription", "Device settings description"), + 0xA40C: ("SubjectDistanceRange", "Subject distance range"), + # other tags + 0xA420: ("ImageUniqueID", "Unique image ID"), + } -def sortExifEntry(a,b): - return int( a["offset"].value - b["offset"].value ) +class GPSIFDEntry(BasicIFDEntry): + TAG_INFO = { + 0x0000: ("GPSVersionID", "GPS tag version"), + 0x0001: ("GPSLatitudeRef", "North or South Latitude"), + 0x0002: ("GPSLatitude", "Latitude"), + 0x0003: ("GPSLongitudeRef", "East or West Longitude"), + 0x0004: ("GPSLongitude", "Longitude"), + 0x0005: ("GPSAltitudeRef", "Altitude reference"), + 0x0006: ("GPSAltitude", "Altitude"), + 0x0007: ("GPSTimeStamp", "GPS time (atomic clock)"), + 0x0008: ("GPSSatellites", "GPS satellites used for measurement"), + 0x0009: ("GPSStatus", "GPS receiver status"), + 0x000A: ("GPSMeasureMode", "GPS measurement mode"), + 0x000B: ("GPSDOP", "Measurement precision"), + 0x000C: ("GPSSpeedRef", "Speed unit"), + 0x000D: ("GPSSpeed", "Speed of GPS receiver"), + 0x000E: ("GPSTrackRef", "Reference for direction of movement"), + 0x000F: ("GPSTrack", "Direction of movement"), + 0x0010: ("GPSImgDirectionRef", "Reference for direction of image"), + 0x0011: ("GPSImgDirection", "Direction of image"), + 0x0012: ("GPSMapDatum", "Geodetic survey data used"), + 0x0013: ("GPSDestLatitudeRef", "Reference for latitude of destination"), + 0x0014: ("GPSDestLatitude", "Latitude of destination"), + 0x0015: ("GPSDestLongitudeRef", "Reference for longitude of destination"), + 0x0016: ("GPSDestLongitude", "Longitude of destination"), + 0x0017: ("GPSDestBearingRef", "Reference for bearing of destination"), + 0x0018: ("GPSDestBearing", "Bearing of destination"), + 0x0019: ("GPSDestDistanceRef", "Reference for distance to destination"), + 0x001A: ("GPSDestDistance", "Distance to destination"), + 0x001B: ("GPSProcessingMethod", "Name of GPS processing method"), + 0x001C: ("GPSAreaInformation", "Name of GPS area"), + 0x001D: ("GPSDateStamp", "GPS date"), + 0x001E: ("GPSDifferential", "GPS differential correction"), + } -class ExifIFD(FieldSet): - def seek(self, offset): - """ - Seek to byte address relative to parent address. - """ - padding = offset - (self.address + self.current_size)/8 - if 0 < padding: - return createPaddingField(self, padding*8) - else: - return None +class InteropIFDEntry(BasicIFDEntry): + TAG_INFO = { + 0x0001: ("InteroperabilityIndex", "Interoperability Identification"), + } + +class IFD(SeekableFieldSet): + EntryClass = IFDEntry + def __init__(self, parent, name, base_addr): + self.base_addr = base_addr + SeekableFieldSet.__init__(self, parent, name) def createFields(self): - offset_diff = 6 yield UInt16(self, "count", "Number of entries") - entries = [] - next_chunk_offset = None count = self["count"].value - if not count: - return - while count: - addr = self.absolute_address + self.current_size - next = self.stream.readBits(addr, 32, NETWORK_ENDIAN) - if next in (0, 0xF0000000): - break - entry = ExifEntry(self, "entry[]") - yield entry - if entry["tag"].value in (ExifEntry.EXIF_IFD_POINTER, ExifEntry.OFFSET_JPEG_SOI): - next_chunk_offset = entry["value"].value + offset_diff - if 32 < entry.getSizes()[0]: - entries.append(entry) - count -= 1 - yield UInt32(self, "next", "Next IFD offset") - try: - entries.sort( sortExifEntry ) - except TypeError: - raise ParserError("Unable to sort entries!") - value_index = 0 - for entry in entries: - padding = self.seek(entry["offset"].value + offset_diff) - if padding is not None: - yield padding - - value_size, array_size = entry.getSizes() - if not array_size: + if count == 0: + raise ParserError("IFDs cannot be empty.") + for i in xrange(count): + yield self.EntryClass(self, "entry[]") + yield UInt32(self, "next", "Offset to next IFD") + for i in xrange(count): + entry = self['entry[%d]'%i] + if 'offset' not in entry: continue - cls = entry.value_cls - if 1 < array_size: - name = "value_%s[]" % entry.name + self.seekByte(entry['offset'].value+self.base_addr//8, relative=False) + count = entry['count'].value + name = "value[%s]"%i + if issubclass(entry.value_cls, Bytes): + yield entry.value_cls(self, name, count) else: - name = "value_%s" % entry.name - desc = "Value of \"%s\"" % entry["tag"].display - if cls is String: - for index in xrange(array_size): - yield cls(self, name, value_size/8, desc, strip=" \0", charset="ISO-8859-1") - elif cls is Bytes: - for index in xrange(array_size): - yield cls(self, name, value_size/8, desc) - else: - for index in xrange(array_size): - yield cls(self, name, desc) - value_index += 1 - if next_chunk_offset is not None: - padding = self.seek(next_chunk_offset) - if padding is not None: - yield padding + if count > 1: + name += "[]" + for i in xrange(count): + yield entry.value_cls(self, name) - def createDescription(self): - return "Exif IFD (id %s)" % self["id"].value + def getEntryValues(self, entry): + n = int(entry.name.rsplit('[',1)[1].strip(']')) + if 'offset' in entry: + field = 'value[%d]'%n + base = self + else: + field = 'value' + base = entry + if field in base: + return [base[field]] + else: + return base.array(field) -class Exif(FieldSet): +class ExifIFD(IFD): + EntryClass = ExifIFDEntry + +class GPSIFD(IFD): + EntryClass = GPSIFDEntry + +class InteropIFD(IFD): + EntryClass = InteropIFDEntry + +IFD_TAGS = { + IFDEntry.EXIF_IFD_POINTER: ('exif', ExifIFD), + IFDEntry.GPS_IFD_POINTER: ('exif_gps', GPSIFD), + IFDEntry.INTEROP_IFD_POINTER: ('exif_interop', InteropIFD), +} + +def TIFF(self): + iff_start = self.absolute_address + self.current_size + yield String(self, "endian", 2, "Endian ('II' or 'MM')", charset="ASCII") + if self["endian"].value not in ("II", "MM"): + raise ParserError("Invalid endian!") + if self["endian"].value == "II": + self.endian = LITTLE_ENDIAN + else: + self.endian = BIG_ENDIAN + + yield UInt16(self, "version", "TIFF version number") + yield UInt32(self, "img_dir_ofs", "Next image directory offset") + offsets = [(self['img_dir_ofs'].value, 'ifd[]', IFD)] + while offsets: + offset, name, klass = offsets.pop(0) + self.seekByte(offset+iff_start//8, relative=False) + ifd = klass(self, name, iff_start) + yield ifd + for entry in ifd.array('entry'): + tag = entry['tag'].value + if tag in IFD_TAGS: + name, klass = IFD_TAGS[tag] + offsets.append((ifd.getEntryValues(entry)[0].value, name+'[]', klass)) + if ifd['next'].value != 0: + offsets.append((ifd['next'].value, 'ifd[]', IFD)) + +class Exif(SeekableFieldSet): def createFields(self): # Headers yield String(self, "header", 6, "Header (Exif\\0\\0)", charset="ASCII") if self["header"].value != "Exif\0\0": raise ParserError("Invalid EXIF signature!") - yield String(self, "byte_order", 2, "Byte order", charset="ASCII") - if self["byte_order"].value not in ("II", "MM"): - raise ParserError("Invalid endian!") - if self["byte_order"].value == "II": - self.endian = LITTLE_ENDIAN - else: - self.endian = BIG_ENDIAN - yield UInt16(self, "version", "TIFF version number") - yield UInt32(self, "img_dir_ofs", "Next image directory offset") - while not self.eof: - addr = self.absolute_address + self.current_size - tag = self.stream.readBits(addr, 16, NETWORK_ENDIAN) - if tag == 0xFFD8: - size = (self._size - self.current_size) // 8 - yield SubFile(self, "thumbnail", size, "Thumbnail (JPEG file)", mime_type="image/jpeg") - break - elif tag == 0xFFFF: - break - yield ExifIFD(self, "ifd[]", "IFD") - padding = self.seekBit(self._size) - if padding is not None: - yield padding - + iff_start = self.absolute_address + self.current_size + ifds = [] + for field in TIFF(self): + yield field + if isinstance(field, IFD): + ifds.append(field) + for ifd in ifds: + data = {} + for i, entry in enumerate(ifd.array('entry')): + data[entry['tag'].display] = entry + if 'JPEGInterchangeFormat' in data and 'JPEGInterchangeFormatLength' in data: + offs = ifd.getEntryValues(data['JPEGInterchangeFormat'])[0].value + size = ifd.getEntryValues(data['JPEGInterchangeFormatLength'])[0].value + if size == 0: continue + self.seekByte(offs + iff_start//8, relative=False) + yield SubFile(self, "thumbnail[]", size, "Thumbnail (JPEG file)", mime_type="image/jpeg") diff --git a/lib/hachoir_parser/image/gif.py b/lib/hachoir_parser/image/gif.py index 777ba658..b870b673 100644 --- a/lib/hachoir_parser/image/gif.py +++ b/lib/hachoir_parser/image/gif.py @@ -1,25 +1,162 @@ """ GIF picture parser. -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao + +- GIF format + http://local.wasp.uwa.edu.au/~pbourke/dataformats/gif/ +- LZW compression + http://en.wikipedia.org/wiki/LZW """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, Enum, UInt8, UInt16, Bit, Bits, NullBytes, String, PascalString8, Character, NullBits, RawBytes) -from lib.hachoir_parser.image.common import PaletteRGB -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import humanDuration -from lib.hachoir_core.text_handler import textHandler, displayHandler, hexadecimal +from hachoir_parser.image.common import PaletteRGB +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.stream import StringInputStream +from hachoir_core.tools import humanDuration, paddingSize +from hachoir_core.text_handler import textHandler, displayHandler, hexadecimal # Maximum image dimension (in pixel) MAX_WIDTH = 6000 MAX_HEIGHT = MAX_WIDTH MAX_FILE_SIZE = 100 * 1024 * 1024 +class FragmentGroup: + def __init__(self, parser): + self.items = [] + self.parser = parser + self.args = {} + + def add(self, item): + self.items.append(item) + + def createInputStream(self): + # FIXME: Use lazy stream creation + data = [] + for item in self.items: + data.append( item["rawdata"].value ) + data = "".join(data) + + # FIXME: Use smarter code to send arguments + self.args["startbits"] = self.items[0].parent["lzw_min_code_size"].value + tags = {"class": self.parser, "args": self.args} + tags = tags.iteritems() + return StringInputStream(data, "", tags=tags) + +class CustomFragment(FieldSet): + def __init__(self, parent, name, size, parser, description=None, group=None): + FieldSet.__init__(self, parent, name, description, size=size) + if not group: + group = FragmentGroup(parser) + self.group = group + self.group.add(self) + + def createFields(self): + yield UInt8(self, "size") + yield RawBytes(self, "rawdata", self["size"].value) + + def _createInputStream(self, **args): + return self.group.createInputStream() + +def rle_repr(l): + """Run-length encode a list into an "eval"-able form + + Example: + >>> rle_repr([20, 16, 16, 16, 16, 16, 18, 18, 65]) + '[20] + [16]*5 + [18]*2 + [65]' + + Adapted from http://twistedmatrix.com/trac/browser/trunk/twisted/python/dxprofile.py + """ + def add_rle(previous, runlen, result): + if isinstance(previous, (list, tuple)): + previous = rle_repr(previous) + if runlen>1: + result.append('[%s]*%i'%(previous, runlen)) + else: + if result and '*' not in result[-1]: + result[-1] = '[%s, %s]'%(result[-1][1:-1], previous) + else: + result.append('[%s]'%previous) + iterable = iter(l) + runlen = 1 + result = [] + try: + previous = iterable.next() + except StopIteration: + return "[]" + for element in iterable: + if element == previous: + runlen = runlen + 1 + continue + else: + add_rle(previous, runlen, result) + previous = element + runlen = 1 + add_rle(previous, runlen, result) + return ' + '.join(result) + +class GifImageBlock(Parser): + endian = LITTLE_ENDIAN + def createFields(self): + dictionary = {} + self.nbits = self.startbits + CLEAR_CODE = 2**self.nbits + END_CODE = CLEAR_CODE + 1 + compress_code = CLEAR_CODE + 2 + obuf = [] + output = [] + while True: + if compress_code >= 2**self.nbits: + self.nbits += 1 + code = Bits(self, "code[]", self.nbits) + if code.value == CLEAR_CODE: + if compress_code == 2**(self.nbits-1): + # this fixes a bizarre edge case where the reset code could + # appear just after the bits incremented. Apparently, the + # correct behaviour is to express the reset code with the + # old number of bits, not the new... + code = Bits(self, "code[]", self.nbits-1) + self.nbits = self.startbits + 1 + dictionary = {} + compress_code = CLEAR_CODE + 2 + obuf = [] + code._description = "Reset Code (LZW code %i)" % code.value + yield code + continue + elif code.value == END_CODE: + code._description = "End of Information Code (LZW code %i)" % code.value + yield code + break + if code.value < CLEAR_CODE: # literal + if obuf: + chain = obuf + [code.value] + dictionary[compress_code] = chain + compress_code += 1 + obuf = [code.value] + output.append(code.value) + code._description = "Literal Code %i" % code.value + elif code.value >= CLEAR_CODE + 2: + if code.value in dictionary: + chain = dictionary[code.value] + code._description = "Compression Code %i (found in dictionary as %s)" % (code.value, rle_repr(chain)) + else: + chain = obuf + [obuf[0]] + code._description = "Compression Code %i (not found in dictionary; guessed to be %s)" % (code.value, rle_repr(chain)) + dictionary[compress_code] = obuf + [chain[0]] + compress_code += 1 + obuf = chain + output += chain + code._description += "; Current Decoded Length %i"%len(output) + yield code + padding = paddingSize(self.current_size, 8) + if padding: + yield NullBits(self, "padding[]", padding) + class Image(FieldSet): def createFields(self): yield UInt16(self, "left", "Left") @@ -27,24 +164,26 @@ class Image(FieldSet): yield UInt16(self, "width", "Width") yield UInt16(self, "height", "Height") - yield Bits(self, "bpp", 3, "Bits / pixel minus one") - yield NullBits(self, "nul", 2) - yield Bit(self, "sorted", "Sorted??") + yield Bits(self, "size_local_map", 3, "log2(size of local map) minus one") + yield NullBits(self, "reserved", 2) + yield Bit(self, "sort_flag", "Is the local map sorted by decreasing importance?") yield Bit(self, "interlaced", "Interlaced?") yield Bit(self, "has_local_map", "Use local color map?") if self["has_local_map"].value: - nb_color = 1 << (1 + self["bpp"].value) + nb_color = 1 << (1 + self["size_local_map"].value) yield PaletteRGB(self, "local_map", nb_color, "Local color map") - yield UInt8(self, "code_size", "LZW Minimum Code Size") + yield UInt8(self, "lzw_min_code_size", "LZW Minimum Code Size") + group = None while True: - blen = UInt8(self, "block_len[]", "Block Length") - yield blen - if blen.value != 0: - yield RawBytes(self, "data[]", blen.value, "Image Data") - else: + size = UInt8(self, "block_size") + if size.value == 0: break + block = CustomFragment(self, "image_block[]", None, GifImageBlock, "GIF Image Block", group) + group = block.group + yield block + yield NullBytes(self, "terminator", 1, "Terminator (0)") def createDescription(self): return "Image: %ux%u pixels at (%u,%u)" % ( @@ -64,16 +203,19 @@ NETSCAPE_CODE = { def parseApplicationExtension(parent): yield PascalString8(parent, "app_name", "Application name") - yield UInt8(parent, "size") - size = parent["size"].value - if parent["app_name"].value == "NETSCAPE2.0" and size == 3: - yield Enum(UInt8(parent, "netscape_code"), NETSCAPE_CODE) - if parent["netscape_code"].value == 1: - yield UInt16(parent, "loop_count") + while True: + size = UInt8(parent, "size[]") + if size.value == 0: + break + yield size + if parent["app_name"].value == "NETSCAPE2.0" and size.value == 3: + yield Enum(UInt8(parent, "netscape_code"), NETSCAPE_CODE) + if parent["netscape_code"].value == 1: + yield UInt16(parent, "loop_count") + else: + yield RawBytes(parent, "raw[]", 2) else: - yield RawBytes(parent, "raw", 2) - else: - yield RawBytes(parent, "raw", size) + yield RawBytes(parent, "raw[]", size.value) yield NullBytes(parent, "terminator", 1, "Terminator (0)") def parseGraphicControl(parent): @@ -149,15 +291,20 @@ class ScreenDescriptor(FieldSet): def createFields(self): yield UInt16(self, "width", "Width") yield UInt16(self, "height", "Height") - yield Bits(self, "bpp", 3, "Bits per pixel minus one") - yield Bit(self, "reserved", "(reserved)") + yield Bits(self, "size_global_map", 3, "log2(size of global map) minus one") + yield Bit(self, "sort_flag", "Is the global map sorted by decreasing importance?") yield Bits(self, "color_res", 3, "Color resolution minus one") yield Bit(self, "global_map", "Has global map?") yield UInt8(self, "background", "Background color") - yield UInt8(self, "pixel_aspect_ratio", "Pixel Aspect Ratio") + field = UInt8(self, "pixel_aspect_ratio") + if field.value: + field._description = "Pixel aspect ratio: %f (stored as %i)"%((field.value + 15)/64., field.value) + else: + field._description = "Pixel aspect ratio: not specified" + yield field def createDescription(self): - colors = 1 << (self["bpp"].value+1) + colors = 1 << (self["size_global_map"].value+1) return "Screen descriptor: %ux%u pixels %u colors" \ % (self["width"].value, self["height"].value, colors) @@ -196,7 +343,7 @@ class GifFile(Parser): yield ScreenDescriptor(self, "screen") if self["screen/global_map"].value: - bpp = (self["screen/bpp"].value+1) + bpp = (self["screen/size_global_map"].value+1) yield PaletteRGB(self, "color_map", 1 << bpp, "Color map") self.color_map = self["color_map"] else: diff --git a/lib/hachoir_parser/image/ico.py b/lib/hachoir_parser/image/ico.py index fc4282d7..193a81c6 100644 --- a/lib/hachoir_parser/image/ico.py +++ b/lib/hachoir_parser/image/ico.py @@ -4,12 +4,12 @@ Microsoft Windows icon and cursor file format parser. Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, UInt32, Enum, RawBytes) -from lib.hachoir_parser.image.common import PaletteRGBA -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.win32 import BitmapInfoHeader +from hachoir_parser.image.common import PaletteRGBA +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.common.win32 import BitmapInfoHeader class IconHeader(FieldSet): def createFields(self): diff --git a/lib/hachoir_parser/image/iptc.py b/lib/hachoir_parser/image/iptc.py index c814ed7d..6727de7f 100644 --- a/lib/hachoir_parser/image/iptc.py +++ b/lib/hachoir_parser/image/iptc.py @@ -11,9 +11,9 @@ Sources: Author: Victor Stinner """ -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, String, RawBytes, NullBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.text_handler import textHandler, hexadecimal def IPTC_String(parent, name, desc=None): # Charset may be utf-8, ISO-8859-1, or ... diff --git a/lib/hachoir_parser/image/jpeg.py b/lib/hachoir_parser/image/jpeg.py index 89965788..4a361962 100644 --- a/lib/hachoir_parser/image/jpeg.py +++ b/lib/hachoir_parser/image/jpeg.py @@ -8,21 +8,25 @@ Information: http://java.sun.com/j2se/1.5.0/docs/api/javax/imageio/metadata/doc-files/jpeg_metadata.html#color - APP12: http://search.cpan.org/~exiftool/Image-ExifTool/lib/Image/ExifTool/TagNames.pod +- JPEG Data Format + http://www.w3.org/Graphics/JPEG/itu-t81.pdf -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao """ -from lib.hachoir_core.error import HachoirError -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, - UInt8, UInt16, Enum, - Bit, Bits, NullBits, NullBytes, +from hachoir_core.error import HachoirError +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, FieldError, + UInt8, UInt16, Enum, Field, + Bit, Bits, NullBits, NullBytes, PaddingBits, String, RawBytes) -from lib.hachoir_parser.image.common import PaletteRGB -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.image.exif import Exif -from lib.hachoir_parser.image.photoshop_metadata import PhotoshopMetadata +from hachoir_parser.image.common import PaletteRGB +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.image.exif import Exif +from hachoir_parser.image.photoshop_metadata import PhotoshopMetadata +from hachoir_parser.archive.zlib import build_tree +from hachoir_core.tools import paddingSize, alignValue MAX_FILESIZE = 100 * 1024 * 1024 @@ -144,6 +148,13 @@ class APP12(FieldSet): while not self.eof: yield Ducky(self, "item[]") +class SOFComponent(FieldSet): + def createFields(self): + yield UInt8(self, "component_id") + yield Bits(self, "horiz_sample", 4, "Horizontal sampling factor") + yield Bits(self, "vert_sample", 4, "Vertical sampling factor") + yield UInt8(self, "quant_table", "Quantization table destination selector") + class StartOfFrame(FieldSet): def createFields(self): yield UInt8(self, "precision") @@ -153,9 +164,7 @@ class StartOfFrame(FieldSet): yield UInt8(self, "nr_components") for index in range(self["nr_components"].value): - yield UInt8(self, "component_id[]") - yield UInt8(self, "high[]") - yield UInt8(self, "low[]") + yield SOFComponent(self, "component[]") class Comment(FieldSet): def createFields(self): @@ -178,17 +187,25 @@ class AdobeChunk(FieldSet): yield NullBytes(self, "flags1", 2) yield Enum(UInt8(self, "color_transform", "Colorspace transformation code"), self.COLORSPACE_TRANSFORMATION) +class SOSComponent(FieldSet): + def createFields(self): + comp_id = UInt8(self, "component_id") + yield comp_id + if not(1 <= comp_id.value <= self["../nr_components"].value): + raise ParserError("JPEG error: Invalid component-id") + yield Bits(self, "dc_coding_table", 4, "DC entropy coding table destination selector") + yield Bits(self, "ac_coding_table", 4, "AC entropy coding table destination selector") + class StartOfScan(FieldSet): def createFields(self): yield UInt8(self, "nr_components") for index in range(self["nr_components"].value): - comp_id = UInt8(self, "component_id[]") - yield comp_id - if not(1 <= comp_id.value <= self["nr_components"].value): - raise ParserError("JPEG error: Invalid component-id") - yield UInt8(self, "value[]") - yield RawBytes(self, "raw", 3) # TODO: What's this??? + yield SOSComponent(self, "component[]") + yield UInt8(self, "spectral_start", "Start of spectral or predictor selection") + yield UInt8(self, "spectral_end", "End of spectral selection") + yield Bits(self, "bit_pos_high", 4, "Successive approximation bit position high") + yield Bits(self, "bit_pos_low", 4, "Successive approximation bit position low or point transform") class RestartInterval(FieldSet): def createFields(self): @@ -217,6 +234,182 @@ class DefineQuantizationTable(FieldSet): while self.current_size < self.size: yield QuantizationTable(self, "qt[]") +class HuffmanTable(FieldSet): + def createFields(self): + # http://www.w3.org/Graphics/JPEG/itu-t81.pdf, page 40-41 + yield Enum(Bits(self, "table_class", 4, "Table class"), { + 0:"DC or Lossless Table", + 1:"AC Table"}) + yield Bits(self, "index", 4, "Huffman table destination identifier") + for i in xrange(1, 17): + yield UInt8(self, "count[%i]" % i, "Number of codes of length %i" % i) + lengths = [] + remap = {} + for i in xrange(1, 17): + for j in xrange(self["count[%i]" % i].value): + field = UInt8(self, "value[%i][%i]" % (i, j), "Value of code #%i of length %i" % (j, i)) + yield field + remap[len(lengths)] = field.value + lengths.append(i) + self.tree = {} + for i,j in build_tree(lengths).iteritems(): + self.tree[i] = remap[j] + +class DefineHuffmanTable(FieldSet): + def createFields(self): + while self.current_size < self.size: + yield HuffmanTable(self, "huffman_table[]") + +class HuffmanCode(Field): + """Huffman code. Uses tree parameter as the Huffman tree.""" + def __init__(self, parent, name, tree, description=""): + Field.__init__(self, parent, name, 0, description) + + endian = self.parent.endian + stream = self.parent.stream + addr = self.absolute_address + + value = 0 + met_ff = False + while (self.size, value) not in tree: + if addr % 8 == 0: + last_byte = stream.readBytes(addr - 8, 1) + if last_byte == '\xFF': + next_byte = stream.readBytes(addr, 1) + if next_byte != '\x00': + raise FieldError("Unexpected byte sequence %r!"%(last_byte + next_byte)) + addr += 8 # hack hack hack + met_ff = True + self._description = "[skipped 8 bits after 0xFF] " + bit = stream.readBits(addr, 1, endian) + value <<= 1 + value += bit + self._size += 1 + addr += 1 + self.createValue = lambda: value + self.realvalue = tree[(self.size, value)] + if met_ff: + self._size += 8 + +class JpegHuffmanImageUnit(FieldSet): + """8x8 block of sample/coefficient values""" + def __init__(self, parent, name, dc_tree, ac_tree, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.dc_tree = dc_tree + self.ac_tree = ac_tree + + def createFields(self): + field = HuffmanCode(self, "dc_data", self.dc_tree) + field._description = "DC Code %i (Huffman Code %i)" % (field.realvalue, field.value) + field._description + yield field + if field.realvalue != 0: + extra = Bits(self, "dc_data_extra", field.realvalue) + if extra.value < 2**(field.realvalue - 1): + corrected_value = extra.value + (-1 << field.realvalue) + 1 + else: + corrected_value = extra.value + extra._description = "Extra Bits: Corrected DC Value %i" % corrected_value + yield extra + data = [] + while len(data) < 63: + field = HuffmanCode(self, "ac_data[]", self.ac_tree) + value_r = field.realvalue >> 4 + if value_r: + data += [0] * value_r + value_s = field.realvalue & 0x0F + if value_r == value_s == 0: + field._description = "AC Code Block Terminator (0, 0) (Huffman Code %i)" % field.value + field._description + yield field + return + field._description = "AC Code %i, %i (Huffman Code %i)" % (value_r, value_s, field.value) + field._description + yield field + if value_s != 0: + extra = Bits(self, "ac_data_extra[%s" % field.name.split('[')[1], value_s) + if extra.value < 2**(value_s - 1): + corrected_value = extra.value + (-1 << value_s) + 1 + else: + corrected_value = extra.value + extra._description = "Extra Bits: Corrected AC Value %i" % corrected_value + data.append(corrected_value) + yield extra + else: + data.append(0) + +class JpegImageData(FieldSet): + def __init__(self, parent, name, frame, scan, restart_interval, restart_offset=0, *args, **kwargs): + FieldSet.__init__(self, parent, name, *args, **kwargs) + self.frame = frame + self.scan = scan + self.restart_interval = restart_interval + self.restart_offset = restart_offset + # try to figure out where this field ends + start = self.absolute_address + while True: + end = self.stream.searchBytes("\xff", start, MAX_FILESIZE*8) + if end is None: + # this is a bad sign, since it means there is no terminator + # we ignore this; it likely means a truncated image + break + if self.stream.readBytes(end, 2) == '\xff\x00': + # padding: false alarm + start=end+16 + continue + else: + self._size = end-self.absolute_address + break + + def createFields(self): + if self.frame["../type"].value in [0xC0, 0xC1]: + # yay, huffman coding! + if not hasattr(self, "huffman_tables"): + self.huffman_tables = {} + for huffman in self.parent.array("huffman"): + for table in huffman["content"].array("huffman_table"): + for _dummy_ in table: + # exhaust table, so the huffman tree is built + pass + self.huffman_tables[table["table_class"].value, table["index"].value] = table.tree + components = [] # sos_comp, samples + max_vert = 0 + max_horiz = 0 + for component in self.scan.array("component"): + for sof_comp in self.frame.array("component"): + if sof_comp["component_id"].value == component["component_id"].value: + vert = sof_comp["vert_sample"].value + horiz = sof_comp["horiz_sample"].value + components.append((component, vert * horiz)) + max_vert = max(max_vert, vert) + max_horiz = max(max_horiz, horiz) + mcu_height = alignValue(self.frame["height"].value, 8 * max_vert) // (8 * max_vert) + mcu_width = alignValue(self.frame["width"].value, 8 * max_horiz) // (8 * max_horiz) + if self.restart_interval and self.restart_offset > 0: + mcu_number = self.restart_interval * self.restart_offset + else: + mcu_number = 0 + initial_mcu = mcu_number + while True: + if (self.restart_interval and mcu_number != initial_mcu and mcu_number % self.restart_interval == 0) or\ + mcu_number == mcu_height * mcu_width: + padding = paddingSize(self.current_size, 8) + if padding: + yield PaddingBits(self, "padding[]", padding) # all 1s + last_byte = self.stream.readBytes(self.absolute_address + self.current_size - 8, 1) + if last_byte == '\xFF': + next_byte = self.stream.readBytes(self.absolute_address + self.current_size, 1) + if next_byte != '\x00': + raise FieldError("Unexpected byte sequence %r!"%(last_byte + next_byte)) + yield NullBytes(self, "stuffed_byte[]", 1) + break + for sos_comp, num_units in components: + for interleave_count in range(num_units): + yield JpegHuffmanImageUnit(self, "block[%i]component[%i][]" % (mcu_number, sos_comp["component_id"].value), + self.huffman_tables[0, sos_comp["dc_coding_table"].value], + self.huffman_tables[1, sos_comp["ac_coding_table"].value]) + mcu_number += 1 + else: + self.warning("Sorry, only supporting Baseline & Extended Sequential JPEG images so far!") + return + class JpegChunk(FieldSet): TAG_SOI = 0xD8 TAG_EOI = 0xD9 @@ -224,10 +417,18 @@ class JpegChunk(FieldSet): TAG_DQT = 0xDB TAG_DRI = 0xDD TAG_INFO = { - 0xC4: ("huffman[]", "Define Huffman Table (DHT)", None), + 0xC4: ("huffman[]", "Define Huffman Table (DHT)", DefineHuffmanTable), 0xD8: ("start_image", "Start of image (SOI)", None), 0xD9: ("end_image", "End of image (EOI)", None), - 0xDA: ("start_scan", "Start Of Scan (SOS)", StartOfScan), + 0xD0: ("restart_marker_0[]", "Restart Marker (RST0)", None), + 0xD1: ("restart_marker_1[]", "Restart Marker (RST1)", None), + 0xD2: ("restart_marker_2[]", "Restart Marker (RST2)", None), + 0xD3: ("restart_marker_3[]", "Restart Marker (RST3)", None), + 0xD4: ("restart_marker_4[]", "Restart Marker (RST4)", None), + 0xD5: ("restart_marker_5[]", "Restart Marker (RST5)", None), + 0xD6: ("restart_marker_6[]", "Restart Marker (RST6)", None), + 0xD7: ("restart_marker_7[]", "Restart Marker (RST7)", None), + 0xDA: ("start_scan[]", "Start Of Scan (SOS)", StartOfScan), 0xDB: ("quantization[]", "Define Quantization Table (DQT)", DefineQuantizationTable), 0xDC: ("nb_line", "Define number of Lines (DNL)", None), 0xDD: ("restart_interval", "Define Restart Interval (DRI)", RestartInterval), @@ -280,7 +481,7 @@ class JpegChunk(FieldSet): raise ParserError("JPEG: Invalid chunk header!") yield textHandler(UInt8(self, "type", "Type"), hexadecimal) tag = self["type"].value - if tag in (self.TAG_SOI, self.TAG_EOI): + if tag in [self.TAG_SOI, self.TAG_EOI] + range(0xD0, 0xD8): # D0 - D7 inclusive are the restart markers return yield UInt16(self, "size", "Size") size = (self["size"].value - 2) @@ -326,12 +527,31 @@ class JpegFile(Parser): return True def createFields(self): + frame = None + scan = None + restart_interval = None + restart_offset = 0 while not self.eof: chunk = JpegChunk(self, "chunk[]") yield chunk + if chunk["type"].value in JpegChunk.START_OF_FRAME: + if chunk["type"].value not in [0xC0, 0xC1]: # SOF0 [Baseline], SOF1 [Extended Sequential] + self.warning("Only supporting Baseline & Extended Sequential JPEG images so far!") + frame = chunk["content"] if chunk["type"].value == JpegChunk.TAG_SOS: - # TODO: Read JPEG image data... - break + if not frame: + self.warning("Missing or invalid SOF marker before SOS!") + continue + scan = chunk["content"] + # hack: scan only the fields seen so far (in _fields): don't use the generator + if "restart_interval" in self._fields: + restart_interval = self["restart_interval/content/interval"].value + else: + restart_interval = None + yield JpegImageData(self, "image_data[]", frame, scan, restart_interval) + elif chunk["type"].value in range(0xD0, 0xD8): + restart_offset += 1 + yield JpegImageData(self, "image_data[]", frame, scan, restart_interval, restart_offset) # TODO: is it possible to handle piped input? if self._size is None: @@ -350,8 +570,8 @@ class JpegFile(Parser): def createDescription(self): desc = "JPEG picture" - if "sof/content" in self: - header = self["sof/content"] + if "start_frame/content" in self: + header = self["start_frame/content"] desc += ": %ux%u pixels" % (header["width"].value, header["height"].value) return desc @@ -365,4 +585,3 @@ class JpegFile(Parser): if end is not None: return end + 16 return None - diff --git a/lib/hachoir_parser/image/pcx.py b/lib/hachoir_parser/image/pcx.py index cf23a7cb..cb2a63bf 100644 --- a/lib/hachoir_parser/image/pcx.py +++ b/lib/hachoir_parser/image/pcx.py @@ -2,13 +2,13 @@ PCX picture filter. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import ( +from hachoir_parser import Parser +from hachoir_core.field import ( UInt8, UInt16, PaddingBytes, RawBytes, Enum) -from lib.hachoir_parser.image.common import PaletteRGB -from lib.hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.image.common import PaletteRGB +from hachoir_core.endian import LITTLE_ENDIAN class PcxFile(Parser): endian = LITTLE_ENDIAN diff --git a/lib/hachoir_parser/image/photoshop_metadata.py b/lib/hachoir_parser/image/photoshop_metadata.py index 89670bcb..15fed726 100644 --- a/lib/hachoir_parser/image/photoshop_metadata.py +++ b/lib/hachoir_parser/image/photoshop_metadata.py @@ -1,11 +1,19 @@ -from lib.hachoir_core.field import (FieldSet, ParserError, - UInt8, UInt16, UInt32, - String, CString, PascalString8, +""" Photoshop metadata parser. + +References: +- http://www.scribd.com/doc/32900475/Photoshop-File-Formats +""" + +from hachoir_core.field import (FieldSet, ParserError, + UInt8, UInt16, UInt32, Float32, Enum, + SubFile, String, CString, PascalString8, NullBytes, RawBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import alignValue, createDict -from lib.hachoir_parser.image.iptc import IPTC -from lib.hachoir_parser.common.win32 import PascalStringWin32 +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import alignValue, createDict +from hachoir_parser.image.iptc import IPTC +from hachoir_parser.common.win32 import PascalStringWin32 + +BOOL = {0: False, 1: True} class Version(FieldSet): def createFields(self): @@ -18,25 +26,102 @@ class Version(FieldSet): if size: yield NullBytes(self, "padding", size) +class FixedFloat32(FieldSet): + def createFields(self): + yield UInt16(self, "int_part") + yield UInt16(self, "float_part") + + def createValue(self): + return self["int_part"].value + float(self["float_part"].value) / (1<<16) + +class ResolutionInfo(FieldSet): + def createFields(self): + yield FixedFloat32(self, "horiz_res") + yield Enum(UInt16(self, "horiz_res_unit"), {1:'px/in', 2:'px/cm'}) + yield Enum(UInt16(self, "width_unit"), {1:'inches', 2:'cm', 3:'points', 4:'picas', 5:'columns'}) + yield FixedFloat32(self, "vert_res") + yield Enum(UInt16(self, "vert_res_unit"), {1:'px/in', 2:'px/cm'}) + yield Enum(UInt16(self, "height_unit"), {1:'inches', 2:'cm', 3:'points', 4:'picas', 5:'columns'}) + +class PrintScale(FieldSet): + def createFields(self): + yield Enum(UInt16(self, "style"), {0:'centered', 1:'size to fit', 2:'user defined'}) + yield Float32(self, "x_location") + yield Float32(self, "y_location") + yield Float32(self, "scale") + +class PrintFlags(FieldSet): + def createFields(self): + yield Enum(UInt8(self, "labels"), BOOL) + yield Enum(UInt8(self, "crop_marks"), BOOL) + yield Enum(UInt8(self, "color_bars"), BOOL) + yield Enum(UInt8(self, "reg_marks"), BOOL) + yield Enum(UInt8(self, "negative"), BOOL) + yield Enum(UInt8(self, "flip"), BOOL) + yield Enum(UInt8(self, "interpolate"), BOOL) + yield Enum(UInt8(self, "caption"), BOOL) + yield Enum(UInt8(self, "print_flags"), BOOL) + yield Enum(UInt8(self, "unknown"), BOOL) + + def createValue(self): + return [field.name for field in self if field.value] + + def createDisplay(self): + return ', '.join(self.value) + +class PrintFlags2(FieldSet): + def createFields(self): + yield UInt16(self, "version") + yield UInt8(self, "center_crop_marks") + yield UInt8(self, "reserved") + yield UInt32(self, "bleed_width") + yield UInt16(self, "bleed_width_scale") + +class GridGuides(FieldSet): + def createFields(self): + yield UInt32(self, "version") + yield UInt32(self, "horiz_cycle", "Horizontal grid spacing, in quarter inches") + yield UInt32(self, "vert_cycle", "Vertical grid spacing, in quarter inches") + yield UInt32(self, "guide_count", "Number of guide resource blocks (can be 0)") + +class Thumbnail(FieldSet): + def createFields(self): + yield Enum(UInt32(self, "format"), {0:'Raw RGB', 1:'JPEG RGB'}) + yield UInt32(self, "width", "Width of thumbnail in pixels") + yield UInt32(self, "height", "Height of thumbnail in pixels") + yield UInt32(self, "widthbytes", "Padded row bytes = (width * bits per pixel + 31) / 32 * 4") + yield UInt32(self, "uncompressed_size", "Total size = widthbytes * height * planes") + yield UInt32(self, "compressed_size", "Size after compression. Used for consistency check") + yield UInt16(self, "bits_per_pixel") + yield UInt16(self, "num_planes") + yield SubFile(self, "thumbnail", self['compressed_size'].value, "Thumbnail (JPEG file)", mime_type="image/jpeg") + class Photoshop8BIM(FieldSet): TAG_INFO = { - 0x03ed: ("res_info", None, "Resolution information"), - 0x03f3: ("print_flag", None, "Print flags: labels, crop marks, colour bars, etc."), + 0x03ed: ("res_info", ResolutionInfo, "Resolution information"), + 0x03f3: ("print_flag", PrintFlags, "Print flags: labels, crop marks, colour bars, etc."), 0x03f5: ("col_half_info", None, "Colour half-toning information"), 0x03f8: ("color_trans_func", None, "Colour transfer function"), 0x0404: ("iptc", IPTC, "IPTC/NAA"), 0x0406: ("jpeg_qual", None, "JPEG quality"), - 0x0408: ("grid_guide", None, "Grid guides informations"), - 0x040a: ("copyright_flag", None, "Copyright flag"), - 0x040c: ("thumb_res2", None, "Thumbnail resource (2)"), - 0x040d: ("glob_angle", None, "Global lighting angle for effects"), + 0x0408: ("grid_guide", GridGuides, "Grid guides informations"), + 0x0409: ("thumb_res", Thumbnail, "Thumbnail resource (PS 4.0)"), + 0x0410: ("watermark", UInt8, "Watermark"), + 0x040a: ("copyright_flag", UInt8, "Copyright flag"), + 0x040b: ("url", None, "URL"), + 0x040c: ("thumb_res2", Thumbnail, "Thumbnail resource (PS 5.0)"), + 0x040d: ("glob_angle", UInt32, "Global lighting angle for effects"), 0x0411: ("icc_tagged", None, "ICC untagged (1 means intentionally untagged)"), - 0x0414: ("base_layer_id", None, "Base value for new layers ID's"), - 0x0419: ("glob_altitude", None, "Global altitude"), + 0x0414: ("base_layer_id", UInt32, "Base value for new layers ID's"), + 0x0416: ("indexed_colors", UInt16, "Number of colors in table that are actually defined"), + 0x0417: ("transparency_index", UInt16, "Index of transparent color"), + 0x0419: ("glob_altitude", UInt32, "Global altitude"), 0x041a: ("slices", None, "Slices"), - 0x041e: ("url_list", None, "Unicode URL's"), + 0x041e: ("url_list", None, "Unicode URLs"), 0x0421: ("version", Version, "Version information"), - 0x2710: ("print_flag2", None, "Print flags (2)"), + 0x0425: ("caption_digest", None, "16-byte MD5 caption digest"), + 0x0426: ("printscale", PrintScale, "Printer scaling"), + 0x2710: ("print_flag2", PrintFlags2, "Print flags (2)"), } TAG_NAME = createDict(TAG_INFO, 0) CONTENT_HANDLER = createDict(TAG_INFO, 1) @@ -67,7 +152,10 @@ class Photoshop8BIM(FieldSet): if not size: return if self.handler: - yield self.handler(self, "content", size=size*8) + if issubclass(self.handler, FieldSet): + yield self.handler(self, "content", size=size*8) + else: + yield self.handler(self, "content") else: yield RawBytes(self, "content", size) diff --git a/lib/hachoir_parser/image/png.py b/lib/hachoir_parser/image/png.py index 6ef8fd8e..acbfc850 100644 --- a/lib/hachoir_parser/image/png.py +++ b/lib/hachoir_parser/image/png.py @@ -8,21 +8,21 @@ Documents: Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, Fragment, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Fragment, ParserError, MissingField, UInt8, UInt16, UInt32, String, CString, Bytes, RawBytes, Bit, NullBits, Enum, CompressedField) -from lib.hachoir_parser.image.common import RGB -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.endian import NETWORK_ENDIAN -from lib.hachoir_core.tools import humanFilesize +from hachoir_parser.image.common import RGB +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import NETWORK_ENDIAN +from hachoir_core.tools import humanFilesize from datetime import datetime -MAX_FILESIZE = 500 * 1024 * 1024 +MAX_FILESIZE = 500 * 1024 * 1024 # 500 MB try: from zlib import decompressobj @@ -44,7 +44,7 @@ UNIT_NAME = {1: "Meter"} COMPRESSION_NAME = { 0: u"deflate" # with 32K sliding window } -MAX_CHUNK_SIZE = 500 * 1024 # Maximum chunk size (500 KB) +MAX_CHUNK_SIZE = 5 * 1024 * 1024 # Maximum chunk size (5 MB) def headerParse(parent): yield UInt32(parent, "width", "Width (pixels)") diff --git a/lib/hachoir_parser/image/psd.py b/lib/hachoir_parser/image/psd.py index 5eb8f76e..6ea09fb1 100644 --- a/lib/hachoir_parser/image/psd.py +++ b/lib/hachoir_parser/image/psd.py @@ -5,11 +5,11 @@ Creation date: 8 january 2006 Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt16, UInt32, String, NullBytes, Enum, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_parser.image.photoshop_metadata import Photoshop8BIM +from hachoir_core.endian import BIG_ENDIAN +from hachoir_parser.image.photoshop_metadata import Photoshop8BIM class Config(FieldSet): def __init__(self, *args): diff --git a/lib/hachoir_parser/image/tga.py b/lib/hachoir_parser/image/tga.py index b1d699f8..716ab28a 100644 --- a/lib/hachoir_parser/image/tga.py +++ b/lib/hachoir_parser/image/tga.py @@ -5,10 +5,10 @@ Author: Victor Stinner Creation: 18 december 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import FieldSet, UInt8, UInt16, Enum, RawBytes -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.image.common import PaletteRGB +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, UInt8, UInt16, Enum, RawBytes +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.image.common import PaletteRGB class Line(FieldSet): def __init__(self, *args): diff --git a/lib/hachoir_parser/image/tiff.py b/lib/hachoir_parser/image/tiff.py index ceeba537..30dedd8b 100644 --- a/lib/hachoir_parser/image/tiff.py +++ b/lib/hachoir_parser/image/tiff.py @@ -1,165 +1,35 @@ """ TIFF image parser. -Authors: Victor Stinner and Sebastien Ponce +Authors: Victor Stinner, Sebastien Ponce, Robert Xiao Creation date: 30 september 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, SeekableFieldSet, ParserError, RootSeekableFieldSet, - UInt16, UInt32, Bytes, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_parser.image.exif import BasicIFDEntry -from lib.hachoir_core.tools import createDict +from hachoir_parser import Parser +from hachoir_core.field import FieldSet, SeekableFieldSet, RootSeekableFieldSet, Bytes +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_parser.image.exif import TIFF -MAX_COUNT = 250 - -class IFDEntry(BasicIFDEntry): - static_size = 12*8 - - TAG_INFO = { - 254: ("new_subfile_type", "New subfile type"), - 255: ("subfile_type", "Subfile type"), - 256: ("img_width", "Image width in pixels"), - 257: ("img_height", "Image height in pixels"), - 258: ("bits_per_sample", "Bits per sample"), - 259: ("compression", "Compression method"), - 262: ("photo_interpret", "Photometric interpretation"), - 263: ("thres", "Thresholding"), - 264: ("cell_width", "Cellule width"), - 265: ("cell_height", "Cellule height"), - 266: ("fill_order", "Fill order"), - 269: ("doc_name", "Document name"), - 270: ("description", "Image description"), - 271: ("make", "Make"), - 272: ("model", "Model"), - 273: ("strip_ofs", "Strip offsets"), - 274: ("orientation", "Orientation"), - 277: ("sample_pixel", "Samples per pixel"), - 278: ("row_per_strip", "Rows per strip"), - 279: ("strip_byte", "Strip byte counts"), - 280: ("min_sample_value", "Min sample value"), - 281: ("max_sample_value", "Max sample value"), - 282: ("xres", "X resolution"), - 283: ("yres", "Y resolution"), - 284: ("planar_conf", "Planar configuration"), - 285: ("page_name", "Page name"), - 286: ("xpos", "X position"), - 287: ("ypos", "Y position"), - 288: ("free_ofs", "Free offsets"), - 289: ("free_byte", "Free byte counts"), - 290: ("gray_resp_unit", "Gray response unit"), - 291: ("gray_resp_curve", "Gray response curve"), - 292: ("group3_opt", "Group 3 options"), - 293: ("group4_opt", "Group 4 options"), - 296: ("res_unit", "Resolution unit"), - 297: ("page_nb", "Page number"), - 301: ("color_respt_curve", "Color response curves"), - 305: ("software", "Software"), - 306: ("date_time", "Date time"), - 315: ("artist", "Artist"), - 316: ("host_computer", "Host computer"), - 317: ("predicator", "Predicator"), - 318: ("white_pt", "White point"), - 319: ("prim_chomat", "Primary chromaticities"), - 320: ("color_map", "Color map"), - 321: ("half_tone_hints", "Halftone Hints"), - 322: ("tile_width", "TileWidth"), - 323: ("tile_length", "TileLength"), - 324: ("tile_offsets", "TileOffsets"), - 325: ("tile_byte_counts", "TileByteCounts"), - 332: ("ink_set", "InkSet"), - 333: ("ink_names", "InkNames"), - 334: ("number_of_inks", "NumberOfInks"), - 336: ("dot_range", "DotRange"), - 337: ("target_printer", "TargetPrinter"), - 338: ("extra_samples", "ExtraSamples"), - 339: ("sample_format", "SampleFormat"), - 340: ("smin_sample_value", "SMinSampleValue"), - 341: ("smax_sample_value", "SMaxSampleValue"), - 342: ("transfer_range", "TransferRange"), - 512: ("jpeg_proc", "JPEGProc"), - 513: ("jpeg_interchange_format", "JPEGInterchangeFormat"), - 514: ("jpeg_interchange_format_length", "JPEGInterchangeFormatLength"), - 515: ("jpeg_restart_interval", "JPEGRestartInterval"), - 517: ("jpeg_lossless_predictors", "JPEGLosslessPredictors"), - 518: ("jpeg_point_transforms", "JPEGPointTransforms"), - 519: ("jpeg_qtables", "JPEGQTables"), - 520: ("jpeg_dctables", "JPEGDCTables"), - 521: ("jpeg_actables", "JPEGACTables"), - 529: ("ycbcr_coefficients", "YCbCrCoefficients"), - 530: ("ycbcr_subsampling", "YCbCrSubSampling"), - 531: ("ycbcr_positioning", "YCbCrPositioning"), - 532: ("reference_blackwhite", "ReferenceBlackWhite"), - 33432: ("copyright", "Copyright"), - 0x8769: ("ifd_pointer", "Pointer to next IFD entry"), - } - TAG_NAME = createDict(TAG_INFO, 0) - - def __init__(self, *args): - FieldSet.__init__(self, *args) - tag = self["tag"].value - if tag in self.TAG_INFO: - self._name, self._description = self.TAG_INFO[tag] - else: - self._parser = None - -class IFD(FieldSet): - def __init__(self, *args): - FieldSet.__init__(self, *args) - self._size = 16 + self["count"].value * IFDEntry.static_size - self._has_offset = False - - def createFields(self): - yield UInt16(self, "count") - if MAX_COUNT < self["count"].value: - raise ParserError("TIFF IFD: Invalid count (%s)" - % self["count"].value) - for index in xrange(self["count"].value): - yield IFDEntry(self, "entry[]") +def getStrips(ifd): + data = {} + for i, entry in enumerate(ifd.array('entry')): + data[entry['tag'].display] = entry + # image data + if "StripOffsets" in data and "StripByteCounts" in data: + offs = ifd.getEntryValues(data["StripOffsets"]) + bytes = ifd.getEntryValues(data["StripByteCounts"]) + for off, byte in zip(offs, bytes): + yield off.value, byte.value class ImageFile(SeekableFieldSet): def __init__(self, parent, name, description, ifd): SeekableFieldSet.__init__(self, parent, name, description, None) - self._has_offset = False self._ifd = ifd def createFields(self): - datas = {} - for entry in self._ifd: - if type(entry) != IFDEntry: - continue - for c in entry: - if c.name != "offset": - continue - self.seekByte(c.value, False) - desc = "data of ifd entry " + entry.name, - entryType = BasicIFDEntry.ENTRY_FORMAT[entry["type"].value] - count = entry["count"].value - if entryType == String: - yield String(self, entry.name, count, desc, "\0", "ISO-8859-1") - else: - d = Data(self, entry.name, desc, entryType, count) - datas[d.name] = d - yield d - break - # image data - if "strip_ofs" in datas and "strip_byte" in datas: - for i in xrange(datas["strip_byte"]._count): - self.seekByte(datas["strip_ofs"]["value["+str(i)+"]"].value, False) - yield Bytes(self, "strip[]", datas["strip_byte"]["value["+str(i)+"]"].value) - -class Data(FieldSet): - - def __init__(self, parent, name, desc, type, count): - size = type.static_size * count - FieldSet.__init__(self, parent, name, desc, size) - self._count = count - self._type = type - - def createFields(self): - for i in xrange(self._count): - yield self._type(self, "value[]") + for off, byte in getStrips(self._ifd): + self.seekByte(off, relative=False) + yield Bytes(self, "strip[]", byte) class TiffFile(RootSeekableFieldSet, Parser): PARSER_TAGS = { @@ -168,7 +38,6 @@ class TiffFile(RootSeekableFieldSet, Parser): "file_ext": ("tif", "tiff"), "mime": (u"image/tiff",), "min_size": 8*8, -# TODO: Re-enable magic "magic": (("II\x2A\0", 0), ("MM\0\x2A", 0)), "description": "TIFF picture" } @@ -191,21 +60,11 @@ class TiffFile(RootSeekableFieldSet, Parser): return True def createFields(self): - yield String(self, "endian", 2, 'Endian ("II" or "MM")', charset="ASCII") - yield UInt16(self, "version", "TIFF version number") - offset = UInt32(self, "img_dir_ofs[]", "Next image directory offset (in bytes from the beginning)") - yield offset - ifds = [] - while True: - if offset.value == 0: - break + for field in TIFF(self): + yield field - self.seekByte(offset.value, relative=False) - ifd = IFD(self, "ifd[]", "Image File Directory", None) - ifds.append(ifd) - yield ifd - offset = UInt32(self, "img_dir_ofs[]", "Next image directory offset (in bytes from the beginning)") - yield offset - for ifd in ifds: + for ifd in self.array('ifd'): + offs = (off for off, byte in getStrips(ifd)) + self.seekByte(min(offs), relative=False) image = ImageFile(self, "image[]", "Image File", ifd) yield image diff --git a/lib/hachoir_parser/image/wmf.py b/lib/hachoir_parser/image/wmf.py index c45b530a..86f9840b 100644 --- a/lib/hachoir_parser/image/wmf.py +++ b/lib/hachoir_parser/image/wmf.py @@ -16,14 +16,14 @@ Creation date: 26 december 2006 MAX_FILESIZE = 50 * 1024 * 1024 -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, StaticFieldSet, Enum, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, StaticFieldSet, Enum, MissingField, ParserError, UInt32, Int32, UInt16, Int16, UInt8, NullBytes, RawBytes, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import createDict -from lib.hachoir_parser.image.common import RGBA +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import createDict +from hachoir_parser.image.common import RGBA POLYFILL_MODE = {1: "Alternate", 2: "Winding"} diff --git a/lib/hachoir_parser/image/xcf.py b/lib/hachoir_parser/image/xcf.py index e365001f..f0bfa30c 100644 --- a/lib/hachoir_parser/image/xcf.py +++ b/lib/hachoir_parser/image/xcf.py @@ -9,11 +9,11 @@ CVS online: Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, ParserError, UInt8, UInt32, Enum, Float32, String, PascalString32, RawBytes) -from lib.hachoir_parser.image.common import RGBA -from lib.hachoir_core.endian import NETWORK_ENDIAN +from hachoir_parser.image.common import RGBA +from hachoir_core.endian import NETWORK_ENDIAN class XcfCompression(FieldSet): static_size = 8 diff --git a/lib/hachoir_parser/misc/__init__.py b/lib/hachoir_parser/misc/__init__.py index 938d4aa5..f1392015 100644 --- a/lib/hachoir_parser/misc/__init__.py +++ b/lib/hachoir_parser/misc/__init__.py @@ -1,14 +1,18 @@ -from lib.hachoir_parser.misc.file_3do import File3do -from lib.hachoir_parser.misc.file_3ds import File3ds -from lib.hachoir_parser.misc.torrent import TorrentFile -from lib.hachoir_parser.misc.ttf import TrueTypeFontFile -from lib.hachoir_parser.misc.chm import ChmFile -from lib.hachoir_parser.misc.lnk import LnkFile -from lib.hachoir_parser.misc.pcf import PcfFile -from lib.hachoir_parser.misc.ole2 import OLE2_File -from lib.hachoir_parser.misc.pdf import PDFDocument -from lib.hachoir_parser.misc.pifv import PIFVFile -from lib.hachoir_parser.misc.hlp import HlpFile -from lib.hachoir_parser.misc.gnome_keyring import GnomeKeyring -from lib.hachoir_parser.misc.bplist import BPList - +from hachoir_parser.misc.file_3do import File3do +from hachoir_parser.misc.file_3ds import File3ds +from hachoir_parser.misc.torrent import TorrentFile +from hachoir_parser.misc.ttf import TrueTypeFontFile +from hachoir_parser.misc.chm import ChmFile +from hachoir_parser.misc.lnk import LnkFile +from hachoir_parser.misc.pcf import PcfFile +from hachoir_parser.misc.ole2 import OLE2_File +from hachoir_parser.misc.pdf import PDFDocument +from hachoir_parser.misc.pifv import PIFVFile +from hachoir_parser.misc.hlp import HlpFile +from hachoir_parser.misc.gnome_keyring import GnomeKeyring +from hachoir_parser.misc.bplist import BPList +from hachoir_parser.misc.dsstore import DSStore +from hachoir_parser.misc.word_doc import WordDocumentParser +from hachoir_parser.misc.word_2 import Word2DocumentParser +from hachoir_parser.misc.mstask import MSTaskFile +from hachoir_parser.misc.mapsforge_map import MapsforgeMapFile diff --git a/lib/hachoir_parser/misc/bplist.py b/lib/hachoir_parser/misc/bplist.py index 22282790..5411b488 100644 --- a/lib/hachoir_parser/misc/bplist.py +++ b/lib/hachoir_parser/misc/bplist.py @@ -15,12 +15,12 @@ Author: Robert Xiao Created: 2008-09-21 """ -from lib.hachoir_parser import HachoirParser -from lib.hachoir_core.field import (RootSeekableFieldSet, FieldSet, Enum, +from hachoir_parser import HachoirParser +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, Enum, Bits, GenericInteger, Float32, Float64, UInt8, UInt64, Bytes, NullBytes, RawBytes, String) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import displayHandler -from lib.hachoir_core.tools import humanDatetime +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import displayHandler +from hachoir_core.tools import humanDatetime from datetime import datetime, timedelta class BPListTrailer(FieldSet): @@ -157,9 +157,16 @@ class BPListObject(FieldSet): elif markertype == 3: # Date yield Bits(self, "extra", 4, "Extra value, should be 3") - cvt_time=lambda v:datetime(2001,1,1) + timedelta(seconds=v) + # Use a heuristic to determine which epoch to use + def cvt_time(v): + v=timedelta(seconds=v) + epoch2001 = datetime(2001,1,1) + epoch1970 = datetime(1970,1,1) + if (epoch2001 + v - datetime.today()).days > 5*365: + return epoch1970 + v + return epoch2001 + v yield displayHandler(Float64(self, "value"),lambda x:humanDatetime(cvt_time(x))) - self.xml=lambda prefix:prefix + "%s"%(cvt_time(self['value'].value).isoformat()) + self.xml=lambda prefix:prefix + "%sZ"%(cvt_time(self['value'].value).isoformat()) elif markertype == 4: # Data @@ -175,7 +182,7 @@ class BPListObject(FieldSet): yield BPListSize(self, "size") if self['size'].value: yield String(self, "value", self['size'].value, charset="ASCII") - self.xml=lambda prefix:prefix + "%s"%(self['value'].value.encode('iso-8859-1')) + self.xml=lambda prefix:prefix + "%s"%(self['value'].value.replace('&','&').encode('iso-8859-1')) else: self.xml=lambda prefix:prefix + '' @@ -184,7 +191,7 @@ class BPListObject(FieldSet): yield BPListSize(self, "size") if self['size'].value: yield String(self, "value", self['size'].value*2, charset="UTF-16-BE") - self.xml=lambda prefix:prefix + "%s"%(self['value'].value.encode('utf-8')) + self.xml=lambda prefix:prefix + "%s"%(self['value'].value.replace('&','&').encode('utf-8')) else: self.xml=lambda prefix:prefix + '' diff --git a/lib/hachoir_parser/misc/chm.py b/lib/hachoir_parser/misc/chm.py index 3a158e54..37c5cae0 100644 --- a/lib/hachoir_parser/misc/chm.py +++ b/lib/hachoir_parser/misc/chm.py @@ -6,20 +6,24 @@ Document: http://www.wotsit.org (search "chm") - chmlib library http://www.jedrea.com/chmlib/ +- Unofficial CHM Spec + http://savannah.nongnu.org/projects/chmspec +- Microsoft's HTML Help (.chm) format + http://www.speakeasy.org/~russotto/chm/chmformat.html Author: Victor Stinner Creation date: 2007-03-04 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (Field, FieldSet, ParserError, - Int32, UInt32, UInt64, +from hachoir_core.field import (Field, FieldSet, ParserError, RootSeekableFieldSet, + Int32, UInt16, UInt32, UInt64, RawBytes, PaddingBytes, Enum, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_parser.common.win32 import GUID -from lib.hachoir_parser.common.win32_lang_id import LANGUAGE_ID -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser import HachoirParser +from hachoir_parser.common.win32 import GUID +from hachoir_parser.common.win32_lang_id import LANGUAGE_ID +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler class CWord(Field): """ @@ -42,6 +46,7 @@ class CWord(Field): raise ParserError("CHM: CWord is limited to 64 bits") addr += 8 byte = stream.readBits(addr, 8, endian) + value <<= 7 value += byte self.createValue = lambda: value @@ -84,7 +89,7 @@ class ITSF(FieldSet): yield UInt32(self, "version") yield UInt32(self, "header_size", "Total header length (in bytes)") yield UInt32(self, "one") - yield UInt32(self, "last_modified") + yield UInt32(self, "last_modified", "Lower 32 bits of the time expressed in units of 0.1 us") yield Enum(UInt32(self, "lang_id", "Windows Language ID"), LANGUAGE_ID) yield GUID(self, "dir_uuid", "{7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC}") yield GUID(self, "stream_uuid", "{7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC}") @@ -99,9 +104,9 @@ class PMGL_Entry(FieldSet): def createFields(self): yield CWord(self, "name_len") yield String(self, "name", self["name_len"].value, charset="UTF-8") - yield CWord(self, "space") - yield CWord(self, "start") - yield filesizeHandler(CWord(self, "length")) + yield CWord(self, "section", "Section number that the entry data is in.") + yield CWord(self, "start", "Start offset of the data") + yield filesizeHandler(CWord(self, "length", "Length of the data")) def createDescription(self): return "%s (%s)" % (self["name"].value, self["length"].display) @@ -118,13 +123,23 @@ class PMGL(FieldSet): # Entries stop = self.size - self["free_space"].value * 8 + entry_count = 0 while self.current_size < stop: yield PMGL_Entry(self, "entry[]") + entry_count+=1 # Padding - padding = (self.size - self.current_size) // 8 + quickref_frequency = 1 + (1 << self["/dir/itsp/density"].value) + num_quickref = (entry_count // quickref_frequency) + if entry_count % quickref_frequency == 0: + num_quickref -= 1 + print self.current_size//8, quickref_frequency, num_quickref + padding = (self["free_space"].value - (num_quickref*2+2)) if padding: yield PaddingBytes(self, "padding", padding) + for i in range(num_quickref*quickref_frequency, 0, -quickref_frequency): + yield UInt16(self, "quickref[%i]"%i) + yield UInt16(self, "entry_count") class PMGI_Entry(FieldSet): def createFields(self): @@ -164,36 +179,145 @@ class Directory(FieldSet): if self.current_size < self.size: yield PMGI(self, "pmgi", size=block_size) -class ChmFile(Parser): +class NameList(FieldSet): + def createFields(self): + yield UInt16(self, "length", "Length of name list in 2-byte blocks") + yield UInt16(self, "count", "Number of entries in name list") + for index in range(self["count"].value): + length=UInt16(self, "name_len[]", "Length of name in 2-byte blocks, excluding terminating null") + yield length + yield String(self, "name[]", length.value*2+2, charset="UTF-16-LE") + +class ControlData(FieldSet): + def createFields(self): + yield UInt32(self, "count", "Number of DWORDS in this struct") + yield String(self, "type", 4, "Type of compression") + if self["type"].value!='LZXC': return + yield UInt32(self, "version", "Compression version") + version=self["version"].value + if version==1: block='bytes' + else: block='32KB blocks' + yield UInt32(self, "reset_interval", "LZX: Reset interval in %s"%block) + yield UInt32(self, "window_size", "LZX: Window size in %s"%block) + yield UInt32(self, "cache_size", "LZX: Cache size in %s"%block) + yield UInt32(self, "unknown[]") + +class ResetTable(FieldSet): + def createFields(self): + yield UInt32(self, "unknown[]", "Version number?") + yield UInt32(self, "count", "Number of entries") + yield UInt32(self, "entry_size", "Size of each entry") + yield UInt32(self, "header_size", "Size of this header") + yield UInt64(self, "uncompressed_size") + yield UInt64(self, "compressed_size") + yield UInt64(self, "block_size", "Block size in bytes") + for i in xrange(self["count"].value): + yield UInt64(self, "block_location[]", "location in compressed data of 1st block boundary in uncompressed data") + +class SystemEntry(FieldSet): + ENTRY_TYPE={0:"HHP: [OPTIONS]: Contents File", + 1:"HHP: [OPTIONS]: Index File", + 2:"HHP: [OPTIONS]: Default Topic", + 3:"HHP: [OPTIONS]: Title", + 4:"File Metadata", + 5:"HHP: [OPTIONS]: Default Window", + 6:"HHP: [OPTIONS]: Compiled file", + # 7 present only in files with Binary Index; unknown function + # 8 unknown function + 9: "Version", + 10: "Timestamp", + # 11 only in Binary TOC files + 12: "Number of Info Types", + 13: "#IDXHDR file", + # 14 unknown function + # 15 checksum?? + 16:"HHP: [OPTIONS]: Default Font", + } + def createFields(self): + yield Enum(UInt16(self, "type", "Type of entry"),self.ENTRY_TYPE) + yield UInt16(self, "length", "Length of entry") + yield RawBytes(self, "data", self["length"].value) + def createDescription(self): + return '#SYSTEM Entry, Type %s'%self["type"].display + +class SystemFile(FieldSet): + def createFields(self): + yield UInt32(self, "version", "Either 2 or 3") + while self.current_size < self.size: + yield SystemEntry(self, "entry[]") + +class ChmFile(HachoirParser, RootSeekableFieldSet): + MAGIC = "ITSF\3\0\0\0" PARSER_TAGS = { "id": "chm", "category": "misc", "file_ext": ("chm",), "min_size": 4*8, - "magic": (("ITSF\3\0\0\0", 0),), + "magic": ((MAGIC, 0),), "description": "Microsoft's HTML Help (.chm)", } endian = LITTLE_ENDIAN + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + def validate(self): - if self.stream.readBytes(0, 4) != "ITSF": + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: return "Invalid magic" - if self["itsf/version"].value != 3: - return "Invalid version" return True def createFields(self): yield ITSF(self, "itsf") yield Filesize_Header(self, "file_size", size=self["itsf/filesize_len"].value*8) - padding = self.seekByte(self["itsf/dir_offset"].value) - if padding: - yield padding - yield Directory(self, "dir", size=self["itsf/dir_len"].value*8) + self.seekByte(self["itsf/dir_offset"].value) + directory=Directory(self, "dir", size=self["itsf/dir_len"].value*8) + yield directory - size = (self.size - self.current_size) // 8 - if size: - yield RawBytes(self, "raw_end", size) + otherentries = {} + for pmgl in directory.array("pmgl"): + for entry in pmgl.array("entry"): + if entry["section"].value != 0: + otherentries.setdefault(entry["section"].value,[]).append(entry) + continue + if entry["length"].value == 0: + continue + self.seekByte(self["itsf/data_offset"].value+entry["start"].value) + name = entry["name"].value + if name == "::DataSpace/NameList": + yield NameList(self, "name_list") + elif name.startswith('::DataSpace/Storage/'): + sectname = str(name.split('/')[2]) + if name.endswith('/SpanInfo'): + yield UInt64(self, "%s_spaninfo"%sectname, "Size of uncompressed data in the %s section"%sectname) + elif name.endswith('/ControlData'): + yield ControlData(self, "%s_controldata"%sectname, "Data about the compression scheme", size=entry["length"].value*8) + elif name.endswith('/Transform/List'): + yield String(self, "%s_transform_list"%sectname, 38, description="Transform/List element", charset="UTF-16-LE") + elif name.endswith('/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable'): + yield ResetTable(self, "%s_reset_table"%sectname, "LZX Reset Table", size=entry["length"].value*8) + elif name.endswith('/Content'): + # eventually, a LZX wrapper will appear here, we hope! + yield RawBytes(self, "%s_content"%sectname, entry["length"].value, "Content for the %s section"%sectname) + else: + yield RawBytes(self, "entry_data[]", entry["length"].value, name) + elif name=="/#SYSTEM": + yield SystemFile(self, "system_file", size=entry["length"].value*8) + else: + yield RawBytes(self, "entry_data[]", entry["length"].value, name) + + def getFile(self, filename): + page=0 + if 'pmgi' in self['/dir']: + for entry in self['/dir/pmgi'].array('entry'): + if entry['name'].value <= filename: + page=entry['page'].value + pmgl=self['/dir/pmgl[%i]'%page] + for entry in pmgl.array('entry'): + if entry['name'].value == filename: + return entry + raise ParserError("File '%s' not found!"%filename) def createContentSize(self): return self["file_size/file_size"].value * 8 diff --git a/lib/hachoir_parser/misc/common.py b/lib/hachoir_parser/misc/common.py index b73c8212..38d9f823 100644 --- a/lib/hachoir_parser/misc/common.py +++ b/lib/hachoir_parser/misc/common.py @@ -1,4 +1,4 @@ -from lib.hachoir_core.field import StaticFieldSet, Float32 +from hachoir_core.field import StaticFieldSet, Float32 class Vertex(StaticFieldSet): format = ((Float32, "x"), (Float32, "y"), (Float32, "z")) diff --git a/lib/hachoir_parser/misc/dsstore.py b/lib/hachoir_parser/misc/dsstore.py new file mode 100644 index 00000000..02792ad5 --- /dev/null +++ b/lib/hachoir_parser/misc/dsstore.py @@ -0,0 +1,211 @@ +""" +Mac OS X .DS_Store parser. + +Documents: +- http://search.cpan.org/~wiml/Mac-Finder-DSStore-0.95/DSStoreFormat.pod +Author: Robert Xiao +Created: 2010-09-01 +""" + +from hachoir_parser import HachoirParser +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, + NullBytes, RawBytes, PaddingBytes, Bytes, SubFile, String, PascalString8, + Bits, UInt8, UInt16, UInt32, + Link, + ParserError) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import displayHandler, hexadecimal +from hachoir_core.tools import paddingSize + +class BlockAddress(FieldSet): + static_size = 32 + + def createFields(self): + yield displayHandler(Bits(self, "offset", 27, description="Offset into file divided by 32"), lambda x: hex(x*32).strip('L')) + yield displayHandler(Bits(self, "size", 5, description="Power-of-2 size of the block"), lambda x: hex(1< 0: + self.seekByte(offs.value+4) + yield RawBytes(self, "free[]", size) diff --git a/lib/hachoir_parser/misc/file_3do.py b/lib/hachoir_parser/misc/file_3do.py index 3d909d56..3108d0ae 100644 --- a/lib/hachoir_parser/misc/file_3do.py +++ b/lib/hachoir_parser/misc/file_3do.py @@ -7,12 +7,12 @@ Author: Cyril Zorin Creation date: 28 september 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt32, Int32, String, Float32, RawBytes, PaddingBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_parser.misc.common import Vertex, MapUV +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_parser.misc.common import Vertex, MapUV class Vector(FieldSet): def __init__(self, parent, name, diff --git a/lib/hachoir_parser/misc/file_3ds.py b/lib/hachoir_parser/misc/file_3ds.py index de05fa94..aaf4fbf4 100644 --- a/lib/hachoir_parser/misc/file_3ds.py +++ b/lib/hachoir_parser/misc/file_3ds.py @@ -3,13 +3,13 @@ Author: Victor Stinner """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (StaticFieldSet, FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (StaticFieldSet, FieldSet, UInt16, UInt32, RawBytes, Enum, CString) -from lib.hachoir_parser.image.common import RGB -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.misc.common import Vertex, MapUV +from hachoir_parser.image.common import RGB +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.misc.common import Vertex, MapUV def readObject(parent): yield CString(parent, "name", "Object name") diff --git a/lib/hachoir_parser/misc/gnome_keyring.py b/lib/hachoir_parser/misc/gnome_keyring.py index 56b26a67..0bade36f 100644 --- a/lib/hachoir_parser/misc/gnome_keyring.py +++ b/lib/hachoir_parser/misc/gnome_keyring.py @@ -9,14 +9,14 @@ Author: Victor Stinner Creation date: 2008-04-09 """ -from lib.hachoir_core.tools import paddingSize -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.tools import paddingSize +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Bit, NullBits, NullBytes, UInt8, UInt32, String, RawBytes, Enum, TimestampUnix64, CompressedField, SubFile) -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.endian import BIG_ENDIAN try: import hashlib diff --git a/lib/hachoir_parser/misc/hlp.py b/lib/hachoir_parser/misc/hlp.py index 24da2077..167dc7a6 100644 --- a/lib/hachoir_parser/misc/hlp.py +++ b/lib/hachoir_parser/misc/hlp.py @@ -10,12 +10,12 @@ Author: Victor Stinner Creation date: 2007-09-03 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, Bits, Int32, UInt16, UInt32, NullBytes, RawBytes, PaddingBytes, String) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import (textHandler, hexadecimal, +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import (textHandler, hexadecimal, displayHandler, humanFilesize) class FileEntry(FieldSet): diff --git a/lib/hachoir_parser/misc/lnk.py b/lib/hachoir_parser/misc/lnk.py index b0512baa..3844d37f 100644 --- a/lib/hachoir_parser/misc/lnk.py +++ b/lib/hachoir_parser/misc/lnk.py @@ -22,20 +22,20 @@ Changes: * Creation of the parser """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, CString, String, UInt32, UInt16, UInt8, Bit, Bits, PaddingBits, TimestampWin64, DateTimeMSDOS32, NullBytes, PaddingBytes, RawBytes, Enum) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.common.win32 import GUID -from lib.hachoir_parser.common.msdos import MSDOSFileAttr16, MSDOSFileAttr32 -from lib.hachoir_core.text_handler import filesizeHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.common.win32 import GUID +from hachoir_parser.common.msdos import MSDOSFileAttr16, MSDOSFileAttr32 +from hachoir_core.text_handler import filesizeHandler -from lib.hachoir_core.tools import paddingSize +from hachoir_core.tools import paddingSize class ItemIdList(FieldSet): def __init__(self, *args, **kw): @@ -56,7 +56,7 @@ class ItemId(FieldSet): 0x23: "Drive", 0x25: "Drive", 0x29: "Drive", - 0x2E: "GUID", + 0x2E: "Shell Extension", 0x2F: "Drive", 0x30: "Dir/File", 0x31: "Directory", @@ -66,6 +66,7 @@ class ItemId(FieldSet): 0x42: "Computer", 0x46: "Net Provider", 0x47: "Whole Network", + 0x4C: "Web Folder", 0x61: "MSITStore", 0x70: "Printer/RAS Connection", 0xB1: "History/Favorite", @@ -86,16 +87,26 @@ class ItemId(FieldSet): yield Enum(UInt8(self, "type"),self.ITEM_TYPE) entrytype=self["type"].value - if entrytype in (0x1F, 0x2E, 0x70): + if entrytype in (0x1F, 0x70): # GUID yield RawBytes(self, "dummy", 1, "should be 0x50") yield GUID(self, "guid") + elif entrytype == 0x2E: + # Shell extension + yield RawBytes(self, "dummy", 1, "should be 0x50") + if self["dummy"].value == '\0': + yield UInt16(self, "length_data", "Length of shell extension-specific data") + if self["length_data"].value: + yield RawBytes(self, "data", self["length_data"].value, "Shell extension-specific data") + yield GUID(self, "handler_guid") + yield GUID(self, "guid") + elif entrytype in (0x23, 0x25, 0x29, 0x2F): # Drive yield String(self, "drive", self["length"].value-3, strip="\0") - elif entrytype in (0x30, 0x31, 0x32): + elif entrytype in (0x30, 0x31, 0x32, 0x61, 0xb1): yield RawBytes(self, "dummy", 1, "should be 0x00") yield UInt32(self, "size", "size of file; 0 for folders") yield DateTimeMSDOS32(self, "date_time", "File/folder date and time") @@ -111,8 +122,11 @@ class ItemId(FieldSet): yield RawBytes(self, "unknown[]", 6) yield DateTimeMSDOS32(self, "creation_date_time", "File/folder creation date and time") yield DateTimeMSDOS32(self, "access_date_time", "File/folder last access date and time") - yield RawBytes(self, "unknown[]", 4) + yield RawBytes(self, "unknown[]", 2) + yield UInt16(self, "length_next", "Length of next two strings (if zero, ignore this field)") yield CString(self, "unicode_name", "File/folder name", charset="UTF-16-LE") + if self["length_next"].value: + yield CString(self, "localized_name", "Localized name") yield RawBytes(self, "unknown[]", 2) else: yield CString(self, "name_short", "File/folder short name") @@ -136,6 +150,19 @@ class ItemId(FieldSet): yield CString(self, "description") yield RawBytes(self, "unknown[]", 2) + elif entrytype == 0x4C: + # Web Folder + yield RawBytes(self, "unknown[]", 5) + yield TimestampWin64(self, "modification_time") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + yield LnkString(self, "name") + yield RawBytes(self, "padding[]", 2) + yield LnkString(self, "address") + if self["address/length"].value: + yield RawBytes(self, "padding[]", 2) + else: yield RawBytes(self, "raw", self["length"].value-3) @@ -249,13 +276,17 @@ class FileLocationInfo(FieldSet): class LnkString(FieldSet): def createFields(self): yield UInt16(self, "length", "Length of this string") - if self.root.hasUnicodeNames(): - yield String(self, "data", self["length"].value*2, charset="UTF-16-LE") - else: - yield String(self, "data", self["length"].value, charset="ASCII") + if self["length"].value: + if self.root.hasUnicodeNames(): + yield String(self, "data", self["length"].value*2, charset="UTF-16-LE") + else: + yield String(self, "data", self["length"].value, charset="ASCII") def createValue(self): - return self["data"].value + if self["length"].value: + return self["data"].value + else: + return "" class ColorRef(FieldSet): ''' COLORREF struct, 0x00bbggrr ''' diff --git a/lib/hachoir_parser/misc/mapsforge_map.py b/lib/hachoir_parser/misc/mapsforge_map.py new file mode 100644 index 00000000..4b99653a --- /dev/null +++ b/lib/hachoir_parser/misc/mapsforge_map.py @@ -0,0 +1,357 @@ +""" +Mapsforge map file parser (for version 3 files). + +Author: Oliver Gerlich + +References: +- http://code.google.com/p/mapsforge/wiki/SpecificationBinaryMapFile +- http://mapsforge.org/ +""" + +from hachoir_parser import Parser +from hachoir_core.field import (ParserError, + Bit, Bits, UInt8, UInt16, UInt32, UInt64, String, RawBytes, + PaddingBits, PaddingBytes, + Enum, Field, FieldSet, SeekableFieldSet, RootSeekableFieldSet) +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN + + +# micro-degrees factor: +UDEG = float(1000*1000) + + +CoordinateEncoding = { + 0: "single delta encoding", + 1: "double delta encoding", +} + + +class UIntVbe(Field): + def __init__(self, parent, name, description=None): + Field.__init__(self, parent, name, description=description) + + value = 0 + size = 0 + while True: + byteValue = ord( self._parent.stream.readBytes(self.absolute_address + (size*8), 1) ) + + haveMoreData = (byteValue & 0x80) + value = value | ((byteValue & 0x7f) << (size*7)) + size += 1 + assert size < 100, "UIntVBE is too large" + + if not(haveMoreData): + break + + self._size = size*8 + self.createValue = lambda: value + + +class IntVbe(Field): + def __init__(self, parent, name, description=None): + Field.__init__(self, parent, name, description=description) + + value = 0 + size = 0 + shift = 0 + while True: + byteValue = ord( self._parent.stream.readBytes(self.absolute_address + (size*8), 1) ) + + haveMoreData = (byteValue & 0x80) + if size == 0: + isNegative = (byteValue & 0x40) + value = (byteValue & 0x3f) + shift += 6 + else: + value = value | ((byteValue & 0x7f) << shift) + shift += 7 + size += 1 + assert size < 100, "IntVBE is too large" + + if not(haveMoreData): + break + + if isNegative: + value *= -1 + + self._size = size*8 + self.createValue = lambda: value + + +class VbeString(FieldSet): + def createFields(self): + yield UIntVbe(self, "length") + yield String(self, "chars", self["length"].value, charset="UTF-8") + + def createDescription (self): + return '(%d B) "%s"' % (self["length"].value, self["chars"].value) + + +class TagStringList(FieldSet): + def createFields(self): + yield UInt16(self, "num_tags") + for i in range(self["num_tags"].value): + yield VbeString(self, "tag[]") + + def createDescription (self): + return "%d tag strings" % self["num_tags"].value + + +class ZoomIntervalCfg(FieldSet): + def createFields(self): + yield UInt8(self, "base_zoom_level") + yield UInt8(self, "min_zoom_level") + yield UInt8(self, "max_zoom_level") + yield UInt64(self, "subfile_start") + yield UInt64(self, "subfile_size") + + def createDescription (self): + return "zoom level around %d (%d - %d)" % (self["base_zoom_level"].value, + self["min_zoom_level"].value, self["max_zoom_level"].value) + + +class TileIndexEntry(FieldSet): + def createFields(self): + yield Bit(self, "is_water_tile") + yield Bits(self, "offset", 39) + + +class TileZoomTable(FieldSet): + def createFields(self): + yield UIntVbe(self, "num_pois") + yield UIntVbe(self, "num_ways") + + def createDescription (self): + return "%d POIs, %d ways" % (self["num_pois"].value, self["num_ways"].value) + + +class TileHeader(FieldSet): + def __init__ (self, parent, name, zoomIntervalCfg, **kw): + FieldSet.__init__(self, parent, name, **kw) + self.zoomIntervalCfg = zoomIntervalCfg + + def createFields(self): + numLevels = int(self.zoomIntervalCfg["max_zoom_level"].value - self.zoomIntervalCfg["min_zoom_level"].value) +1 + assert(numLevels < 50) + for i in range(numLevels): + yield TileZoomTable(self, "zoom_table_entry[]") + yield UIntVbe(self, "first_way_offset") + + +class POIData(FieldSet): + def createFields(self): + yield IntVbe(self, "lat_diff") + yield IntVbe(self, "lon_diff") + yield Bits(self, "layer", 4) + yield Bits(self, "num_tags", 4) + + for i in range(self["num_tags"].value): + yield UIntVbe(self, "tag_id[]") + + yield Bit(self, "have_name") + yield Bit(self, "have_house_number") + yield Bit(self, "have_ele") + yield PaddingBits(self, "pad[]", 5) + + if self["have_name"].value: + yield VbeString(self, "name") + if self["have_house_number"].value: + yield VbeString(self, "house_number") + if self["have_ele"].value: + yield IntVbe(self, "ele") + + def createDescription (self): + s = "POI" + if self["have_name"].value: + s += ' "%s"' % self["name"]["chars"].value + s += " @ %f/%f" % (self["lat_diff"].value / UDEG, self["lon_diff"].value / UDEG) + return s + + + +class SubTileBitmap(FieldSet): + static_size = 2*8 + def createFields(self): + for y in range(4): + for x in range(4): + yield Bit(self, "is_used[%d,%d]" % (x,y)) + + +class WayProperties(FieldSet): + def createFields(self): + yield UIntVbe(self, "way_data_size") + + # WayProperties is split into an outer and an inner field, to allow specifying data size for inner part: + yield WayPropertiesInner(self, "inner", size=self["way_data_size"].value * 8) + + +class WayPropertiesInner(FieldSet): + def createFields(self): + yield SubTileBitmap(self, "sub_tile_bitmap") + #yield Bits(self, "sub_tile_bitmap", 16) + + yield Bits(self, "layer", 4) + yield Bits(self, "num_tags", 4) + + for i in range(self["num_tags"].value): + yield UIntVbe(self, "tag_id[]") + + yield Bit(self, "have_name") + yield Bit(self, "have_house_number") + yield Bit(self, "have_ref") + yield Bit(self, "have_label_position") + yield Bit(self, "have_num_way_blocks") + yield Enum(Bit(self, "coord_encoding"), CoordinateEncoding) + yield PaddingBits(self, "pad[]", 2) + + if self["have_name"].value: + yield VbeString(self, "name") + if self["have_house_number"].value: + yield VbeString(self, "house_number") + if self["have_ref"].value: + yield VbeString(self, "ref") + if self["have_label_position"].value: + yield IntVbe(self, "label_lat_diff") + yield IntVbe(self, "label_lon_diff") + numWayDataBlocks = 1 + if self["have_num_way_blocks"].value: + yield UIntVbe(self, "num_way_blocks") + numWayDataBlocks = self["num_way_blocks"].value + + for i in range(numWayDataBlocks): + yield WayData(self, "way_data[]") + + def createDescription (self): + s = "way" + if self["have_name"].value: + s += ' "%s"' % self["name"]["chars"].value + return s + + +class WayData(FieldSet): + def createFields(self): + yield UIntVbe(self, "num_coord_blocks") + for i in range(self["num_coord_blocks"].value): + yield WayCoordBlock(self, "way_coord_block[]") + +class WayCoordBlock(FieldSet): + def createFields(self): + yield UIntVbe(self, "num_way_nodes") + yield IntVbe(self, "first_lat_diff") + yield IntVbe(self, "first_lon_diff") + + for i in range(self["num_way_nodes"].value-1): + yield IntVbe(self, "lat_diff[]") + yield IntVbe(self, "lon_diff[]") + + +class TileData(FieldSet): + def __init__ (self, parent, name, zoomIntervalCfg, **kw): + FieldSet.__init__(self, parent, name, **kw) + self.zoomIntervalCfg = zoomIntervalCfg + + def createFields(self): + yield TileHeader(self, "tile_header", self.zoomIntervalCfg) + + numLevels = int(self.zoomIntervalCfg["max_zoom_level"].value - self.zoomIntervalCfg["min_zoom_level"].value) +1 + for zoomLevel in range(numLevels): + zoomTableEntry = self["tile_header"]["zoom_table_entry[%d]" % zoomLevel] + for poiIndex in range(zoomTableEntry["num_pois"].value): + yield POIData(self, "poi_data[%d,%d]" % (zoomLevel, poiIndex)) + + for zoomLevel in range(numLevels): + zoomTableEntry = self["tile_header"]["zoom_table_entry[%d]" % zoomLevel] + for wayIndex in range(zoomTableEntry["num_ways"].value): + yield WayProperties(self, "way_props[%d,%d]" % (zoomLevel, wayIndex)) + + + +class ZoomSubFile(SeekableFieldSet): + def __init__ (self, parent, name, zoomIntervalCfg, **kw): + SeekableFieldSet.__init__(self, parent, name, **kw) + self.zoomIntervalCfg = zoomIntervalCfg + + def createFields(self): + indexEntries = [] + numTiles = None + i = 0 + while True: + entry = TileIndexEntry(self, "tile_index_entry[]") + indexEntries.append(entry) + yield entry + + i+=1 + if numTiles is None: + # calculate number of tiles (TODO: better calc this from map bounding box) + firstOffset = self["tile_index_entry[0]"]["offset"].value + numTiles = firstOffset / 5 + if i >= numTiles: + break + + for indexEntry in indexEntries: + self.seekByte(indexEntry["offset"].value, relative=True) + yield TileData(self, "tile_data[]", zoomIntervalCfg=self.zoomIntervalCfg) + + + +class MapsforgeMapFile(Parser, RootSeekableFieldSet): + PARSER_TAGS = { + "id": "mapsforge_map", + "category": "misc", + "file_ext": ("map",), + "min_size": 62*8, + "description": "Mapsforge map file", + } + + endian = BIG_ENDIAN + + def validate(self): + return self["file_magic"].value == "mapsforge binary OSM" and self["file_version"].value == 3 + + def createFields(self): + yield String(self, "file_magic", 20) + yield UInt32(self, "header_size") + yield UInt32(self, "file_version") + yield UInt64(self, "file_size") + yield UInt64(self, "creation_date") + yield UInt32(self, "min_lat") + yield UInt32(self, "min_lon") + yield UInt32(self, "max_lat") + yield UInt32(self, "max_lon") + yield UInt16(self, "tile_size") + yield VbeString(self, "projection") + + # flags + yield Bit(self, "have_debug") + yield Bit(self, "have_map_start") + yield Bit(self, "have_start_zoom") + yield Bit(self, "have_language_preference") + yield Bit(self, "have_comment") + yield Bit(self, "have_created_by") + yield Bits(self, "reserved[]", 2) + + if self["have_map_start"].value: + yield UInt32(self, "start_lat") + yield UInt32(self, "start_lon") + if self["have_start_zoom"].value: + yield UInt8(self, "start_zoom") + if self["have_language_preference"].value: + yield VbeString(self, "language_preference") + if self["have_comment"].value: + yield VbeString(self, "comment") + if self["have_created_by"].value: + yield VbeString(self, "created_by") + + yield TagStringList(self, "poi_tags") + yield TagStringList(self, "way_tags") + + + yield UInt8(self, "num_zoom_intervals") + for i in range(self["num_zoom_intervals"].value): + yield ZoomIntervalCfg(self, "zoom_interval_cfg[]") + + for i in range(self["num_zoom_intervals"].value): + zoomIntervalCfg = self["zoom_interval_cfg[%d]" % i] + self.seekByte(zoomIntervalCfg["subfile_start"].value, relative=False) + yield ZoomSubFile(self, "subfile[]", size=zoomIntervalCfg["subfile_size"].value * 8, zoomIntervalCfg=zoomIntervalCfg) + diff --git a/lib/hachoir_parser/misc/msoffice.py b/lib/hachoir_parser/misc/msoffice.py index 960ec825..825c5637 100644 --- a/lib/hachoir_parser/misc/msoffice.py +++ b/lib/hachoir_parser/misc/msoffice.py @@ -3,49 +3,40 @@ Parsers for the different streams and fragments found in an OLE2 file. Documents: - goffice source code + - Microsoft Office PowerPoint 97-2007 Binary File Format (.ppt) Specification + http://download.microsoft.com/download/0/B/E/0BE8BDD7-E5E8-422A-ABFD-4342ED7AD886/PowerPoint97-2007BinaryFileFormat(ppt)Specification.pdf Author: Robert Xiao, Victor Stinner -Creation: 2006-04-23 +Creation: 8 january 2005 """ -from lib.hachoir_parser import HachoirParser -from lib.hachoir_core.field import FieldSet, RootSeekableFieldSet, RawBytes -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.stream import StringInputStream -from lib.hachoir_parser.misc.msoffice_summary import SummaryFieldSet, CompObj -from lib.hachoir_parser.misc.word_doc import WordDocumentFieldSet +from hachoir_core.field import (SubFile, FieldSet, + UInt8, UInt16, Int32, UInt32, Enum, String, CString, + Bits, RawBytes) +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.misc.ole2_util import OLE2FragmentParser, RawParser +from hachoir_core.stream import StringInputStream +from hachoir_parser.misc.msoffice_summary import Summary, CompObj +from hachoir_parser.misc.word_doc import WordDocumentParser, WordTableParser -PROPERTY_NAME = { - u"\5DocumentSummaryInformation": "doc_summary", - u"\5SummaryInformation": "summary", - u"WordDocument": "word_doc", -} - -class OfficeRootEntry(HachoirParser, RootSeekableFieldSet): - PARSER_TAGS = { - "description": "Microsoft Office document subfragments", - } - endian = LITTLE_ENDIAN - - def __init__(self, stream, **args): - RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) - HachoirParser.__init__(self, stream, **args) - - def validate(self): - return True +class RootEntry(OLE2FragmentParser): + ENDIAN_CHECK=False def createFields(self): for index, property in enumerate(self.ole2.properties): if index == 0: continue try: - name = PROPERTY_NAME[property["name"].value] + name,parser = PROPERTY_NAME[property["name"].value] except LookupError: name = property.name+"content" - for field in self.parseProperty(index, property, name): + parser = RawParser + for field in self.parseProperty(property, name, parser): yield field + def seekSBlock(self, block): + self.seekBit(block * self.ole2.ss_size) - def parseProperty(self, property_index, property, name_prefix): + def parseProperty(self, property, name_prefix, parser=RawParser): ole2 = self.ole2 if not property["size"].value: return @@ -55,49 +46,45 @@ class OfficeRootEntry(HachoirParser, RootSeekableFieldSet): first = None previous = None size = 0 - start = property["start"].value - chain = ole2.getChain(start, True) - blocksize = ole2.ss_size - desc_format = "Small blocks %s..%s (%s)" + fragment_group = None + chain = ole2.getChain(property["start"].value, ole2.ss_fat) while True: try: block = chain.next() contiguous = False - if not first: + if first is None: first = block contiguous = True - if previous and block == (previous+1): + if previous is not None and block == (previous+1): contiguous = True if contiguous: previous = block - size += blocksize + size += ole2.ss_size continue except StopIteration: block = None + if first is None: + break self.seekSBlock(first) - desc = desc_format % (first, previous, previous-first+1) - size = min(size, property["size"].value*8) - if name_prefix in ("summary", "doc_summary"): - yield SummaryFieldSet(self, name, desc, size=size) - elif name_prefix == "word_doc": - yield WordDocumentFieldSet(self, name, desc, size=size) - elif property_index == 1: - yield CompObj(self, "comp_obj", desc, size=size) - else: - yield RawBytes(self, name, size//8, desc) + desc = "Small blocks %s..%s (%s)" % (first, previous, previous-first+1) + desc += " of %s bytes" % (ole2.ss_size//8) + field = CustomFragment(self, name, size, parser, desc, fragment_group) + yield field + if not fragment_group: + fragment_group = field.group + fragment_group.args["datasize"] = property["size"].value + fragment_group.args["ole2name"] = property["name"].value if block is None: break first = block previous = block - size = ole2.sector_size - - def seekSBlock(self, block): - self.seekBit(block * self.ole2.ss_size) + size = ole2.ss_size class FragmentGroup: def __init__(self, parser): self.items = [] self.parser = parser + self.args = {} def add(self, item): self.items.append(item) @@ -110,8 +97,8 @@ class FragmentGroup: data = "".join(data) # FIXME: Use smarter code to send arguments - args = {"ole2": self.items[0].root} - tags = {"class": self.parser, "args": args} + self.args["ole2"] = self.items[0].root + tags = {"class": self.parser, "args": self.args} tags = tags.iteritems() return StringInputStream(data, "", tags=tags) @@ -129,3 +116,660 @@ class CustomFragment(FieldSet): def _createInputStream(self, **args): return self.group.createInputStream() +class Pictures(OLE2FragmentParser): + class Picture(FieldSet): + def createFields(self): + yield RawBytes(self, "identifier", 4, "some kind of marker (A0461DF0)") + yield UInt32(self, "size") + yield RawBytes(self, "unknown[]", 16) + yield RawBytes(self, "unknown[]", 1) + yield SubFile(self, "image", self["size"].value-17, "Image Data") + ENDIAN_CHECK=False + + def createFields(self): + pos=0 + while pos//8 < self.datasize: + newpic=Pictures.Picture(self, "picture[]") + yield newpic + pos+=newpic.size + +class PowerPointDocument(OLE2FragmentParser): + OBJ_TYPES={ 0:"Unknown", + 1000:"Document", + 1001:"DocumentAtom", + 1002:"EndDocument", + 1003:"SlidePersist", + 1004:"SlideBase", + 1005:"SlideBaseAtom", + 1006:"Slide", + 1007:"SlideAtom", + 1008:"Notes", + 1009:"NotesAtom", + 1010:"Environment", + 1011:"SlidePersistAtom", + 1012:"Scheme", + 1013:"SchemeAtom", + 1014:"DocViewInfo", + 1015:"SSlideLayoutAtom", + 1016:"MainMaster", + 1017:"SSSlideInfoAtom", + 1018:"SlideViewInfo", + 1019:"GuideAtom", + 1020:"ViewInfo", + 1021:"ViewInfoAtom", + 1022:"SlideViewInfoAtom", + 1023:"VBAInfo", + 1024:"VBAInfoAtom", + 1025:"SSDocInfoAtom", + 1026:"Summary", + 1027:"Texture", + 1028:"VBASlideInfo", + 1029:"VBASlideInfoAtom", + 1030:"DocRoutingSlip", + 1031:"OutlineViewInfo", + 1032:"SorterViewInfo", + 1033:"ExObjList", + 1034:"ExObjListAtom", + 1035:"PPDrawingGroup", #FIXME: Office Art File Format Docu + 1036:"PPDrawing", #FIXME: Office Art File Format Docu + 1038:"Theme", + 1039:"ColorMapping", + 1040:"NamedShows", # don't know if container + 1041:"NamedShow", + 1042:"NamedShowSlides", # don't know if container + 1052:"OriginalMainMasterId", + 1053:"CompositeMasterId", + 1054:"RoundTripContentMasterInfo12", + 1055:"RoundTripShapeId12", + 1056:"RoundTripHFPlaceholder12", + 1058:"RoundTripContentMasterId12", + 1059:"RoundTripOArtTextStyles12", + 1060:"HeaderFooterDefaults12", + 1061:"DocFlags12", + 1062:"RoundTripShapeCheckSumForCustomLayouts12", + 1063:"RoundTripNotesMasterTextStyles12", + 1064:"RoundTripCustomTableStyles12", + 2000:"List", + 2005:"FontCollection", + 2017:"ListPlaceholder", + 2019:"BookmarkCollection", + 2020:"SoundCollection", + 2021:"SoundCollAtom", + 2022:"Sound", + 2023:"SoundData", + 2025:"BookmarkSeedAtom", + 2026:"GuideList", + 2028:"RunArray", + 2029:"RunArrayAtom", + 2030:"ArrayElementAtom", + 2031:"Int4ArrayAtom", + 2032:"ColorSchemeAtom", + 3008:"OEShape", + 3009:"ExObjRefAtom", + 3011:"OEPlaceholderAtom", + 3020:"GrColor", + 3024:"GPointAtom", + 3025:"GrectAtom", + 3031:"GRatioAtom", + 3032:"Gscaling", + 3034:"GpointAtom", + 3035:"OEShapeAtom", + 3037:"OEPlaceholderNewPlaceholderId12", + 3998:"OutlineTextRefAtom", + 3999:"TextHeaderAtom", + 4000:"TextCharsAtom", + 4001:"StyleTextPropAtom", + 4002:"BaseTextPropAtom", + 4003:"TxMasterStyleAtom", + 4004:"TxCFStyleAtom", + 4005:"TxPFStyleAtom", + 4006:"TextRulerAtom", + 4007:"TextBookmarkAtom", + 4008:"TextBytesAtom", + 4009:"TxSIStyleAtom", + 4010:"TextSpecInfoAtom", + 4011:"DefaultRulerAtom", + 4023:"FontEntityAtom", + 4024:"FontEmbeddedData", + 4025:"TypeFace", + 4026:"CString", + 4027:"ExternalObject", + 4033:"MetaFile", + 4034:"ExOleObj", + 4035:"ExOleObjAtom", + 4036:"ExPlainLinkAtom", + 4037:"CorePict", + 4038:"CorePictAtom", + 4039:"ExPlainAtom", + 4040:"SrKinsoku", + 4041:"HandOut", + 4044:"ExEmbed", + 4045:"ExEmbedAtom", + 4046:"ExLink", + 4047:"ExLinkAtom_old", + 4048:"BookmarkEntityAtom", + 4049:"ExLinkAtom", + 4050:"SrKinsokuAtom", + 4051:"ExHyperlinkAtom", + 4053:"ExPlain", + 4054:"ExPlainLink", + 4055:"ExHyperlink", + 4056:"SlideNumberMCAtom", + 4057:"HeadersFooters", + 4058:"HeadersFootersAtom", + 4062:"RecolorEntryAtom", + 4063:"TxInteractiveInfoAtom", + 4065:"EmFormatAtom", + 4066:"CharFormatAtom", + 4067:"ParaFormatAtom", + 4068:"MasterText", + 4071:"RecolorInfoAtom", + 4073:"ExQuickTime", + 4074:"ExQuickTimeMovie", + 4075:"ExQuickTimeMovieData", + 4076:"ExSubscription", + 4077:"ExSubscriptionSection", + 4078:"ExControl", + 4080:"SlideListWithText", + 4081:"AnimationInfoAtom", + 4082:"InteractiveInfo", + 4083:"InteractiveInfoAtom", + 4084:"SlideList", + 4085:"UserEditAtom", + 4086:"CurrentUserAtom", + 4087:"DateTimeMCAtom", + 4088:"GenericDateMCAtom", + 4090:"FooterMCAtom", + 4091:"ExControlAtom", + 4100:"ExMediaAtom", + 4101:"ExVideo", + 4102:"ExAviMovie", + 4103:"ExMCIMovie", + 4109:"ExMIDIAudio", + 4110:"ExCDAudio", + 4111:"ExWAVAudioEmbedded", + 4112:"ExWAVAudioLink", + 4113:"ExOleObjStg", + 4114:"ExCDAudioAtom", + 4115:"ExWAVAudioEmbeddedAtom", + 4116:"AnimationInfoAtom", + 4117:"RTFDateTimeMCAtom", + 5000:"ProgTags", # don't know if container + 5001:"ProgStringTag", + 5002:"ProgBinaryTag", + 5003:"BinaryTagData", + 6000:"PrintOptions", + 6001:"PersistPtrFullBlock", # don't know if container + 6002:"PersistPtrIncrementalBlock", # don't know if container + 10000:"RulerIndentAtom", + 10001:"GScalingAtom", + 10002:"GRColorAtom", + 10003:"GLPointAtom", + 10004:"GlineAtom", + 11019:"AnimationAtom12", + 11021:"AnimationHashAtom12", + 14100:"SlideSyncInfo12", + 14101:"SlideSyncInfoAtom12", + 0xf000:"EscherDggContainer", # Drawing Group Container + 0xf006:"EscherDgg", + 0xf016:"EscherCLSID", + 0xf00b:"EscherOPT", + 0xf001:"EscherBStoreContainer", + 0xf007:"EscherBSE", + 0xf018:"EscherBlip_START", # Blip types are between + 0xf117:"EscherBlip_END", # these two values + 0xf002:"EscherDgContainer", # Drawing Container + 0xf008:"EscherDg", + 0xf118:"EscherRegroupItems", + 0xf120:"EscherColorScheme", # bug in docs + 0xf003:"EscherSpgrContainer", + 0xf004:"EscherSpContainer", + 0xf009:"EscherSpgr", + 0xf00a:"EscherSp", + 0xf00c:"EscherTextbox", + 0xf00d:"EscherClientTextbox", + 0xf00e:"EscherAnchor", + 0xf00f:"EscherChildAnchor", + 0xf010:"EscherClientAnchor", + 0xf011:"EscherClientData", + 0xf005:"EscherSolverContainer", + 0xf012:"EscherConnectorRule", # bug in docs + 0xf013:"EscherAlignRule", + 0xf014:"EscherArcRule", + 0xf015:"EscherClientRule", + 0xf017:"EscherCalloutRule", + 0xf119:"EscherSelection", + 0xf11a:"EscherColorMRU", + 0xf11d:"EscherDeletedPspl", # bug in docs + 0xf11e:"EscherSplitMenuColors", + 0xf11f:"EscherOleObject", + 0xf122:"EscherUserDefined"} + class CurrentUserAtom(FieldSet): + def createFields(self): + yield UInt32(self, "size") + yield textHandler(UInt32(self, "magic", "0xe391c05f for normal PPT, 0xf3d1c4df for encrypted PPT"), hexadecimal) + yield UInt32(self, "offsetToCurrentEdit", "Offset in main stream to current edit field") + yield UInt16(self, "lenUserName", "Length of user name") + yield UInt16(self, "docFileVersion", "1012 for PP97+") + yield UInt8(self, "majorVersion", "3 for PP97+") + yield UInt8(self, "minorVersion", "0 for PP97+") + yield UInt16(self, "unknown") + yield String(self, "userName", self["lenUserName"].value, "ANSI version of the username") + yield UInt32(self, "relVersion", "Release version: 8 for regular PPT file, 9 for multiple-master PPT file") + + class PowerPointObject(FieldSet): + def createFields(self): + yield Bits(self, "version", 4) + yield Bits(self, "instance", 12) + yield Enum(UInt16(self, "type"),PowerPointDocument.OBJ_TYPES) + yield UInt32(self, "length") + self._size = self["length"].value * 8 + 64 + obj_type = self["type"].display + obj_len = self["length"].value + # type 1064 (RoundTripCustomTableStyles12) may appear to be a container, but it is not. + if self["version"].value==0xF and self["type"].value != 1064: + while (self.current_size)//8 < obj_len+8: + yield PowerPointDocument.PowerPointObject(self, "object[]") + elif obj_len: + if obj_type=="FontEntityAtom": + yield String(self, "data", obj_len, charset="UTF-16-LE", truncate="\0", strip="\0") + elif obj_type=="TextCharsAtom": + yield String(self, "data", obj_len, charset="UTF-16-LE") + elif obj_type=="TextBytesAtom": + yield String(self, "data", obj_len, charset="ASCII") + elif hasattr(PowerPointDocument, obj_type): + field = getattr(PowerPointDocument, obj_type)(self, "data") + field._size = obj_len * 8 + yield field + else: + yield RawBytes(self, "data", obj_len) + def createDescription(self): + if self["version"].value==0xF: + return "PowerPoint Object Container; instance %s, type %s"%(self["instance"].value,self["type"].display) + return "PowerPoint Object; version %s, instance %s, type %s"%(self["version"].value,self["instance"].value,self["type"].display) + ENDIAN_CHECK=False + OS_CHECK=False + def createFields(self): + pos=0 + while pos//8 < self.datasize: + newobj=PowerPointDocument.PowerPointObject(self, "object[]") + yield newobj + pos+=newobj.size + +class CurrentUser(OLE2FragmentParser): + def createFields(self): + yield PowerPointDocument.PowerPointObject(self, "current_user") + if self.current_size < self.size: + yield String(self, "unicode_name", self["current_user/data/lenUserName"].value * 2, charset="UTF-16-LE") + + +class ExcelWorkbook(OLE2FragmentParser): + BIFF_TYPES={0x000:"DIMENSIONS_v0", + 0x200:"DIMENSIONS_v2", + 0x001:"BLANK_v0", + 0x201:"BLANK_v2", + 0x002:"INTEGER", + 0x003:"NUMBER_v0", + 0x203:"NUMBER_v2", + 0x004:"LABEL_v0", + 0x204:"LABEL_v2", + 0x005:"BOOLERR_v0", + 0x205:"BOOLERR_v2", + 0x006:"FORMULA_v0", + 0x206:"FORMULA_v2", + 0x406:"FORMULA_v4", + 0x007:"STRING_v0", + 0x207:"STRING_v2", + 0x008:"ROW_v0", + 0x208:"ROW_v2", + 0x009:"BOF_v0", + 0x209:"BOF_v2", + 0x409:"BOF_v4", + 0x809:"BOF_v8", + 0x00a:"EOF", + 0x00b:"INDEX_v0", + 0x20b:"INDEX_v2", + 0x00c:"CALCCOUNT", + 0x00d:"CALCMODE", + 0x00e:"PRECISION", + 0x00f:"REFMODE", + 0x010:"DELTA", + 0x011:"ITERATION", + 0x012:"PROTECT", + 0x013:"PASSWORD", + 0x014:"HEADER", + 0x015:"FOOTER", + 0x016:"EXTERNCOUNT", + 0x017:"EXTERNSHEET", + 0x018:"NAME_v0", + 0x218:"NAME_v2", + 0x019:"WINDOWPROTECT", + 0x01a:"VERTICALPAGEBREAKS", + 0x01b:"HORIZONTALPAGEBREAKS", + 0x01c:"NOTE", + 0x01d:"SELECTION", + 0x01e:"FORMAT_v0", + 0x41e:"FORMAT_v4", + 0x01f:"FORMATCOUNT", # Undocumented + 0x020:"COLUMNDEFAULT", # Undocumented + 0x021:"ARRAY_v0", + 0x221:"ARRAY_v2", + 0x022:"1904", + 0x023:"EXTERNNAME_v0", + 0x223:"EXTERNNAME_v2", + 0x024:"COLWIDTH", # Undocumented + 0x025:"DEFAULTROWHEIGHT_v0", + 0x225:"DEFAULTROWHEIGHT_v2", + 0x026:"LEFT_MARGIN", + 0x027:"RIGHT_MARGIN", + 0x028:"TOP_MARGIN", + 0x029:"BOTTOM_MARGIN", + 0x02a:"PRINTHEADERS", + 0x02b:"PRINTGRIDLINES", + 0x02f:"FILEPASS", + 0x031:"FONT_v0", + 0x231:"FONT_v2", + 0x032:"FONTCOUNT", # Undocumented + 0x033:"PRINTSIZE", # Undocumented + 0x036:"TABLE_v0", + 0x236:"TABLE_v2", + 0x037:"TABLE2", # OOo has docs + 0x038:"WNDESK", # Undocumented + 0x039:"ZOOM", # Undocumented + 0x03a:"BEGINPREF", # Undocumented + 0x03b:"ENDPREF", # Undocumented + 0x03c:"CONTINUE", + 0x03d:"WINDOW1", + 0x03e:"WINDOW2_v0", + 0x23e:"WINDOW2_v2", + 0x03f:"PANE_V2", # Undocumented + 0x040:"BACKUP", + 0x041:"PANE", + 0x042:"CODEPAGE", + 0x043:"XF_OLD_v0", + 0x243:"XF_OLD_v2", + 0x443:"XF_OLD_v4", + 0x044:"XF_INDEX", + 0x045:"FONT_COLOR", + 0x04d:"PLS", + 0x050:"DCON", + 0x051:"DCONREF", + 0x052:"DCONNAME", + 0x055:"DEFCOLWIDTH", + 0x059:"XCT", + 0x05a:"CRN", + 0x05b:"FILESHARING", + 0x05c:"WRITEACCESS", + 0x05d:"OBJ", + 0x05e:"UNCALCED", + 0x05f:"SAVERECALC", + 0x060:"TEMPLATE", + 0x061:"INTL", # Undocumented + 0x862:"TAB_COLOR", # Undocumented, OO calls it SHEETLAYOUT + 0x063:"OBJPROTECT", + 0x07d:"COLINFO", + 0x27e:"RK", # Odd that there is no 0x7e + 0x07f:"IMDATA", + 0x080:"GUTS", + 0x081:"WSBOOL", + 0x082:"GRIDSET", + 0x083:"HCENTER", + 0x084:"VCENTER", + 0x085:"BOUNDSHEET", + 0x086:"WRITEPROT", + 0x087:"ADDIN", + 0x088:"EDG", + 0x089:"PUB", + 0x08c:"COUNTRY", + 0x08d:"HIDEOBJ", + 0x08e:"BUNDLESOFFSET", # Undocumented + 0x08f:"BUNDLEHEADER", # Undocumented + 0x090:"SORT", + 0x091:"SUB", + 0x092:"PALETTE", + 0x293:"STYLE", # Odd that there is no 0x93 + 0x094:"LHRECORD", + 0x095:"LHNGRAPH", + 0x096:"SOUND", + 0x097:"SYNC", # Undocumented + 0x098:"LPR", + 0x099:"STANDARDWIDTH", + 0x09a:"FNGROUPNAME", + 0x09b:"FILTERMODE", + 0x09c:"FNGROUPCOUNT", + 0x09d:"AUTOFILTERINFO", + 0x09e:"AUTOFILTER", + 0x0a0:"SCL", + 0x0a1:"SETUP", + 0x0a4:"TOOLBARVER", # Undocumented + 0x0a9:"COORDLIST", + 0x0ab:"GCW", + 0x0ae:"SCENMAN", + 0x0af:"SCENARIO", + 0x0b0:"SXVIEW", + 0x0b1:"SXVD", + 0x0b2:"SXVI", + 0x0b3:"SXSI", # Undocumented + 0x0b4:"SXIVD", + 0x0b5:"SXLI", + 0x0b6:"SXPI", + 0x0b7:"FACENUM", # Undocumented + 0x0b8:"DOCROUTE", + 0x0b9:"RECIPNAME", + 0x0ba:"SSLIST", # Undocumented + 0x0bb:"MASKIMDATA", # Undocumented + 0x4bc:"SHRFMLA", + 0x0bd:"MULRK", + 0x0be:"MULBLANK", + 0x0bf:"TOOLBARHDR", # Undocumented + 0x0c0:"TOOLBAREND", # Undocumented + 0x0c1:"MMS", + 0x0c2:"ADDMENU", + 0x0c3:"DELMENU", + 0x0c4:"TIPHISTORY", # Undocumented + 0x0c5:"SXDI", + 0x0c6:"SXDB", + 0x0c7:"SXFDB", # guessed + 0x0c8:"SXDDB", # guessed + 0x0c9:"SXNUM", # guessed + 0x0ca:"SXBOOL", # guessed + 0x0cb:"SXERR", # guessed + 0x0cc:"SXINT", # guessed + 0x0cd:"SXSTRING", + 0x0ce:"SXDTR", # guessed + 0x0cf:"SXNIL", # guessed + 0x0d0:"SXTBL", + 0x0d1:"SXTBRGIITM", + 0x0d2:"SXTBPG", + 0x0d3:"OBPROJ", + 0x0d5:"SXIDSTM", + 0x0d6:"RSTRING", + 0x0d7:"DBCELL", + 0x0d8:"SXNUMGROUP", # from OO : numerical grouping in pivot cache field + 0x0da:"BOOKBOOL", + 0x0dc:"PARAMQRY", # DUPLICATE dc + 0x0dc:"SXEXT", # DUPLICATE dc + 0x0dd:"SCENPROTECT", + 0x0de:"OLESIZE", + 0x0df:"UDDESC", + 0x0e0:"XF", + 0x0e1:"INTERFACEHDR", + 0x0e2:"INTERFACEEND", + 0x0e3:"SXVS", + 0x0e5:"MERGECELLS", # guessed + 0x0e9:"BG_PIC", # Undocumented + 0x0ea:"TABIDCONF", + 0x0eb:"MS_O_DRAWING_GROUP", + 0x0ec:"MS_O_DRAWING", + 0x0ed:"MS_O_DRAWING_SELECTION", + 0x0ef:"PHONETIC", # semi-Undocumented + 0x0f0:"SXRULE", + 0x0f1:"SXEX", + 0x0f2:"SXFILT", + 0x0f6:"SXNAME", + 0x0f7:"SXSELECT", + 0x0f8:"SXPAIR", + 0x0f9:"SXFMLA", + 0x0fb:"SXFORMAT", + 0x0fc:"SST", + 0x0fd:"LABELSST", + 0x0ff:"EXTSST", + 0x100:"SXVDEX", + 0x103:"SXFORMULA", + 0x122:"SXDBEX", + 0x137:"CHTRINSERT", + 0x138:"CHTRINFO", + 0x13B:"CHTRCELLCONTENT", + 0x13d:"TABID", + 0x140:"CHTRMOVERANGE", + 0x14D:"CHTRINSERTTAB", + 0x15F:"LABELRANGES", + 0x160:"USESELFS", + 0x161:"DSF", + 0x162:"XL5MODIFY", + 0x196:"CHTRHEADER", + 0x1a5:"FILESHARING2", + 0x1a9:"USERDBVIEW", + 0x1aa:"USERSVIEWBEGIN", + 0x1ab:"USERSVIEWEND", + 0x1ad:"QSI", + 0x1ae:"SUPBOOK", + 0x1af:"PROT4REV", + 0x1b0:"CONDFMT", + 0x1b1:"CF", + 0x1b2:"DVAL", + 0x1b5:"DCONBIN", + 0x1b6:"TXO", + 0x1b7:"REFRESHALL", + 0x1b8:"HLINK", + 0x1ba:"CODENAME", # TYPO in MS Docs + 0x1bb:"SXFDBTYPE", + 0x1bc:"PROT4REVPASS", + 0x1be:"DV", + 0x1c0:"XL9FILE", + 0x1c1:"RECALCID", + 0x800:"LINK_TIP", # follows an hlink + 0x802:"UNKNOWN_802", # OO exports it but has not name or docs + 0x803:"WQSETT", # OO named it and can export it, but does not include it in the docs + 0x804:"WQTABLES", # OO named it and can export it, but does not include it in the docs + 0x805:"UNKNOWN_805", # No name or docs, seems related to web query see #153260 for sample + 0x810:"PIVOT_AUTOFORMAT", # Seems to contain pivot table autoformat indicies, plus ?? + 0x864:"UNKNOWN_864", # seems related to pivot tables + 0x867:"SHEETPROTECTION", # OO named it, and has docs + 0x868:"RANGEPROTECTION", # OO named it, no docs yet + + 0x1001:"CHART_units", + 0x1002:"CHART_chart", + 0x1003:"CHART_series", + 0x1006:"CHART_dataformat", + 0x1007:"CHART_lineformat", + 0x1009:"CHART_markerformat", + 0x100a:"CHART_areaformat", + 0x100b:"CHART_pieformat", + 0x100c:"CHART_attachedlabel", + 0x100d:"CHART_seriestext", + 0x1014:"CHART_chartformat", + 0x1015:"CHART_legend", + 0x1016:"CHART_serieslist", + 0x1017:"CHART_bar", + 0x1018:"CHART_line", + 0x1019:"CHART_pie", + 0x101a:"CHART_area", + 0x101b:"CHART_scatter", + 0x101c:"CHART_chartline", + 0x101d:"CHART_axis", + 0x101e:"CHART_tick", + 0x101f:"CHART_valuerange", + 0x1020:"CHART_catserrange", + 0x1021:"CHART_axislineformat", + 0x1022:"CHART_chartformatlink", + 0x1024:"CHART_defaulttext", + 0x1025:"CHART_text", + 0x1026:"CHART_fontx", + 0x1027:"CHART_objectlink", + 0x1032:"CHART_frame", + 0x1033:"CHART_begin", + 0x1034:"CHART_end", + 0x1035:"CHART_plotarea", + 0x103a:"CHART_3d", + 0x103c:"CHART_picf", + 0x103d:"CHART_dropbar", + 0x103e:"CHART_radar", + 0x103f:"CHART_surf", + 0x1040:"CHART_radararea", + 0x1041:"CHART_axisparent", + 0x1043:"CHART_legendxn", + 0x1044:"CHART_shtprops", + 0x1045:"CHART_sertocrt", + 0x1046:"CHART_axesused", + 0x1048:"CHART_sbaseref", + 0x104a:"CHART_serparent", + 0x104b:"CHART_serauxtrend", + 0x104e:"CHART_ifmt", + 0x104f:"CHART_pos", + 0x1050:"CHART_alruns", + 0x1051:"CHART_ai", + 0x105b:"CHART_serauxerrbar", + 0x105c:"CHART_clrtclient", # Undocumented + 0x105d:"CHART_serfmt", + 0x105f:"CHART_3dbarshape", # Undocumented + 0x1060:"CHART_fbi", + 0x1061:"CHART_boppop", + 0x1062:"CHART_axcext", + 0x1063:"CHART_dat", + 0x1064:"CHART_plotgrowth", + 0x1065:"CHART_siindex", + 0x1066:"CHART_gelframe", + 0x1067:"CHART_boppopcustom",} + class BIFF(FieldSet): + def createFields(self): + yield Enum(UInt16(self, "type"),ExcelWorkbook.BIFF_TYPES) + yield UInt16(self, "length") + if self["length"].value: + yield RawBytes(self, "data", self["length"].value) + def createDescription(self): + return "Excel BIFF; type %s"%self["type"].display + def createFields(self): + pos=0 + while pos//8 < self.datasize: + newobj=ExcelWorkbook.BIFF(self, "BIFF[]") + yield newobj + pos+=newobj.size + +class ThumbsCatalog(OLE2FragmentParser): + class ThumbsEntry(FieldSet): + def createFields(self): + yield UInt32(self, "size") + yield UInt32(self, "index") + yield Bits(self, "flags", 8) + yield RawBytes(self, "unknown[]", 5) + yield UInt16(self, "unknown[]") + yield CString(self, "name", charset="UTF-16-LE") + if self.current_size // 8 != self['size'].value: + yield RawBytes(self, "padding", self['size'].value - self.current_size // 8) + def createDescription(self): + return "Thumbnail entry for %s"%self["name"].display + + def createFields(self): + yield UInt16(self, "unknown[]") + yield UInt16(self, "unknown[]") + yield UInt32(self, "count") + yield UInt32(self, "unknown[]") + yield UInt32(self, "unknown[]") + for i in xrange(self['count'].value): + yield ThumbsCatalog.ThumbsEntry(self, "entry[]") + +PROPERTY_NAME = { + u"Root Entry": ("root",RootEntry), + u"\5DocumentSummaryInformation": ("doc_summary",Summary), + u"\5SummaryInformation": ("summary",Summary), + u"\1CompObj": ("compobj",CompObj), + u"Pictures": ("pictures",Pictures), + u"PowerPoint Document": ("powerpointdoc",PowerPointDocument), + u"Current User": ("current_user",CurrentUser), + u"Workbook": ("workbook",ExcelWorkbook), + u"Catalog": ("catalog",ThumbsCatalog), + u"WordDocument": ("word_doc",WordDocumentParser), + u"0Table": ("table0",WordTableParser), + u"1Table": ("table1",WordTableParser), +} diff --git a/lib/hachoir_parser/misc/msoffice_summary.py b/lib/hachoir_parser/misc/msoffice_summary.py index 4199934f..e3ded4cf 100644 --- a/lib/hachoir_parser/misc/msoffice_summary.py +++ b/lib/hachoir_parser/misc/msoffice_summary.py @@ -7,18 +7,19 @@ Documents - Apache POI (HPSF Internals): http://poi.apache.org/hpsf/internals.html """ -from lib.hachoir_parser import HachoirParser -from lib.hachoir_core.field import (FieldSet, ParserError, - RootSeekableFieldSet, SeekableFieldSet, +from hachoir_core.endian import BIG_ENDIAN,LITTLE_ENDIAN +from hachoir_parser import HachoirParser +from hachoir_core.field import (FieldSet, ParserError, + SeekableFieldSet, Bit, Bits, NullBits, UInt8, UInt16, UInt32, TimestampWin64, TimedeltaWin64, Enum, - Bytes, RawBytes, NullBytes, String, + Bytes, RawBytes, NullBytes, PaddingBits, String, Int8, Int32, Float32, Float64, PascalString32) -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.tools import createDict -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN -from lib.hachoir_parser.common.win32 import GUID, PascalStringWin32, CODEPAGE_CHARSET -from lib.hachoir_parser.image.bmp import BmpHeader, parseImageData +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.tools import createDict, paddingSize +from hachoir_parser.common.win32 import GUID, PascalStringWin32, CODEPAGE_CHARSET +from hachoir_parser.image.bmp import BmpHeader, parseImageData +from hachoir_parser.misc.ole2_util import OLE2FragmentParser MAX_SECTION_COUNT = 100 @@ -165,10 +166,37 @@ class Thumbnail(FieldSet): yield RawBytes(self, "data", size) class PropertyContent(FieldSet): + class NullHandler(FieldSet): + def createFields(self): + yield UInt32(self, "unknown[]") + yield PascalString32(self, "data") + def createValue(self): + return self["data"].value + class BlobHandler(FieldSet): + def createFields(self): + self.osconfig = self.parent.osconfig + yield UInt32(self, "size") + yield UInt32(self, "count") + for i in range(self["count"].value): + yield PropertyContent(self, "item[]") + n=paddingSize(self.current_size,32) + if n: yield PaddingBits(self, "padding[]", n) + class WidePascalString32(FieldSet): + ''' uses number of characters instead of number of bytes ''' + def __init__(self,parent,name,charset='ASCII'): + FieldSet.__init__(self,parent,name) + self.charset=charset + def createFields(self): + yield UInt32(self, "length", "Length of this string") + yield String(self, "data", self["length"].value*2, charset=self.charset) + def createValue(self): + return self["data"].value + def createDisplay(self): + return 'u'+self["data"].display TYPE_LPSTR = 30 TYPE_INFO = { 0: ("EMPTY", None), - 1: ("NULL", None), + 1: ("NULL", NullHandler), 2: ("UInt16", UInt16), 3: ("UInt32", UInt32), 4: ("Float32", Float32), @@ -197,9 +225,9 @@ class PropertyContent(FieldSet): 28: ("CARRAY", None), 29: ("USERDEFINED", None), 30: ("LPSTR", PascalString32), - 31: ("LPWSTR", PascalString32), + 31: ("LPWSTR", WidePascalString32), 64: ("FILETIME", TimestampWin64), - 65: ("BLOB", None), + 65: ("BLOB", BlobHandler), 66: ("STREAM", None), 67: ("STORAGE", None), 68: ("STREAMED_OBJECT", None), @@ -223,8 +251,13 @@ class PropertyContent(FieldSet): kw = {} try: handler = self.TYPE_INFO[tag][1] - if handler == PascalString32: - osconfig = self.osconfig + if handler in (self.WidePascalString32,PascalString32): + cur = self + while not hasattr(cur,'osconfig'): + cur=cur.parent + if cur is None: + raise LookupError('Cannot find osconfig') + osconfig = cur.osconfig if tag == self.TYPE_LPSTR: kw["charset"] = osconfig.charset else: @@ -235,9 +268,10 @@ class PropertyContent(FieldSet): except LookupError: handler = None if not handler: - raise ParserError("OLE2: Unable to parse property of type %s" \ + self.warning("OLE2: Unable to parse property of type %s" \ % self["type"].display) - if self["is_vector"].value: + # raise ParserError( + elif self["is_vector"].value: yield UInt32(self, "count") for index in xrange(self["count"].value): yield handler(self, "item[]", **kw) @@ -276,20 +310,16 @@ class SummaryIndex(FieldSet): yield String(self, "name", 16) yield UInt32(self, "offset") -class BaseSummary: - endian = LITTLE_ENDIAN +class Summary(OLE2FragmentParser): + ENDIAN_CHECK=True - def __init__(self): - if self["endian"].value == "\xFF\xFE": - self.endian = BIG_ENDIAN - elif self["endian"].value == "\xFE\xFF": - self.endian = LITTLE_ENDIAN - else: - raise ParserError("OLE2: Invalid endian value") - self.osconfig = OSConfig(self["os_type"].value == OS_MAC) + def __init__(self, stream, **args): + OLE2FragmentParser.__init__(self, stream, **args) + #self.osconfig = OSConfig(self["os_type"].value == OS_MAC) + self.osconfig = OSConfig(self.endian == BIG_ENDIAN) def createFields(self): - yield Bytes(self, "endian", 2, "Endian (0xFF 0xFE for Intel)") + yield Bytes(self, "endian", 2, "Endian (\\xfe\\xff for little endian)") yield UInt16(self, "format", "Format (0)") yield UInt8(self, "os_version") yield UInt8(self, "os_revision") @@ -313,35 +343,20 @@ class BaseSummary: if 0 < size: yield NullBytes(self, "end_padding", size) -class SummaryParser(BaseSummary, HachoirParser, RootSeekableFieldSet): - PARSER_TAGS = { - "description": "Microsoft Office summary", - } +class CompObj(OLE2FragmentParser): + ENDIAN_CHECK=True - def __init__(self, stream, **kw): - RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) - HachoirParser.__init__(self, stream, **kw) - BaseSummary.__init__(self) - - def validate(self): - return True - -class SummaryFieldSet(BaseSummary, FieldSet): - def __init__(self, parent, name, description=None, size=None): - FieldSet.__init__(self, parent, name, description=description, size=size) - BaseSummary.__init__(self) - -class CompObj(FieldSet): - OS_VERSION = { - 0x0a03: "Windows 3.1", - } + def __init__(self, stream, **args): + OLE2FragmentParser.__init__(self, stream, **args) + self.osconfig = OSConfig(self["os"].value == OS_MAC) + def createFields(self): # Header yield UInt16(self, "version", "Version (=1)") - yield textHandler(UInt16(self, "endian", "Endian (0xFF 0xFE for Intel)"), hexadecimal) + yield Bytes(self, "endian", 2, "Endian (\\xfe\\xff for little endian)") yield UInt8(self, "os_version") yield UInt8(self, "os_revision") - yield Enum(UInt16(self, "os_type"), OS_NAME) + yield Enum(UInt16(self, "os"), OS_NAME) yield Int32(self, "unused", "(=-1)") yield GUID(self, "clsid") @@ -349,12 +364,12 @@ class CompObj(FieldSet): yield PascalString32(self, "user_type", strip="\0") # Clipboard format - if self["os_type"].value == OS_MAC: + if self["os"].value == OS_MAC: yield Int32(self, "unused[]", "(=-2)") yield String(self, "clipboard_format", 4) else: yield PascalString32(self, "clipboard_format", strip="\0") - if self.current_size == self.size: + if self._current_size // 8 == self.datasize: return #-- OLE 2.01 --- @@ -362,7 +377,7 @@ class CompObj(FieldSet): # Program ID yield PascalString32(self, "prog_id", strip="\0") - if self["os_type"].value != OS_MAC: + if self["os"].value != OS_MAC: # Magic number yield textHandler(UInt32(self, "magic", "Magic number (0x71B239F4)"), hexadecimal) @@ -371,7 +386,8 @@ class CompObj(FieldSet): yield PascalStringWin32(self, "clipboard_format_unicode", strip="\0") yield PascalStringWin32(self, "prog_id_unicode", strip="\0") - size = (self.size - self.current_size) // 8 + size = self.datasize - (self._current_size // 8) # _current_size because current_size returns _current_max_size if size: yield NullBytes(self, "end_padding", size) + if self.datasize """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import ( +from hachoir_parser import Parser +from hachoir_core.field import ( Field, FieldSet, ParserError, GenericVector, UInt8, UInt16, UInt32, String, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal MAGIC = "%PDF-" ENDMAGIC = "%%EOF" diff --git a/lib/hachoir_parser/misc/pifv.py b/lib/hachoir_parser/misc/pifv.py index e60e651e..d9474733 100644 --- a/lib/hachoir_parser/misc/pifv.py +++ b/lib/hachoir_parser/misc/pifv.py @@ -5,13 +5,13 @@ Author: Alexandre Boeglin Creation date: 08 jul 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, UInt24, UInt32, UInt64, Enum, CString, String, PaddingBytes, RawBytes, NullBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import paddingSize, humanFilesize -from lib.hachoir_parser.common.win32 import GUID +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import paddingSize, humanFilesize +from hachoir_parser.common.win32 import GUID EFI_SECTION_COMPRESSION = 0x1 EFI_SECTION_GUID_DEFINED = 0x2 diff --git a/lib/hachoir_parser/misc/torrent.py b/lib/hachoir_parser/misc/torrent.py index 62653271..0c32a785 100644 --- a/lib/hachoir_parser/misc/torrent.py +++ b/lib/hachoir_parser/misc/torrent.py @@ -7,11 +7,11 @@ Status: To statufy Author: Christophe Gisquet """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, String, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.tools import makePrintable, timestampUNIX, humanFilesize +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.tools import makePrintable, timestampUNIX, humanFilesize # Maximum number of bytes for string length MAX_STRING_LENGTH = 6 # length in 0..999999 @@ -128,7 +128,7 @@ class DictionaryItem(FieldSet): # Map first chunk byte => type TAGS = {'d': Dictionary, 'i': Integer, 'l': List} -for index in xrange(1, 9+1): +for index in xrange(0, 9+1): TAGS[str(index)] = TorrentString # Create an entry diff --git a/lib/hachoir_parser/misc/ttf.py b/lib/hachoir_parser/misc/ttf.py index 3a698ad1..f1024aab 100644 --- a/lib/hachoir_parser/misc/ttf.py +++ b/lib/hachoir_parser/misc/ttf.py @@ -10,14 +10,14 @@ Author: Victor Stinner Creation date: 2007-02-08 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt16, UInt32, Bit, Bits, PaddingBits, NullBytes, String, RawBytes, Bytes, Enum, TimestampMac32) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler MAX_NAME_COUNT = 300 MIN_NB_TABLE = 3 diff --git a/lib/hachoir_parser/misc/word_2.py b/lib/hachoir_parser/misc/word_2.py new file mode 100644 index 00000000..aec727b1 --- /dev/null +++ b/lib/hachoir_parser/misc/word_2.py @@ -0,0 +1,168 @@ +""" +Documents: + +* "Microsoft Word for Windows 2.0 Binary Format" + http://www.wotsit.org/download.asp?f=word2&sc=275927573 +""" + +from hachoir_core.field import (FieldSet, Enum, + Bit, Bits, + UInt8, Int16, UInt16, UInt32, Int32, + NullBytes, Bytes, RawBytes, PascalString16, + DateTimeMSDOS32, TimeDateMSDOS32) +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser.misc.ole2_util import OLE2FragmentParser +from hachoir_core.tools import paddingSize +from hachoir_parser.common.win32_lang_id import LANGUAGE_ID +TIMESTAMP = DateTimeMSDOS32 + +class FC_CB(FieldSet): + def createFields(self): + yield Int32(self, "fc", "File Offset") + yield UInt16(self, "cb", "Byte Count") + def createValue(self): + return (self['fc'].value,self['cb'].value) + +class FIB(FieldSet): + def createFields(self): + yield UInt16(self, "wIdent", "Magic Number") + yield UInt16(self, "nFib", "File Information Block (FIB) Version") + yield UInt16(self, "nProduct", "Product Version") + yield Enum(UInt16(self, "lid", "Language ID"), LANGUAGE_ID) + yield Int16(self, "pnNext") + + yield Bit(self, "fDot", "Is the document a document template?") + yield Bit(self, "fGlsy", "Is the document a glossary?") + yield Bit(self, "fComplex", "Is the document in Complex format?") + yield Bit(self, "fHasPic", "Does the document have embedded images?") + yield Bits(self, "cQuickSaves", 4, "Number of times the document was quick-saved") + yield Bit(self, "fEncrypted", "Is the document encrypted?") + yield Bits(self, "reserved[]", 7) + + yield UInt16(self, "nFibBack") + yield UInt32(self, "reserved[]") + yield NullBytes(self, "rgwSpare", 6) + + yield UInt32(self, "fcMin", "File offset of first text character") + yield UInt32(self, "fcMax", "File offset of last text character + 1") + yield Int32(self, "cbMax", "File offset of last byte + 1") + yield NullBytes(self, "fcSpare", 16) + + yield UInt32(self, "ccpText", "Length of main document text stream") + yield Int32(self, "ccpFtn", "Length of footnote subdocument text stream") + yield Int32(self, "ccpHdr", "Length of header subdocument text stream") + yield Int32(self, "ccpMcr", "Length of macro subdocument text stream") + yield Int32(self, "ccpAtn", "Length of annotation subdocument text stream") + yield NullBytes(self, "ccpSpare", 16) + + yield FC_CB(self, "StshfOrig", "Original STSH allocation") + yield FC_CB(self, "Stshf", "Current STSH allocation") + yield FC_CB(self, "PlcffndRef", "Footnote reference PLC") + yield FC_CB(self, "PlcffndTxt", "Footnote text PLC") + yield FC_CB(self, "PlcfandRef", "Annotation reference PLC") + yield FC_CB(self, "PlcfandTxt", "Annotation text PLC") + yield FC_CB(self, "Plcfsed", "Section descriptor PLC") + yield FC_CB(self, "Plcfpgd", "Page descriptor PLC") + yield FC_CB(self, "Plcfphe", "Paragraph heights PLC") + yield FC_CB(self, "Sttbfglsy", "Glossary string table") + yield FC_CB(self, "Plcfglsy", "Glossary PLC") + yield FC_CB(self, "Plcfhdd", "Header PLC") + yield FC_CB(self, "PlcfbteChpx", "Character property bin table PLC") + yield FC_CB(self, "PlcfbtePapx", "Paragraph property bin table PLC") + yield FC_CB(self, "Plcfsea", "Private Use PLC") + yield FC_CB(self, "Sttbfffn") + yield FC_CB(self, "PlcffldMom") + yield FC_CB(self, "PlcffldHdr") + yield FC_CB(self, "PlcffldFtn") + yield FC_CB(self, "PlcffldAtn") + yield FC_CB(self, "PlcffldMcr") + yield FC_CB(self, "Sttbfbkmk") + yield FC_CB(self, "Plcfbkf") + yield FC_CB(self, "Plcfbkl") + yield FC_CB(self, "Cmds") + yield FC_CB(self, "Plcmcr") + yield FC_CB(self, "Sttbfmcr") + yield FC_CB(self, "PrDrvr", "Printer Driver information") + yield FC_CB(self, "PrEnvPort", "Printer environment for Portrait mode") + yield FC_CB(self, "PrEnvLand", "Printer environment for Landscape mode") + yield FC_CB(self, "Wss", "Window Save State") + yield FC_CB(self, "Dop", "Document Property data") + yield FC_CB(self, "SttbfAssoc") + yield FC_CB(self, "Clx", "'Complex' file format data") + yield FC_CB(self, "PlcfpgdFtn", "Footnote page descriptor PLC") + yield FC_CB(self, "AutosaveSource", "Original filename for Autosave purposes") + yield FC_CB(self, "Spare5") + yield FC_CB(self, "Spare6") + + yield Int16(self, "wSpare4") + yield UInt16(self, "pnChpFirst") + yield UInt16(self, "pnPapFirst") + yield UInt16(self, "cpnBteChp", "Count of CHPX FKPs recorded in file") + yield UInt16(self, "cpnBtePap", "Count of PAPX FKPs recorded in file") + +class SEPX(FieldSet): + def createFields(self): + yield UInt8(self, "size") + self._size=(self['size'].value+1)*8 + yield RawBytes(self, "raw[]", self['size'].value) + +class SEPXGroup(FieldSet): + def __init__(self, parent, name, size, description=None): + FieldSet.__init__(self, parent, name, description=description) + self._size=size*8 + def createFields(self): + while self.current_size < self.size: + next=self.stream.readBytes(self.absolute_address+self.current_size,1) + if next=='\x00': + padding = paddingSize((self.absolute_address + self.current_size)//8, 512) + if padding: + yield NullBytes(self, "padding[]", padding) + if self.current_size >= self.size: break + yield SEPX(self, "sepx[]") + +class Word2DocumentParser(OLE2FragmentParser): + MAGIC='\xdb\xa5' # 42459 + PARSER_TAGS = { + "id": "word_v2_document", + "min_size": 8, + "magic": ((MAGIC, 0),), + "file_ext": ("doc",), + "description": "Microsoft Office Word Version 2.0 document", + } + endian = LITTLE_ENDIAN + + def __init__(self, stream, **args): + OLE2FragmentParser.__init__(self, stream, **args) + + def validate(self): + if self.stream.readBytes(0,2) != self.MAGIC: + return "Invalid magic." + if self['FIB/nFib'].value not in (45,): + return "Unknown FIB version." + return True + + def createFields(self): + yield FIB(self, "FIB", "File Information Block") + + padding = (self['FIB/fcMin'].value - self.current_size//8) + if padding: + yield NullBytes(self, "padding[]", padding) + if self['FIB/ccpText'].value: + yield Bytes(self, "text", self['FIB/ccpText'].value) + if self['FIB/ccpFtn'].value: + yield Bytes(self, "text_footnote", self['FIB/ccpFtn'].value) + if self['FIB/ccpHdr'].value: + yield Bytes(self, "text_header", self['FIB/ccpHdr'].value) + if self['FIB/ccpMcr'].value: + yield Bytes(self, "text_macro", self['FIB/ccpMcr'].value) + if self['FIB/ccpAtn'].value: + yield Bytes(self, "text_annotation", self['FIB/ccpAtn'].value) + + padding = (self['FIB/fcMax'].value - self.current_size//8) + if padding: + yield RawBytes(self, "padding[]", padding) + + sepx_size = (self['FIB/pnChpFirst'].value*512 - self.current_size//8) + if sepx_size: + yield SEPXGroup(self, "sepx", sepx_size) + diff --git a/lib/hachoir_parser/misc/word_doc.py b/lib/hachoir_parser/misc/word_doc.py index 8c9df549..36929d83 100644 --- a/lib/hachoir_parser/misc/word_doc.py +++ b/lib/hachoir_parser/misc/word_doc.py @@ -11,289 +11,417 @@ Documents: section. Revised Dec 21 1998, added missing Document Properties (section). """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.field import (FieldSet, Enum, Bit, Bits, UInt8, Int16, UInt16, UInt32, Int32, - NullBytes, RawBytes, PascalString16, - DateTimeMSDOS32) -from lib.hachoir_core.endian import LITTLE_ENDIAN + NullBytes, Bytes, RawBytes, PascalString8, PascalString16, CString, String, + TimestampMac32, TimestampWin64) +from hachoir_core.text_handler import displayHandler +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_parser import guessParser +from hachoir_parser.misc.ole2_util import OLE2FragmentParser +from hachoir_parser.common.win32_lang_id import LANGUAGE_ID -TIMESTAMP = DateTimeMSDOS32 - -class BaseWordDocument: +CREATOR_ID={0x6A62: "Microsoft Word"} +class ShortArray(FieldSet): def createFields(self): - yield UInt16(self, "wIdent", 2) - yield UInt16(self, "nFib") - yield UInt16(self, "nProduct") - yield UInt16(self, "lid") + yield UInt16(self, "csw", "Count of fields in the array of shorts") + self._size = self['csw'].value*16+16 + yield Enum(UInt16(self, "wMagicCreated", "File creator ID"), CREATOR_ID) + yield Enum(UInt16(self, "wMagicRevised", "File last modifier ID"), CREATOR_ID) + yield UInt16(self, "wMagicCreatePrivate") + yield UInt16(self, "wMagicCreatedPrivate") + yield UInt16(self, "pnFbpChpFirst_W6") + yield UInt16(self, "pnChpFirst_W6") + yield UInt16(self, "cpnBteChp_W6") + yield UInt16(self, "pnFbpPapFirst_W6") + yield UInt16(self, "pnPapFirst_W6") + yield UInt16(self, "cpnBtePap_W6") + yield UInt16(self, "pnFbpLvcFirst_W6") + yield UInt16(self, "pnLvcFirst_W6") + yield UInt16(self, "cpnBteLvc_W6") + yield Enum(UInt16(self, "lidFE", "Language ID if a Far East version of Word was used"), LANGUAGE_ID) + while self.current_size < self.size: + yield Int16(self, "unknown[]") + +def buildDateHandler(v): + md,y=divmod(v,100) + m,d=divmod(md,100) + if y < 60: y=2000+y + else: y=1900+y + return "%04i-%02i-%02i"%(y,m,d) + +class LongArray(FieldSet): + def createFields(self): + yield UInt16(self, "clw", "Count of fields in the array of longs") + self._size = self['clw'].value*32+16 + yield Int32(self, "cbMax", "Stream offset of last byte + 1") + yield displayHandler(UInt32(self, "lProductCreated", "Date when the creator program was built"),buildDateHandler) + yield displayHandler(UInt32(self, "lProductRevised", "Date when the last modifier program was built"),buildDateHandler) + + yield UInt32(self, "ccpText", "Length of main document text stream") + yield Int32(self, "ccpFtn", "Length of footnote subdocument text stream") + yield Int32(self, "ccpHdr", "Length of header subdocument text stream") + yield Int32(self, "ccpMcr", "Length of macro subdocument text stream") + yield Int32(self, "ccpAtn", "Length of annotation subdocument text stream") + yield Int32(self, "ccpEdn", "Length of endnote subdocument text stream") + yield Int32(self, "ccpTxbx", "Length of textbox subdocument text stream") + yield Int32(self, "ccpHdrTxbx", "Length of header textbox subdocument text stream") + yield Int32(self, "pnFbpChpFirst", "Start of CHPX (Character Property) sector chain (sector = 512-byte 'page')") + yield Int32(self, "pnChpFirst", "First CHPX sector") + yield Int32(self, "cpnBteChp", "Number of CHPX sectors in the file") + yield Int32(self, "pnFbpPapFirst", "Start of PAPX (Paragraph Property) sector chain") + yield Int32(self, "pnPapFirst", "First PAPX sector") + yield Int32(self, "cpnBtePap", "Number of PAPX sectors in the file") + yield Int32(self, "pnFbpLvcFirst", "Start of LVC sector chain") + yield Int32(self, "pnLvcFirst", "First LVC sector") + yield Int32(self, "cpnBteLvc", "Number of LVC sectors in the file") + yield Int32(self, "fcIslandFirst") + yield Int32(self, "fcIslandLim") + while self.current_size < self.size: + yield Int32(self, "unknown[]") + +class FCLCB(FieldSet): + static_size=64 + def createFields(self): + yield Int32(self, "fc", "Table Stream Offset") + yield UInt32(self, "lcb", "Byte Count") + def createValue(self): + return (self['fc'].value,self['lcb'].value) + +class FCLCBArray(FieldSet): + def createFields(self): + yield UInt16(self, "cfclcb", "Count of fields in the array of FC/LCB pairs") + self._size = self['cfclcb'].value*64+16 + + yield FCLCB(self, "StshfOrig", "Original STSH allocation") + yield FCLCB(self, "Stshf", "Current STSH allocation") + yield FCLCB(self, "PlcffndRef", "Footnote reference (FRD) PLC") + yield FCLCB(self, "PlcffndTxt", "Footnote text PLC") + yield FCLCB(self, "PlcfandRef", "Annotation reference (ATRD) PLC") + yield FCLCB(self, "PlcfandTxt", "Annotation text PLC") + yield FCLCB(self, "Plcfsed", "Section descriptor (SED) PLC") + yield FCLCB(self, "Plcpad", "No longer used; used to be Plcfpgd (Page descriptor PLC)") + yield FCLCB(self, "Plcfphe", "Paragraph heights (PHE) PLC (only for Complex files)") + yield FCLCB(self, "Sttbfglsy", "Glossary string table") + yield FCLCB(self, "Plcfglsy", "Glossary PLC") + yield FCLCB(self, "Plcfhdd", "Header (HDD) PLC") + yield FCLCB(self, "PlcfbteChpx", "Character property bin table PLC") + yield FCLCB(self, "PlcfbtePapx", "Paragraph property bin table PLC") + yield FCLCB(self, "Plcfsea", "Private Use PLC") + yield FCLCB(self, "Sttbfffn", "Font information STTB") + yield FCLCB(self, "PlcffldMom", "Main document field position (FLD) PLC") + yield FCLCB(self, "PlcffldHdr", "Header subdocument field position (FLD) PLC") + yield FCLCB(self, "PlcffldFtn", "Footnote subdocument field position (FLD) PLC") + yield FCLCB(self, "PlcffldAtn", "Annotation subdocument field position (FLD) PLC") + yield FCLCB(self, "PlcffldMcr", "No longer used") + yield FCLCB(self, "Sttbfbkmk", "Bookmark names STTB") + yield FCLCB(self, "Plcfbkf", "Bookmark begin position (BKF) PLC") + yield FCLCB(self, "Plcfbkl", "Bookmark end position (BKL) PLC") + yield FCLCB(self, "Cmds", "Macro commands") + yield FCLCB(self, "Plcmcr", "No longer used") + yield FCLCB(self, "Sttbfmcr", "No longer used") + yield FCLCB(self, "PrDrvr", "Printer Driver information") + yield FCLCB(self, "PrEnvPort", "Printer environment for Portrait mode") + yield FCLCB(self, "PrEnvLand", "Printer environment for Landscape mode") + yield FCLCB(self, "Wss", "Window Save State") + yield FCLCB(self, "Dop", "Document Property data") + yield FCLCB(self, "SttbfAssoc", "Associated strings STTB") + yield FCLCB(self, "Clx", "Complex file information") + yield FCLCB(self, "PlcfpgdFtn", "Not used") + yield FCLCB(self, "AutosaveSource", "Original filename for Autosave purposes") + yield FCLCB(self, "GrpXstAtnOwners", "String Group for Annotation Owner Names") + yield FCLCB(self, "SttbfAtnbkmk", "Annotation subdocument bookmark names STTB") + yield FCLCB(self, "PlcdoaMom", "No longer used") + yield FCLCB(self, "PlcdoaHdr", "No longer used") + yield FCLCB(self, "PlcspaMom", "Main document File Shape (FSPA) PLC") + yield FCLCB(self, "PlcspaHdr", "Header subdocument FSPA PLC") + yield FCLCB(self, "PlcfAtnbkf", "Annotation subdocument bookmark begin position (BKF) PLC") + yield FCLCB(self, "PlcfAtnbkl", "Annotation subdocument bookmark end position (BKL) PLC") + yield FCLCB(self, "Pms", "Print Merge State") + yield FCLCB(self, "FormFldSttbs", "Form field values STTB") + yield FCLCB(self, "PlcfendRef", "Endnote Reference (FRD) PLC") + yield FCLCB(self, "PlcfendTxt", "Endnote Text PLC") + yield FCLCB(self, "PlcffldEdn", "Endnote subdocument field position (FLD) PLC)") + yield FCLCB(self, "PlcfpgdEdn", "not used") + yield FCLCB(self, "DggInfo", "Office Art Object Table Data") + yield FCLCB(self, "SttbfRMark", "Editor Author Abbreviations STTB") + yield FCLCB(self, "SttbCaption", "Caption Title STTB") + yield FCLCB(self, "SttbAutoCaption", "Auto Caption Title STTB") + yield FCLCB(self, "Plcfwkb", "WKB PLC") + yield FCLCB(self, "Plcfspl", "Spell Check State PLC") + yield FCLCB(self, "PlcftxbxTxt", "Text Box Text PLC") + yield FCLCB(self, "PlcffldTxbx", "Text Box Reference (FLD) PLC") + yield FCLCB(self, "PlcfhdrtxbxTxt", "Header Text Box Text PLC") + yield FCLCB(self, "PlcffldHdrTxbx", "Header Text Box Reference (FLD) PLC") + yield FCLCB(self, "StwUser", "Macro User storage") + yield FCLCB(self, "Sttbttmbd", "Embedded TrueType Font Data") + yield FCLCB(self, "Unused") + yield FCLCB(self, "PgdMother", "Main text page descriptors PLF") + yield FCLCB(self, "BkdMother", "Main text break descriptors PLF") + yield FCLCB(self, "PgdFtn", "Footnote text page descriptors PLF") + yield FCLCB(self, "BkdFtn", "Footnote text break descriptors PLF") + yield FCLCB(self, "PgdEdn", "Endnote text page descriptors PLF") + yield FCLCB(self, "BkdEdn", "Endnote text break descriptors PLF") + yield FCLCB(self, "SttbfIntlFld", "Field keywords STTB") + yield FCLCB(self, "RouteSlip", "Mailer Routing Slip") + yield FCLCB(self, "SttbSavedBy", "STTB of names of users who have saved the document") + yield FCLCB(self, "SttbFnm", "STTB of filenames of documents referenced by this one") + yield FCLCB(self, "PlcfLst", "List Format information PLC") + yield FCLCB(self, "PlfLfo", "List Format Override information PLC") + yield FCLCB(self, "PlcftxbxBkd", "Main document textbox break table (BKD) PLC") + yield FCLCB(self, "PlcftxbxHdrBkd", "Header subdocument textbox break table (BKD) PLC") + yield FCLCB(self, "DocUndo", "Undo/Versioning data") + yield FCLCB(self, "Rgbuse", "Undo/Versioning data") + yield FCLCB(self, "Usp", "Undo/Versioning data") + yield FCLCB(self, "Uskf", "Undo/Versioning data") + yield FCLCB(self, "PlcupcRgbuse", "Undo/Versioning data") + yield FCLCB(self, "PlcupcUsp", "Undo/Versioning data") + yield FCLCB(self, "SttbGlsyStyle", "Glossary entry style names STTB") + yield FCLCB(self, "Plgosl", "Grammar options PL") + yield FCLCB(self, "Plcocx", "OCX data PLC") + yield FCLCB(self, "PlcfbteLvc", "Character property bin table PLC") + if self['../fMac'].value: + yield TimestampMac32(self, "ftModified", "Date last modified") + yield Int32(self, "padding[]") + else: + yield TimestampWin64(self, "ftModified", "Date last modified") + yield FCLCB(self, "Plcflvc", "LVC PLC") + yield FCLCB(self, "Plcasumy", "Autosummary PLC") + yield FCLCB(self, "Plcfgram", "Grammar check PLC") + yield FCLCB(self, "SttbListNames", "List names STTB") + yield FCLCB(self, "SttbfUssr", "Undo/Versioning data") + while self.current_size < self.size: + yield FCLCB(self, "unknown[]") + +class FIB(FieldSet): + def createFields(self): + yield UInt16(self, "wIdent", "Magic Number") + yield UInt16(self, "nFib", "File Information Block (FIB) Version") + yield UInt16(self, "nProduct", "Product Version") + yield Enum(UInt16(self, "lid", "Language ID"), LANGUAGE_ID) yield Int16(self, "pnNext") - yield Bit(self, "fDot") - yield Bit(self, "fGlsy") - yield Bit(self, "fComplex") - yield Bit(self, "fHasPic") - yield Bits(self, "cQuickSaves", 4) - yield Bit(self, "fEncrypted") - yield Bit(self, "fWhichTblStm") - yield Bit(self, "fReadOnlyRecommanded") - yield Bit(self, "fWriteReservation") - yield Bit(self, "fExtChar") + yield Bit(self, "fDot", "Is the document a document template?") + yield Bit(self, "fGlsy", "Is the document a glossary?") + yield Bit(self, "fComplex", "Is the document in Complex format?") + yield Bit(self, "fHasPic", "Does the document have embedded images?") + yield Bits(self, "cQuickSaves", 4, "Number of times the document was quick-saved") + yield Bit(self, "fEncrypted", "Is the document encrypted?") + yield Bits(self, "fWhichTblStm", 1, "Which table stream (0Table or 1Table) to use") + yield Bit(self, "fReadOnlyRecommended", "Should the file be opened read-only?") + yield Bit(self, "fWriteReservation", "Is the file write-reserved?") + yield Bit(self, "fExtChar", "Does the file use an extended character set?") yield Bit(self, "fLoadOverride") - yield Bit(self, "fFarEeast") + yield Bit(self, "fFarEast") yield Bit(self, "fCrypto") - yield UInt16(self, "nFibBack") - yield UInt32(self, "lKey") - yield UInt8(self, "envr") + yield UInt16(self, "nFibBack", "Document is backwards compatible down to this FIB version") + yield UInt32(self, "lKey", "File encryption key (only if fEncrypted)") + yield Enum(UInt8(self, "envr", "Document creation environment"), {0:'Word for Windows',1:'Macintosh Word'}) - yield Bit(self, "fMac") + yield Bit(self, "fMac", "Was this file last saved on a Mac?") yield Bit(self, "fEmptySpecial") yield Bit(self, "fLoadOverridePage") yield Bit(self, "fFutureSavedUndo") yield Bit(self, "fWord97Save") yield Bits(self, "fSpare0", 3) + CHARSET={0:'Windows ANSI',256:'Macintosh'} + yield Enum(UInt16(self, "chse", "Character set for document text"),CHARSET) + yield Enum(UInt16(self, "chsTables", "Character set for internal table text"),CHARSET) + yield UInt32(self, "fcMin", "File offset for the first character of text") + yield UInt32(self, "fcMax", "File offset for the last character of text + 1") - yield UInt16(self, "chse") - yield UInt16(self, "chsTables") - yield UInt32(self, "fcMin") - yield UInt32(self, "fcMac") + yield ShortArray(self, "array1", "Array of shorts") + yield LongArray(self, "array2", "Array of longs") + yield FCLCBArray(self, "array3", "Array of File Offset/Byte Count (FC/LCB) pairs") - yield PascalString16(self, "file_creator", strip="\0") +def getRootParser(ole2): + return guessParser(ole2["root[0]"].getSubIStream()) - yield NullBytes(self, "reserved[]", 12) +def getOLE2Parser(ole2, path): + name = path+"[0]" + if name in ole2: + fragment = ole2[name] + else: + fragment = getRootParser(ole2)[name] + return guessParser(fragment.getSubIStream()) - yield Int16(self, "lidFE") - yield UInt16(self, "clw") - yield Int32(self, "cbMac") - yield UInt32(self, "lProductCreated") - yield TIMESTAMP(self, "lProductRevised") - - yield UInt32(self, "ccpText") - yield Int32(self, "ccpFtn") - yield Int32(self, "ccpHdr") - yield Int32(self, "ccpMcr") - yield Int32(self, "ccpAtn") - yield Int32(self, "ccpEdn") - yield Int32(self, "ccpTxbx") - yield Int32(self, "ccpHdrTxbx") - yield Int32(self, "pnFbpChpFirst") - yield Int32(self, "pnChpFirst") - yield Int32(self, "cpnBteChp") - yield Int32(self, "pnFbpPapFirst") - yield Int32(self, "pnPapFirst") - yield Int32(self, "cpnBtePap") - yield Int32(self, "pnFbpLvcFirst") - yield Int32(self, "pnLvcFirst") - yield Int32(self, "cpnBteLvc") - yield Int32(self, "fcIslandFirst") - yield Int32(self, "fcIslandLim") - yield UInt16(self, "cfclcb") - yield Int32(self, "fcStshfOrig") - yield UInt32(self, "lcbStshfOrig") - yield Int32(self, "fcStshf") - yield UInt32(self, "lcbStshf") - - yield Int32(self, "fcPlcffndRef") - yield UInt32(self, "lcbPlcffndRef") - yield Int32(self, "fcPlcffndTxt") - yield UInt32(self, "lcbPlcffndTxt") - yield Int32(self, "fcPlcfandRef") - yield UInt32(self, "lcbPlcfandRef") - yield Int32(self, "fcPlcfandTxt") - yield UInt32(self, "lcbPlcfandTxt") - yield Int32(self, "fcPlcfsed") - yield UInt32(self, "lcbPlcfsed") - yield Int32(self, "fcPlcpad") - yield UInt32(self, "lcbPlcpad") - yield Int32(self, "fcPlcfphe") - yield UInt32(self, "lcbPlcfphe") - yield Int32(self, "fcSttbfglsy") - yield UInt32(self, "lcbSttbfglsy") - yield Int32(self, "fcPlcfglsy") - yield UInt32(self, "lcbPlcfglsy") - yield Int32(self, "fcPlcfhdd") - yield UInt32(self, "lcbPlcfhdd") - yield Int32(self, "fcPlcfbteChpx") - yield UInt32(self, "lcbPlcfbteChpx") - yield Int32(self, "fcPlcfbtePapx") - yield UInt32(self, "lcbPlcfbtePapx") - yield Int32(self, "fcPlcfsea") - yield UInt32(self, "lcbPlcfsea") - yield Int32(self, "fcSttbfffn") - yield UInt32(self, "lcbSttbfffn") - yield Int32(self, "fcPlcffldMom") - yield UInt32(self, "lcbPlcffldMom") - yield Int32(self, "fcPlcffldHdr") - yield UInt32(self, "lcbPlcffldHdr") - yield Int32(self, "fcPlcffldFtn") - yield UInt32(self, "lcbPlcffldFtn") - yield Int32(self, "fcPlcffldAtn") - yield UInt32(self, "lcbPlcffldAtn") - yield Int32(self, "fcPlcffldMcr") - yield UInt32(self, "lcbPlcffldMcr") - yield Int32(self, "fcSttbfbkmk") - yield UInt32(self, "lcbSttbfbkmk") - yield Int32(self, "fcPlcfbkf") - yield UInt32(self, "lcbPlcfbkf") - yield Int32(self, "fcPlcfbkl") - yield UInt32(self, "lcbPlcfbkl") - yield Int32(self, "fcCmds") - yield UInt32(self, "lcbCmds") - yield Int32(self, "fcPlcmcr") - yield UInt32(self, "lcbPlcmcr") - yield Int32(self, "fcSttbfmcr") - yield UInt32(self, "lcbSttbfmcr") - yield Int32(self, "fcPrDrvr") - yield UInt32(self, "lcbPrDrvr") - yield Int32(self, "fcPrEnvPort") - yield UInt32(self, "lcbPrEnvPort") - yield Int32(self, "fcPrEnvLand") - yield UInt32(self, "lcbPrEnvLand") - yield Int32(self, "fcWss") - yield UInt32(self, "lcbWss") - yield Int32(self, "fcDop") - yield UInt32(self, "lcbDop") - yield Int32(self, "fcSttbfAssoc") - yield UInt32(self, "lcbSttbfAssoc") - yield Int32(self, "fcClx") - yield UInt32(self, "lcbClx") - yield Int32(self, "fcPlcfpgdFtn") - yield UInt32(self, "lcbPlcfpgdFtn") - yield Int32(self, "fcAutosaveSource") - yield UInt32(self, "lcbAutosaveSource") - yield Int32(self, "fcGrpXstAtnOwners") - yield UInt32(self, "lcbGrpXstAtnOwners") - yield Int32(self, "fcSttbfAtnbkmk") - yield UInt32(self, "lcbSttbfAtnbkmk") - yield Int32(self, "fcPlcdoaMom") - yield UInt32(self, "lcbPlcdoaMom") - yield Int32(self, "fcPlcdoaHdr") - yield UInt32(self, "lcbPlcdoaHdr") - yield Int32(self, "fcPlcspaMom") - yield UInt32(self, "lcbPlcspaMom") - yield Int32(self, "fcPlcspaHdr") - yield UInt32(self, "lcbPlcspaHdr") - yield Int32(self, "fcPlcfAtnbkf") - yield UInt32(self, "lcbPlcfAtnbkf") - yield Int32(self, "fcPlcfAtnbkl") - yield UInt32(self, "lcbPlcfAtnbkl") - yield Int32(self, "fcPms") - yield UInt32(self, "lcbPms") - yield Int32(self, "fcFormFldSttbs") - yield UInt32(self, "lcbFormFldSttbs") - yield Int32(self, "fcPlcfendRef") - yield UInt32(self, "lcbPlcfendRef") - yield Int32(self, "fcPlcfendTxt") - yield UInt32(self, "lcbPlcfendTxt") - yield Int32(self, "fcPlcffldEdn") - yield UInt32(self, "lcbPlcffldEdn") - yield Int32(self, "fcPlcfpgdEdn") - yield UInt32(self, "lcbPlcfpgdEdn") - yield Int32(self, "fcDggInfo") - yield UInt32(self, "lcbDggInfo") - yield Int32(self, "fcSttbfRMark") - yield UInt32(self, "lcbSttbfRMark") - yield Int32(self, "fcSttbCaption") - yield UInt32(self, "lcbSttbCaption") - yield Int32(self, "fcSttbAutoCaption") - yield UInt32(self, "lcbSttbAutoCaption") - yield Int32(self, "fcPlcfwkb") - yield UInt32(self, "lcbPlcfwkb") - yield Int32(self, "fcPlcfspl") - yield UInt32(self, "lcbPlcfspl") - yield Int32(self, "fcPlcftxbxTxt") - yield UInt32(self, "lcbPlcftxbxTxt") - yield Int32(self, "fcPlcffldTxbx") - yield UInt32(self, "lcbPlcffldTxbx") - yield Int32(self, "fcPlcfhdrtxbxTxt") - yield UInt32(self, "lcbPlcfhdrtxbxTxt") - yield Int32(self, "fcPlcffldHdrTxbx") - yield UInt32(self, "lcbPlcffldHdrTxbx") - yield Int32(self, "fcStwUser") - yield UInt32(self, "lcbStwUser") - yield Int32(self, "fcSttbttmbd") - yield UInt32(self, "cbSttbttmbd") - yield Int32(self, "fcUnused") - yield UInt32(self, "lcbUnused") - yield Int32(self, "fcPgdMother") - yield UInt32(self, "lcbPgdMother") - yield Int32(self, "fcBkdMother") - yield UInt32(self, "lcbBkdMother") - yield Int32(self, "fcPgdFtn") - yield UInt32(self, "lcbPgdFtn") - yield Int32(self, "fcBkdFtn") - yield UInt32(self, "lcbBkdFtn") - yield Int32(self, "fcPgdEdn") - yield UInt32(self, "lcbPgdEdn") - yield Int32(self, "fcBkdEdn") - yield UInt32(self, "lcbBkdEdn") - yield Int32(self, "fcSttbfIntlFld") - yield UInt32(self, "lcbSttbfIntlFld") - yield Int32(self, "fcRouteSlip") - yield UInt32(self, "lcbRouteSlip") - yield Int32(self, "fcSttbSavedBy") - yield UInt32(self, "lcbSttbSavedBy") - yield Int32(self, "fcSttbFnm") - yield UInt32(self, "lcbSttbFnm") - yield Int32(self, "fcPlcfLst") - yield UInt32(self, "lcbPlcfLst") - yield Int32(self, "fcPlfLfo") - yield UInt32(self, "lcbPlfLfo") - yield Int32(self, "fcPlcftxbxBkd") - yield UInt32(self, "lcbPlcftxbxBkd") - yield Int32(self, "fcPlcftxbxHdrBkd") - yield UInt32(self, "lcbPlcftxbxHdrBkd") - yield Int32(self, "fcDocUndo") - yield UInt32(self, "lcbDocUndo") - yield Int32(self, "fcRgbuse") - yield UInt32(self, "lcbRgbuse") - yield Int32(self, "fcUsp") - yield UInt32(self, "lcbUsp") - yield Int32(self, "fcUskf") - yield UInt32(self, "lcbUskf") - yield Int32(self, "fcPlcupcRgbuse") - yield UInt32(self, "lcbPlcupcRgbuse") - yield Int32(self, "fcPlcupcUsp") - yield UInt32(self, "lcbPlcupcUsp") - yield Int32(self, "fcSttbGlsyStyle") - yield UInt32(self, "lcbSttbGlsyStyle") - yield Int32(self, "fcPlgosl") - yield UInt32(self, "lcbPlgosl") - yield Int32(self, "fcPlcocx") - yield UInt32(self, "lcbPlcocx") - yield Int32(self, "fcPlcfbteLvc") - yield UInt32(self, "lcbPlcfbteLvc") - yield TIMESTAMP(self, "ftModified") - yield Int32(self, "fcPlcflvc") - yield UInt32(self, "lcbPlcflvc") - yield Int32(self, "fcPlcasumy") - yield UInt32(self, "lcbPlcasumy") - yield Int32(self, "fcPlcfgram") - yield UInt32(self, "lcbPlcfgram") - yield Int32(self, "fcSttbListNames") - yield UInt32(self, "lcbSttbListNames") - yield Int32(self, "fcSttbfUssr") - yield UInt32(self, "lcbSttbfUssr") - - tail = (self.size - self.current_size) // 8 - if tail: - yield RawBytes(self, "tail", tail) - -class WordDocumentFieldSet(BaseWordDocument, FieldSet): - pass - -class WordDocumentParser(BaseWordDocument, Parser): +class WordDocumentParser(OLE2FragmentParser): + MAGIC='\xec\xa5' # 42476 PARSER_TAGS = { "id": "word_document", "min_size": 8, + "magic": ((MAGIC, 0),), "description": "Microsoft Office Word document", } endian = LITTLE_ENDIAN - def __init__(self, stream, **kw): - Parser.__init__(self, stream, **kw) + def __init__(self, stream, **args): + OLE2FragmentParser.__init__(self, stream, **args) def validate(self): + if self.stream.readBytes(0,2) != self.MAGIC: + return "Invalid magic." + if self['FIB/nFib'].value not in (192,193): + return "Unknown FIB version." return True + def createFields(self): + yield FIB(self, "FIB", "File Information Block") + table = getOLE2Parser(self.ole2, "table"+str(self["FIB/fWhichTblStm"].value)) + + padding = (self['FIB/fcMin'].value - self.current_size//8) + if padding: + yield NullBytes(self, "padding[]", padding) + + # Guess whether the file uses UTF16 encoding. + is_unicode = False + if self['FIB/array2/ccpText'].value*2 == self['FIB/fcMax'].value - self['FIB/fcMin'].value: + is_unicode = True + for fieldname, textname in [('Text','text'),('Ftn','text_footnote'), + ('Hdr','text_header'),('Mcr','text_macro'),('Atn','text_annotation'), + ('Edn','text_endnote'),('Txbx','text_textbox'),('HdrTxbx','text_header_textbox')]: + size = self['FIB/array2/ccp'+fieldname].value + if size: + if is_unicode: + yield String(self, textname, size*2, charset="UTF-16-LE") + else: + yield Bytes(self, textname, size) + + padding = (self['FIB/fcMax'].value - self.current_size//8) + if padding: + yield RawBytes(self, "padding[]", padding) + +class WidePascalString16(String): + def __init__(self, parent, name, description=None, + strip=None, nbytes=None, truncate=None): + Bytes.__init__(self, parent, name, 1, description) + + self._format = "WidePascalString16" + self._strip = strip + self._truncate = truncate + self._character_size = 2 + self._charset = "UTF-16-LE" + self._content_offset = 2 + self._content_size = self._character_size * self._parent.stream.readBits( + self.absolute_address, self._content_offset*8, self._parent.endian) + self._size = (self._content_size + self.content_offset) * 8 + +class TableParsers(object): + class Bte(FieldSet): + 'Bin Table Entry' + static_size = 32 + def createFields(self): + yield Bits(self, "pn", 22, "Referenced page number") + yield Bits(self, "unused", 10) + + def createValue(self): + return self["pn"].value + + class Ffn(FieldSet): + 'Font Family Name' + def createFields(self): + yield UInt8(self, "size", "Total length of this FFN in bytes, minus 1") + self._size = self["size"].value * 8 + 8 + yield Bits(self, "prq", 2, "Pitch request") + yield Bit(self, "fTrueType", "Is font a TrueType font?") + yield Bits(self, "reserved[]", 1) + yield Bits(self, "ff", 3, "Font Family ID") + yield Bits(self, "reserved[]", 1) + yield UInt16(self, "wWeight", "Base weight of font") + yield UInt8(self, "chs", "Character set identifier") + yield UInt8(self, "ixchSzAlt", "Index into name to the name of the alternate font") + yield RawBytes(self, "panose", 10) + yield RawBytes(self, "fs", 24, "Font Signature") + yield CString(self, "name", charset="UTF-16-LE") + if self["ixchSzAlt"].value != 0: + yield CString(self, "nameAlt", charset="UTF-16-LE") + + def createValue(self): + return self["name"].value + + class Sttbf(FieldSet): + 'String Table stored in File' + SttbfAssocDESC = { + 0: "FileNext: unused", + 1: "Dot: filename of associated template", + 2: "Title: title of document", + 3: "Subject: subject of document", + 4: "KeyWords: keywords of document", + 5: "Comments: comments of document", + 6: "Author: author of document", + 7: "LastRevBy: name of person who last revised the document", + 8: "DataDoc: filename of data document", + 9: "HeaderDoc: filename of header document", + 10: "Criteria1: packed string used by print merge record selection", + 11: "Criteria2: packed string used by print merge record selection", + 12: "Criteria3: packed string used by print merge record selection", + 13: "Criteria4: packed string used by print merge record selection", + 14: "Criteria5: packed string used by print merge record selection", + 15: "Criteria6: packed string used by print merge record selection", + 16: "Criteria7: packed string used by print merge record selection", + 17: "Max: maximum number of strings in string table", + } + + def createFields(self): + if self.stream.readBytes(self.absolute_address, 2) == "\xff\xff": + yield Int16(self, "utf16_marker", "If this field is present, the Sttbf contains UTF-16 data.") + self.is_utf16 = True + else: + self.is_utf16 = False + yield UInt16(self, "count", "Number of strings in this Sttbf") + extra_data_field = UInt16(self, "extra_data_len", "Size of optional extra data after each string") + yield extra_data_field + extra_data_len = extra_data_field.value + for i in xrange(self["count"].value): + if self.name == "SttbfAssoc": + desc = self.SttbfAssocDESC.get(i, None) + else: + desc = None + if self.name == "Sttbfffn": + yield TableParsers.Ffn(self, "string[]", desc) + elif self.is_utf16: + yield WidePascalString16(self, "string[]", desc) + else: + yield PascalString8(self, "string[]", desc) + if extra_data_len: + yield RawBytes(self, "extra[]", extra_data_len) + + class Plcf(FieldSet): + 'Plex of CPs/FCs stored in file' + def createFields(self): + if self.size is None: + return + chunk_parser = None + size = None + if self.name.startswith("Plcfbte"): + chunk_parser = TableParsers.Bte + if not chunk_parser: + return + if size is None: + size = chunk_parser.static_size // 8 + n = (self.size / 8 - 4) / (4 + size) + for i in xrange(n+1): + yield UInt32(self, "cp_fc[]", "CP or FC value") + for i in xrange(n): + yield chunk_parser(self, "obj[]") + +class WordTableParser(OLE2FragmentParser): + def createFields(self): + word_doc = getOLE2Parser(self.ole2, "word_doc") + if word_doc["FIB/fWhichTblStm"].value != int(self.ole2name[0]): + yield RawBytes(self, "inactive_table", self.datasize) + return + for fclcb in word_doc["FIB/array3"]: + if not isinstance(fclcb, FCLCB): + continue + if fclcb["fc"].value < 0 or fclcb["lcb"].value <= 0: + continue + self.seekByte(fclcb["fc"].value, relative=False) + if fclcb.name.startswith("Sttb"): + yield TableParsers.Sttbf(self, fclcb.name, size=fclcb["lcb"].value * 8) + elif fclcb.name.startswith("Plc"): + yield TableParsers.Plcf(self, fclcb.name, size=fclcb["lcb"].value * 8) + else: + yield RawBytes(self, fclcb.name, fclcb["lcb"].value, fclcb.description) diff --git a/lib/hachoir_parser/network/__init__.py b/lib/hachoir_parser/network/__init__.py index 0781e00f..a7fe2473 100644 --- a/lib/hachoir_parser/network/__init__.py +++ b/lib/hachoir_parser/network/__init__.py @@ -1,2 +1,2 @@ -from lib.hachoir_parser.network.tcpdump import TcpdumpFile +from hachoir_parser.network.tcpdump import TcpdumpFile diff --git a/lib/hachoir_parser/network/common.py b/lib/hachoir_parser/network/common.py index bc049b6b..d6e9feaa 100644 --- a/lib/hachoir_parser/network/common.py +++ b/lib/hachoir_parser/network/common.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.field import FieldSet, Field, Bits -from lib.hachoir_core.bits import str2hex -from lib.hachoir_parser.network.ouid import REGISTERED_OUID -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.field import FieldSet, Field, Bits +from hachoir_core.bits import str2hex +from hachoir_parser.network.ouid import REGISTERED_OUID +from hachoir_core.endian import BIG_ENDIAN from socket import gethostbyaddr, herror as socket_host_error def ip2name(addr): diff --git a/lib/hachoir_parser/network/tcpdump.py b/lib/hachoir_parser/network/tcpdump.py index 1625c845..564e6189 100644 --- a/lib/hachoir_parser/network/tcpdump.py +++ b/lib/hachoir_parser/network/tcpdump.py @@ -12,16 +12,16 @@ Author: Victor Stinner Creation: 23 march 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, Enum, Bytes, NullBytes, RawBytes, UInt8, UInt16, UInt32, Int32, TimestampUnix32, Bit, Bits, NullBits) -from lib.hachoir_core.endian import NETWORK_ENDIAN, LITTLE_ENDIAN -from lib.hachoir_core.tools import humanDuration -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import createDict -from lib.hachoir_parser.network.common import MAC48_Address, IPv4_Address, IPv6_Address +from hachoir_core.endian import NETWORK_ENDIAN, LITTLE_ENDIAN +from hachoir_core.tools import humanDuration +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import createDict +from hachoir_parser.network.common import MAC48_Address, IPv4_Address, IPv6_Address def diff(field): return humanDuration(field.value*1000) diff --git a/lib/hachoir_parser/parser.py b/lib/hachoir_parser/parser.py index 833e8a79..e8124158 100644 --- a/lib/hachoir_parser/parser.py +++ b/lib/hachoir_parser/parser.py @@ -1,8 +1,8 @@ -import lib.hachoir_core.config as config -from lib.hachoir_core.field import Parser as GenericParser -from lib.hachoir_core.error import HACHOIR_ERRORS, HachoirError, error -from lib.hachoir_core.tools import makeUnicode -from lib.hachoir_core.i18n import _ +import hachoir_core.config as config +from hachoir_core.field import Parser as GenericParser +from hachoir_core.error import HACHOIR_ERRORS, HachoirError, error +from hachoir_core.tools import makeUnicode +from hachoir_core.i18n import _ from inspect import getmro diff --git a/lib/hachoir_parser/parser_list.py b/lib/hachoir_parser/parser_list.py index 88692f3c..38071550 100644 --- a/lib/hachoir_parser/parser_list.py +++ b/lib/hachoir_parser/parser_list.py @@ -1,8 +1,8 @@ import re import types -from lib.hachoir_core.error import error -from lib.hachoir_core.i18n import _ -from lib.hachoir_parser import Parser, HachoirParser +from hachoir_core.error import error +from hachoir_core.i18n import _ +from hachoir_parser import Parser, HachoirParser import sys ### Parser list ################################################################ @@ -198,8 +198,7 @@ class HachoirParserList(ParserList): return self.parser_list todo = [] - from lib import hachoir_parser - module = hachoir_parser + module = __import__("hachoir_parser") for attrname in dir(module): attr = getattr(module, attrname) if isinstance(attr, types.ModuleType): diff --git a/lib/hachoir_parser/program/__init__.py b/lib/hachoir_parser/program/__init__.py index 5dba92d3..261eaf15 100644 --- a/lib/hachoir_parser/program/__init__.py +++ b/lib/hachoir_parser/program/__init__.py @@ -1,6 +1,7 @@ -from lib.hachoir_parser.program.elf import ElfFile -from lib.hachoir_parser.program.exe import ExeFile -from lib.hachoir_parser.program.python import PythonCompiledFile -from lib.hachoir_parser.program.java import JavaCompiledClassFile -from lib.hachoir_parser.program.prc import PRCFile +from hachoir_parser.program.elf import ElfFile +from hachoir_parser.program.exe import ExeFile +from hachoir_parser.program.python import PythonCompiledFile +from hachoir_parser.program.java import JavaCompiledClassFile +from hachoir_parser.program.prc import PRCFile +from hachoir_parser.program.nds import NdsFile diff --git a/lib/hachoir_parser/program/elf.py b/lib/hachoir_parser/program/elf.py index 7c66ca28..4ddd6511 100644 --- a/lib/hachoir_parser/program/elf.py +++ b/lib/hachoir_parser/program/elf.py @@ -1,44 +1,98 @@ """ ELF (Unix/BSD executable file format) parser. -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao Creation date: 08 may 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, - UInt8, UInt16, UInt32, Enum, - String, Bytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_parser import HachoirParser +from hachoir_core.field import (RootSeekableFieldSet, FieldSet, ParserError, Bit, NullBits, RawBits, + UInt8, UInt16, UInt32, UInt64, Enum, + String, RawBytes, Bytes) +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN class ElfHeader(FieldSet): - static_size = 52*8 LITTLE_ENDIAN_ID = 1 BIG_ENDIAN_ID = 2 MACHINE_NAME = { + # e_machine, EM_ defines + 0: u"No machine", 1: u"AT&T WE 32100", 2: u"SPARC", 3: u"Intel 80386", 4: u"Motorola 68000", 5: u"Motorola 88000", + 6: u"Intel 80486", 7: u"Intel 80860", - 8: u"MIPS RS3000" + 8: u"MIPS I Architecture", + 9: u"Amdahl UTS on System/370", + 10: u"MIPS RS3000 Little-endian", + 11: u"IBM RS/6000 XXX reserved", + 15: u"Hewlett-Packard PA-RISC", + 16: u"NCube XXX reserved", + 17: u"Fujitsu VPP500", + 18: u"Enhanced instruction set SPARC", + 19: u"Intel 80960", + 20: u"PowerPC 32-bit", + 21: u"PowerPC 64-bit", + 36: u"NEC V800", + 37: u"Fujitsu FR20", + 38: u"TRW RH-32", + 39: u"Motorola RCE", + 40: u"Advanced RISC Machines (ARM)", + 41: u"DIGITAL Alpha", + 42: u"Hitachi Super-H", + 43: u"SPARC Version 9", + 44: u"Siemens Tricore", + 45: u"Argonaut RISC Core", + 46: u"Hitachi H8/300", + 47: u"Hitachi H8/300H", + 48: u"Hitachi H8S", + 49: u"Hitachi H8/500", + 50: u"Intel Merced (IA-64) Processor", + 51: u"Stanford MIPS-X", + 52: u"Motorola Coldfire", + 53: u"Motorola MC68HC12", + 62: u"Advanced Micro Devices x86-64", + 75: u"DIGITAL VAX", + 36902: u"used by NetBSD/alpha; obsolete", } CLASS_NAME = { + # e_ident[EI_CLASS], ELFCLASS defines 1: u"32 bits", 2: u"64 bits" } TYPE_NAME = { + # e_type, ET_ defines 0: u"No file type", 1: u"Relocatable file", 2: u"Executable file", 3: u"Shared object file", 4: u"Core file", 0xFF00: u"Processor-specific (0xFF00)", - 0xFFFF: u"Processor-specific (0xFFFF)" + 0xFFFF: u"Processor-specific (0xFFFF)", + } + OSABI_NAME = { + # e_ident[EI_OSABI], ELFOSABI_ defines + 0: u"UNIX System V ABI", + 1: u"HP-UX operating system", + 2: u"NetBSD", + 3: u"GNU/Linux", + 4: u"GNU/Hurd", + 5: u"86Open common IA32 ABI", + 6: u"Solaris", + 7: u"Monterey", + 8: u"IRIX", + 9: u"FreeBSD", + 10: u"TRU64 UNIX", + 11: u"Novell Modesto", + 12: u"OpenBSD", + 97: u"ARM", + 255: u"Standalone (embedded) application", } ENDIAN_NAME = { + # e_ident[EI_DATA], ELFDATA defines LITTLE_ENDIAN_ID: "Little endian", BIG_ENDIAN_ID: "Big endian", } @@ -46,23 +100,29 @@ class ElfHeader(FieldSet): def createFields(self): yield Bytes(self, "signature", 4, r'ELF signature ("\x7fELF")') yield Enum(UInt8(self, "class", "Class"), self.CLASS_NAME) + if self["class"].value == 1: + ElfLongWord = UInt32 + else: + ElfLongWord = UInt64 yield Enum(UInt8(self, "endian", "Endian"), self.ENDIAN_NAME) yield UInt8(self, "file_version", "File version") - yield String(self, "pad", 8, "Pad") - yield UInt8(self, "nb_ident", "Size of ident[]") + yield Enum(UInt8(self, "osabi_ident", "OS/syscall ABI identification"), self.OSABI_NAME) + yield UInt8(self, "abi_version", "syscall ABI version") + yield String(self, "pad", 7, "Pad") + yield Enum(UInt16(self, "type", "File type"), self.TYPE_NAME) yield Enum(UInt16(self, "machine", "Machine type"), self.MACHINE_NAME) yield UInt32(self, "version", "ELF format version") - yield UInt32(self, "entry", "Number of entries") - yield UInt32(self, "phoff", "Program header offset") - yield UInt32(self, "shoff", "Section header offset") - yield UInt32(self, "flags", "Flags") + yield textHandler(ElfLongWord(self, "entry", "Entry point"), hexadecimal) + yield ElfLongWord(self, "phoff", "Program header file offset") + yield ElfLongWord(self, "shoff", "Section header file offset") + yield UInt32(self, "flags", "Architecture-specific flags") yield UInt16(self, "ehsize", "Elf header size (this header)") yield UInt16(self, "phentsize", "Program header entry size") yield UInt16(self, "phnum", "Program header entry count") yield UInt16(self, "shentsize", "Section header entry size") - yield UInt16(self, "shnum", "Section header entre count") - yield UInt16(self, "shstrndx", "Section header strtab index") + yield UInt16(self, "shnum", "Section header entry count") + yield UInt16(self, "shstrndx", "Section header string table index") def isValid(self): if self["signature"].value != "\x7FELF": @@ -73,70 +133,154 @@ class ElfHeader(FieldSet): return "Unknown endian (%s)" % self["endian"].value return "" +class SectionFlags(FieldSet): + def createFields(self): + if self.root.endian == BIG_ENDIAN: + if self.root.is64bit: + yield RawBits(self, "reserved[]", 32) + yield RawBits(self, "processor_specific", 4, "Processor specific flags") + yield NullBits(self, "reserved[]", 17) + yield Bit(self, "is_tls", "Section contains TLS data?") + yield NullBits(self, "reserved[]", 7) + yield Bit(self, "is_exec", "Section contains executable instructions?") + yield Bit(self, "is_alloc", "Section occupies memory?") + yield Bit(self, "is_writable", "Section contains writable data?") + else: + yield Bit(self, "is_writable", "Section contains writable data?") + yield Bit(self, "is_alloc", "Section occupies memory?") + yield Bit(self, "is_exec", "Section contains executable instructions?") + yield NullBits(self, "reserved[]", 7) + yield Bit(self, "is_tls", "Section contains TLS data?") + yield RawBits(self, "processor_specific", 4, "Processor specific flags") + yield NullBits(self, "reserved[]", 17) + if self.root.is64bit: + yield RawBits(self, "reserved[]", 32) + +class SymbolStringTableOffset(UInt32): + def createDisplay(self): + section_index = self['/header/shstrndx'].value + section = self['/section['+str(section_index)+']'] + text = section.value[self.value:] + return text.split('\0',1)[0] + class SectionHeader32(FieldSet): static_size = 40*8 TYPE_NAME = { - 8: "BSS" + # sh_type, SHT_ defines + 0: "Inactive", + 1: "Program defined information", + 2: "Symbol table section", + 3: "String table section", + 4: "Relocation section with addends", + 5: "Symbol hash table section", + 6: "Dynamic section", + 7: "Note section", + 8: "Block started by symbol (BSS) or No space section", + 9: "Relocation section without addends", + 10:"Reserved - purpose unknown", + 11:"Dynamic symbol table section", } def createFields(self): - yield UInt32(self, "name", "Name") - yield Enum(UInt32(self, "type", "Type"), self.TYPE_NAME) - yield UInt32(self, "flags", "Flags") + yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)") + yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME) + yield SectionFlags(self, "flags", "Section flags") yield textHandler(UInt32(self, "VMA", "Virtual memory address"), hexadecimal) - yield textHandler(UInt32(self, "LMA", "Logical memory address (in file)"), hexadecimal) - yield textHandler(UInt32(self, "size", "Size"), hexadecimal) - yield UInt32(self, "link", "Link") - yield UInt32(self, "info", "Information") - yield UInt32(self, "addr_align", "Address alignment") - yield UInt32(self, "entry_size", "Entry size") + yield textHandler(UInt32(self, "LMA", "Logical memory address (offset in file)"), hexadecimal) + yield textHandler(UInt32(self, "size", "Section size (bytes)"), hexadecimal) + yield UInt32(self, "link", "Index of a related section") + yield UInt32(self, "info", "Type-dependent information") + yield UInt32(self, "addr_align", "Address alignment (bytes)") + yield UInt32(self, "entry_size", "Size of each entry in section") def createDescription(self): return "Section header (name: %s, type: %s)" % \ - (self["name"].value, self["type"].display) + (self["name"].display, self["type"].display) + +class SectionHeader64(SectionHeader32): + static_size = 64*8 + + def createFields(self): + yield SymbolStringTableOffset(self, "name", "Section name (index into section header string table)") + yield Enum(textHandler(UInt32(self, "type", "Section type"), hexadecimal), self.TYPE_NAME) + yield SectionFlags(self, "flags", "Section flags") + yield textHandler(UInt64(self, "VMA", "Virtual memory address"), hexadecimal) + yield textHandler(UInt64(self, "LMA", "Logical memory address (offset in file)"), hexadecimal) + yield textHandler(UInt64(self, "size", "Section size (bytes)"), hexadecimal) + yield UInt32(self, "link", "Index of a related section") + yield UInt32(self, "info", "Type-dependent information") + yield UInt64(self, "addr_align", "Address alignment (bytes)") + yield UInt64(self, "entry_size", "Size of each entry in section") + +class ProgramFlags(FieldSet): + static_size = 32 + FLAGS = (('pf_r','readable'),('pf_w','writable'),('pf_x','executable')) + + def createFields(self): + if self.root.endian == BIG_ENDIAN: + yield NullBits(self, "padding[]", 29) + for fld, desc in self.FLAGS: + yield Bit(self, fld, "Segment is " + desc) + else: + for fld, desc in reversed(self.FLAGS): + yield Bit(self, fld, "Segment is " + desc) + yield NullBits(self, "padding[]", 29) + + def createDescription(self): + attribs=[] + for fld, desc in self.FLAGS: + if self[fld].value: + attribs.append(desc) + return 'Segment is '+', '.join(attribs) class ProgramHeader32(FieldSet): TYPE_NAME = { - 3: "Dynamic library" + # p_type, PT_ defines + 0: u"Unused program header table entry", + 1: u"Loadable program segment", + 2: u"Dynamic linking information", + 3: u"Program interpreter", + 4: u"Auxiliary information", + 5: u"Reserved, unspecified semantics", + 6: u"Entry for header table itself", + 7: u"Thread Local Storage segment", + 0x70000000: u"MIPS_REGINFO", } static_size = 32*8 def createFields(self): - yield Enum(UInt16(self, "type", "Type"), ProgramHeader32.TYPE_NAME) - yield UInt16(self, "flags", "Flags") + yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME) yield UInt32(self, "offset", "Offset") yield textHandler(UInt32(self, "vaddr", "V. address"), hexadecimal) yield textHandler(UInt32(self, "paddr", "P. address"), hexadecimal) yield UInt32(self, "file_size", "File size") yield UInt32(self, "mem_size", "Memory size") - yield UInt32(self, "align", "Alignment") - yield UInt32(self, "xxx", "???") + yield ProgramFlags(self, "flags") + yield UInt32(self, "align", "Alignment padding") def createDescription(self): return "Program Header (%s)" % self["type"].display -def sortSection(a, b): - return int(a["offset"] - b["offset"]) +class ProgramHeader64(ProgramHeader32): + static_size = 56*8 -#class Sections(FieldSet): -# def createFields?(self, stream, parent, sections): -# for section in sections: -# ofs = section["offset"] -# size = section["file_size"] -# if size != 0: -# sub = stream.createSub(ofs, size) -# #yield DeflateFilter(self, "section[]", sub, size, Section, "Section")) -# chunk = self.doRead("section[]", "Section", (Section,), {"stream": sub}) -# else: -# chunk = self.doRead("section[]", "Section", (FormatChunk, "string[0]")) -# chunk.description = "ELF section (in file: %s..%s)" % (ofs, ofs+size) + def createFields(self): + yield Enum(UInt32(self, "type", "Segment type"), ProgramHeader32.TYPE_NAME) + yield ProgramFlags(self, "flags") + yield UInt64(self, "offset", "Offset") + yield textHandler(UInt64(self, "vaddr", "V. address"), hexadecimal) + yield textHandler(UInt64(self, "paddr", "P. address"), hexadecimal) + yield UInt64(self, "file_size", "File size") + yield UInt64(self, "mem_size", "Memory size") + yield UInt64(self, "align", "Alignment padding") -class ElfFile(Parser): +class ElfFile(HachoirParser, RootSeekableFieldSet): + MAGIC = "\x7FELF" PARSER_TAGS = { "id": "elf", "category": "program", "file_ext": ("so", ""), - "min_size": ElfHeader.static_size, # At least one program header + "min_size": 52*8, # At least one program header "mime": ( u"application/x-executable", u"application/x-object", @@ -148,7 +292,13 @@ class ElfFile(Parser): } endian = LITTLE_ENDIAN + def __init__(self, stream, **args): + RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) + HachoirParser.__init__(self, stream, **args) + def validate(self): + if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: + return "Invalid magic" err = self["header"].isValid() if err: return err @@ -163,23 +313,27 @@ class ElfFile(Parser): # Parse header and program headers yield ElfHeader(self, "header", "Header") - for index in xrange(self["header/phnum"].value): - yield ProgramHeader32(self, "prg_header[]") + self.is64bit = (self["header/class"].value == 2) - if False: - raise ParserError("TODO: Parse sections...") - #sections = self.array("prg_header") - #size = self["header/shoff"].value - self.current_size//8 - #chunk = self.doRead("data", "Data", (DeflateFilter, stream, size, Sections, sections)) - #chunk.description = "Sections (use an evil hack to manage share same data on differents parts)" - #assert self.current_size//8 == self["header/shoff"].value - else: - raw = self.seekByte(self["header/shoff"].value, "raw[]", relative=False) - if raw: - yield raw + for index in xrange(self["header/phnum"].value): + if self.is64bit: + yield ProgramHeader64(self, "prg_header[]") + else: + yield ProgramHeader32(self, "prg_header[]") + + self.seekByte(self["header/shoff"].value, relative=False) for index in xrange(self["header/shnum"].value): - yield SectionHeader32(self, "section_header[]") + if self.is64bit: + yield SectionHeader64(self, "section_header[]") + else: + yield SectionHeader32(self, "section_header[]") + + for index in xrange(self["header/shnum"].value): + field = self["section_header["+str(index)+"]"] + if field['size'].value != 0: + self.seekByte(field['LMA'].value, relative=False) + yield RawBytes(self, "section["+str(index)+"]", field['size'].value) def createDescription(self): return "ELF Unix/BSD program/library: %s" % ( diff --git a/lib/hachoir_parser/program/exe.py b/lib/hachoir_parser/program/exe.py index 531b89fe..5a7bc727 100644 --- a/lib/hachoir_parser/program/exe.py +++ b/lib/hachoir_parser/program/exe.py @@ -9,15 +9,15 @@ Author: Victor Stinner Creation date: 2006-08-13 """ -from lib.hachoir_parser import HachoirParser -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.field import (FieldSet, RootSeekableFieldSet, +from hachoir_parser import HachoirParser +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.field import (FieldSet, RootSeekableFieldSet, UInt16, UInt32, String, RawBytes, PaddingBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_parser.program.exe_ne import NE_Header -from lib.hachoir_parser.program.exe_pe import PE_Header, PE_OptHeader, SectionHeader -from lib.hachoir_parser.program.exe_res import PE_Resource, NE_VersionInfoNode +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser.program.exe_ne import NE_Header +from hachoir_parser.program.exe_pe import PE_Header, PE_OptHeader, SectionHeader +from hachoir_parser.program.exe_res import PE_Resource, NE_VersionInfoNode MAX_NB_SECTION = 50 diff --git a/lib/hachoir_parser/program/exe_ne.py b/lib/hachoir_parser/program/exe_ne.py index b4dbe395..cf62e885 100644 --- a/lib/hachoir_parser/program/exe_ne.py +++ b/lib/hachoir_parser/program/exe_ne.py @@ -1,7 +1,7 @@ -from lib.hachoir_core.field import (FieldSet, +from hachoir_core.field import (FieldSet, Bit, UInt8, UInt16, UInt32, Bytes, PaddingBits, PaddingBytes, NullBits, NullBytes) -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler class NE_Header(FieldSet): static_size = 64*8 diff --git a/lib/hachoir_parser/program/exe_pe.py b/lib/hachoir_parser/program/exe_pe.py index c8fa101c..d769e91d 100644 --- a/lib/hachoir_parser/program/exe_pe.py +++ b/lib/hachoir_parser/program/exe_pe.py @@ -1,9 +1,9 @@ -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_core.field import (FieldSet, ParserError, Bit, UInt8, UInt16, UInt32, TimestampUnix32, Bytes, String, Enum, PaddingBytes, PaddingBits, NullBytes, NullBits) -from lib.hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler -from lib.hachoir_core.error import HACHOIR_ERRORS +from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler +from hachoir_core.error import HACHOIR_ERRORS class SectionHeader(FieldSet): static_size = 40 * 8 @@ -72,7 +72,7 @@ class SectionHeader(FieldSet): return "section_%s" % name except HACHOIR_ERRORS, err: self.warning(unicode(err)) - return "section[]" + return "section[]" class DataDirectory(FieldSet): def createFields(self): diff --git a/lib/hachoir_parser/program/exe_res.py b/lib/hachoir_parser/program/exe_res.py index 22b25d0e..850fcf01 100644 --- a/lib/hachoir_parser/program/exe_res.py +++ b/lib/hachoir_parser/program/exe_res.py @@ -9,15 +9,15 @@ Author: Victor Stinner Creation date: 2007-01-19 """ -from lib.hachoir_core.field import (FieldSet, ParserError, Enum, +from hachoir_core.field import (FieldSet, ParserError, Enum, Bit, Bits, SeekableFieldSet, UInt16, UInt32, TimestampUnix32, RawBytes, PaddingBytes, NullBytes, NullBits, CString, String) -from lib.hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal -from lib.hachoir_core.tools import createDict, paddingSize, alignValue, makePrintable -from lib.hachoir_core.error import HACHOIR_ERRORS -from lib.hachoir_parser.common.win32 import BitmapInfoHeader +from hachoir_core.text_handler import textHandler, filesizeHandler, hexadecimal +from hachoir_core.tools import createDict, paddingSize, alignValue, makePrintable +from hachoir_core.error import HACHOIR_ERRORS +from hachoir_parser.common.win32 import BitmapInfoHeader MAX_DEPTH = 5 MAX_INDEX_PER_HEADER = 300 diff --git a/lib/hachoir_parser/program/java.py b/lib/hachoir_parser/program/java.py index 2e58552c..7329cbe0 100644 --- a/lib/hachoir_parser/program/java.py +++ b/lib/hachoir_parser/program/java.py @@ -59,15 +59,15 @@ TODO/FIXME: should update the length field of it's entry, etc. Sounds like a huge work. """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import ( +from hachoir_parser import Parser +from hachoir_core.field import ( ParserError, FieldSet, StaticFieldSet, Enum, RawBytes, PascalString16, Float32, Float64, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, Bit, NullBits ) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.tools import paddingSize +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.tools import paddingSize ############################################################################### def parse_flags(flags, flags_dict, show_unknown_flags=True, separator=" "): diff --git a/lib/hachoir_parser/program/nds.py b/lib/hachoir_parser/program/nds.py new file mode 100644 index 00000000..bc6e5c44 --- /dev/null +++ b/lib/hachoir_parser/program/nds.py @@ -0,0 +1,359 @@ +""" +Nintendo DS .nds game file parser + +File format references: +- http://www.bottledlight.com/ds/index.php/FileFormats/NDSFormat +- http://imrannazar.com/The-Smallest-NDS-File +- http://darkfader.net/ds/files/ndstool.cpp +- http://crackerscrap.com/docs/dsromstructure.html +- http://nocash.emubase.de/gbatek.htm +""" + +from hachoir_parser import Parser +from hachoir_core.field import (ParserError, + UInt8, UInt16, UInt32, UInt64, String, RawBytes, SubFile, FieldSet, NullBits, Bit, Bits, Bytes, + SeekableFieldSet, RootSeekableFieldSet) +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN + + +""" +CRC16 Calculation + +Modified from: +http://www.mail-archive.com/python-list@python.org/msg47844.html + +Original License: +crc16.py by Bryan G. Olson, 2005 +This module is free software and may be used and +distributed under the same terms as Python itself. +""" +class CRC16: + _table = None + + def _initTable (self): + from array import array + + # CRC-16 poly: p(x) = x**16 + x**15 + x**2 + 1 + # top bit implicit, reflected + poly = 0xa001 + CRC16._table = array('H') + for byte in range(256): + crc = 0 + for bit in range(8): + if (byte ^ crc) & 1: + crc = (crc >> 1) ^ poly + else: + crc >>= 1 + byte >>= 1 + CRC16._table.append(crc) + + def checksum (self, string, value): + if CRC16._table is None: + self._initTable() + + for ch in string: + value = self._table[ord(ch) ^ (value & 0xff)] ^ (value >> 8) + return value + + +class Crc16(UInt16): + "16 bit field for calculating and comparing CRC-16 of specified string" + def __init__(self, parent, name, targetBytes): + UInt16.__init__(self, parent, name) + self.targetBytes = targetBytes + + def createDescription(self): + crc = CRC16().checksum(self.targetBytes, 0xffff) + if crc == self.value: + return "matches CRC of %d bytes" % len(self.targetBytes) + else: + return "mismatch (calculated CRC %d for %d bytes)" % (crc, len(self.targetBytes)) + + +class FileNameDirTable(FieldSet): + static_size = (4+2+2)*8 + def createFields(self): + yield UInt32(self, "entry_start") + yield UInt16(self, "entry_file_id") + yield UInt16(self, "parent_id") + + def createDescription(self): + return "first file id: %d; parent directory id: %d (%d)" % (self["entry_file_id"].value, self["parent_id"].value, self["parent_id"].value & 0xFFF) + +class FileNameEntry(FieldSet): + def createFields(self): + yield Bits(self, "name_len", 7) + yield Bit(self, "is_directory") + yield String(self, "name", self["name_len"].value) + if self["is_directory"].value: + yield UInt16(self, "dir_id") + + def createDescription(self): + s = "" + if self["is_directory"].value: + s = "[D] " + return s + self["name"].value + +class Directory(FieldSet): + def createFields(self): + while True: + fne = FileNameEntry(self, "entry[]") + if fne["name_len"].value == 0: + yield UInt8(self, "end_marker") + break + yield fne + + +class FileNameTable(SeekableFieldSet): + def createFields(self): + self.startOffset = self.absolute_address / 8 + + # parent_id of first FileNameDirTable contains number of directories: + dt = FileNameDirTable(self, "dir_table[]") + numDirs = dt["parent_id"].value + yield dt + + for i in range(1, numDirs): + yield FileNameDirTable(self, "dir_table[]") + + for i in range(0, numDirs): + dt = self["dir_table[%d]" % i] + offset = self.startOffset + dt["entry_start"].value + self.seekByte(offset, relative=False) + yield Directory(self, "directory[]") + + +class FATFileEntry(FieldSet): + static_size = 2*4*8 + def createFields(self): + yield UInt32(self, "start") + yield UInt32(self, "end") + + def createDescription(self): + return "start: %d; size: %d" % (self["start"].value, self["end"].value - self["start"].value) + +class FATContent(FieldSet): + def createFields(self): + num_entries = self.parent["header"]["fat_size"].value / 8 + for i in range(0, num_entries): + yield FATFileEntry(self, "entry[]") + + + +class BannerTile(FieldSet): + static_size = 32*8 + def createFields(self): + for y in range(8): + for x in range(8): + yield Bits(self, "pixel[%d,%d]" % (x,y), 4) + +class BannerIcon(FieldSet): + static_size = 16*32*8 + def createFields(self): + for y in range(4): + for x in range(4): + yield BannerTile(self, "tile[%d,%d]" % (x,y)) + +class NdsColor(FieldSet): + static_size = 16 + def createFields(self): + yield Bits(self, "red", 5) + yield Bits(self, "green", 5) + yield Bits(self, "blue", 5) + yield NullBits(self, "pad", 1) + + def createDescription(self): + return "#%02x%02x%02x" % (self["red"].value << 3, self["green"].value << 3, self["blue"].value << 3) + +class Banner(FieldSet): + static_size = 2112*8 + def createFields(self): + yield UInt16(self, "version") + # CRC of this structure, excluding first 32 bytes: + yield Crc16(self, "crc", self.stream.readBytes(self.absolute_address+(32*8), (2112-32))) + yield RawBytes(self, "reserved", 28) + yield BannerIcon(self, "icon_data") + for i in range(0, 16): + yield NdsColor(self, "palette_color[]") + yield String(self, "title_jp", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_en", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_fr", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_de", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_it", 256, charset="UTF-16-LE", truncate="\0") + yield String(self, "title_es", 256, charset="UTF-16-LE", truncate="\0") + + +class Overlay(FieldSet): + static_size = 8*4*8 + def createFields(self): + yield UInt32(self, "id") + yield textHandler(UInt32(self, "ram_address"), hexadecimal) + yield UInt32(self, "ram_size") + yield UInt32(self, "bss_size") + yield textHandler(UInt32(self, "init_start_address"), hexadecimal) + yield textHandler(UInt32(self, "init_end_address"), hexadecimal) + yield UInt32(self, "file_id") + yield RawBytes(self, "reserved[]", 4) + + def createDescription(self): + return "file #%d, %d (+%d) bytes to 0x%08x" % ( + self["file_id"].value, self["ram_size"].value, self["bss_size"].value, self["ram_address"].value) + + +class SecureArea(FieldSet): + static_size=2048*8 + def createFields(self): + yield textHandler(UInt64(self, "id"), hexadecimal) + if self["id"].value == 0xe7ffdeffe7ffdeff: # indicates that secure area is decrypted + yield Bytes(self, "fixed[]", 6) # always \xff\xde\xff\xe7\xff\xde + yield Crc16(self, "header_crc16", self.stream.readBytes(self.absolute_address+(16*8), 2048-16)) + yield RawBytes(self, "unknown[]", 2048-16-2) + yield Bytes(self, "fixed[]", 2) # always \0\0 + else: + yield RawBytes(self, "encrypted[]", 2048-8) + + +class DeviceSize(UInt8): + def createDescription(self): + return "%d Mbit" % ((2**(20+self.value)) / (1024*1024)) + +class Header(FieldSet): + def createFields(self): + yield String(self, "game_title", 12, truncate="\0") + yield String(self, "game_code", 4) + yield String(self, "maker_code", 2) + yield UInt8(self, "unit_code") + yield UInt8(self, "device_code") + + yield DeviceSize(self, "card_size") + yield String(self, "card_info", 9) + yield UInt8(self, "rom_version") + yield Bits(self, "unknown_flags[]", 2) + yield Bit(self, "autostart_flag") + yield Bits(self, "unknown_flags[]", 5) + + yield UInt32(self, "arm9_source", "ARM9 ROM offset") + yield textHandler(UInt32(self, "arm9_execute_addr", "ARM9 entry address"), hexadecimal) + yield textHandler(UInt32(self, "arm9_copy_to_addr", "ARM9 RAM address"), hexadecimal) + yield UInt32(self, "arm9_bin_size", "ARM9 code size") + + yield UInt32(self, "arm7_source", "ARM7 ROM offset") + yield textHandler(UInt32(self, "arm7_execute_addr", "ARM7 entry address"), hexadecimal) + yield textHandler(UInt32(self, "arm7_copy_to_addr", "ARM7 RAM address"), hexadecimal) + yield UInt32(self, "arm7_bin_size", "ARM7 code size") + + yield UInt32(self, "filename_table_offset") + yield UInt32(self, "filename_table_size") + yield UInt32(self, "fat_offset") + yield UInt32(self, "fat_size") + + yield UInt32(self, "arm9_overlay_src") + yield UInt32(self, "arm9_overlay_size") + yield UInt32(self, "arm7_overlay_src") + yield UInt32(self, "arm7_overlay_size") + + yield textHandler(UInt32(self, "ctl_read_flags"), hexadecimal) + yield textHandler(UInt32(self, "ctl_init_flags"), hexadecimal) + yield UInt32(self, "banner_offset") + yield Crc16(self, "secure_crc16", self.stream.readBytes(0x4000*8, 0x4000)) + yield UInt16(self, "rom_timeout") + + yield UInt32(self, "arm9_unk_addr") + yield UInt32(self, "arm7_unk_addr") + yield UInt64(self, "unenc_mode_magic") + + yield UInt32(self, "rom_size") + yield UInt32(self, "header_size") + + yield RawBytes(self, "unknown[]", 36) + yield String(self, "passme_autoboot_detect", 4) + yield RawBytes(self, "unknown[]", 16) + + yield RawBytes(self, "gba_logo", 156) + yield Crc16(self, "logo_crc16", self.stream.readBytes(0xc0*8, 156)) + yield Crc16(self, "header_crc16", self.stream.readBytes(0, 350)) + + yield UInt32(self, "debug_rom_offset") + yield UInt32(self, "debug_size") + yield textHandler(UInt32(self, "debug_ram_address"), hexadecimal) + + +class NdsFile(Parser, RootSeekableFieldSet): + PARSER_TAGS = { + "id": "nds_file", + "category": "program", + "file_ext": ("nds",), + "mime": (u"application/octet-stream",), + "min_size": 352 * 8, # just a minimal header + "description": "Nintendo DS game file", + } + + endian = LITTLE_ENDIAN + + def validate(self): + try: + header = self["header"] + except Exception, e: + return False + + return (self.stream.readBytes(0, 1) != "\0" + and (header["device_code"].value & 7) == 0 + and header["header_size"].value >= 352 + and header["card_size"].value < 15 # arbitrary limit at 32Gbit + and header["arm9_bin_size"].value > 0 and header["arm9_bin_size"].value <= 0x3bfe00 + and header["arm7_bin_size"].value > 0 and header["arm7_bin_size"].value <= 0x3bfe00 + and header["arm9_source"].value + header["arm9_bin_size"].value < self._size + and header["arm7_source"].value + header["arm7_bin_size"].value < self._size + and header["arm9_execute_addr"].value >= 0x02000000 and header["arm9_execute_addr"].value <= 0x023bfe00 + and header["arm9_copy_to_addr"].value >= 0x02000000 and header["arm9_copy_to_addr"].value <= 0x023bfe00 + and header["arm7_execute_addr"].value >= 0x02000000 and header["arm7_execute_addr"].value <= 0x03807e00 + and header["arm7_copy_to_addr"].value >= 0x02000000 and header["arm7_copy_to_addr"].value <= 0x03807e00 + ) + + def createFields(self): + # Header + yield Header(self, "header") + + # Secure Area + if self["header"]["arm9_source"].value >= 0x4000 and self["header"]["arm9_source"].value < 0x8000: + secStart = self["header"]["arm9_source"].value & 0xfffff000 + self.seekByte(secStart, relative=False) + yield SecureArea(self, "secure_area", size=0x8000-secStart) + + # ARM9 binary + self.seekByte(self["header"]["arm9_source"].value, relative=False) + yield RawBytes(self, "arm9_bin", self["header"]["arm9_bin_size"].value) + + # ARM7 binary + self.seekByte(self["header"]["arm7_source"].value, relative=False) + yield RawBytes(self, "arm7_bin", self["header"]["arm7_bin_size"].value) + + # File Name Table + if self["header"]["filename_table_size"].value > 0: + self.seekByte(self["header"]["filename_table_offset"].value, relative=False) + yield FileNameTable(self, "filename_table", size=self["header"]["filename_table_size"].value*8) + + # FAT + if self["header"]["fat_size"].value > 0: + self.seekByte(self["header"]["fat_offset"].value, relative=False) + yield FATContent(self, "fat_content", size=self["header"]["fat_size"].value*8) + + # banner + if self["header"]["banner_offset"].value > 0: + self.seekByte(self["header"]["banner_offset"].value, relative=False) + yield Banner(self, "banner") + + # ARM9 overlays + if self["header"]["arm9_overlay_src"].value > 0: + self.seekByte(self["header"]["arm9_overlay_src"].value, relative=False) + numOvls = self["header"]["arm9_overlay_size"].value / (8*4) + for i in range(numOvls): + yield Overlay(self, "arm9_overlay[]") + + # files + if self["header"]["fat_size"].value > 0: + for field in self["fat_content"]: + if field["end"].value > field["start"].value: + self.seekByte(field["start"].value, relative=False) + yield SubFile(self, "file[]", field["end"].value - field["start"].value) diff --git a/lib/hachoir_parser/program/prc.py b/lib/hachoir_parser/program/prc.py index 19db7885..f4db0254 100644 --- a/lib/hachoir_parser/program/prc.py +++ b/lib/hachoir_parser/program/prc.py @@ -5,11 +5,11 @@ Author: Sebastien Ponce Creation date: 29 october 2008 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt16, UInt32, TimestampMac32, String, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN +from hachoir_core.endian import BIG_ENDIAN class PRCHeader(FieldSet): static_size = 78*8 diff --git a/lib/hachoir_parser/program/python.py b/lib/hachoir_parser/program/python.py index 5075f7f6..f408fb2d 100644 --- a/lib/hachoir_parser/program/python.py +++ b/lib/hachoir_parser/program/python.py @@ -11,14 +11,14 @@ Creation: 25 march 2005 DISASSEMBLE = False -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, UInt8, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt16, Int32, UInt32, Int64, ParserError, Float64, Enum, Character, Bytes, RawBytes, PascalString8, TimestampUnix32) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.bits import long2raw -from lib.hachoir_core.text_handler import textHandler, hexadecimal -from lib.hachoir_core.i18n import ngettext +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.bits import long2raw +from hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.i18n import ngettext if DISASSEMBLE: from dis import dis @@ -268,6 +268,7 @@ class PythonCompiledFile(Parser): MAGIC = { # Python 1.x 20121: ("1.5", 0x1050000), + 50428: ("1.6", 0x1060000), # Python 2.x 50823: ("2.0", 0x2000000), @@ -286,6 +287,13 @@ class PythonCompiledFile(Parser): 62111: ("2.5b3", 0x2050000), 62121: ("2.5c1", 0x2050000), 62131: ("2.5c2", 0x2050000), + 62151: ("2.6a0", 0x2070000), + 62161: ("2.6a1", 0x2070000), + 62171: ("2.7a0", 0x2070000), + 62181: ("2.7a0", 0x2070000), + 62191: ("2.7a0", 0x2070000), + 62201: ("2.7a0", 0x2070000), + 62211: ("2.7a0", 0x2070000), # Python 3.x 3000: ("3.0 (3000)", 0x3000000), @@ -295,14 +303,20 @@ class PythonCompiledFile(Parser): 3040: ("3.0 (3040)", 0x3000000), 3050: ("3.0 (3050)", 0x3000000), 3060: ("3.0 (3060)", 0x3000000), - 3070: ("3.0 (3070)", 0x3000000), - 3080: ("3.0 (3080)", 0x3000000), - 3090: ("3.0 (3090)", 0x3000000), - 3100: ("3.0 (3100)", 0x3000000), - 3102: ("3.0 (3102)", 0x3000000), - 3110: ("3.0a4", 0x3000000), - 3130: ("3.0a5", 0x3000000), - 3131: ("3.0a5 unicode", 0x3000000), + 3061: ("3.0 (3061)", 0x3000000), + 3071: ("3.0 (3071)", 0x3000000), + 3081: ("3.0 (3081)", 0x3000000), + 3091: ("3.0 (3091)", 0x3000000), + 3101: ("3.0 (3101)", 0x3000000), + 3103: ("3.0 (3103)", 0x3000000), + 3111: ("3.0a4", 0x3000000), + 3131: ("3.0a5", 0x3000000), + 3141: ("3.1a0", 0x3010000), + 3151: ("3.1a0", 0x3010000), + 3160: ("3.2a0", 0x3020000), + 3170: ("3.2a1", 0x3020000), + 3180: ("3.2a2", 0x3020000), + 3190: ("Python 3.3a0", 0x3030000), } # Dictionnary which associate the pyc signature (4-byte long string) diff --git a/lib/hachoir_parser/template.py b/lib/hachoir_parser/template.py index 0df480eb..2b75eb6e 100644 --- a/lib/hachoir_parser/template.py +++ b/lib/hachoir_parser/template.py @@ -13,17 +13,17 @@ Creation date: YYYY-mm-DD """ # TODO: Just keep what you need -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (ParserError, UInt8, UInt16, UInt32, String, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN +from hachoir_core.endian import LITTLE_ENDIAN, BIG_ENDIAN class TODOFile(Parser): PARSER_TAGS = { "id": "TODO", "category": "TODO", # "archive", "audio", "container", ... "file_ext": ("TODO",), # TODO: Example ("bmp",) to parse the file "image.bmp" - "mime": (u"TODO"), # TODO: Example: "image/png" + "mime": (u"TODO",), # TODO: Example: "image/png" "min_size": 0, # TODO: Minimum file size (x bits, or x*8 in bytes) "description": "TODO", # TODO: Example: "A bitmap picture" } diff --git a/lib/hachoir_parser/version.py b/lib/hachoir_parser/version.py index 28d1e616..6571743e 100644 --- a/lib/hachoir_parser/version.py +++ b/lib/hachoir_parser/version.py @@ -1,4 +1,4 @@ -__version__ = "1.3.4" +__version__ = "1.3.5" PACKAGE = "hachoir-parser" WEBSITE = "http://bitbucket.org/haypo/hachoir/wiki/hachoir-parser" LICENSE = 'GNU GPL v2' diff --git a/lib/hachoir_parser/video/__init__.py b/lib/hachoir_parser/video/__init__.py index 1fabf92a..26f787e9 100644 --- a/lib/hachoir_parser/video/__init__.py +++ b/lib/hachoir_parser/video/__init__.py @@ -1,6 +1,6 @@ -from lib.hachoir_parser.video.asf import AsfFile -from lib.hachoir_parser.video.flv import FlvFile -from lib.hachoir_parser.video.mov import MovFile -from lib.hachoir_parser.video.mpeg_video import MPEGVideoFile -from lib.hachoir_parser.video.mpeg_ts import MPEG_TS +from hachoir_parser.video.asf import AsfFile +from hachoir_parser.video.flv import FlvFile +from hachoir_parser.video.mov import MovFile +from hachoir_parser.video.mpeg_video import MPEGVideoFile +from hachoir_parser.video.mpeg_ts import MPEG_TS diff --git a/lib/hachoir_parser/video/amf.py b/lib/hachoir_parser/video/amf.py index 963f3207..496c5c1d 100644 --- a/lib/hachoir_parser/video/amf.py +++ b/lib/hachoir_parser/video/amf.py @@ -10,9 +10,9 @@ Author: Victor Stinner Creation date: 4 november 2006 """ -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_core.field import (FieldSet, ParserError, UInt8, UInt16, UInt32, PascalString16, Float64) -from lib.hachoir_core.tools import timestampUNIX +from hachoir_core.tools import timestampUNIX def parseUTF8(parent): yield PascalString16(parent, "value", charset="UTF-8") diff --git a/lib/hachoir_parser/video/asf.py b/lib/hachoir_parser/video/asf.py index 35711d09..39205ea6 100644 --- a/lib/hachoir_parser/video/asf.py +++ b/lib/hachoir_parser/video/asf.py @@ -10,20 +10,20 @@ Author: Victor Stinner Creation: 5 august 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, UInt16, UInt32, UInt64, TimestampWin64, TimedeltaWin64, String, PascalString16, Enum, Bit, Bits, PaddingBits, PaddingBytes, NullBytes, RawBytes) -from lib.hachoir_core.endian import LITTLE_ENDIAN -from lib.hachoir_core.text_handler import ( +from hachoir_core.endian import LITTLE_ENDIAN +from hachoir_core.text_handler import ( displayHandler, filesizeHandler) -from lib.hachoir_core.tools import humanBitRate +from hachoir_core.tools import humanBitRate from itertools import izip -from lib.hachoir_parser.video.fourcc import audio_codec_name, video_fourcc_name -from lib.hachoir_parser.common.win32 import BitmapInfoHeader, GUID +from hachoir_parser.video.fourcc import audio_codec_name, video_fourcc_name +from hachoir_parser.common.win32 import BitmapInfoHeader, GUID MAX_HEADER_SIZE = 100 * 1024 # bytes diff --git a/lib/hachoir_parser/video/flv.py b/lib/hachoir_parser/video/flv.py index c2723d9f..5edbe7ab 100644 --- a/lib/hachoir_parser/video/flv.py +++ b/lib/hachoir_parser/video/flv.py @@ -12,14 +12,14 @@ Author: Victor Stinner Creation date: 4 november 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, UInt8, UInt24, UInt32, NullBits, NullBytes, Bit, Bits, String, RawBytes, Enum) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_parser.audio.mpeg_audio import Frame -from lib.hachoir_parser.video.amf import AMFObject -from lib.hachoir_core.tools import createDict +from hachoir_core.endian import BIG_ENDIAN +from hachoir_parser.audio.mpeg_audio import Frame +from hachoir_parser.video.amf import AMFObject +from hachoir_core.tools import createDict SAMPLING_RATE = { 0: ( 5512, "5.5 kHz"), diff --git a/lib/hachoir_parser/video/mov.py b/lib/hachoir_parser/video/mov.py index 32a81d09..1ab6ac51 100644 --- a/lib/hachoir_parser/video/mov.py +++ b/lib/hachoir_parser/video/mov.py @@ -10,28 +10,48 @@ Documents: http://wiki.multimedia.cx/index.php?title=Apple_QuickTime - File type (ftyp): http://www.ftyps.com/ +- MPEG4 standard + http://neuron2.net/library/avc/c041828_ISO_IEC_14496-12_2005%28E%29.pdf -Author: Victor Stinner +Author: Victor Stinner, Robert Xiao Creation: 2 august 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (ParserError, FieldSet, MissingField, - UInt8, Int16, UInt16, UInt32, TimestampMac32, - String, PascalString8, CString, - RawBytes, PaddingBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_parser import Parser +from hachoir_parser.common.win32 import GUID +from hachoir_core.field import (ParserError, FieldSet, MissingField, + Enum, + Bit, NullBits, Bits, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, TimestampMac32, + String, PascalString8, PascalString16, CString, + RawBytes, NullBytes, PaddingBytes) +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal -class QTFloat32(FieldSet): - static_size = 32 - def createFields(self): - yield Int16(self, "int_part") - yield UInt16(self, "float_part") - def createValue(self): - return self["int_part"].value + float(self["float_part"].value) / 65535 - def createDescription(self): - return str(self.value) +from hachoir_core.tools import MAC_TIMESTAMP_T0, timedelta +def timestampMac64(value): + if not isinstance(value, (float, int, long)): + raise TypeError("an integer or float is required") + return MAC_TIMESTAMP_T0 + timedelta(seconds=value) +from hachoir_core.field.timestamp import timestampFactory +TimestampMac64 = timestampFactory("TimestampMac64", timestampMac64, 64) + +def fixedFloatFactory(name, int_bits, float_bits, doc): + size = int_bits + float_bits + class Float(FieldSet): + static_size = size + __doc__ = doc + def createFields(self): + yield Bits(self, "int_part", int_bits) + yield Bits(self, "float_part", float_bits) + def createValue(self): + return self["int_part"].value + float(self["float_part"].value) / (1< 0: + yield RawBytes(self, "extra_data", size) + +class SampleDescription(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield UInt32(self, "count", description="Total entries in table") + for i in xrange(self['count'].value): + yield SampleEntry(self, "sample_entry[]") + +class SyncSampleTable(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield UInt32(self, "count", description="Number of sync samples") + for i in xrange(self['count'].value): + yield UInt32(self, "sample_number[]") + +class SampleSizeTable(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield UInt32(self, "uniform_size", description="Uniform size of each sample (0 if non-uniform)") + yield UInt32(self, "count", description="Number of samples") + if self['uniform_size'].value == 0: + for i in xrange(self['count'].value): + yield UInt32(self, "sample_size[]") + +class CompactSampleSizeTable(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield NullBits(self, "reserved[]", 24) + yield UInt8(self, "field_size", "Size of each entry in this table, in bits") + yield UInt32(self, "count", description="Number of samples") + bitsize = self['field_size'].value + for i in xrange(self['count'].value): + yield Bits(self, "sample_size[]", bitsize) + if self.current_size % 8 != 0: + yield NullBits(self, "padding[]", 8 - (self.current_size % 8)) + +class SampleToChunkTable(FieldSet): + def createFields(self): + yield UInt8(self, "version") + yield NullBits(self, "flags", 24) + yield UInt32(self, "count", description="Number of samples") + for i in xrange(self['count'].value): + yield UInt32(self, "first_chunk[]") + yield UInt32(self, "samples_per_chunk[]") + yield UInt32(self, "sample_description_index[]") + class Atom(FieldSet): tag_info = { - # TODO: Use dictionnary of dictionnary, like Matroska parser does - # "elst" is a child of "edts", but not of "moov" for example - "moov": (AtomList, "movie", "Movie"), - "trak": (AtomList, "track", "Track"), - "mdia": (AtomList, "media", "Media"), - "edts": (AtomList, "edts", ""), - "minf": (AtomList, "minf", ""), - "stbl": (AtomList, "stbl", ""), - "dinf": (AtomList, "dinf", ""), - "elst": (ELST, "edts", ""), - "tkhd": (TrackHeader, "track_hdr", "Track header"), - "hdlr": (HDLR, "hdlr", ""), - "mdhd": (MediaHeader, "media_hdr", "Media header"), - "load": (Load, "load", ""), - "mvhd": (MovieHeader, "movie_hdr", "Movie header"), - "ftyp": (FileType, "file_type", "File type"), + "ftyp": (FileType, "file_type", "File type and compatibility"), + # pdin: progressive download information + # pnot: movie preview (old QT spec) + "moov": (AtomList, "movie", "Container for all metadata"), + "mvhd": (MovieHeader, "movie_hdr", "Movie header, overall declarations"), + # clip: movie clipping (old QT spec) + # crgn: movie clipping region (old QT spec) + "trak": (AtomList, "track", "Container for an individual track or stream"), + "tkhd": (TrackHeader, "track_hdr", "Track header, overall information about the track"), + # matt: track matte (old QT spec) + # kmat: compressed matte (old QT spec) + "tref": (AtomList, "tref", "Track reference container"), + "hint": (TrackReferenceType, "hint", "Original media track(s) for this hint track"), + "cdsc": (TrackReferenceType, "cdsc", "Reference to track described by this track"), + "edts": (AtomList, "edts", "Edit list container"), + "elst": (EditList, "elst", "Edit list"), + "load": (Load, "load", "Track loading settings (old QT spec)"), + # imap: Track input map (old QT spec) + "mdia": (AtomList, "media", "Container for the media information in a track"), + "mdhd": (MediaHeader, "media_hdr", "Media header, overall information about the media"), + "hdlr": (Handler, "hdlr", "Handler, declares the media or metadata (handler) type"), + "minf": (AtomList, "minf", "Media information container"), + "vmhd": (VideoMediaHeader, "vmhd", "Video media header, overall information (video track only)"), + "smhd": (SoundMediaHeader, "smhd", "Sound media header, overall information (sound track only)"), + "hmhd": (HintMediaHeader, "hmhd", "Hint media header, overall information (hint track only)"), + # nmhd: Null media header, overall information (some tracks only) (unparsed) + "dinf": (AtomList, "dinf", "Data information, container"), + "dref": (DataReference, "dref", "Data reference, declares source(s) of media data in track"), + "url ": (DataEntryUrl, "url", "URL data reference"), + "urn ": (DataEntryUrn, "urn", "URN data reference"), + "stbl": (AtomList, "stbl", "Sample table, container for the time/space map"), + "stsd": (SampleDescription, "stsd", "Sample descriptions (codec types, initialization etc.)"), + "stts": (SampleDecodeTimeTable, "stts", "decoding time-to-sample delta table"), + "ctts": (SampleCompositionTimeTable, "ctts", "composition time-to-sample offset table"), + "stsc": (SampleToChunkTable, "stsc", "sample-to-chunk, partial data-offset information"), + "stsz": (SampleSizeTable, "stsz", "Sample size table (framing)"), + "stz2": (CompactSampleSizeTable, "stz2", "Compact sample size table (framing)"), + "stco": (ChunkOffsetTable, "stco", "Chunk offset, partial data-offset information"), + "co64": (ChunkOffsetTable64, "co64", "64-bit chunk offset"), + "stss": (SyncSampleTable, "stss", "Sync sample table (random access points)"), + # stsh: shadow sync sample table + # padb: sample padding bits + # stdp: sample degradation priority + # sdtp: independent and disposable samples + # sbgp: sample-to-group + # sgpd: sample group description + # subs: sub-sample information + # ctab color table (old QT spec) + # mvex: movie extends + # mehd: movie extends header + # trex: track extends defaults + # ipmc: IPMP control + "moof": (AtomList, "moof", "movie fragment"), + "mfhd": (MovieFragmentHeader, "mfhd", "movie fragment header"), + # traf: track fragment + # tfhd: track fragment header + # trun: track fragment run + # sdtp: independent and disposable samples + # sbgp: sample-to-group + # subs: sub-sample information + "mfra": (AtomList, "mfra", "movie fragment random access"), + "tfra": (TrackFragmentRandomAccess, "tfra", "track fragment random access"), + "mfro": (MovieFragmentRandomAccessOffset, "mfro", "movie fragment random access offset"), + # mdat: media data container + # free: free space (unparsed) + # skip: free space (unparsed) + "udta": (AtomList, "udta", "User data"), + "meta": (META, "meta", "File metadata"), + "keys": (KeyList, "keys", "Metadata keys"), + ## hdlr + ## dinf + ## dref: data reference, declares source(s) of metadata items + ## ipmc: IPMP control + # iloc: item location + # ipro: item protection + # sinf: protection scheme information + # frma: original format + # imif: IPMP information + # schm: scheme type + # schi: scheme information + # iinf: item information + # xml : XML container + # bxml: binary XML container + # pitm: primary item reference + ## other tags + "ilst": (ItemList, "ilst", "Item list"), + "trkn": (AtomList, "trkn", "Metadata: Track number"), + "disk": (AtomList, "disk", "Metadata: Disk number"), + "tmpo": (AtomList, "tempo", "Metadata: Tempo"), + "cpil": (AtomList, "cpil", "Metadata: Compilation"), + "gnre": (AtomList, "gnre", "Metadata: Genre"), + "\xa9cpy": (AtomList, "copyright", "Metadata: Copyright statement"), + "\xa9day": (AtomList, "date", "Metadata: Date of content creation"), + "\xa9dir": (AtomList, "director", "Metadata: Movie director"), + "\xa9ed1": (AtomList, "edit1", "Metadata: Edit date and description (1)"), + "\xa9ed2": (AtomList, "edit2", "Metadata: Edit date and description (2)"), + "\xa9ed3": (AtomList, "edit3", "Metadata: Edit date and description (3)"), + "\xa9ed4": (AtomList, "edit4", "Metadata: Edit date and description (4)"), + "\xa9ed5": (AtomList, "edit5", "Metadata: Edit date and description (5)"), + "\xa9ed6": (AtomList, "edit6", "Metadata: Edit date and description (6)"), + "\xa9ed7": (AtomList, "edit7", "Metadata: Edit date and description (7)"), + "\xa9ed8": (AtomList, "edit8", "Metadata: Edit date and description (8)"), + "\xa9ed9": (AtomList, "edit9", "Metadata: Edit date and description (9)"), + "\xa9fmt": (AtomList, "format", "Metadata: Movie format (CGI, digitized, etc.)"), + "\xa9inf": (AtomList, "info", "Metadata: Information about the movie"), + "\xa9prd": (AtomList, "producer", "Metadata: Movie producer"), + "\xa9prf": (AtomList, "performers", "Metadata: Performer names"), + "\xa9req": (AtomList, "requirements", "Metadata: Special hardware and software requirements"), + "\xa9src": (AtomList, "source", "Metadata: Credits for those who provided movie source content"), + "\xa9nam": (AtomList, "name", "Metadata: Name of song or video"), + "\xa9des": (AtomList, "description", "Metadata: File description"), + "\xa9cmt": (AtomList, "comment", "Metadata: General comment"), + "\xa9alb": (AtomList, "album", "Metadata: Album name"), + "\xa9gen": (AtomList, "genre", "Metadata: Custom genre"), + "\xa9ART": (AtomList, "artist", "Metadata: Artist name"), + "\xa9too": (AtomList, "encoder", "Metadata: Encoder"), + "\xa9wrt": (AtomList, "writer", "Metadata: Writer"), + "covr": (AtomList, "cover", "Metadata: Cover art"), + "----": (AtomList, "misc", "Metadata: Miscellaneous"), + "tags": (AtomList, "tags", "File tags"), + "tseg": (AtomList, "tseg", "tseg"), + "chpl": (NeroChapters, "chpl", "Nero chapter data"), } tag_handler = [ item[0] for item in tag_info ] tag_desc = [ item[1] for item in tag_info ] def createFields(self): yield UInt32(self, "size") - yield String(self, "tag", 4) + yield RawBytes(self, "tag", 4) size = self["size"].value if size == 1: - raise ParserError("Extended size is not supported!") - #yield UInt64(self, "size64") - size = self["size64"].value + # 64-bit size + yield UInt64(self, "size64") + size = self["size64"].value - 16 elif size == 0: - #size = (self.root.size - self.root.current_size - self.current_size) / 8 + # Unbounded atom if self._size is None: - size = (self.parent.size - self.current_size) / 8 - 8 + size = (self.parent.size - self.parent.current_size) / 8 - 8 else: size = (self.size - self.current_size) / 8 else: size = size - 8 - if 0 < size: + if self['tag'].value == 'uuid': + yield GUID(self, "usertag") + tag = self["usertag"].value + size -= 16 + else: tag = self["tag"].value + if size > 0: if tag in self.tag_info: handler, name, desc = self.tag_info[tag] yield handler(self, name, desc, size=size*8) @@ -191,6 +820,8 @@ class Atom(FieldSet): yield RawBytes(self, "data", size) def createDescription(self): + if self["tag"].value == "uuid": + return "Atom: uuid: "+self["usertag"].value return "Atom: %s" % self["tag"].value class MovFile(Parser): @@ -207,12 +838,16 @@ class MovFile(Parser): # File type brand => MIME type 'mp41': u'video/mp4', 'mp42': u'video/mp4', + 'avc1': u'video/mp4', + 'isom': u'video/mp4', + 'iso2': u'video/mp4', } endian = BIG_ENDIAN def __init__(self, *args, **kw): Parser.__init__(self, *args, **kw) - self.is_mpeg4 = False + + is_mpeg4 = property(lambda self:self.mime_type==u'video/mp4') def validate(self): # TODO: Write better code, erk! @@ -242,5 +877,5 @@ class MovFile(Parser): return self.BRANDS[brand] except MissingField: pass - return None + return u'video/quicktime' diff --git a/lib/hachoir_parser/video/mpeg_ts.py b/lib/hachoir_parser/video/mpeg_ts.py index c158dc9e..ed8724a3 100644 --- a/lib/hachoir_parser/video/mpeg_ts.py +++ b/lib/hachoir_parser/video/mpeg_ts.py @@ -9,11 +9,11 @@ Author: Victor Stinner Creation date: 13 january 2007 """ -from lib.hachoir_parser import Parser -from lib.hachoir_core.field import (FieldSet, ParserError, MissingField, +from hachoir_parser import Parser +from hachoir_core.field import (FieldSet, ParserError, MissingField, UInt8, Enum, Bit, Bits, RawBytes) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.text_handler import textHandler, hexadecimal class Packet(FieldSet): def __init__(self, *args): @@ -92,11 +92,11 @@ class MPEG_TS(Parser): return True def createFields(self): - sync = self.stream.searchBytes("\x47", 0, 204*8) - if sync is None: - raise ParserError("Unable to find synchronization byte") - elif sync: - yield RawBytes(self, "incomplete_packet", sync//8) while not self.eof: + sync = self.stream.searchBytes("\x47", self.current_size, self.current_size+204*8) + if sync is None: + raise ParserError("Unable to find synchronization byte") + elif sync: + yield RawBytes(self, "incomplete_packet[]", (sync-self.current_size)//8) yield Packet(self, "packet[]") diff --git a/lib/hachoir_parser/video/mpeg_video.py b/lib/hachoir_parser/video/mpeg_video.py index 9545681e..5a5d51c8 100644 --- a/lib/hachoir_parser/video/mpeg_video.py +++ b/lib/hachoir_parser/video/mpeg_video.py @@ -13,17 +13,17 @@ Author: Victor Stinner Creation date: 15 september 2006 """ -from lib.hachoir_parser import Parser -from lib.hachoir_parser.audio.mpeg_audio import MpegAudioFile -from lib.hachoir_core.field import (FieldSet, +from hachoir_parser import Parser +from hachoir_parser.audio.mpeg_audio import MpegAudioFile +from hachoir_core.field import (FieldSet, FieldError, ParserError, Bit, Bits, Bytes, RawBits, PaddingBits, NullBits, UInt8, UInt16, RawBytes, PaddingBytes, Enum) -from lib.hachoir_core.endian import BIG_ENDIAN -from lib.hachoir_core.stream import StringInputStream -from lib.hachoir_core.text_handler import textHandler, hexadecimal +from hachoir_core.endian import BIG_ENDIAN +from hachoir_core.stream import StringInputStream +from hachoir_core.text_handler import textHandler, hexadecimal class FragmentGroup: def __init__(self, parser): From 92bf6bb6d6a86884a21009eddb02e716e98bd8c4 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Sat, 15 Aug 2015 15:26:07 +0100 Subject: [PATCH 12/95] Change configure quiet option in Hachoir to suppress warnings. --- CHANGES.md | 2 ++ HACKS.txt | 7 ++++--- lib/hachoir_core/config.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index dad0436f..79956f12 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,8 @@ * Remove legacy anime split home option from anime settings tab (new option located in general/interface tab) * Remove "Manage Torrents" * Update Beautiful Soup 4.3.2 to 4.4.0 (r390) +* Update Hachoir library 1.3.3 to 1.3.4 (r1383) +* Change configure quiet option in Hachoir to suppress warnings (add ref:hacks.txt) ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/HACKS.txt b/HACKS.txt index 5aea55dc..75c4b37f 100644 --- a/HACKS.txt +++ b/HACKS.txt @@ -1,7 +1,8 @@ Libs with customisations... -/tornado +/lib/cachecontrol/caches/file_cache.py +/lib/hachoir_core/config.py +/lib/pynma/pynma.py /lib/requests/packages/urllib3/connectionpool.py /lib/requests/packages/urllib3/util/ssl_.py -/lib/cachecontrol/caches/file_cache.py -/lib/pynma/pynma.py \ No newline at end of file +/tornado diff --git a/lib/hachoir_core/config.py b/lib/hachoir_core/config.py index 9250b627..e9bade06 100644 --- a/lib/hachoir_core/config.py +++ b/lib/hachoir_core/config.py @@ -14,7 +14,7 @@ unicode_stdout = True # Replace stdout and stderr with Unicode compatible ob # Global options debug = False # Display many informations usefull to debug verbose = False # Display more informations -quiet = False # Don't display warnings +quiet = True # Don't display warnings # Use internationalization and localization (gettext)? if os.name == "nt": From 70a8b47b446221b761f43b832cf1f655733fea76 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Fri, 21 Aug 2015 03:32:27 +0100 Subject: [PATCH 13/95] Add parse media content to determine quality. Determine quality before making final assumptions during re-scan, force update, pp and other processes. Add a postprocess folder name validation. --- CHANGES.md | 2 + sickbeard/common.py | 50 ++++++++++- sickbeard/postProcessor.py | 8 +- sickbeard/processTV.py | 4 + sickbeard/tv.py | 164 ++++++++++++++++++------------------- 5 files changed, 141 insertions(+), 87 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 79956f12..f449fa55 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,6 +17,8 @@ * Update Beautiful Soup 4.3.2 to 4.4.0 (r390) * Update Hachoir library 1.3.3 to 1.3.4 (r1383) * Change configure quiet option in Hachoir to suppress warnings (add ref:hacks.txt) +* Add parse media content to determine quality before making final assumptions during re-scan, update, pp +* Add a postprocess folder name validation ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/sickbeard/common.py b/sickbeard/common.py index 41c32c12..602bc58d 100644 --- a/sickbeard/common.py +++ b/sickbeard/common.py @@ -230,6 +230,45 @@ class Quality: else: return Quality.UNKNOWN + @staticmethod + def fileQuality(filename): + + from sickbeard import encodingKludge as ek + if ek.ek(os.path.isfile, filename): + + from hachoir_parser import createParser + from hachoir_metadata import extractMetadata + from hachoir_core.stream import InputStreamError + + parser = height = None + try: + parser = createParser(filename) + except InputStreamError as e: + logger.log('Hachoir can\'t parse file content quality because it found error: %s' % e.text, logger.WARNING) + + if parser: + extract = extractMetadata(parser) + if extract: + try: + height = extract.get('height') + except (AttributeError, ValueError): + try: + for metadata in extract.iterGroups(): + if re.search('(?i)video', metadata.header): + height = metadata.get('height') + break + except (AttributeError, ValueError): + pass + + parser.stream._input.close() + + tolerance = lambda value, percent: int(round(value - (value * percent / 100.0))) + if height >= tolerance(352, 5): + if height <= tolerance(720, 2): + return Quality.SDTV + return (Quality.HDTV, Quality.FULLHDTV)[height >= tolerance(1080, 1)] + return Quality.UNKNOWN + @staticmethod def assumeQuality(name): if name.lower().endswith(('.avi', '.mp4')): @@ -262,10 +301,19 @@ class Quality: @staticmethod def statusFromName(name, assume=True, anime=False): quality = Quality.nameQuality(name, anime) - if assume and quality == Quality.UNKNOWN: + if assume and Quality.UNKNOWN == quality: quality = Quality.assumeQuality(name) return Quality.compositeStatus(DOWNLOADED, quality) + @staticmethod + def statusFromNameOrFile(file_path, assume=True, anime=False): + quality = Quality.nameQuality(file_path, anime) + if Quality.UNKNOWN == quality: + quality = Quality.fileQuality(file_path) + if assume and Quality.UNKNOWN == quality: + quality = Quality.assumeQuality(file_path) + return Quality.compositeStatus(DOWNLOADED, quality) + DOWNLOADED = None SNATCHED = None SNATCHED_PROPER = None diff --git a/sickbeard/postProcessor.py b/sickbeard/postProcessor.py index 08543eb9..6ed1a27e 100644 --- a/sickbeard/postProcessor.py +++ b/sickbeard/postProcessor.py @@ -662,7 +662,7 @@ class PostProcessor(object): continue ep_quality = common.Quality.nameQuality(cur_name, ep_obj.show.is_anime) - quality_log = u' "%s" quality from the %s %s' % (common.Quality.qualityStrings[ep_quality], thing, cur_name) + quality_log = u' "%s" quality parsed from the %s %s' % (common.Quality.qualityStrings[ep_quality], thing, cur_name) # if we find a good one then use it if common.Quality.UNKNOWN != ep_quality: @@ -671,6 +671,12 @@ class PostProcessor(object): else: self._log(u'Found' + quality_log, logger.DEBUG) + ep_quality = common.Quality.fileQuality(self.file_path) + if common.Quality.UNKNOWN != ep_quality: + self._log(u'Using "%s" quality parsed from the metadata file content of %s' + % (common.Quality.qualityStrings[ep_quality], self.file_name), logger.DEBUG) + return ep_quality + # Try guessing quality from the file name ep_quality = common.Quality.assumeQuality(self.file_name) self._log(u'Using guessed "%s" quality from the file name %s' diff --git a/sickbeard/processTV.py b/sickbeard/processTV.py index 5abd23c5..5baad72f 100644 --- a/sickbeard/processTV.py +++ b/sickbeard/processTV.py @@ -434,6 +434,10 @@ class ProcessTVShow(object): try_scene_exceptions=True, convert=True).parse( dir_name, cache_result=False) + # check we parsed id, ep and season + if not (0 < len(parse_result.episode_numbers) and isinstance(parse_result.show.indexerid, int) + and isinstance(parse_result.season_number, int)): + return False except (InvalidNameException, InvalidShowException): # If the filename doesn't parse, then return false as last # resort. We can assume that unparseable filenames are not diff --git a/sickbeard/tv.py b/sickbeard/tv.py index 65ebea27..2ed8fe57 100644 --- a/sickbeard/tv.py +++ b/sickbeard/tv.py @@ -613,141 +613,137 @@ class TVShow(object): def makeEpFromFile(self, file): if not ek.ek(os.path.isfile, file): - logger.log(str(self.indexerid) + u": That isn't even a real file dude... " + file) + logger.log(u'%s: Not a real file... %s' % (self.indexerid, file)) return None - logger.log(str(self.indexerid) + u": Creating episode object from " + file, logger.DEBUG) + logger.log(u'%s: Creating episode object from %s' % (self.indexerid, file), logger.DEBUG) try: - myParser = NameParser(showObj=self, try_indexers=True) - parse_result = myParser.parse(file) + my_parser = NameParser(showObj=self, try_indexers=True) + parse_result = my_parser.parse(file) except InvalidNameException: - logger.log(u"Unable to parse the filename " + file + " into a valid episode", logger.DEBUG) + logger.log(u'Unable to parse the filename %s into a valid episode' % file, logger.DEBUG) return None except InvalidShowException: - logger.log(u"Unable to parse the filename " + file + " into a valid show", logger.DEBUG) + logger.log(u'Unable to parse the filename %s into a valid show' % file, logger.DEBUG) return None if not len(parse_result.episode_numbers): - logger.log("parse_result: " + str(parse_result)) - logger.log(u"No episode number found in " + file + ", ignoring it", logger.ERROR) + logger.log(u'parse_result: %s' % parse_result) + logger.log(u'No episode number found in %s, ignoring it' % file, logger.ERROR) return None # for now lets assume that any episode in the show dir belongs to that show - season = parse_result.season_number if parse_result.season_number != None else 1 + season = parse_result.season_number if None is not parse_result.season_number else 1 episodes = parse_result.episode_numbers - rootEp = None + root_ep = None sql_l = [] - for curEpNum in episodes: + for cur_ep_num in episodes: - episode = int(curEpNum) + episode = int(cur_ep_num) - logger.log( - str(self.indexerid) + ": " + file + " parsed to " + self.name + " " + str(season) + "x" + str(episode), - logger.DEBUG) + logger.log(u'%s: %s parsed to %s %sx%s' % (self.indexerid, file, self.name, season, episode), logger.DEBUG) - checkQualityAgain = False + check_quality_again = False same_file = False - curEp = self.getEpisode(season, episode) + cur_ep = self.getEpisode(season, episode) - if curEp == None: + if None is cur_ep: try: - curEp = self.getEpisode(season, episode, file) + cur_ep = self.getEpisode(season, episode, file) except exceptions.EpisodeNotFoundException: - logger.log(str(self.indexerid) + u": Unable to figure out what this file is, skipping", - logger.ERROR) + logger.log(u'%s: Unable to figure out what this file is, skipping' % self.indexerid, logger.ERROR) continue else: # if there is a new file associated with this ep then re-check the quality - if curEp.location and ek.ek(os.path.normpath, curEp.location) != ek.ek(os.path.normpath, file): + if cur_ep.location and ek.ek(os.path.normpath, cur_ep.location) != ek.ek(os.path.normpath, file): logger.log( - u"The old episode had a different file associated with it, I will re-check the quality based on the new filename " + file, + u'The old episode had a different file associated with it, re-checking the quality based on the new filename ' + file, logger.DEBUG) - checkQualityAgain = True + check_quality_again = True - with curEp.lock: - old_size = curEp.file_size - curEp.location = file + with cur_ep.lock: + old_size = cur_ep.file_size + cur_ep.location = file # if the sizes are the same then it's probably the same file - if old_size and curEp.file_size == old_size: + if old_size and cur_ep.file_size == old_size: same_file = True else: same_file = False - curEp.checkForMetaFiles() + cur_ep.checkForMetaFiles() - if rootEp == None: - rootEp = curEp + if None is root_ep: + root_ep = cur_ep else: - if curEp not in rootEp.relatedEps: - rootEp.relatedEps.append(curEp) + if cur_ep not in root_ep.relatedEps: + root_ep.relatedEps.append(cur_ep) # if it's a new file then if not same_file: - curEp.release_name = '' + cur_ep.release_name = '' # if they replace a file on me I'll make some attempt at re-checking the quality unless I know it's the same file - if checkQualityAgain and not same_file: - newQuality = Quality.nameQuality(file, self.is_anime) - logger.log(u"Since this file has been renamed, I checked " + file + " and found quality " + - Quality.qualityStrings[newQuality], logger.DEBUG) - if newQuality != Quality.UNKNOWN: - curEp.status = Quality.compositeStatus(DOWNLOADED, newQuality) - + if check_quality_again and not same_file: + new_quality = Quality.nameQuality(file, self.is_anime) + if Quality.UNKNOWN == new_quality: + new_quality = Quality.fileQuality(file) + logger.log(u'Since this file was renamed, file %s was checked and quality "%s" found' + % (file, Quality.qualityStrings[new_quality]), logger.DEBUG) + if Quality.UNKNOWN != new_quality: + cur_ep.status = Quality.compositeStatus(DOWNLOADED, new_quality) # check for status/quality changes as long as it's a new file - elif not same_file and sickbeard.helpers.isMediaFile(file) and curEp.status not in Quality.DOWNLOADED + [ - ARCHIVED, IGNORED]: + elif not same_file and sickbeard.helpers.isMediaFile(file)\ + and cur_ep.status not in Quality.DOWNLOADED + [ARCHIVED, IGNORED]: - oldStatus, oldQuality = Quality.splitCompositeStatus(curEp.status) - newQuality = Quality.nameQuality(file, self.is_anime) - if newQuality == Quality.UNKNOWN: - newQuality = Quality.assumeQuality(file) + old_status, old_quality = Quality.splitCompositeStatus(cur_ep.status) + new_quality = Quality.nameQuality(file, self.is_anime) + if Quality.UNKNOWN == new_quality: + new_quality = Quality.fileQuality(file) + if Quality.UNKNOWN == new_quality: + new_quality = Quality.assumeQuality(file) - newStatus = None + new_status = None # if it was snatched and now exists then set the status correctly - if oldStatus == SNATCHED and oldQuality <= newQuality: - logger.log(u"STATUS: this episode used to be snatched with quality " + Quality.qualityStrings[ - oldQuality] + u" but a file exists with quality " + Quality.qualityStrings[ - newQuality] + u" so I'm setting the status to DOWNLOADED", logger.DEBUG) - newStatus = DOWNLOADED + if SNATCHED == old_status and old_quality <= new_quality: + logger.log(u'STATUS: this episode used to be snatched with quality %s but a file exists with quality %s so setting the status to DOWNLOADED' + % (Quality.qualityStrings[old_quality], Quality.qualityStrings[new_quality]), logger.DEBUG) + new_status = DOWNLOADED # if it was snatched proper and we found a higher quality one then allow the status change - elif oldStatus == SNATCHED_PROPER and oldQuality < newQuality: - logger.log(u"STATUS: this episode used to be snatched proper with quality " + Quality.qualityStrings[ - oldQuality] + u" but a file exists with quality " + Quality.qualityStrings[ - newQuality] + u" so I'm setting the status to DOWNLOADED", logger.DEBUG) - newStatus = DOWNLOADED + elif SNATCHED_PROPER == old_status and old_quality < new_quality: + logger.log(u'STATUS: this episode used to be snatched proper with quality %s but a file exists with quality %s so setting the status to DOWNLOADED' + % (Quality.qualityStrings[old_quality], Quality.qualityStrings[new_quality]), logger.DEBUG) + new_status = DOWNLOADED - elif oldStatus not in (SNATCHED, SNATCHED_PROPER): - newStatus = DOWNLOADED + elif old_status not in (SNATCHED, SNATCHED_PROPER): + new_status = DOWNLOADED - if newStatus != None: - with curEp.lock: - logger.log(u"STATUS: we have an associated file, so setting the status from " + str( - curEp.status) + u" to DOWNLOADED/" + str(Quality.statusFromName(file, anime=self.is_anime)), - logger.DEBUG) - curEp.status = Quality.compositeStatus(newStatus, newQuality) + if None is not new_status: + with cur_ep.lock: + logger.log(u'STATUS: we have an associated file, so setting the status from %s to DOWNLOADED/%s' + % (cur_ep.status, Quality.compositeStatus(Quality.DOWNLOADED, new_quality)), logger.DEBUG) + cur_ep.status = Quality.compositeStatus(new_status, new_quality) - with curEp.lock: - result = curEp.get_sql() + with cur_ep.lock: + result = cur_ep.get_sql() if None is not result: sql_l.append(result) if 0 < len(sql_l): - myDB = db.DBConnection() - myDB.mass_action(sql_l) - + my_db = db.DBConnection() + my_db.mass_action(sql_l) # creating metafiles on the root should be good enough - if sickbeard.USE_FAILED_DOWNLOADS and rootEp is not None: - with rootEp.lock: - rootEp.createMetaFiles() + if sickbeard.USE_FAILED_DOWNLOADS and root_ep is not None: + with root_ep.lock: + root_ep.createMetaFiles() - return rootEp + return root_ep def loadFromDB(self, skipNFO=False): @@ -1812,14 +1808,13 @@ class TVEpisode(object): elif sickbeard.helpers.isMediaFile(self.location): # leave propers alone, you have to either post-process them or manually change them back if self.status not in Quality.SNATCHED_PROPER + Quality.DOWNLOADED + Quality.SNATCHED + [ARCHIVED]: - logger.log( - u"5 Status changes from " + str(self.status) + " to " + str(Quality.statusFromName(self.location)), - logger.DEBUG) - self.status = Quality.statusFromName(self.location, anime=self.show.is_anime) + status_quality = Quality.statusFromNameOrFile(self.location, anime=self.show.is_anime) + logger.log(u'(1) Status changes from %s to %s' % (self.status, status_quality), logger.DEBUG) + self.status = status_quality # shouldn't get here probably else: - logger.log(u"6 Status changes from " + str(self.status) + " to " + str(UNKNOWN), logger.DEBUG) + logger.log(u"(2) Status changes from " + str(self.status) + " to " + str(UNKNOWN), logger.DEBUG) self.status = UNKNOWN def loadFromNFO(self, location): @@ -1837,11 +1832,10 @@ class TVEpisode(object): if self.location != "": - if self.status == UNKNOWN: - if sickbeard.helpers.isMediaFile(self.location): - logger.log(u"7 Status changes from " + str(self.status) + " to " + str( - Quality.statusFromName(self.location, anime=self.show.is_anime)), logger.DEBUG) - self.status = Quality.statusFromName(self.location, anime=self.show.is_anime) + if UNKNOWN == self.status and sickbeard.helpers.isMediaFile(self.location): + status_quality = Quality.statusFromNameOrFile(self.location, anime=self.show.is_anime) + logger.log(u'(3) Status changes from %s to %s' % (self.status, status_quality), logger.DEBUG) + self.status = status_quality nfoFile = sickbeard.helpers.replaceExtension(self.location, "nfo") logger.log(str(self.show.indexerid) + u": Using NFO name " + nfoFile, logger.DEBUG) From 5cf79fb3b924b7053420bd0fdb327f7ab570afdd Mon Sep 17 00:00:00 2001 From: JackDandy Date: Sat, 22 Aug 2015 01:07:00 +0100 Subject: [PATCH 14/95] Update Requests library to 2.7.0 (5d6d1bc). --- CHANGES.md | 1 + lib/requests/adapters.py | 10 ++++------ lib/requests/api.py | 4 ++-- lib/requests/models.py | 4 ++-- lib/requests/sessions.py | 21 +++++++++++++-------- lib/requests/utils.py | 7 ++++--- 6 files changed, 26 insertions(+), 21 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index f449fa55..a10983c0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -19,6 +19,7 @@ * Change configure quiet option in Hachoir to suppress warnings (add ref:hacks.txt) * Add parse media content to determine quality before making final assumptions during re-scan, update, pp * Add a postprocess folder name validation +* Update Requests library to 2.7.0 (5d6d1bc) ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py index 02e0dd1f..cdc5744c 100644 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -35,6 +35,7 @@ from .auth import _basic_auth_str DEFAULT_POOLBLOCK = False DEFAULT_POOLSIZE = 10 DEFAULT_RETRIES = 0 +DEFAULT_POOL_TIMEOUT = None class BaseAdapter(object): @@ -326,8 +327,8 @@ class HTTPAdapter(BaseAdapter): :param request: The :class:`PreparedRequest ` being sent. :param stream: (optional) Whether to stream the request content. :param timeout: (optional) How long to wait for the server to send - data before giving up, as a float, or a (`connect timeout, read - timeout `_) tuple. + data before giving up, as a float, or a :ref:`(connect timeout, + read timeout) ` tuple. :type timeout: float or tuple :param verify: (optional) Whether to verify SSL certificates. :param cert: (optional) Any user-provided SSL certificate to be trusted. @@ -375,7 +376,7 @@ class HTTPAdapter(BaseAdapter): if hasattr(conn, 'proxy_pool'): conn = conn.proxy_pool - low_conn = conn._get_conn(timeout=timeout) + low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) try: low_conn.putrequest(request.method, @@ -407,9 +408,6 @@ class HTTPAdapter(BaseAdapter): # Then, reraise so that we can handle the actual exception. low_conn.close() raise - else: - # All is well, return the connection to the pool. - conn._put_conn(low_conn) except (ProtocolError, socket.error) as err: raise ConnectionError(err, request=request) diff --git a/lib/requests/api.py b/lib/requests/api.py index d40fa380..72a777b2 100644 --- a/lib/requests/api.py +++ b/lib/requests/api.py @@ -27,8 +27,8 @@ def request(method, url, **kwargs): :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How long to wait for the server to send data - before giving up, as a float, or a (`connect timeout, read timeout - `_) tuple. + before giving up, as a float, or a :ref:`(connect timeout, read + timeout) ` tuple. :type timeout: float or tuple :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. :type allow_redirects: bool diff --git a/lib/requests/models.py b/lib/requests/models.py index 7ab21f78..4270c647 100644 --- a/lib/requests/models.py +++ b/lib/requests/models.py @@ -828,10 +828,10 @@ class Response(object): http_error_msg = '' if 400 <= self.status_code < 500: - http_error_msg = '%s Client Error: %s' % (self.status_code, self.reason) + http_error_msg = '%s Client Error: %s for url: %s' % (self.status_code, self.reason, self.url) elif 500 <= self.status_code < 600: - http_error_msg = '%s Server Error: %s' % (self.status_code, self.reason) + http_error_msg = '%s Server Error: %s for url: %s' % (self.status_code, self.reason, self.url) if http_error_msg: raise HTTPError(http_error_msg, response=self) diff --git a/lib/requests/sessions.py b/lib/requests/sessions.py index 820919ee..c3ef363c 100644 --- a/lib/requests/sessions.py +++ b/lib/requests/sessions.py @@ -62,12 +62,11 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict): merged_setting = dict_class(to_key_val_list(session_setting)) merged_setting.update(to_key_val_list(request_setting)) - # Remove keys that are set to None. - for (k, v) in request_setting.items(): - if v is None: - del merged_setting[k] - - merged_setting = dict((k, v) for (k, v) in merged_setting.items() if v is not None) + # Remove keys that are set to None. Extract keys first to avoid altering + # the dictionary during iteration. + none_keys = [k for (k, v) in merged_setting.items() if v is None] + for key in none_keys: + del merged_setting[key] return merged_setting @@ -275,6 +274,12 @@ class Session(SessionRedirectMixin): >>> s = requests.Session() >>> s.get('http://httpbin.org/get') 200 + + Or as a context manager:: + + >>> with requests.Session() as s: + >>> s.get('http://httpbin.org/get') + 200 """ __attrs__ = [ @@ -418,8 +423,8 @@ class Session(SessionRedirectMixin): :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How long to wait for the server to send - data before giving up, as a float, or a (`connect timeout, read - timeout `_) tuple. + data before giving up, as a float, or a :ref:`(connect timeout, + read timeout) ` tuple. :type timeout: float or tuple :param allow_redirects: (optional) Set to True by default. :type allow_redirects: bool diff --git a/lib/requests/utils.py b/lib/requests/utils.py index 8fba62dd..3fd0e41f 100644 --- a/lib/requests/utils.py +++ b/lib/requests/utils.py @@ -67,7 +67,7 @@ def super_len(o): return len(o.getvalue()) -def get_netrc_auth(url): +def get_netrc_auth(url, raise_errors=False): """Returns the Requests tuple auth for a given url from netrc.""" try: @@ -105,8 +105,9 @@ def get_netrc_auth(url): return (_netrc[login_i], _netrc[2]) except (NetrcParseError, IOError): # If there was a parsing error or a permissions issue reading the file, - # we'll just skip netrc auth - pass + # we'll just skip netrc auth unless explicitly asked to raise errors. + if raise_errors: + raise # AppEngine hackiness. except (ImportError, AttributeError): From 9469bb050f45d4c6373a402293848c4eef03ad91 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Sat, 22 Aug 2015 01:15:45 +0100 Subject: [PATCH 15/95] Update SimpleJSON library 3.7.3 to 3.8.0 (a37a9bd). --- CHANGES.md | 1 + lib/simplejson/__init__.py | 31 +++++++++++++++++++++---------- lib/simplejson/_speedups.c | 21 +++++++++++++++++---- lib/simplejson/encoder.py | 23 ++++++++++++++++++++--- 4 files changed, 59 insertions(+), 17 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index a10983c0..5689c856 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -20,6 +20,7 @@ * Add parse media content to determine quality before making final assumptions during re-scan, update, pp * Add a postprocess folder name validation * Update Requests library to 2.7.0 (5d6d1bc) +* Update SimpleJSON library 3.7.3 to 3.8.0 (a37a9bd) ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/lib/simplejson/__init__.py b/lib/simplejson/__init__.py index b8d50978..ac1e5cb5 100644 --- a/lib/simplejson/__init__.py +++ b/lib/simplejson/__init__.py @@ -5,9 +5,8 @@ interchange format. :mod:`simplejson` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained version of the :mod:`json` library contained in Python 2.6, but maintains -compatibility with Python 2.4 and Python 2.5 and (currently) has -significant performance advantages, even without using the optional C -extension for speedups. +compatibility back to Python 2.5 and (currently) has significant performance +advantages, even without using the optional C extension for speedups. Encoding basic Python object hierarchies:: @@ -98,7 +97,7 @@ Using simplejson.tool from the shell to validate and pretty-print:: Expecting property name: line 1 column 3 (char 2) """ from __future__ import absolute_import -__version__ = '3.7.3' +__version__ = '3.8.0' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', @@ -140,6 +139,7 @@ _default_encoder = JSONEncoder( use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, + iterable_as_array=False, bigint_as_string=False, item_sort_key=None, for_json=False, @@ -152,7 +152,8 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, sort_keys=False, item_sort_key=None, - for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). @@ -204,6 +205,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, If *tuple_as_array* is true (default: ``True``), :class:`tuple` (and subclasses) will be encoded as JSON arrays. + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher or lower than -2**53 will be encoded as strings. This is to avoid the rounding that happens in Javascript otherwise. Note that this is still a @@ -242,7 +247,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, check_circular and allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal - and namedtuple_as_object and tuple_as_array + and namedtuple_as_object and tuple_as_array and not iterable_as_array and not bigint_as_string and not sort_keys and not item_sort_key and not for_json and not ignore_nan and int_as_string_bitcount is None @@ -258,6 +263,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, default=default, use_decimal=use_decimal, namedtuple_as_object=namedtuple_as_object, tuple_as_array=tuple_as_array, + iterable_as_array=iterable_as_array, bigint_as_string=bigint_as_string, sort_keys=sort_keys, item_sort_key=item_sort_key, @@ -276,7 +282,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, sort_keys=False, item_sort_key=None, - for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is false then ``dict`` keys that are not basic types @@ -324,6 +331,10 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, If *tuple_as_array* is true (default: ``True``), :class:`tuple` (and subclasses) will be encoded as JSON arrays. + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + If *bigint_as_string* is true (not the default), ints 2**53 and higher or lower than -2**53 will be encoded as strings. This is to avoid the rounding that happens in Javascript otherwise. @@ -356,12 +367,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, """ # cached encoder - if ( - not skipkeys and ensure_ascii and + if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal - and namedtuple_as_object and tuple_as_array + and namedtuple_as_object and tuple_as_array and not iterable_as_array and not bigint_as_string and not sort_keys and not item_sort_key and not for_json and not ignore_nan and int_as_string_bitcount is None @@ -377,6 +387,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, use_decimal=use_decimal, namedtuple_as_object=namedtuple_as_object, tuple_as_array=tuple_as_array, + iterable_as_array=iterable_as_array, bigint_as_string=bigint_as_string, sort_keys=sort_keys, item_sort_key=item_sort_key, diff --git a/lib/simplejson/_speedups.c b/lib/simplejson/_speedups.c index bc1648ae..fb68e359 100644 --- a/lib/simplejson/_speedups.c +++ b/lib/simplejson/_speedups.c @@ -169,6 +169,7 @@ typedef struct _PyEncoderObject { int use_decimal; int namedtuple_as_object; int tuple_as_array; + int iterable_as_array; PyObject *max_long_size; PyObject *min_long_size; PyObject *item_sort_key; @@ -2581,7 +2582,6 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds) { - /* initialize Encoder object */ static char *kwlist[] = { "markers", "default", @@ -2596,30 +2596,32 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) "use_decimal", "namedtuple_as_object", "tuple_as_array", + "iterable_as_array" "int_as_string_bitcount", "item_sort_key", "encoding", "for_json", "ignore_nan", "Decimal", + "iterable_as_array", NULL}; PyEncoderObject *s; PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; - PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array; + PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array, *iterable_as_array; PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json; PyObject *ignore_nan, *Decimal; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOOO:make_encoder", kwlist, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, &namedtuple_as_object, &tuple_as_array, &int_as_string_bitcount, &item_sort_key, &encoding, &for_json, - &ignore_nan, &Decimal)) + &ignore_nan, &Decimal, &iterable_as_array)) return -1; Py_INCREF(markers); @@ -2649,6 +2651,7 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) s->use_decimal = PyObject_IsTrue(use_decimal); s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); s->tuple_as_array = PyObject_IsTrue(tuple_as_array); + s->iterable_as_array = PyObject_IsTrue(iterable_as_array); if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) { static const unsigned int long_long_bitsize = SIZEOF_LONG_LONG * 8; int int_as_string_bitcount_val = (int)PyLong_AsLong(int_as_string_bitcount); @@ -2936,6 +2939,16 @@ encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ss else { PyObject *ident = NULL; PyObject *newobj; + if (s->iterable_as_array) { + newobj = PyObject_GetIter(obj); + if (newobj == NULL) + PyErr_Clear(); + else { + rv = encoder_listencode_list(s, rval, newobj, indent_level); + Py_DECREF(newobj); + break; + } + } if (s->markers != Py_None) { int has_key; ident = PyLong_FromVoidPtr(obj); diff --git a/lib/simplejson/encoder.py b/lib/simplejson/encoder.py index d240438e..d771bb48 100644 --- a/lib/simplejson/encoder.py +++ b/lib/simplejson/encoder.py @@ -124,7 +124,7 @@ class JSONEncoder(object): use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, item_sort_key=None, for_json=False, ignore_nan=False, - int_as_string_bitcount=None): + int_as_string_bitcount=None, iterable_as_array=False): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -179,6 +179,10 @@ class JSONEncoder(object): If tuple_as_array is true (the default), tuple (and subclasses) will be encoded as JSON arrays. + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + If bigint_as_string is true (not the default), ints 2**53 and higher or lower than -2**53 will be encoded as strings. This is to avoid the rounding that happens in Javascript otherwise. @@ -210,6 +214,7 @@ class JSONEncoder(object): self.use_decimal = use_decimal self.namedtuple_as_object = namedtuple_as_object self.tuple_as_array = tuple_as_array + self.iterable_as_array = iterable_as_array self.bigint_as_string = bigint_as_string self.item_sort_key = item_sort_key self.for_json = for_json @@ -338,7 +343,7 @@ class JSONEncoder(object): self.namedtuple_as_object, self.tuple_as_array, int_as_string_bitcount, self.item_sort_key, self.encoding, self.for_json, - self.ignore_nan, decimal.Decimal) + self.ignore_nan, decimal.Decimal, self.iterable_as_array) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, @@ -347,7 +352,7 @@ class JSONEncoder(object): self.namedtuple_as_object, self.tuple_as_array, int_as_string_bitcount, self.item_sort_key, self.encoding, self.for_json, - Decimal=decimal.Decimal) + self.iterable_as_array, Decimal=decimal.Decimal) try: return _iterencode(o, 0) finally: @@ -386,6 +391,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _use_decimal, _namedtuple_as_object, _tuple_as_array, _int_as_string_bitcount, _item_sort_key, _encoding,_for_json, + _iterable_as_array, ## HACK: hand-optimized bytecode; turn globals into locals _PY3=PY3, ValueError=ValueError, @@ -399,6 +405,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, list=list, str=str, tuple=tuple, + iter=iter, ): if _use_decimal and Decimal is None: Decimal = decimal.Decimal @@ -646,6 +653,16 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, elif _use_decimal and isinstance(o, Decimal): yield str(o) else: + while _iterable_as_array: + # Markers are not checked here because it is valid for + # an iterable to return self. + try: + o = iter(o) + except TypeError: + break + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + return if markers is not None: markerid = id(o) if markerid in markers: From 10114fa8951e9907ece6d200a8872ff0c6943d02 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Fri, 21 Aug 2015 19:08:34 +0100 Subject: [PATCH 16/95] Update Tornado Web Server 4.2 to 4.3.dev1 (1b6157d). --- CHANGES.md | 7 +- tornado/__init__.py | 4 +- tornado/_locale_data.py | 92 ++++++++++++++++ tornado/auth.py | 129 +++++++++++++++------- tornado/autoreload.py | 11 +- tornado/concurrent.py | 18 ++- tornado/gen.py | 76 +++++++++++-- tornado/http1connection.py | 5 +- tornado/httpserver.py | 1 - tornado/httputil.py | 9 ++ tornado/ioloop.py | 2 +- tornado/iostream.py | 39 ++++--- tornado/locale.py | 102 ++++++----------- tornado/locks.py | 88 ++++++++++++--- tornado/options.py | 50 ++++++--- tornado/platform/asyncio.py | 23 +++- tornado/platform/twisted.py | 13 ++- tornado/queues.py | 48 +++++++- tornado/simple_httpclient.py | 5 +- tornado/template.py | 206 +++++++++++++++++++++++++++-------- tornado/testing.py | 48 ++++++-- tornado/util.py | 10 +- tornado/web.py | 72 ++++++++---- tornado/websocket.py | 3 +- 24 files changed, 776 insertions(+), 285 deletions(-) create mode 100644 tornado/_locale_data.py diff --git a/CHANGES.md b/CHANGES.md index 5689c856..f1d6c2e0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -21,11 +21,12 @@ * Add a postprocess folder name validation * Update Requests library to 2.7.0 (5d6d1bc) * Update SimpleJSON library 3.7.3 to 3.8.0 (a37a9bd) +* Update Tornado Web Server 4.2 to 4.3.dev1 (1b6157d) ### 0.10.0 (2015-08-06 11:05:00 UTC) * Remove EZRSS provider -* Update Tornado webserver to 4.2 (fdfaf3d) +* Update Tornado Web Server to 4.2 (fdfaf3d) * Update change to suppress reporting of Tornado exception error 1 to updated package (ref:hacks.txt) * Update fix for API response header for JSON content type and the return of JSONP data to updated package (ref:hacks.txt) * Update Requests library 2.6.2 to 2.7.0 (8b5e457) @@ -141,7 +142,7 @@ ### 0.9.0 (2015-05-18 14:33:00 UTC) -* Update Tornado webserver to 4.2.dev1 (609dbb9) +* Update Tornado Web Server to 4.2.dev1 (609dbb9) * Update change to suppress reporting of Tornado exception error 1 to updated package as listed in hacks.txt * Update fix for API response header for JSON content type and the return of JSONP data to updated package as listed in hacks.txt * Change network names to only display on top line of Day by Day layout on Episode View @@ -655,7 +656,7 @@ * Add return code from hardlinking error to log * Fix ABD regex for certain filenames * Change miscellaneous UI fixes -* Update Tornado webserver to 4.1dev1 and add the certifi lib dependency +* Update Tornado Web Server to 4.1dev1 and add the certifi lib dependency * Fix trending shows page from loading full size poster images * Add "Archive on first match" to Manage, Mass Update, Edit Selected page * Fix searching IPTorrentsProvider diff --git a/tornado/__init__.py b/tornado/__init__.py index 29e87041..bf3e0f7e 100644 --- a/tornado/__init__.py +++ b/tornado/__init__.py @@ -25,5 +25,5 @@ from __future__ import absolute_import, division, print_function, with_statement # is zero for an official release, positive for a development branch, # or negative for a release candidate or beta (after the base version # number has been incremented) -version = "4.2" -version_info = (4, 2, 0, 0) +version = "4.3.dev1" +version_info = (4, 3, 0, -100) diff --git a/tornado/_locale_data.py b/tornado/_locale_data.py new file mode 100644 index 00000000..caf0f060 --- /dev/null +++ b/tornado/_locale_data.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# coding: utf-8 +# +# Copyright 2012 Facebook +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Data used by the tornado.locale module.""" + +# NOTE: This file is supposed to contain unicode strings, which is +# exactly what you'd get with e.g. u"Español" in most python versions. +# However, Python 3.2 doesn't support the u"" syntax, so we use a u() +# function instead. tornado.util.u cannot be used because it doesn't +# support non-ascii characters on python 2. +# When we drop support for Python 3.2, we can remove the parens +# and make these plain unicode strings. +from tornado.escape import to_unicode as u + +LOCALE_NAMES = { + "af_ZA": {"name_en": u("Afrikaans"), "name": u("Afrikaans")}, + "am_ET": {"name_en": u("Amharic"), "name": u("አማርኛ")}, + "ar_AR": {"name_en": u("Arabic"), "name": u("العربية")}, + "bg_BG": {"name_en": u("Bulgarian"), "name": u("Български")}, + "bn_IN": {"name_en": u("Bengali"), "name": u("বাংলা")}, + "bs_BA": {"name_en": u("Bosnian"), "name": u("Bosanski")}, + "ca_ES": {"name_en": u("Catalan"), "name": u("Català")}, + "cs_CZ": {"name_en": u("Czech"), "name": u("Čeština")}, + "cy_GB": {"name_en": u("Welsh"), "name": u("Cymraeg")}, + "da_DK": {"name_en": u("Danish"), "name": u("Dansk")}, + "de_DE": {"name_en": u("German"), "name": u("Deutsch")}, + "el_GR": {"name_en": u("Greek"), "name": u("Ελληνικά")}, + "en_GB": {"name_en": u("English (UK)"), "name": u("English (UK)")}, + "en_US": {"name_en": u("English (US)"), "name": u("English (US)")}, + "es_ES": {"name_en": u("Spanish (Spain)"), "name": u("Español (España)")}, + "es_LA": {"name_en": u("Spanish"), "name": u("Español")}, + "et_EE": {"name_en": u("Estonian"), "name": u("Eesti")}, + "eu_ES": {"name_en": u("Basque"), "name": u("Euskara")}, + "fa_IR": {"name_en": u("Persian"), "name": u("فارسی")}, + "fi_FI": {"name_en": u("Finnish"), "name": u("Suomi")}, + "fr_CA": {"name_en": u("French (Canada)"), "name": u("Français (Canada)")}, + "fr_FR": {"name_en": u("French"), "name": u("Français")}, + "ga_IE": {"name_en": u("Irish"), "name": u("Gaeilge")}, + "gl_ES": {"name_en": u("Galician"), "name": u("Galego")}, + "he_IL": {"name_en": u("Hebrew"), "name": u("עברית")}, + "hi_IN": {"name_en": u("Hindi"), "name": u("हिन्दी")}, + "hr_HR": {"name_en": u("Croatian"), "name": u("Hrvatski")}, + "hu_HU": {"name_en": u("Hungarian"), "name": u("Magyar")}, + "id_ID": {"name_en": u("Indonesian"), "name": u("Bahasa Indonesia")}, + "is_IS": {"name_en": u("Icelandic"), "name": u("Íslenska")}, + "it_IT": {"name_en": u("Italian"), "name": u("Italiano")}, + "ja_JP": {"name_en": u("Japanese"), "name": u("日本語")}, + "ko_KR": {"name_en": u("Korean"), "name": u("한국어")}, + "lt_LT": {"name_en": u("Lithuanian"), "name": u("Lietuvių")}, + "lv_LV": {"name_en": u("Latvian"), "name": u("Latviešu")}, + "mk_MK": {"name_en": u("Macedonian"), "name": u("Македонски")}, + "ml_IN": {"name_en": u("Malayalam"), "name": u("മലയാളം")}, + "ms_MY": {"name_en": u("Malay"), "name": u("Bahasa Melayu")}, + "nb_NO": {"name_en": u("Norwegian (bokmal)"), "name": u("Norsk (bokmål)")}, + "nl_NL": {"name_en": u("Dutch"), "name": u("Nederlands")}, + "nn_NO": {"name_en": u("Norwegian (nynorsk)"), "name": u("Norsk (nynorsk)")}, + "pa_IN": {"name_en": u("Punjabi"), "name": u("ਪੰਜਾਬੀ")}, + "pl_PL": {"name_en": u("Polish"), "name": u("Polski")}, + "pt_BR": {"name_en": u("Portuguese (Brazil)"), "name": u("Português (Brasil)")}, + "pt_PT": {"name_en": u("Portuguese (Portugal)"), "name": u("Português (Portugal)")}, + "ro_RO": {"name_en": u("Romanian"), "name": u("Română")}, + "ru_RU": {"name_en": u("Russian"), "name": u("Русский")}, + "sk_SK": {"name_en": u("Slovak"), "name": u("Slovenčina")}, + "sl_SI": {"name_en": u("Slovenian"), "name": u("Slovenščina")}, + "sq_AL": {"name_en": u("Albanian"), "name": u("Shqip")}, + "sr_RS": {"name_en": u("Serbian"), "name": u("Српски")}, + "sv_SE": {"name_en": u("Swedish"), "name": u("Svenska")}, + "sw_KE": {"name_en": u("Swahili"), "name": u("Kiswahili")}, + "ta_IN": {"name_en": u("Tamil"), "name": u("தமிழ்")}, + "te_IN": {"name_en": u("Telugu"), "name": u("తెలుగు")}, + "th_TH": {"name_en": u("Thai"), "name": u("ภาษาไทย")}, + "tl_PH": {"name_en": u("Filipino"), "name": u("Filipino")}, + "tr_TR": {"name_en": u("Turkish"), "name": u("Türkçe")}, + "uk_UA": {"name_en": u("Ukraini "), "name": u("Українська")}, + "vi_VN": {"name_en": u("Vietnamese"), "name": u("Tiếng Việt")}, + "zh_CN": {"name_en": u("Chinese (Simplified)"), "name": u("中文(简体)")}, + "zh_TW": {"name_en": u("Chinese (Traditional)"), "name": u("中文(繁體)")}, +} diff --git a/tornado/auth.py b/tornado/auth.py index 800b10af..32d0e226 100644 --- a/tornado/auth.py +++ b/tornado/auth.py @@ -621,6 +621,72 @@ class OAuth2Mixin(object): args.update(extra_params) return url_concat(url, args) + @_auth_return_future + def oauth2_request(self, url, callback, access_token=None, + post_args=None, **args): + """Fetches the given URL auth an OAuth2 access token. + + If the request is a POST, ``post_args`` should be provided. Query + string arguments should be given as keyword arguments. + + Example usage: + + ..testcode:: + + class MainHandler(tornado.web.RequestHandler, + tornado.auth.FacebookGraphMixin): + @tornado.web.authenticated + @tornado.gen.coroutine + def get(self): + new_entry = yield self.oauth2_request( + "https://graph.facebook.com/me/feed", + post_args={"message": "I am posting from my Tornado application!"}, + access_token=self.current_user["access_token"]) + + if not new_entry: + # Call failed; perhaps missing permission? + yield self.authorize_redirect() + return + self.finish("Posted a message!") + + .. testoutput:: + :hide: + + .. versionadded:: 4.3 + """ + all_args = {} + if access_token: + all_args["access_token"] = access_token + all_args.update(args) + + if all_args: + url += "?" + urllib_parse.urlencode(all_args) + callback = functools.partial(self._on_oauth2_request, callback) + http = self.get_auth_http_client() + if post_args is not None: + http.fetch(url, method="POST", body=urllib_parse.urlencode(post_args), + callback=callback) + else: + http.fetch(url, callback=callback) + + def _on_oauth2_request(self, future, response): + if response.error: + future.set_exception(AuthError("Error response %s fetching %s" % + (response.error, response.request.url))) + return + + future.set_result(escape.json_decode(response.body)) + + def get_auth_http_client(self): + """Returns the `.AsyncHTTPClient` instance to be used for auth requests. + + May be overridden by subclasses to use an HTTP client other than + the default. + + .. versionadded:: 4.3 + """ + return httpclient.AsyncHTTPClient() + class TwitterMixin(OAuthMixin): """Twitter OAuth authentication. @@ -791,12 +857,21 @@ class GoogleOAuth2Mixin(OAuth2Mixin): """ _OAUTH_AUTHORIZE_URL = "https://accounts.google.com/o/oauth2/auth" _OAUTH_ACCESS_TOKEN_URL = "https://accounts.google.com/o/oauth2/token" + _OAUTH_USERINFO_URL = "https://www.googleapis.com/oauth2/v1/userinfo" _OAUTH_NO_CALLBACKS = False _OAUTH_SETTINGS_KEY = 'google_oauth' @_auth_return_future def get_authenticated_user(self, redirect_uri, code, callback): - """Handles the login for the Google user, returning a user object. + """Handles the login for the Google user, returning an access token. + + The result is a dictionary containing an ``access_token`` field + ([among others](https://developers.google.com/identity/protocols/OAuth2WebServer#handlingtheresponse)). + Unlike other ``get_authenticated_user`` methods in this package, + this method does not return any additional information about the user. + The returned access token can be used with `OAuth2Mixin.oauth2_request` + to request additional information (perhaps from + ``https://www.googleapis.com/oauth2/v2/userinfo``) Example usage: @@ -807,10 +882,14 @@ class GoogleOAuth2Mixin(OAuth2Mixin): @tornado.gen.coroutine def get(self): if self.get_argument('code', False): - user = yield self.get_authenticated_user( + access = yield self.get_authenticated_user( redirect_uri='http://your.site.com/auth/google', code=self.get_argument('code')) - # Save the user with e.g. set_secure_cookie + user = yield self.oauth2_request( + "https://www.googleapis.com/oauth2/v1/userinfo", + access_token=access["access_token"]) + # Save the user and access token with + # e.g. set_secure_cookie. else: yield self.authorize_redirect( redirect_uri='http://your.site.com/auth/google', @@ -845,14 +924,6 @@ class GoogleOAuth2Mixin(OAuth2Mixin): args = escape.json_decode(response.body) future.set_result(args) - def get_auth_http_client(self): - """Returns the `.AsyncHTTPClient` instance to be used for auth requests. - - May be overridden by subclasses to use an HTTP client other than - the default. - """ - return httpclient.AsyncHTTPClient() - class FacebookGraphMixin(OAuth2Mixin): """Facebook authentication using the new Graph API and OAuth2.""" @@ -983,40 +1054,16 @@ class FacebookGraphMixin(OAuth2Mixin): The given path is relative to ``self._FACEBOOK_BASE_URL``, by default "https://graph.facebook.com". + This method is a wrapper around `OAuth2Mixin.oauth2_request`; + the only difference is that this method takes a relative path, + while ``oauth2_request`` takes a complete url. + .. versionchanged:: 3.1 Added the ability to override ``self._FACEBOOK_BASE_URL``. """ url = self._FACEBOOK_BASE_URL + path - all_args = {} - if access_token: - all_args["access_token"] = access_token - all_args.update(args) - - if all_args: - url += "?" + urllib_parse.urlencode(all_args) - callback = functools.partial(self._on_facebook_request, callback) - http = self.get_auth_http_client() - if post_args is not None: - http.fetch(url, method="POST", body=urllib_parse.urlencode(post_args), - callback=callback) - else: - http.fetch(url, callback=callback) - - def _on_facebook_request(self, future, response): - if response.error: - future.set_exception(AuthError("Error response %s fetching %s" % - (response.error, response.request.url))) - return - - future.set_result(escape.json_decode(response.body)) - - def get_auth_http_client(self): - """Returns the `.AsyncHTTPClient` instance to be used for auth requests. - - May be overridden by subclasses to use an HTTP client other than - the default. - """ - return httpclient.AsyncHTTPClient() + return self.oauth2_request(url, callback, access_token, + post_args, **args) def _oauth_signature(consumer_token, method, url, parameters={}, token=None): diff --git a/tornado/autoreload.py b/tornado/autoreload.py index a52ddde4..1cbf26c6 100644 --- a/tornado/autoreload.py +++ b/tornado/autoreload.py @@ -289,11 +289,16 @@ def main(): runpy.run_module(module, run_name="__main__", alter_sys=True) elif mode == "script": with open(script) as f: + # Execute the script in our namespace instead of creating + # a new one so that something that tries to import __main__ + # (e.g. the unittest module) will see names defined in the + # script instead of just those defined in this module. global __file__ __file__ = script - # Use globals as our "locals" dictionary so that - # something that tries to import __main__ (e.g. the unittest - # module) will see the right things. + # If __package__ is defined, imports may be incorrectly + # interpreted as relative to this module. + global __package__ + del __package__ exec_in(f.read(), globals(), globals()) except SystemExit as e: logging.basicConfig() diff --git a/tornado/concurrent.py b/tornado/concurrent.py index 479ca022..f491bd09 100644 --- a/tornado/concurrent.py +++ b/tornado/concurrent.py @@ -16,16 +16,16 @@ """Utilities for working with threads and ``Futures``. ``Futures`` are a pattern for concurrent programming introduced in -Python 3.2 in the `concurrent.futures` package (this package has also -been backported to older versions of Python and can be installed with -``pip install futures``). Tornado will use `concurrent.futures.Future` if -it is available; otherwise it will use a compatible class defined in this -module. +Python 3.2 in the `concurrent.futures` package. This package defines +a mostly-compatible `Future` class designed for use from coroutines, +as well as some utility functions for interacting with the +`concurrent.futures` package. """ from __future__ import absolute_import, division, print_function, with_statement import functools import platform +import textwrap import traceback import sys @@ -170,6 +170,14 @@ class Future(object): self._callbacks = [] + # Implement the Python 3.5 Awaitable protocol if possible + # (we can't use return and yield together until py33). + if sys.version_info >= (3, 3): + exec(textwrap.dedent(""" + def __await__(self): + return (yield self) + """)) + def cancel(self): """Cancel the operation, if possible. diff --git a/tornado/gen.py b/tornado/gen.py index 91457689..78ddce75 100644 --- a/tornado/gen.py +++ b/tornado/gen.py @@ -80,8 +80,8 @@ import collections import functools import itertools import sys +import textwrap import types -import weakref from tornado.concurrent import Future, TracebackFuture, is_future, chain_future from tornado.ioloop import IOLoop @@ -98,6 +98,22 @@ except ImportError as e: singledispatch = None +try: + from collections.abc import Generator as GeneratorType # py35+ +except ImportError: + from types import GeneratorType + +try: + from inspect import isawaitable # py35+ +except ImportError: + def isawaitable(x): return False + +try: + import builtins # py3 +except ImportError: + import __builtin__ as builtins + + class KeyReuseError(Exception): pass @@ -202,6 +218,10 @@ def _make_coroutine_wrapper(func, replace_callback): argument, so we cannot simply implement ``@engine`` in terms of ``@coroutine``. """ + # On Python 3.5, set the coroutine flag on our generator, to allow it + # to be used with 'await'. + if hasattr(types, 'coroutine'): + func = types.coroutine(func) @functools.wraps(func) def wrapper(*args, **kwargs): future = TracebackFuture() @@ -219,7 +239,7 @@ def _make_coroutine_wrapper(func, replace_callback): future.set_exc_info(sys.exc_info()) return future else: - if isinstance(result, types.GeneratorType): + if isinstance(result, GeneratorType): # Inline the first iteration of Runner.run. This lets us # avoid the cost of creating a Runner when the coroutine # never actually yields, which in turn allows us to @@ -318,7 +338,22 @@ class WaitIterator(object): arguments were used in the construction of the `WaitIterator`, ``current_index`` will use the corresponding keyword). + On Python 3.5, `WaitIterator` implements the async iterator + protocol, so it can be used with the ``async for`` statement (note + that in this version the entire iteration is aborted if any value + raises an exception, while the previous example can continue past + individual errors):: + + async for result in gen.WaitIterator(future1, future2): + print("Result {} received from {} at {}".format( + result, wait_iterator.current_future, + wait_iterator.current_index)) + .. versionadded:: 4.1 + + .. versionchanged:: 4.3 + Added ``async for`` support in Python 3.5. + """ def __init__(self, *args, **kwargs): if args and kwargs: @@ -375,6 +410,16 @@ class WaitIterator(object): self.current_future = done self.current_index = self._unfinished.pop(done) + @coroutine + def __aiter__(self): + raise Return(self) + + def __anext__(self): + if self.done(): + # Lookup by name to silence pyflakes on older versions. + raise getattr(builtins, 'StopAsyncIteration')() + return self.next() + class YieldPoint(object): """Base class for objects that may be yielded from the generator. @@ -609,11 +654,12 @@ class Multi(YieldPoint): def multi_future(children, quiet_exceptions=()): """Wait for multiple asynchronous futures in parallel. - Takes a list of ``Futures`` (but *not* other ``YieldPoints``) and returns - a new Future that resolves when all the other Futures are done. - If all the ``Futures`` succeeded, the returned Future's result is a list - of their results. If any failed, the returned Future raises the exception - of the first one to fail. + Takes a list of ``Futures`` or other yieldable objects (with the + exception of the legacy `.YieldPoint` interfaces) and returns a + new Future that resolves when all the other Futures are done. If + all the ``Futures`` succeeded, the returned Future's result is a + list of their results. If any failed, the returned Future raises + the exception of the first one to fail. Instead of a list, the argument may also be a dictionary whose values are Futures, in which case a parallel dictionary is returned mapping the same @@ -634,12 +680,16 @@ def multi_future(children, quiet_exceptions=()): If multiple ``Futures`` fail, any exceptions after the first (which is raised) will be logged. Added the ``quiet_exceptions`` argument to suppress this logging for selected exception types. + + .. versionchanged:: 4.3 + Added support for other yieldable objects. """ if isinstance(children, dict): keys = list(children.keys()) children = children.values() else: keys = None + children = list(map(convert_yielded, children)) assert all(is_future(i) for i in children) unfinished_children = set(children) @@ -1001,6 +1051,16 @@ def _argument_adapter(callback): callback(None) return wrapper +if sys.version_info >= (3, 3): + exec(textwrap.dedent(""" + @coroutine + def _wrap_awaitable(x): + return (yield from x) + """)) +else: + def _wrap_awaitable(x): + raise NotImplementedError() + def convert_yielded(yielded): """Convert a yielded object into a `.Future`. @@ -1022,6 +1082,8 @@ def convert_yielded(yielded): return multi_future(yielded) elif is_future(yielded): return yielded + elif isawaitable(yielded): + return _wrap_awaitable(yielded) else: raise BadYieldError("yielded unknown object %r" % (yielded,)) diff --git a/tornado/http1connection.py b/tornado/http1connection.py index 5d6f4c21..6226ef7a 100644 --- a/tornado/http1connection.py +++ b/tornado/http1connection.py @@ -698,9 +698,8 @@ class HTTP1ServerConnection(object): # This exception was already logged. conn.close() return - except Exception as e: - if 1 != e.errno: - gen_log.error("Uncaught exception", exc_info=True) + except Exception: + gen_log.error("Uncaught exception", exc_info=True) conn.close() return if not ret: diff --git a/tornado/httpserver.py b/tornado/httpserver.py index 2dd04dd7..ff235fe4 100644 --- a/tornado/httpserver.py +++ b/tornado/httpserver.py @@ -188,7 +188,6 @@ class HTTPServer(TCPServer, Configurable, class _HTTPRequestContext(object): def __init__(self, stream, address, protocol): self.address = address - self.protocol = protocol # Save the socket's address family now so we know how to # interpret self.address even after the stream is closed # and its socket attribute replaced with None. diff --git a/tornado/httputil.py b/tornado/httputil.py index fa5e697c..747dfc40 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -242,6 +242,15 @@ class HTTPHeaders(dict): # effectively a deep copy. return self.copy() + def __reduce_ex__(self, v): + # We must override dict.__reduce_ex__ to pickle ourselves + # correctly. + return HTTPHeaders, (), list(self.get_all()) + + def __setstate__(self, state): + for k, v in state: + self.add(k, v) + class HTTPServerRequest(object): """A single HTTP request. diff --git a/tornado/ioloop.py b/tornado/ioloop.py index 67e33b52..87d4168e 100644 --- a/tornado/ioloop.py +++ b/tornado/ioloop.py @@ -249,7 +249,7 @@ class IOLoop(Configurable): if IOLoop.current(instance=False) is None: self.make_current() elif make_current: - if IOLoop.current(instance=False) is None: + if IOLoop.current(instance=False) is not None: raise RuntimeError("current IOLoop already exists") self.make_current() diff --git a/tornado/iostream.py b/tornado/iostream.py index f255dc1f..c5d3e2c9 100644 --- a/tornado/iostream.py +++ b/tornado/iostream.py @@ -89,8 +89,16 @@ class StreamClosedError(IOError): Note that the close callback is scheduled to run *after* other callbacks on the stream (to allow for buffered data to be processed), so you may see this error before you see the close callback. + + The ``real_error`` attribute contains the underlying error that caused + the stream to close (if any). + + .. versionchanged:: 4.3 + Added the ``real_error`` attribute. """ - pass + def __init__(self, real_error=None): + super(StreamClosedError, self).__init__('Stream is closed') + self.real_error = real_error class UnsatisfiableReadError(Exception): @@ -344,7 +352,8 @@ class BaseIOStream(object): try: self._try_inline_read() except: - future.add_done_callback(lambda f: f.exception()) + if future is not None: + future.add_done_callback(lambda f: f.exception()) raise return future @@ -446,13 +455,7 @@ class BaseIOStream(object): futures.append(self._ssl_connect_future) self._ssl_connect_future = None for future in futures: - if self._is_connreset(self.error): - # Treat connection resets as closed connections so - # clients only have to catch one kind of exception - # to avoid logging. - future.set_exception(StreamClosedError()) - else: - future.set_exception(self.error or StreamClosedError()) + future.set_exception(StreamClosedError(real_error=self.error)) if self._close_callback is not None: cb = self._close_callback self._close_callback = None @@ -645,8 +648,7 @@ class BaseIOStream(object): except UnsatisfiableReadError: raise except Exception as e: - if 1 != e.errno: - gen_log.warning("error on read", exc_info=True) + gen_log.warning("error on read: %s" % e) self.close(exc_info=True) return if pos is not None: @@ -876,7 +878,7 @@ class BaseIOStream(object): def _check_closed(self): if self.closed(): - raise StreamClosedError("Stream is closed") + raise StreamClosedError(real_error=self.error) def _maybe_add_error_listener(self): # This method is part of an optimization: to detect a connection that @@ -1149,6 +1151,15 @@ class IOStream(BaseIOStream): def close_callback(): if not future.done(): + # Note that unlike most Futures returned by IOStream, + # this one passes the underlying error through directly + # instead of wrapping everything in a StreamClosedError + # with a real_error attribute. This is because once the + # connection is established it's more helpful to raise + # the SSLError directly than to hide it behind a + # StreamClosedError (and the client is expecting SSL + # issues rather than network issues since this method is + # named start_tls). future.set_exception(ssl_stream.error or StreamClosedError()) if orig_close_callback is not None: orig_close_callback() @@ -1312,8 +1323,8 @@ class SSLIOStream(IOStream): return False try: ssl_match_hostname(peercert, self._server_hostname) - except SSLCertificateError: - gen_log.warning("Invalid SSL certificate", exc_info=True) + except SSLCertificateError as e: + gen_log.warning("Invalid SSL certificate: %s" % e) return False else: return True diff --git a/tornado/locale.py b/tornado/locale.py index a668765b..a1f2b81b 100644 --- a/tornado/locale.py +++ b/tornado/locale.py @@ -41,8 +41,10 @@ the `Locale.translate` method will simply return the original string. from __future__ import absolute_import, division, print_function, with_statement +import codecs import csv import datetime +from io import BytesIO import numbers import os import re @@ -51,13 +53,14 @@ from tornado import escape from tornado.log import gen_log from tornado.util import u +from tornado._locale_data import LOCALE_NAMES + _default_locale = "en_US" _translations = {} _supported_locales = frozenset([_default_locale]) _use_gettext = False CONTEXT_SEPARATOR = "\x04" - def get(*locale_codes): """Returns the closest match for the given locale codes. @@ -86,7 +89,7 @@ def set_default_locale(code): _supported_locales = frozenset(list(_translations.keys()) + [_default_locale]) -def load_translations(directory): +def load_translations(directory, encoding=None): """Loads translations from CSV files in a directory. Translations are strings with optional Python-style named placeholders @@ -106,12 +109,20 @@ def load_translations(directory): The file is read using the `csv` module in the default "excel" dialect. In this format there should not be spaces after the commas. + If no ``encoding`` parameter is given, the encoding will be + detected automatically (among UTF-8 and UTF-16) if the file + contains a byte-order marker (BOM), defaulting to UTF-8 if no BOM + is present. + Example translation ``es_LA.csv``:: "I love you","Te amo" "%(name)s liked this","A %(name)s les gustó esto","plural" "%(name)s liked this","A %(name)s le gustó esto","singular" + .. versionchanged:: 4.3 + Added ``encoding`` parameter. Added support for BOM-based encoding + detection, UTF-16, and UTF-8-with-BOM. """ global _translations global _supported_locales @@ -125,13 +136,29 @@ def load_translations(directory): os.path.join(directory, path)) continue full_path = os.path.join(directory, path) + if encoding is None: + # Try to autodetect encoding based on the BOM. + with open(full_path, 'rb') as f: + data = f.read(len(codecs.BOM_UTF16_LE)) + if data in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): + encoding = 'utf-16' + else: + # utf-8-sig is "utf-8 with optional BOM". It's discouraged + # in most cases but is common with CSV files because Excel + # cannot read utf-8 files without a BOM. + encoding = 'utf-8-sig' try: # python 3: csv.reader requires a file open in text mode. # Force utf8 to avoid dependence on $LANG environment variable. - f = open(full_path, "r", encoding="utf-8") + f = open(full_path, "r", encoding=encoding) except TypeError: - # python 2: files return byte strings, which are decoded below. - f = open(full_path, "r") + # python 2: csv can only handle byte strings (in ascii-compatible + # encodings), which we decode below. Transcode everything into + # utf8 before passing it to csv.reader. + f = BytesIO() + with codecs.open(full_path, "r", encoding=encoding) as infile: + f.write(escape.utf8(infile.read())) + f.seek(0) _translations[locale] = {} for i, row in enumerate(csv.reader(f)): if not row or len(row) < 2: @@ -491,68 +518,3 @@ class GettextLocale(Locale): # Translation not found result = message return result - -LOCALE_NAMES = { - "af_ZA": {"name_en": u("Afrikaans"), "name": u("Afrikaans")}, - "am_ET": {"name_en": u("Amharic"), "name": u('\u12a0\u121b\u122d\u129b')}, - "ar_AR": {"name_en": u("Arabic"), "name": u("\u0627\u0644\u0639\u0631\u0628\u064a\u0629")}, - "bg_BG": {"name_en": u("Bulgarian"), "name": u("\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438")}, - "bn_IN": {"name_en": u("Bengali"), "name": u("\u09ac\u09be\u0982\u09b2\u09be")}, - "bs_BA": {"name_en": u("Bosnian"), "name": u("Bosanski")}, - "ca_ES": {"name_en": u("Catalan"), "name": u("Catal\xe0")}, - "cs_CZ": {"name_en": u("Czech"), "name": u("\u010ce\u0161tina")}, - "cy_GB": {"name_en": u("Welsh"), "name": u("Cymraeg")}, - "da_DK": {"name_en": u("Danish"), "name": u("Dansk")}, - "de_DE": {"name_en": u("German"), "name": u("Deutsch")}, - "el_GR": {"name_en": u("Greek"), "name": u("\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac")}, - "en_GB": {"name_en": u("English (UK)"), "name": u("English (UK)")}, - "en_US": {"name_en": u("English (US)"), "name": u("English (US)")}, - "es_ES": {"name_en": u("Spanish (Spain)"), "name": u("Espa\xf1ol (Espa\xf1a)")}, - "es_LA": {"name_en": u("Spanish"), "name": u("Espa\xf1ol")}, - "et_EE": {"name_en": u("Estonian"), "name": u("Eesti")}, - "eu_ES": {"name_en": u("Basque"), "name": u("Euskara")}, - "fa_IR": {"name_en": u("Persian"), "name": u("\u0641\u0627\u0631\u0633\u06cc")}, - "fi_FI": {"name_en": u("Finnish"), "name": u("Suomi")}, - "fr_CA": {"name_en": u("French (Canada)"), "name": u("Fran\xe7ais (Canada)")}, - "fr_FR": {"name_en": u("French"), "name": u("Fran\xe7ais")}, - "ga_IE": {"name_en": u("Irish"), "name": u("Gaeilge")}, - "gl_ES": {"name_en": u("Galician"), "name": u("Galego")}, - "he_IL": {"name_en": u("Hebrew"), "name": u("\u05e2\u05d1\u05e8\u05d9\u05ea")}, - "hi_IN": {"name_en": u("Hindi"), "name": u("\u0939\u093f\u0928\u094d\u0926\u0940")}, - "hr_HR": {"name_en": u("Croatian"), "name": u("Hrvatski")}, - "hu_HU": {"name_en": u("Hungarian"), "name": u("Magyar")}, - "id_ID": {"name_en": u("Indonesian"), "name": u("Bahasa Indonesia")}, - "is_IS": {"name_en": u("Icelandic"), "name": u("\xcdslenska")}, - "it_IT": {"name_en": u("Italian"), "name": u("Italiano")}, - "ja_JP": {"name_en": u("Japanese"), "name": u("\u65e5\u672c\u8a9e")}, - "ko_KR": {"name_en": u("Korean"), "name": u("\ud55c\uad6d\uc5b4")}, - "lt_LT": {"name_en": u("Lithuanian"), "name": u("Lietuvi\u0173")}, - "lv_LV": {"name_en": u("Latvian"), "name": u("Latvie\u0161u")}, - "mk_MK": {"name_en": u("Macedonian"), "name": u("\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438")}, - "ml_IN": {"name_en": u("Malayalam"), "name": u("\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02")}, - "ms_MY": {"name_en": u("Malay"), "name": u("Bahasa Melayu")}, - "nb_NO": {"name_en": u("Norwegian (bokmal)"), "name": u("Norsk (bokm\xe5l)")}, - "nl_NL": {"name_en": u("Dutch"), "name": u("Nederlands")}, - "nn_NO": {"name_en": u("Norwegian (nynorsk)"), "name": u("Norsk (nynorsk)")}, - "pa_IN": {"name_en": u("Punjabi"), "name": u("\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40")}, - "pl_PL": {"name_en": u("Polish"), "name": u("Polski")}, - "pt_BR": {"name_en": u("Portuguese (Brazil)"), "name": u("Portugu\xeas (Brasil)")}, - "pt_PT": {"name_en": u("Portuguese (Portugal)"), "name": u("Portugu\xeas (Portugal)")}, - "ro_RO": {"name_en": u("Romanian"), "name": u("Rom\xe2n\u0103")}, - "ru_RU": {"name_en": u("Russian"), "name": u("\u0420\u0443\u0441\u0441\u043a\u0438\u0439")}, - "sk_SK": {"name_en": u("Slovak"), "name": u("Sloven\u010dina")}, - "sl_SI": {"name_en": u("Slovenian"), "name": u("Sloven\u0161\u010dina")}, - "sq_AL": {"name_en": u("Albanian"), "name": u("Shqip")}, - "sr_RS": {"name_en": u("Serbian"), "name": u("\u0421\u0440\u043f\u0441\u043a\u0438")}, - "sv_SE": {"name_en": u("Swedish"), "name": u("Svenska")}, - "sw_KE": {"name_en": u("Swahili"), "name": u("Kiswahili")}, - "ta_IN": {"name_en": u("Tamil"), "name": u("\u0ba4\u0bae\u0bbf\u0bb4\u0bcd")}, - "te_IN": {"name_en": u("Telugu"), "name": u("\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41")}, - "th_TH": {"name_en": u("Thai"), "name": u("\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22")}, - "tl_PH": {"name_en": u("Filipino"), "name": u("Filipino")}, - "tr_TR": {"name_en": u("Turkish"), "name": u("T\xfcrk\xe7e")}, - "uk_UA": {"name_en": u("Ukraini "), "name": u("\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430")}, - "vi_VN": {"name_en": u("Vietnamese"), "name": u("Ti\u1ebfng Vi\u1ec7t")}, - "zh_CN": {"name_en": u("Chinese (Simplified)"), "name": u("\u4e2d\u6587(\u7b80\u4f53)")}, - "zh_TW": {"name_en": u("Chinese (Traditional)"), "name": u("\u4e2d\u6587(\u7e41\u9ad4)")}, -} diff --git a/tornado/locks.py b/tornado/locks.py index 4b0bdb38..a1817728 100644 --- a/tornado/locks.py +++ b/tornado/locks.py @@ -12,13 +12,6 @@ # License for the specific language governing permissions and limitations # under the License. -""" -.. testsetup:: * - - from tornado import ioloop, gen, locks - io_loop = ioloop.IOLoop.current() -""" - from __future__ import absolute_import, division, print_function, with_statement __all__ = ['Condition', 'Event', 'Semaphore', 'BoundedSemaphore', 'Lock'] @@ -61,7 +54,11 @@ class Condition(_TimeoutGarbageCollector): .. testcode:: - condition = locks.Condition() + from tornado import gen + from tornado.ioloop import IOLoop + from tornado.locks import Condition + + condition = Condition() @gen.coroutine def waiter(): @@ -80,7 +77,7 @@ class Condition(_TimeoutGarbageCollector): # Yield two Futures; wait for waiter() and notifier() to finish. yield [waiter(), notifier()] - io_loop.run_sync(runner) + IOLoop.current().run_sync(runner) .. testoutput:: @@ -92,7 +89,7 @@ class Condition(_TimeoutGarbageCollector): `wait` takes an optional ``timeout`` argument, which is either an absolute timestamp:: - io_loop = ioloop.IOLoop.current() + io_loop = IOLoop.current() # Wait up to 1 second for a notification. yield condition.wait(timeout=io_loop.time() + 1) @@ -161,7 +158,11 @@ class Event(object): .. testcode:: - event = locks.Event() + from tornado import gen + from tornado.ioloop import IOLoop + from tornado.locks import Event + + event = Event() @gen.coroutine def waiter(): @@ -180,7 +181,7 @@ class Event(object): def runner(): yield [waiter(), setter()] - io_loop.run_sync(runner) + IOLoop.current().run_sync(runner) .. testoutput:: @@ -210,7 +211,7 @@ class Event(object): def clear(self): """Reset the internal flag to ``False``. - + Calls to `.wait` will block until `.set` is called. """ if self._future.done(): @@ -261,7 +262,8 @@ class Semaphore(_TimeoutGarbageCollector): from collections import deque - from tornado import gen, ioloop + from tornado import gen + from tornado.ioloop import IOLoop from tornado.concurrent import Future # Ensure reliable doctest output: resolve Futures one at a time. @@ -273,14 +275,18 @@ class Semaphore(_TimeoutGarbageCollector): yield gen.moment f.set_result(None) - ioloop.IOLoop.current().add_callback(simulator, list(futures_q)) + IOLoop.current().add_callback(simulator, list(futures_q)) def use_some_resource(): return futures_q.popleft() .. testcode:: semaphore - sem = locks.Semaphore(2) + from tornado import gen + from tornado.ioloop import IOLoop + from tornado.locks import Semaphore + + sem = Semaphore(2) @gen.coroutine def worker(worker_id): @@ -297,7 +303,7 @@ class Semaphore(_TimeoutGarbageCollector): # Join all workers. yield [worker(i) for i in range(3)] - io_loop.run_sync(runner) + IOLoop.current().run_sync(runner) .. testoutput:: semaphore @@ -321,6 +327,20 @@ class Semaphore(_TimeoutGarbageCollector): # Now the semaphore has been released. print("Worker %d is done" % worker_id) + + In Python 3.5, the semaphore itself can be used as an async context + manager:: + + async def worker(worker_id): + async with sem: + print("Worker %d is working" % worker_id) + await use_some_resource() + + # Now the semaphore has been released. + print("Worker %d is done" % worker_id) + + .. versionchanged:: 4.3 + Added ``async with`` support in Python 3.5. """ def __init__(self, value=1): super(Semaphore, self).__init__() @@ -383,6 +403,14 @@ class Semaphore(_TimeoutGarbageCollector): __exit__ = __enter__ + @gen.coroutine + def __aenter__(self): + yield self.acquire() + + @gen.coroutine + def __aexit__(self, typ, value, tb): + self.release() + class BoundedSemaphore(Semaphore): """A semaphore that prevents release() being called too many times. @@ -412,7 +440,7 @@ class Lock(object): Releasing an unlocked lock raises `RuntimeError`. - `acquire` supports the context manager protocol: + `acquire` supports the context manager protocol in all Python versions: >>> from tornado import gen, locks >>> lock = locks.Lock() @@ -424,6 +452,22 @@ class Lock(object): ... pass ... ... # Now the lock is released. + + In Python 3.5, `Lock` also supports the async context manager + protocol. Note that in this case there is no `acquire`, because + ``async with`` includes both the ``yield`` and the ``acquire`` + (just as it does with `threading.Lock`): + + >>> async def f(): # doctest: +SKIP + ... async with lock: + ... # Do something holding the lock. + ... pass + ... + ... # Now the lock is released. + + .. versionchanged:: 3.5 + Added ``async with`` support in Python 3.5. + """ def __init__(self): self._block = BoundedSemaphore(value=1) @@ -458,3 +502,11 @@ class Lock(object): "Use Lock like 'with (yield lock)', not like 'with lock'") __exit__ = __enter__ + + @gen.coroutine + def __aenter__(self): + yield self.acquire() + + @gen.coroutine + def __aexit__(self, typ, value, tb): + self.release() diff --git a/tornado/options.py b/tornado/options.py index 89a9e432..961bab15 100644 --- a/tornado/options.py +++ b/tornado/options.py @@ -68,6 +68,12 @@ instances to define isolated sets of options, such as for subcommands. from tornado.options import options, parse_command_line options.logging = None parse_command_line() + +.. versionchanged:: 4.3 + Dashes and underscores are fully interchangeable in option names; + options can be defined, set, and read with any mix of the two. + Dashes are typical for command-line usage while config files require + underscores. """ from __future__ import absolute_import, division, print_function, with_statement @@ -103,28 +109,38 @@ class OptionParser(object): self.define("help", type=bool, help="show this help information", callback=self._help_callback) + def _normalize_name(self, name): + return name.replace('_', '-') + def __getattr__(self, name): + name = self._normalize_name(name) if isinstance(self._options.get(name), _Option): return self._options[name].value() raise AttributeError("Unrecognized option %r" % name) def __setattr__(self, name, value): + name = self._normalize_name(name) if isinstance(self._options.get(name), _Option): return self._options[name].set(value) raise AttributeError("Unrecognized option %r" % name) def __iter__(self): - return iter(self._options) + return (opt.name for opt in self._options.values()) - def __getitem__(self, item): - return self._options[item].value() + def __contains__(self, name): + name = self._normalize_name(name) + return name in self._options + + def __getitem__(self, name): + name = self._normalize_name(name) + return self._options[name].value() def items(self): """A sequence of (name, value) pairs. .. versionadded:: 3.1 """ - return [(name, opt.value()) for name, opt in self._options.items()] + return [(opt.name, opt.value()) for name, opt in self._options.items()] def groups(self): """The set of option-groups created by ``define``. @@ -151,7 +167,7 @@ class OptionParser(object): .. versionadded:: 3.1 """ return dict( - (name, opt.value()) for name, opt in self._options.items() + (opt.name, opt.value()) for name, opt in self._options.items() if not group or group == opt.group_name) def as_dict(self): @@ -160,7 +176,7 @@ class OptionParser(object): .. versionadded:: 3.1 """ return dict( - (name, opt.value()) for name, opt in self._options.items()) + (opt.name, opt.value()) for name, opt in self._options.items()) def define(self, name, default=None, type=None, help=None, metavar=None, multiple=False, group=None, callback=None): @@ -223,11 +239,13 @@ class OptionParser(object): group_name = group else: group_name = file_name - self._options[name] = _Option(name, file_name=file_name, - default=default, type=type, help=help, - metavar=metavar, multiple=multiple, - group_name=group_name, - callback=callback) + normalized = self._normalize_name(name) + option = _Option(name, file_name=file_name, + default=default, type=type, help=help, + metavar=metavar, multiple=multiple, + group_name=group_name, + callback=callback) + self._options[normalized] = option def parse_command_line(self, args=None, final=True): """Parses all options given on the command line (defaults to @@ -255,7 +273,7 @@ class OptionParser(object): break arg = args[i].lstrip("-") name, equals, value = arg.partition("=") - name = name.replace('-', '_') + name = self._normalize_name(name) if name not in self._options: self.print_help() raise Error('Unrecognized command line option: %r' % name) @@ -287,8 +305,9 @@ class OptionParser(object): with open(path, 'rb') as f: exec_in(native_str(f.read()), config, config) for name in config: - if name in self._options: - self._options[name].set(config[name]) + normalized = self._normalize_name(name) + if normalized in self._options: + self._options[normalized].set(config[name]) if final: self.run_parse_callbacks() @@ -308,7 +327,8 @@ class OptionParser(object): print("\n%s options:\n" % os.path.normpath(filename), file=file) o.sort(key=lambda option: option.name) for option in o: - prefix = option.name + # Always print names with dashes in a CLI context. + prefix = self._normalize_name(option.name) if option.metavar: prefix += "=" + option.metavar description = option.help or "" diff --git a/tornado/platform/asyncio.py b/tornado/platform/asyncio.py index 8f3dbff6..cfeadc98 100644 --- a/tornado/platform/asyncio.py +++ b/tornado/platform/asyncio.py @@ -35,7 +35,6 @@ class BaseAsyncIOLoop(IOLoop): super(BaseAsyncIOLoop, self).initialize(**kwargs) self.asyncio_loop = asyncio_loop self.close_loop = close_loop - self.asyncio_loop.call_soon(self.make_current) # Maps fd to (fileobj, handler function) pair (as in IOLoop.add_handler) self.handlers = {} # Set of fds listening for reads/writes @@ -105,8 +104,16 @@ class BaseAsyncIOLoop(IOLoop): handler_func(fileobj, events) def start(self): - self._setup_logging() - self.asyncio_loop.run_forever() + old_current = IOLoop.current(instance=False) + try: + self._setup_logging() + self.make_current() + self.asyncio_loop.run_forever() + finally: + if old_current is None: + IOLoop.clear_current() + else: + old_current.make_current() def stop(self): self.asyncio_loop.stop() @@ -140,8 +147,14 @@ class AsyncIOMainLoop(BaseAsyncIOLoop): class AsyncIOLoop(BaseAsyncIOLoop): def initialize(self, **kwargs): - super(AsyncIOLoop, self).initialize(asyncio.new_event_loop(), - close_loop=True, **kwargs) + loop = asyncio.new_event_loop() + try: + super(AsyncIOLoop, self).initialize(loop, close_loop=True, **kwargs) + except Exception: + # If initialize() does not succeed (taking ownership of the loop), + # we have to close it. + loop.close() + raise def to_tornado_future(asyncio_future): diff --git a/tornado/platform/twisted.py b/tornado/platform/twisted.py index 7b3c8ca5..272955a8 100644 --- a/tornado/platform/twisted.py +++ b/tornado/platform/twisted.py @@ -423,7 +423,6 @@ class TwistedIOLoop(tornado.ioloop.IOLoop): reactor = twisted.internet.reactor self.reactor = reactor self.fds = {} - self.reactor.callWhenRunning(self.make_current) def close(self, all_fds=False): fds = self.fds @@ -477,8 +476,16 @@ class TwistedIOLoop(tornado.ioloop.IOLoop): del self.fds[fd] def start(self): - self._setup_logging() - self.reactor.run() + old_current = IOLoop.current(instance=False) + try: + self._setup_logging() + self.make_current() + self.reactor.run() + finally: + if old_current is None: + IOLoop.clear_current() + else: + old_current.make_current() def stop(self): self.reactor.crash() diff --git a/tornado/queues.py b/tornado/queues.py index 55ab4834..129b204e 100644 --- a/tornado/queues.py +++ b/tornado/queues.py @@ -44,6 +44,14 @@ def _set_timeout(future, timeout): lambda _: io_loop.remove_timeout(timeout_handle)) +class _QueueIterator(object): + def __init__(self, q): + self.q = q + + def __anext__(self): + return self.q.get() + + class Queue(object): """Coordinate producer and consumer coroutines. @@ -51,7 +59,11 @@ class Queue(object): .. testcode:: - q = queues.Queue(maxsize=2) + from tornado import gen + from tornado.ioloop import IOLoop + from tornado.queues import Queue + + q = Queue(maxsize=2) @gen.coroutine def consumer(): @@ -71,19 +83,20 @@ class Queue(object): @gen.coroutine def main(): - consumer() # Start consumer. + # Start consumer without waiting (since it never finishes). + IOLoop.current().spawn_callback(consumer) yield producer() # Wait for producer to put all tasks. yield q.join() # Wait for consumer to finish all tasks. print('Done') - io_loop.run_sync(main) + IOLoop.current().run_sync(main) .. testoutput:: Put 0 Put 1 - Put 2 Doing work on 0 + Put 2 Doing work on 1 Put 3 Doing work on 2 @@ -91,6 +104,21 @@ class Queue(object): Doing work on 3 Doing work on 4 Done + + In Python 3.5, `Queue` implements the async iterator protocol, so + ``consumer()`` could be rewritten as:: + + async def consumer(): + async for item in q: + try: + print('Doing work on %s' % item) + yield gen.sleep(0.01) + finally: + q.task_done() + + .. versionchanged:: 4.3 + Added ``async for`` support in Python 3.5. + """ def __init__(self, maxsize=0): if maxsize is None: @@ -215,6 +243,10 @@ class Queue(object): """ return self._finished.wait(timeout) + @gen.coroutine + def __aiter__(self): + return _QueueIterator(self) + # These three are overridable in subclasses. def _init(self): self._queue = collections.deque() @@ -266,7 +298,9 @@ class PriorityQueue(Queue): .. testcode:: - q = queues.PriorityQueue() + from tornado.queues import PriorityQueue + + q = PriorityQueue() q.put((1, 'medium-priority item')) q.put((0, 'high-priority item')) q.put((10, 'low-priority item')) @@ -296,7 +330,9 @@ class LifoQueue(Queue): .. testcode:: - q = queues.LifoQueue() + from tornado.queues import LifoQueue + + q = LifoQueue() q.put(3) q.put(2) q.put(1) diff --git a/tornado/simple_httpclient.py b/tornado/simple_httpclient.py index cf58e162..81ed8873 100644 --- a/tornado/simple_httpclient.py +++ b/tornado/simple_httpclient.py @@ -427,7 +427,10 @@ class _HTTPConnection(httputil.HTTPMessageDelegate): if self.final_callback: self._remove_timeout() if isinstance(value, StreamClosedError): - value = HTTPError(599, "Stream closed") + if value.real_error is None: + value = HTTPError(599, "Stream closed") + else: + value = value.real_error self._run_callback(HTTPResponse(self.request, 599, error=value, request_time=self.io_loop.time() - self.start_time, )) diff --git a/tornado/template.py b/tornado/template.py index 3882ed02..959b191d 100644 --- a/tornado/template.py +++ b/tornado/template.py @@ -186,6 +186,11 @@ with ``{# ... #}``. ``{% while *condition* %}... {% end %}`` Same as the python ``while`` statement. ``{% break %}`` and ``{% continue %}`` may be used inside the loop. + +``{% whitespace *mode* %}`` + Sets the whitespace mode for the remainder of the current file + (or until the next ``{% whitespace %}`` directive). See + `filter_whitespace` for available options. New in Tornado 4.3. """ from __future__ import absolute_import, division, print_function, with_statement @@ -210,6 +215,31 @@ _DEFAULT_AUTOESCAPE = "xhtml_escape" _UNSET = object() +def filter_whitespace(mode, text): + """Transform whitespace in ``text`` according to ``mode``. + + Available modes are: + + * ``all``: Return all whitespace unmodified. + * ``single``: Collapse consecutive whitespace with a single whitespace + character, preserving newlines. + * ``oneline``: Collapse all runs of whitespace into a single space + character, removing all newlines in the process. + + .. versionadded:: 4.3 + """ + if mode == 'all': + return text + elif mode == 'single': + text = re.sub(r"([\t ]+)", " ", text) + text = re.sub(r"(\s*\n\s*)", "\n", text) + return text + elif mode == 'oneline': + return re.sub(r"(\s+)", " ", text) + else: + raise Exception("invalid whitespace mode %s" % mode) + + class Template(object): """A compiled template. @@ -220,21 +250,58 @@ class Template(object): # autodoc because _UNSET looks like garbage. When changing # this signature update website/sphinx/template.rst too. def __init__(self, template_string, name="", loader=None, - compress_whitespace=None, autoescape=_UNSET): + compress_whitespace=_UNSET, autoescape=_UNSET, + whitespace=None): + """Construct a Template. + + :arg str template_string: the contents of the template file. + :arg str name: the filename from which the template was loaded + (used for error message). + :arg tornado.template.BaseLoader loader: the `~tornado.template.BaseLoader` responsible for this template, + used to resolve ``{% include %}`` and ``{% extend %}`` + directives. + :arg bool compress_whitespace: Deprecated since Tornado 4.3. + Equivalent to ``whitespace="single"`` if true and + ``whitespace="all"`` if false. + :arg str autoescape: The name of a function in the template + namespace, or ``None`` to disable escaping by default. + :arg str whitespace: A string specifying treatment of whitespace; + see `filter_whitespace` for options. + + .. versionchanged:: 4.3 + Added ``whitespace`` parameter; deprecated ``compress_whitespace``. + """ self.name = name - if compress_whitespace is None: - compress_whitespace = name.endswith(".html") or \ - name.endswith(".js") + + if compress_whitespace is not _UNSET: + # Convert deprecated compress_whitespace (bool) to whitespace (str). + if whitespace is not None: + raise Exception("cannot set both whitespace and compress_whitespace") + whitespace = "single" if compress_whitespace else "all" + if whitespace is None: + if loader and loader.whitespace: + whitespace = loader.whitespace + else: + # Whitespace defaults by filename. + if name.endswith(".html") or name.endswith(".js"): + whitespace = "single" + else: + whitespace = "all" + # Validate the whitespace setting. + filter_whitespace(whitespace, '') + if autoescape is not _UNSET: self.autoescape = autoescape elif loader: self.autoescape = loader.autoescape else: self.autoescape = _DEFAULT_AUTOESCAPE + self.namespace = loader.namespace if loader else {} - reader = _TemplateReader(name, escape.native_str(template_string)) + reader = _TemplateReader(name, escape.native_str(template_string), + whitespace) self.file = _File(self, _parse(reader, self)) - self.code = self._generate_python(loader, compress_whitespace) + self.code = self._generate_python(loader) self.loader = loader try: # Under python2.5, the fake filename used here must match @@ -277,7 +344,7 @@ class Template(object): linecache.clearcache() return execute() - def _generate_python(self, loader, compress_whitespace): + def _generate_python(self, loader): buffer = StringIO() try: # named_blocks maps from names to _NamedBlock objects @@ -286,8 +353,8 @@ class Template(object): ancestors.reverse() for ancestor in ancestors: ancestor.find_named_blocks(loader, named_blocks) - writer = _CodeWriter(buffer, named_blocks, loader, ancestors[0].template, - compress_whitespace) + writer = _CodeWriter(buffer, named_blocks, loader, + ancestors[0].template) ancestors[0].generate(writer) return buffer.getvalue() finally: @@ -312,12 +379,26 @@ class BaseLoader(object): ``{% extends %}`` and ``{% include %}``. The loader caches all templates after they are loaded the first time. """ - def __init__(self, autoescape=_DEFAULT_AUTOESCAPE, namespace=None): - """``autoescape`` must be either None or a string naming a function - in the template namespace, such as "xhtml_escape". + def __init__(self, autoescape=_DEFAULT_AUTOESCAPE, namespace=None, + whitespace=None): + """Construct a template loader. + + :arg str autoescape: The name of a function in the template + namespace, such as "xhtml_escape", or ``None`` to disable + autoescaping by default. + :arg dict namespace: A dictionary to be added to the default template + namespace, or ``None``. + :arg str whitespace: A string specifying default behavior for + whitespace in templates; see `filter_whitespace` for options. + Default is "single" for files ending in ".html" and ".js" and + "all" for other files. + + .. versionchanged:: 4.3 + Added ``whitespace`` parameter. """ self.autoescape = autoescape self.namespace = namespace or {} + self.whitespace = whitespace self.templates = {} # self.lock protects self.templates. It's a reentrant lock # because templates may load other templates via `include` or @@ -558,37 +639,49 @@ class _Module(_Expression): class _Text(_Node): - def __init__(self, value, line): + def __init__(self, value, line, whitespace): self.value = value self.line = line + self.whitespace = whitespace def generate(self, writer): value = self.value - # Compress lots of white space to a single character. If the whitespace - # breaks a line, have it continue to break a line, but just with a - # single \n character - if writer.compress_whitespace and "
    " not in value:
    -            value = re.sub(r"([\t ]+)", " ", value)
    -            value = re.sub(r"(\s*\n\s*)", "\n", value)
    +        # Compress whitespace if requested, with a crude heuristic to avoid
    +        # altering preformatted whitespace.
    +        if "
    " not in value:
    +            value = filter_whitespace(self.whitespace, value)
     
             if value:
                 writer.write_line('_tt_append(%r)' % escape.utf8(value), self.line)
     
     
     class ParseError(Exception):
    -    """Raised for template syntax errors."""
    -    pass
    +    """Raised for template syntax errors.
    +
    +    ``ParseError`` instances have ``filename`` and ``lineno`` attributes
    +    indicating the position of the error.
    +
    +    .. versionchanged:: 4.3
    +       Added ``filename`` and ``lineno`` attributes.
    +    """
    +    def __init__(self, message, filename, lineno):
    +        self.message = message
    +        # The names "filename" and "lineno" are chosen for consistency
    +        # with python SyntaxError.
    +        self.filename = filename
    +        self.lineno = lineno
    +
    +    def __str__(self):
    +        return '%s at %s:%d' % (self.message, self.filename, self.lineno)
     
     
     class _CodeWriter(object):
    -    def __init__(self, file, named_blocks, loader, current_template,
    -                 compress_whitespace):
    +    def __init__(self, file, named_blocks, loader, current_template):
             self.file = file
             self.named_blocks = named_blocks
             self.loader = loader
             self.current_template = current_template
    -        self.compress_whitespace = compress_whitespace
             self.apply_counter = 0
             self.include_stack = []
             self._indent = 0
    @@ -633,9 +726,10 @@ class _CodeWriter(object):
     
     
     class _TemplateReader(object):
    -    def __init__(self, name, text):
    +    def __init__(self, name, text, whitespace):
             self.name = name
             self.text = text
    +        self.whitespace = whitespace
             self.line = 1
             self.pos = 0
     
    @@ -687,6 +781,9 @@ class _TemplateReader(object):
         def __str__(self):
             return self.text[self.pos:]
     
    +    def raise_parse_error(self, msg):
    +        raise ParseError(msg, self.name, self.line)
    +
     
     def _format_code(code):
         lines = code.splitlines()
    @@ -704,9 +801,10 @@ def _parse(reader, template, in_block=None, in_loop=None):
                 if curly == -1 or curly + 1 == reader.remaining():
                     # EOF
                     if in_block:
    -                    raise ParseError("Missing {%% end %%} block for %s" %
    -                                     in_block)
    -                body.chunks.append(_Text(reader.consume(), reader.line))
    +                    reader.raise_parse_error(
    +                        "Missing {%% end %%} block for %s" % in_block)
    +                body.chunks.append(_Text(reader.consume(), reader.line,
    +                                         reader.whitespace))
                     return body
                 # If the first curly brace is not the start of a special token,
                 # start searching from the character after it
    @@ -725,7 +823,8 @@ def _parse(reader, template, in_block=None, in_loop=None):
             # Append any text before the special token
             if curly > 0:
                 cons = reader.consume(curly)
    -            body.chunks.append(_Text(cons, reader.line))
    +            body.chunks.append(_Text(cons, reader.line,
    +                                     reader.whitespace))
     
             start_brace = reader.consume(2)
             line = reader.line
    @@ -736,14 +835,15 @@ def _parse(reader, template, in_block=None, in_loop=None):
             # which also use double braces.
             if reader.remaining() and reader[0] == "!":
                 reader.consume(1)
    -            body.chunks.append(_Text(start_brace, line))
    +            body.chunks.append(_Text(start_brace, line,
    +                                     reader.whitespace))
                 continue
     
             # Comment
             if start_brace == "{#":
                 end = reader.find("#}")
                 if end == -1:
    -                raise ParseError("Missing end expression #} on line %d" % line)
    +                reader.raise_parse_error("Missing end comment #}")
                 contents = reader.consume(end).strip()
                 reader.consume(2)
                 continue
    @@ -752,11 +852,11 @@ def _parse(reader, template, in_block=None, in_loop=None):
             if start_brace == "{{":
                 end = reader.find("}}")
                 if end == -1:
    -                raise ParseError("Missing end expression }} on line %d" % line)
    +                reader.raise_parse_error("Missing end expression }}")
                 contents = reader.consume(end).strip()
                 reader.consume(2)
                 if not contents:
    -                raise ParseError("Empty expression on line %d" % line)
    +                reader.raise_parse_error("Empty expression")
                 body.chunks.append(_Expression(contents, line))
                 continue
     
    @@ -764,11 +864,11 @@ def _parse(reader, template, in_block=None, in_loop=None):
             assert start_brace == "{%", start_brace
             end = reader.find("%}")
             if end == -1:
    -            raise ParseError("Missing end block %%} on line %d" % line)
    +            reader.raise_parse_error("Missing end block %}")
             contents = reader.consume(end).strip()
             reader.consume(2)
             if not contents:
    -            raise ParseError("Empty block tag ({%% %%}) on line %d" % line)
    +            reader.raise_parse_error("Empty block tag ({% %})")
     
             operator, space, suffix = contents.partition(" ")
             suffix = suffix.strip()
    @@ -783,40 +883,43 @@ def _parse(reader, template, in_block=None, in_loop=None):
             allowed_parents = intermediate_blocks.get(operator)
             if allowed_parents is not None:
                 if not in_block:
    -                raise ParseError("%s outside %s block" %
    -                                 (operator, allowed_parents))
    +                reader.raise_parse_error("%s outside %s block" %
    +                                         (operator, allowed_parents))
                 if in_block not in allowed_parents:
    -                raise ParseError("%s block cannot be attached to %s block" % (operator, in_block))
    +                reader.raise_parse_error(
    +                    "%s block cannot be attached to %s block" %
    +                    (operator, in_block))
                 body.chunks.append(_IntermediateControlBlock(contents, line))
                 continue
     
             # End tag
             elif operator == "end":
                 if not in_block:
    -                raise ParseError("Extra {%% end %%} block on line %d" % line)
    +                reader.raise_parse_error("Extra {% end %} block")
                 return body
     
             elif operator in ("extends", "include", "set", "import", "from",
    -                          "comment", "autoescape", "raw", "module"):
    +                          "comment", "autoescape", "whitespace", "raw",
    +                          "module"):
                 if operator == "comment":
                     continue
                 if operator == "extends":
                     suffix = suffix.strip('"').strip("'")
                     if not suffix:
    -                    raise ParseError("extends missing file path on line %d" % line)
    +                    reader.raise_parse_error("extends missing file path")
                     block = _ExtendsBlock(suffix)
                 elif operator in ("import", "from"):
                     if not suffix:
    -                    raise ParseError("import missing statement on line %d" % line)
    +                    reader.raise_parse_error("import missing statement")
                     block = _Statement(contents, line)
                 elif operator == "include":
                     suffix = suffix.strip('"').strip("'")
                     if not suffix:
    -                    raise ParseError("include missing file path on line %d" % line)
    +                    reader.raise_parse_error("include missing file path")
                     block = _IncludeBlock(suffix, reader, line)
                 elif operator == "set":
                     if not suffix:
    -                    raise ParseError("set missing statement on line %d" % line)
    +                    reader.raise_parse_error("set missing statement")
                     block = _Statement(suffix, line)
                 elif operator == "autoescape":
                     fn = suffix.strip()
    @@ -824,6 +927,12 @@ def _parse(reader, template, in_block=None, in_loop=None):
                         fn = None
                     template.autoescape = fn
                     continue
    +            elif operator == "whitespace":
    +                mode = suffix.strip()
    +                # Validate the selected mode
    +                filter_whitespace(mode, '')
    +                reader.whitespace = mode
    +                continue
                 elif operator == "raw":
                     block = _Expression(suffix, line, raw=True)
                 elif operator == "module":
    @@ -844,11 +953,11 @@ def _parse(reader, template, in_block=None, in_loop=None):
     
                 if operator == "apply":
                     if not suffix:
    -                    raise ParseError("apply missing method name on line %d" % line)
    +                    reader.raise_parse_error("apply missing method name")
                     block = _ApplyBlock(suffix, line, block_body)
                 elif operator == "block":
                     if not suffix:
    -                    raise ParseError("block missing name on line %d" % line)
    +                    reader.raise_parse_error("block missing name")
                     block = _NamedBlock(suffix, block_body, template, line)
                 else:
                     block = _ControlBlock(contents, line, block_body)
    @@ -857,9 +966,10 @@ def _parse(reader, template, in_block=None, in_loop=None):
     
             elif operator in ("break", "continue"):
                 if not in_loop:
    -                raise ParseError("%s outside %s block" % (operator, set(["for", "while"])))
    +                reader.raise_parse_error("%s outside %s block" %
    +                                         (operator, set(["for", "while"])))
                 body.chunks.append(_Statement(contents, line))
                 continue
     
             else:
    -            raise ParseError("unknown operator: %r" % operator)
    +            reader.raise_parse_error("unknown operator: %r" % operator)
    diff --git a/tornado/testing.py b/tornado/testing.py
    index 93f0dbe1..f3cfb773 100644
    --- a/tornado/testing.py
    +++ b/tornado/testing.py
    @@ -47,6 +47,11 @@ try:
     except ImportError:
         from io import StringIO  # py3
     
    +try:
    +    from collections.abc import Generator as GeneratorType  # py35+
    +except ImportError:
    +    from types import GeneratorType
    +
     # Tornado's own test suite requires the updated unittest module
     # (either py27+ or unittest2) so tornado.test.util enforces
     # this requirement, but for other users of tornado.testing we want
    @@ -118,7 +123,7 @@ class _TestMethodWrapper(object):
     
         def __call__(self, *args, **kwargs):
             result = self.orig_method(*args, **kwargs)
    -        if isinstance(result, types.GeneratorType):
    +        if isinstance(result, GeneratorType):
                 raise TypeError("Generator test methods should be decorated with "
                                 "tornado.testing.gen_test")
             elif result is not None:
    @@ -331,20 +336,29 @@ class AsyncHTTPTestCase(AsyncTestCase):
         Tests will typically use the provided ``self.http_client`` to fetch
         URLs from this server.
     
    -    Example::
    +    Example, assuming the "Hello, world" example from the user guide is in
    +    ``hello.py``::
     
    -        class MyHTTPTest(AsyncHTTPTestCase):
    +        import hello
    +
    +        class TestHelloApp(AsyncHTTPTestCase):
                 def get_app(self):
    -                return Application([('/', MyHandler)...])
    +                return hello.make_app()
     
                 def test_homepage(self):
    -                # The following two lines are equivalent to
    -                #   response = self.fetch('/')
    -                # but are shown in full here to demonstrate explicit use
    -                # of self.stop and self.wait.
    -                self.http_client.fetch(self.get_url('/'), self.stop)
    -                response = self.wait()
    -                # test contents of response
    +                response = self.fetch('/')
    +                self.assertEqual(response.code, 200)
    +                self.assertEqual(response.body, 'Hello, world')
    +
    +    That call to ``self.fetch()`` is equivalent to ::
    +
    +        self.http_client.fetch(self.get_url('/'), self.stop)
    +        response = self.wait()
    +
    +    which illustrates how AsyncTestCase can turn an asynchronous operation,
    +    like ``http_client.fetch()``, into a synchronous operation. If you need
    +    to do other asynchronous operations in tests, you'll probably need to use
    +    ``stop()`` and ``wait()`` yourself.
         """
         def setUp(self):
             super(AsyncHTTPTestCase, self).setUp()
    @@ -485,7 +499,7 @@ def gen_test(func=None, timeout=None):
             @functools.wraps(f)
             def pre_coroutine(self, *args, **kwargs):
                 result = f(self, *args, **kwargs)
    -            if isinstance(result, types.GeneratorType):
    +            if isinstance(result, GeneratorType):
                     self._test_generator = result
                 else:
                     self._test_generator = None
    @@ -575,10 +589,16 @@ class ExpectLog(logging.Filter):
         Useful to make tests of error conditions less noisy, while still
         leaving unexpected log entries visible.  *Not thread safe.*
     
    +    The attribute ``logged_stack`` is set to true if any exception
    +    stack trace was logged.
    +
         Usage::
     
             with ExpectLog('tornado.application', "Uncaught exception"):
                 error_response = self.fetch("/some_page")
    +
    +    .. versionchanged:: 4.3
    +       Added the ``logged_stack`` attribute.
         """
         def __init__(self, logger, regex, required=True):
             """Constructs an ExpectLog context manager.
    @@ -596,8 +616,11 @@ class ExpectLog(logging.Filter):
             self.regex = re.compile(regex)
             self.required = required
             self.matched = False
    +        self.logged_stack = False
     
         def filter(self, record):
    +        if record.exc_info:
    +            self.logged_stack = True
             message = record.getMessage()
             if self.regex.match(message):
                 self.matched = True
    @@ -606,6 +629,7 @@ class ExpectLog(logging.Filter):
     
         def __enter__(self):
             self.logger.addFilter(self)
    +        return self
     
         def __exit__(self, typ, value, tb):
             self.logger.removeFilter(self)
    diff --git a/tornado/util.py b/tornado/util.py
    index 606ced19..ea4da876 100644
    --- a/tornado/util.py
    +++ b/tornado/util.py
    @@ -13,7 +13,6 @@ and `.Resolver`.
     from __future__ import absolute_import, division, print_function, with_statement
     
     import array
    -import inspect
     import os
     import sys
     import zlib
    @@ -24,6 +23,13 @@ try:
     except NameError:
         xrange = range  # py3
     
    +# inspect.getargspec() raises DeprecationWarnings in Python 3.5.
    +# The two functions have compatible interfaces for the parts we need.
    +try:
    +    from inspect import getfullargspec as getargspec  # py3
    +except ImportError:
    +    from inspect import getargspec  # py2
    +
     
     class ObjectDict(dict):
         """Makes a dictionary behave like an object, with attribute-style access.
    @@ -284,7 +290,7 @@ class ArgReplacer(object):
         def __init__(self, func, name):
             self.name = name
             try:
    -            self.arg_pos = inspect.getargspec(func).args.index(self.name)
    +            self.arg_pos = getargspec(func).args.index(self.name)
             except ValueError:
                 # Not a positional parameter
                 self.arg_pos = None
    diff --git a/tornado/web.py b/tornado/web.py
    index aa5d02e1..b6197ce7 100644
    --- a/tornado/web.py
    +++ b/tornado/web.py
    @@ -362,10 +362,8 @@ class RequestHandler(object):
             else:
                 raise TypeError("Unsupported header value %r" % value)
             # If \n is allowed into the header, it is possible to inject
    -        # additional headers or split the request. Also cap length to
    -        # prevent obviously erroneous values.
    -        if (len(value) > 4000 or
    -                RequestHandler._INVALID_HEADER_CHAR_RE.search(value)):
    +        # additional headers or split the request.
    +        if RequestHandler._INVALID_HEADER_CHAR_RE.search(value):
                 raise ValueError("Unsafe header value %r", value)
             return value
     
    @@ -694,10 +692,7 @@ class RequestHandler(object):
                     message += ". Lists not accepted for security reasons; see http://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.write"
                 raise TypeError(message)
             if isinstance(chunk, dict):
    -            if 'unwrap_json' in chunk:
    -                chunk = chunk['unwrap_json']
    -            else:
    -                chunk = escape.json_encode(chunk)
    +            chunk = escape.json_encode(chunk)
                 self.set_header("Content-Type", "application/json; charset=UTF-8")
             chunk = utf8(chunk)
             self._write_buffer.append(chunk)
    @@ -841,8 +836,9 @@ class RequestHandler(object):
     
             May be overridden by subclasses.  By default returns a
             directory-based loader on the given path, using the
    -        ``autoescape`` application setting.  If a ``template_loader``
    -        application setting is supplied, uses that instead.
    +        ``autoescape`` and ``template_whitespace`` application
    +        settings.  If a ``template_loader`` application setting is
    +        supplied, uses that instead.
             """
             settings = self.application.settings
             if "template_loader" in settings:
    @@ -852,6 +848,8 @@ class RequestHandler(object):
                 # autoescape=None means "no escaping", so we have to be sure
                 # to only pass this kwarg if the user asked for it.
                 kwargs["autoescape"] = settings["autoescape"]
    +        if "template_whitespace" in settings:
    +            kwargs["whitespace"] = settings["template_whitespace"]
             return template.Loader(template_path, **kwargs)
     
         def flush(self, include_footers=False, callback=None):
    @@ -1391,10 +1389,8 @@ class RequestHandler(object):
                     self.check_xsrf_cookie()
     
                 result = self.prepare()
    -            if is_future(result):
    -                result = yield result
                 if result is not None:
    -                raise TypeError("Expected None, got %r" % result)
    +                result = yield result
                 if self._prepared_future is not None:
                     # Tell the Application we've finished with prepare()
                     # and are ready for the body to arrive.
    @@ -1414,10 +1410,8 @@ class RequestHandler(object):
     
                 method = getattr(self, self.request.method.lower())
                 result = method(*self.path_args, **self.path_kwargs)
    -            if is_future(result):
    -                result = yield result
                 if result is not None:
    -                raise TypeError("Expected None, got %r" % result)
    +                result = yield result
                 if self._auto_finish and not self._finished:
                     self.finish()
             except Exception as e:
    @@ -2151,6 +2145,11 @@ class StaticFileHandler(RequestHandler):
         the ``path`` argument to the get() method (different than the constructor
         argument above); see `URLSpec` for details.
     
    +    To serve a file like ``index.html`` automatically when a directory is
    +    requested, set ``static_handler_args=dict(default_filename="index.html")``
    +    in your application settings, or add ``default_filename`` as an initializer
    +    argument for your ``StaticFileHandler``.
    +
         To maximize the effectiveness of browser caching, this class supports
         versioned urls (by default using the argument ``?v=``).  If a version
         is given, we instruct the browser to cache this file indefinitely.
    @@ -2162,8 +2161,7 @@ class StaticFileHandler(RequestHandler):
         a dedicated static file server (such as nginx or Apache).  We support
         the HTTP ``Accept-Ranges`` mechanism to return partial content (because
         some browsers require this functionality to be present to seek in
    -    HTML5 audio or video), but this handler should not be used with
    -    files that are too large to fit comfortably in memory.
    +    HTML5 audio or video).
     
         **Subclassing notes**
     
    @@ -2379,9 +2377,13 @@ class StaticFileHandler(RequestHandler):
     
             .. versionadded:: 3.1
             """
    -        root = os.path.abspath(root)
    -        # os.path.abspath strips a trailing /
    -        # it needs to be temporarily added back for requests to root/
    +        # os.path.abspath strips a trailing /.
    +        # We must add it back to `root` so that we only match files
    +        # in a directory named `root` instead of files starting with
    +        # that prefix.
    +        root = os.path.abspath(root) + os.path.sep
    +        # The trailing slash also needs to be temporarily added back
    +        # the requested path so a request to root/ will match.
             if not (absolute_path + os.path.sep).startswith(root):
                 raise HTTPError(403, "%s is not in root static directory",
                                 self.path)
    @@ -2493,7 +2495,19 @@ class StaticFileHandler(RequestHandler):
             .. versionadded:: 3.1
             """
             mime_type, encoding = mimetypes.guess_type(self.absolute_path)
    -        return mime_type
    +        # per RFC 6713, use the appropriate type for a gzip compressed file
    +        if encoding == "gzip":
    +            return "application/gzip"
    +        # As of 2015-07-21 there is no bzip2 encoding defined at
    +        # http://www.iana.org/assignments/media-types/media-types.xhtml
    +        # So for that (and any other encoding), use octet-stream.
    +        elif encoding is not None:
    +            return "application/octet-stream"
    +        elif mime_type is not None:
    +            return mime_type
    +        # if mime_type not detected, use application/octet-stream
    +        else:
    +            return "application/octet-stream"
     
         def set_extra_headers(self, path):
             """For subclass to add extra headers to the response"""
    @@ -2644,7 +2658,16 @@ class GZipContentEncoding(OutputTransform):
         CONTENT_TYPES = set(["application/javascript", "application/x-javascript",
                              "application/xml", "application/atom+xml",
                              "application/json", "application/xhtml+xml"])
    -    MIN_LENGTH = 5
    +    # Python's GzipFile defaults to level 9, while most other gzip
    +    # tools (including gzip itself) default to 6, which is probably a
    +    # better CPU/size tradeoff.
    +    GZIP_LEVEL = 6
    +    # Responses that are too short are unlikely to benefit from gzipping
    +    # after considering the "Content-Encoding: gzip" header and the header
    +    # inside the gzip encoding.
    +    # Note that responses written in multiple chunks will be compressed
    +    # regardless of size.
    +    MIN_LENGTH = 1024
     
         def __init__(self, request):
             self._gzipping = "gzip" in request.headers.get("Accept-Encoding", "")
    @@ -2665,7 +2688,8 @@ class GZipContentEncoding(OutputTransform):
             if self._gzipping:
                 headers["Content-Encoding"] = "gzip"
                 self._gzip_value = BytesIO()
    -            self._gzip_file = gzip.GzipFile(mode="w", fileobj=self._gzip_value)
    +            self._gzip_file = gzip.GzipFile(mode="w", fileobj=self._gzip_value,
    +                                            compresslevel=self.GZIP_LEVEL)
                 chunk = self.transform_chunk(chunk, finishing)
                 if "Content-Length" in headers:
                     # The original content length is no longer correct.
    diff --git a/tornado/websocket.py b/tornado/websocket.py
    index 2f57b990..d688295f 100644
    --- a/tornado/websocket.py
    +++ b/tornado/websocket.py
    @@ -444,7 +444,8 @@ class _PerMessageDeflateCompressor(object):
                 self._compressor = None
     
         def _create_compressor(self):
    -        return zlib.compressobj(-1, zlib.DEFLATED, -self._max_wbits)
    +        return zlib.compressobj(tornado.web.GZipContentEncoding.GZIP_LEVEL,
    +                                zlib.DEFLATED, -self._max_wbits)
     
         def compress(self, data):
             compressor = self._compressor or self._create_compressor()
    
    From 9d4f5cbf9116ad7502b462dc176877cd94543c45 Mon Sep 17 00:00:00 2001
    From: JackDandy 
    Date: Mon, 18 May 2015 19:11:45 +0100
    Subject: [PATCH 17/95] Update change to suppress reporting of Tornado
     exception error 1 to updated package (ref:hacks.txt).
    
    ---
     CHANGES.md                 | 1 +
     tornado/http1connection.py | 5 +++--
     tornado/iostream.py        | 3 ++-
     3 files changed, 6 insertions(+), 3 deletions(-)
    
    diff --git a/CHANGES.md b/CHANGES.md
    index f1d6c2e0..150f06c7 100644
    --- a/CHANGES.md
    +++ b/CHANGES.md
    @@ -22,6 +22,7 @@
     * Update Requests library to 2.7.0 (5d6d1bc)
     * Update SimpleJSON library 3.7.3 to 3.8.0 (a37a9bd)
     * Update Tornado Web Server 4.2 to 4.3.dev1 (1b6157d)
    +* Update change to suppress reporting of Tornado exception error 1 to updated package (ref:hacks.txt)
     
     
     ### 0.10.0 (2015-08-06 11:05:00 UTC)
    diff --git a/tornado/http1connection.py b/tornado/http1connection.py
    index 6226ef7a..5d6f4c21 100644
    --- a/tornado/http1connection.py
    +++ b/tornado/http1connection.py
    @@ -698,8 +698,9 @@ class HTTP1ServerConnection(object):
                         # This exception was already logged.
                         conn.close()
                         return
    -                except Exception:
    -                    gen_log.error("Uncaught exception", exc_info=True)
    +                except Exception as e:
    +                    if 1 != e.errno:
    +                        gen_log.error("Uncaught exception", exc_info=True)
                         conn.close()
                         return
                     if not ret:
    diff --git a/tornado/iostream.py b/tornado/iostream.py
    index c5d3e2c9..706d3938 100644
    --- a/tornado/iostream.py
    +++ b/tornado/iostream.py
    @@ -648,7 +648,8 @@ class BaseIOStream(object):
             except UnsatisfiableReadError:
                 raise
             except Exception as e:
    -            gen_log.warning("error on read: %s" % e)
    +            if 1 != e.errno:
    +                gen_log.warning("error on read: %s" % e)
                 self.close(exc_info=True)
                 return
             if pos is not None:
    
    From 58436231360173c6f20c99c7a5636dd1229f8baf Mon Sep 17 00:00:00 2001
    From: JackDandy 
    Date: Mon, 18 May 2015 19:20:58 +0100
    Subject: [PATCH 18/95] Update fix for API response header for JSON content
     type and the return of JSONP data to updated package (ref:hacks.txt).
    
    ---
     CHANGES.md     | 1 +
     tornado/web.py | 5 ++++-
     2 files changed, 5 insertions(+), 1 deletion(-)
    
    diff --git a/CHANGES.md b/CHANGES.md
    index 150f06c7..8ae244f8 100644
    --- a/CHANGES.md
    +++ b/CHANGES.md
    @@ -23,6 +23,7 @@
     * Update SimpleJSON library 3.7.3 to 3.8.0 (a37a9bd)
     * Update Tornado Web Server 4.2 to 4.3.dev1 (1b6157d)
     * Update change to suppress reporting of Tornado exception error 1 to updated package (ref:hacks.txt)
    +* Update fix for API response header for JSON content type and the return of JSONP data to updated package (ref:hacks.txt)
     
     
     ### 0.10.0 (2015-08-06 11:05:00 UTC)
    diff --git a/tornado/web.py b/tornado/web.py
    index b6197ce7..039853d5 100644
    --- a/tornado/web.py
    +++ b/tornado/web.py
    @@ -692,7 +692,10 @@ class RequestHandler(object):
                     message += ". Lists not accepted for security reasons; see http://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.write"
                 raise TypeError(message)
             if isinstance(chunk, dict):
    -            chunk = escape.json_encode(chunk)
    +            if 'unwrap_json' in chunk:
    +                chunk = chunk['unwrap_json']
    +            else:
    +                chunk = escape.json_encode(chunk)
                 self.set_header("Content-Type", "application/json; charset=UTF-8")
             chunk = utf8(chunk)
             self._write_buffer.append(chunk)
    
    From 0d2ede9e9390617578dbac6f72fd0888fc7937df Mon Sep 17 00:00:00 2001
    From: JackDandy 
    Date: Sat, 22 Aug 2015 10:35:03 +0100
    Subject: [PATCH 19/95] Fix post processing season pack folders.
    
    ---
     CHANGES.md             |  1 +
     sickbeard/processTV.py | 27 +++++++++++++--------------
     2 files changed, 14 insertions(+), 14 deletions(-)
    
    diff --git a/CHANGES.md b/CHANGES.md
    index 8ae244f8..4a95b1df 100644
    --- a/CHANGES.md
    +++ b/CHANGES.md
    @@ -24,6 +24,7 @@
     * Update Tornado Web Server 4.2 to 4.3.dev1 (1b6157d)
     * Update change to suppress reporting of Tornado exception error 1 to updated package (ref:hacks.txt)
     * Update fix for API response header for JSON content type and the return of JSONP data to updated package (ref:hacks.txt)
    +* Fix post processing season pack folders
     
     
     ### 0.10.0 (2015-08-06 11:05:00 UTC)
    diff --git a/sickbeard/processTV.py b/sickbeard/processTV.py
    index 5baad72f..78fa50d2 100644
    --- a/sickbeard/processTV.py
    +++ b/sickbeard/processTV.py
    @@ -434,10 +434,6 @@ class ProcessTVShow(object):
                                               try_scene_exceptions=True,
                                               convert=True).parse(
                                                   dir_name, cache_result=False)
    -                # check we parsed id, ep and season
    -                if not (0 < len(parse_result.episode_numbers) and isinstance(parse_result.show.indexerid, int)
    -                        and isinstance(parse_result.season_number, int)):
    -                    return False
                 except (InvalidNameException, InvalidShowException):
                     # If the filename doesn't parse, then return false as last
                     # resort. We can assume that unparseable filenames are not
    @@ -448,7 +444,7 @@ class ProcessTVShow(object):
                         parse_result.show.name)[self.any_vid_processed]
     
             ep_detail_sql = ''
    -        if parse_result.show.indexerid and parse_result.episode_numbers and parse_result.season_number:
    +        if parse_result.show.indexerid and 0 < len(parse_result.episode_numbers) and parse_result.season_number:
                 ep_detail_sql = " and tv_episodes.showid='%s' and tv_episodes.season='%s' and tv_episodes.episode='%s'"\
                                 % (str(parse_result.show.indexerid),
                                     str(parse_result.season_number),
    @@ -460,9 +456,10 @@ class ProcessTVShow(object):
             if sql_result:
                 self._log_helper(u'Found a release directory%s that has already been processed,
    .. skipping: %s' % (showlink, dir_name)) - reset_status(parse_result.show.indexerid, - parse_result.season_number, - parse_result.episode_numbers[0]) + if ep_detail_sql: + reset_status(parse_result.show.indexerid, + parse_result.season_number, + parse_result.episode_numbers[0]) return True else: @@ -474,9 +471,10 @@ class ProcessTVShow(object): if sql_result: self._log_helper(u'Found a video, but that release%s was already processed,
    .. skipping: %s' % (showlink, videofile)) - reset_status(parse_result.show.indexerid, - parse_result.season_number, - parse_result.episode_numbers[0]) + if ep_detail_sql: + reset_status(parse_result.show.indexerid, + parse_result.season_number, + parse_result.episode_numbers[0]) return True # Needed if we have downloaded the same episode @ different quality @@ -491,9 +489,10 @@ class ProcessTVShow(object): if sql_result: self._log_helper(u'Found a video, but the episode%s is already processed,
    .. skipping: %s' % (showlink, videofile)) - reset_status(parse_result.show.indexerid, - parse_result.season_number, - parse_result.episode_numbers[0]) + if ep_detail_sql: + reset_status(parse_result.show.indexerid, + parse_result.season_number, + parse_result.episode_numbers[0]) return True return False From 30ff8177d4687a6d594383eec65fd3cea3e77e7b Mon Sep 17 00:00:00 2001 From: JackDandy Date: Tue, 25 Aug 2015 00:55:23 +0100 Subject: [PATCH 20/95] Fix saving torrent provider option "Seed until ratio" after recent refactor. --- CHANGES.md | 1 + sickbeard/__init__.py | 36 ++++++++++++++++++------------------ sickbeard/webserve.py | 6 +++--- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4a95b1df..98cc559b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -25,6 +25,7 @@ * Update change to suppress reporting of Tornado exception error 1 to updated package (ref:hacks.txt) * Update fix for API response header for JSON content type and the return of JSONP data to updated package (ref:hacks.txt) * Fix post processing season pack folders +* Fix saving torrent provider option "Seed until ratio" after recent refactor ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/sickbeard/__init__.py b/sickbeard/__init__.py index d29171a0..9f86517b 100755 --- a/sickbeard/__init__.py +++ b/sickbeard/__init__.py @@ -1034,23 +1034,23 @@ def initialize(consoleLogging=True): torrent_prov.confirmed = bool(check_setting_int(CFG, prov_id_uc, prov_id + '_confirmed', 0)) if hasattr(torrent_prov, 'options'): torrent_prov.options = check_setting_str(CFG, prov_id_uc, prov_id + '_options', '') - if hasattr(torrent_prov, 'ratio'): - torrent_prov.ratio = check_setting_str(CFG, prov_id_uc, prov_id + '_ratio', '') + if hasattr(torrent_prov, '_seed_ratio'): + torrent_prov._seed_ratio = check_setting_str(CFG, prov_id_uc, prov_id + '_seed_ratio', '') if hasattr(torrent_prov, 'minseed'): torrent_prov.minseed = check_setting_int(CFG, prov_id_uc, prov_id + '_minseed', 0) if hasattr(torrent_prov, 'minleech'): torrent_prov.minleech = check_setting_int(CFG, prov_id_uc, prov_id + '_minleech', 0) if hasattr(torrent_prov, 'freeleech'): torrent_prov.freeleech = bool(check_setting_int(CFG, prov_id_uc, prov_id + '_freeleech', 0)) - if hasattr(torrent_prov, 'search_mode'): - torrent_prov.search_mode = check_setting_str(CFG, prov_id_uc, prov_id + '_search_mode', 'eponly') - if hasattr(torrent_prov, 'search_fallback'): - torrent_prov.search_fallback = bool(check_setting_int(CFG, prov_id_uc, prov_id + '_search_fallback', 0)) if hasattr(torrent_prov, 'enable_recentsearch'): torrent_prov.enable_recentsearch = bool(check_setting_int(CFG, prov_id_uc, prov_id + '_enable_recentsearch', 1)) if hasattr(torrent_prov, 'enable_backlog'): torrent_prov.enable_backlog = bool(check_setting_int(CFG, prov_id_uc, prov_id + '_enable_backlog', 1)) + if hasattr(torrent_prov, 'search_mode'): + torrent_prov.search_mode = check_setting_str(CFG, prov_id_uc, prov_id + '_search_mode', 'eponly') + if hasattr(torrent_prov, 'search_fallback'): + torrent_prov.search_fallback = bool(check_setting_int(CFG, prov_id_uc, prov_id + '_search_fallback', 0)) for nzb_prov in [curProvider for curProvider in providers.sortedProviderList() if GenericProvider.NZB == curProvider.providerType]: @@ -1519,28 +1519,28 @@ def save_config(): new_config[prov_id_uc][prov_id + '_passkey'] = torrent_prov.passkey if hasattr(torrent_prov, 'confirmed'): new_config[prov_id_uc][prov_id + '_confirmed'] = int(torrent_prov.confirmed) - if hasattr(torrent_prov, 'ratio'): - new_config[prov_id_uc][prov_id + '_ratio'] = torrent_prov.ratio + if hasattr(torrent_prov, '_seed_ratio'): + new_config[prov_id_uc][prov_id + '_seed_ratio'] = torrent_prov.seed_ratio() if hasattr(torrent_prov, 'minseed'): new_config[prov_id_uc][prov_id + '_minseed'] = int(torrent_prov.minseed) if hasattr(torrent_prov, 'minleech'): new_config[prov_id_uc][prov_id + '_minleech'] = int(torrent_prov.minleech) + if hasattr(torrent_prov, 'freeleech'): + new_config[prov_id_uc][prov_id + '_freeleech'] = int(torrent_prov.freeleech) + if hasattr(torrent_prov, 'enable_recentsearch'): + new_config[prov_id_uc][prov_id + '_enable_recentsearch'] = int(torrent_prov.enable_recentsearch) + if hasattr(torrent_prov, 'enable_backlog'): + new_config[prov_id_uc][prov_id + '_enable_backlog'] = int(torrent_prov.enable_backlog) + if hasattr(torrent_prov, 'search_mode'): + new_config[prov_id_uc][prov_id + '_search_mode'] = torrent_prov.search_mode + if hasattr(torrent_prov, 'search_fallback'): + new_config[prov_id_uc][prov_id + '_search_fallback'] = int(torrent_prov.search_fallback) if hasattr(torrent_prov, 'options'): new_config[prov_id_uc][prov_id + '_options'] = torrent_prov.options if hasattr(torrent_prov, 'proxy'): new_config[prov_id_uc][prov_id + '_proxy'] = int(torrent_prov.proxy.enabled) if hasattr(torrent_prov.proxy, 'url'): new_config[prov_id_uc][prov_id + '_proxy_url'] = torrent_prov.proxy.url - if hasattr(torrent_prov, 'freeleech'): - new_config[prov_id_uc][prov_id + '_freeleech'] = int(torrent_prov.freeleech) - if hasattr(torrent_prov, 'search_mode'): - new_config[prov_id_uc][prov_id + '_search_mode'] = torrent_prov.search_mode - if hasattr(torrent_prov, 'search_fallback'): - new_config[prov_id_uc][prov_id + '_search_fallback'] = int(torrent_prov.search_fallback) - if hasattr(torrent_prov, 'enable_recentsearch'): - new_config[prov_id_uc][prov_id + '_enable_recentsearch'] = int(torrent_prov.enable_recentsearch) - if hasattr(torrent_prov, 'enable_backlog'): - new_config[prov_id_uc][prov_id + '_enable_backlog'] = int(torrent_prov.enable_backlog) for nzb_prov in [curProvider for curProvider in providers.sortedProviderList() if GenericProvider.NZB == curProvider.providerType]: diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index b75ea6de..be552373 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -4155,11 +4155,11 @@ class ConfigProviders(Config): except: curTorrentProvider.minleech = 0 - if hasattr(curTorrentProvider, 'ratio'): + if hasattr(curTorrentProvider, '_seed_ratio'): try: - curTorrentProvider.ratio = str(kwargs[curTorrentProvider.get_id() + '_ratio']).strip() + curTorrentProvider._seed_ratio = str(kwargs[curTorrentProvider.get_id() + '_ratio']).strip() except: - curTorrentProvider.ratio = None + curTorrentProvider._seed_ratio = None if hasattr(curTorrentProvider, 'digest'): try: From 9aa0637fa15892f2b2cd8e7737d24ef174ccc06b Mon Sep 17 00:00:00 2001 From: Amelandbor Date: Tue, 1 Sep 2015 14:52:14 +0200 Subject: [PATCH 21/95] Change white text in light theme on Manage / Episode Status Management page to black for better readability --- CHANGES.md | 1 + gui/slick/css/light.css | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 98cc559b..3955615d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -26,6 +26,7 @@ * Update fix for API response header for JSON content type and the return of JSONP data to updated package (ref:hacks.txt) * Fix post processing season pack folders * Fix saving torrent provider option "Seed until ratio" after recent refactor +* Change white text in light theme on Manage / Episode Status Management page to black for better readability ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/gui/slick/css/light.css b/gui/slick/css/light.css index a5605538..f44be5e6 100644 --- a/gui/slick/css/light.css +++ b/gui/slick/css/light.css @@ -610,7 +610,7 @@ manage*.tmpl } a.whitelink{ - color:#fff + color:#000 } /* ======================================================================= @@ -1378,4 +1378,4 @@ jquery.confirm.css #confirmBox .red:hover{ background-color:#A13331 -} \ No newline at end of file +} From 44746bbb07850a8efe689c865e0f3626adf4731e Mon Sep 17 00:00:00 2001 From: Adam Date: Mon, 7 Sep 2015 08:38:21 +0800 Subject: [PATCH 22/95] Change default de-referrer url to blank dereferrer.org is no longer active, so we will clear the config entry from users settings so external links will work. --- sickbeard/__init__.py | 4 ++-- sickbeard/config.py | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sickbeard/__init__.py b/sickbeard/__init__.py index 9f86517b..80f7acf6 100755 --- a/sickbeard/__init__.py +++ b/sickbeard/__init__.py @@ -60,7 +60,7 @@ CFG = None CONFIG_FILE = None # This is the version of the config we EXPECT to find -CONFIG_VERSION = 12 +CONFIG_VERSION = 13 # Default encryption version (0 for None) ENCRYPTION_VERSION = 0 @@ -641,7 +641,7 @@ def initialize(consoleLogging=True): CPU_PRESET = check_setting_str(CFG, 'General', 'cpu_preset', 'NORMAL') - ANON_REDIRECT = check_setting_str(CFG, 'General', 'anon_redirect', 'http://dereferer.org/?') + ANON_REDIRECT = check_setting_str(CFG, 'General', 'anon_redirect', '') PROXY_SETTING = check_setting_str(CFG, 'General', 'proxy_setting', '') PROXY_INDEXERS = bool(check_setting_int(CFG, 'General', 'proxy_indexers', 1)) # attempt to help prevent users from breaking links by using a bad url diff --git a/sickbeard/config.py b/sickbeard/config.py index 58b5242b..151af6a3 100644 --- a/sickbeard/config.py +++ b/sickbeard/config.py @@ -444,7 +444,8 @@ class ConfigMigrator(): 9: 'Rename pushbullet variables', 10: 'Reset backlog frequency to default', 11: 'Migrate anime split view to new layout', - 12: 'Add "hevc" and some non-english languages to ignore words if not found'} + 12: 'Add "hevc" and some non-english languages to ignore words if not found', + 13: 'Change default dereferrer url to blank'} def migrate_config(self): """ Calls each successive migration until the config is the same version as SG expects """ @@ -768,3 +769,8 @@ class ConfigMigrator(): new_list += [new_word] sickbeard.IGNORE_WORDS = ', '.join(sorted(new_list)) + + def _migrate_v13(self): + # change dereferrer.org urls to blank, but leave any other url untouched + if sickbeard.ANON_REDIRECT == 'http://dereferer.org/?': + sickbeard.ANON_REDIRECT = '' From 1b957fe6079dbc0ba2b6d140a6472b85c8a751e6 Mon Sep 17 00:00:00 2001 From: Adam Date: Sun, 6 Sep 2015 10:56:09 +0800 Subject: [PATCH 23/95] Change various sickbeard/__init__.py obsolete code cleanups --- sickbeard/__init__.py | 82 +++++-------------------------------------- sickbeard/config.py | 5 +-- 2 files changed, 12 insertions(+), 75 deletions(-) diff --git a/sickbeard/__init__.py b/sickbeard/__init__.py index 80f7acf6..3dd4ba4a 100755 --- a/sickbeard/__init__.py +++ b/sickbeard/__init__.py @@ -32,23 +32,18 @@ import os.path import uuid import base64 sys.path.insert(1, os.path.abspath('../lib')) -from sickbeard import providers, metadata, config, webserveInit +from sickbeard import providers, metadata, config, webserveInit, searchBacklog, showUpdater, versionChecker, \ + autoPostProcesser, subtitles, traktChecker, helpers, db, exceptions, show_queue, search_queue, scheduler, \ + show_name_helpers, logger, naming, searchRecent, searchProper, scene_numbering, scene_exceptions, name_cache from sickbeard.providers.generic import GenericProvider from providers import btn, newznab, womble, thepiratebay, torrentleech, kat, iptorrents, grabtheinfo, \ omgwtfnzbs, scc, torrentday, hdbits, speedcd, nyaatorrents, torrentbytes, beyondhd, gftracker, transmithe_net, \ freshontv, bitsoup, tokyotoshokan, animenzb, totv, rarbg, morethan, alpharatio, pisexy, strike, torrentshack from sickbeard.config import CheckSection, check_setting_int, check_setting_str, check_setting_float, ConfigMigrator, \ naming_ep_type, minimax -from sickbeard import searchBacklog, showUpdater, versionChecker, autoPostProcesser, \ - subtitles, traktChecker -from sickbeard import helpers, db, exceptions, show_queue, search_queue, scheduler, show_name_helpers -from sickbeard import logger -from sickbeard import naming -from sickbeard import searchRecent, searchProper -from sickbeard import scene_numbering, scene_exceptions, name_cache from indexers.indexer_api import indexerApi -from indexers.indexer_exceptions import indexer_shownotfound, indexer_exception, indexer_error, indexer_episodenotfound, \ - indexer_attributenotfound, indexer_seasonnotfound, indexer_userabort, indexerExcepts +from indexers.indexer_exceptions import indexer_shownotfound, indexer_exception, indexer_error, \ + indexer_episodenotfound, indexer_attributenotfound, indexer_seasonnotfound, indexer_userabort, indexerExcepts from sickbeard.common import SD, SKIPPED, NAMING_REPEAT from sickbeard.databases import mainDB, cache_db, failed_db @@ -71,11 +66,6 @@ MY_NAME = None MY_ARGS = [] SYS_ENCODING = '' DATA_DIR = '' -CREATEPID = False -PIDFILE = '' - -DAEMON = None -NO_RESIZE = False # system events events = None @@ -92,7 +82,6 @@ subtitlesFinderScheduler = None traktCheckerScheduler = None showList = None -loadingShowList = None UPDATE_SHOWS_ON_START = False SHOW_UPDATE_HOUR = 3 @@ -101,7 +90,6 @@ newznabProviderList = [] torrentRssProviderList = [] metadata_provider_dict = {} -NEWEST_VERSION = None NEWEST_VERSION_STRING = None VERSION_NOTIFY = False AUTO_UPDATE = False @@ -132,8 +120,6 @@ HANDLE_REVERSE_PROXY = False PROXY_SETTING = None PROXY_INDEXERS = True -LOCALHOST_IP = None - CPU_PRESET = 'NORMAL' ANON_REDIRECT = None @@ -161,7 +147,6 @@ SHOW_TAGS = [] DEFAULT_SHOW_TAG = '' SHOWLIST_TAGVIEW = '' -USE_LISTVIEW = False METADATA_XBMC = None METADATA_XBMC_12PLUS = None METADATA_MEDIABROWSER = None @@ -244,16 +229,6 @@ TV_DOWNLOAD_DIR = None UNPACK = False SKIP_REMOVED_FILES = False -NZBS = False -NZBS_UID = None -NZBS_HASH = None - -WOMBLE = False - -OMGWTFNZBS = False -OMGWTFNZBS_USERNAME = None -OMGWTFNZBS_APIKEY = None - SAB_USERNAME = None SAB_PASSWORD = None SAB_APIKEY = None @@ -500,7 +475,7 @@ def initialize(consoleLogging=True): USE_PLEX, PLEX_NOTIFY_ONSNATCH, PLEX_NOTIFY_ONDOWNLOAD, PLEX_NOTIFY_ONSUBTITLEDOWNLOAD, PLEX_UPDATE_LIBRARY, \ PLEX_SERVER_HOST, PLEX_HOST, PLEX_USERNAME, PLEX_PASSWORD, DEFAULT_BACKLOG_FREQUENCY, MIN_BACKLOG_FREQUENCY, MAX_BACKLOG_FREQUENCY, BACKLOG_STARTUP, SKIP_REMOVED_FILES, \ showUpdateScheduler, __INITIALIZED__, LAUNCH_BROWSER, TRASH_REMOVE_SHOW, TRASH_ROTATE_LOGS, HOME_SEARCH_FOCUS, SORT_ARTICLE, showList, loadingShowList, UPDATE_SHOWS_ON_START, SHOW_UPDATE_HOUR, \ - NEWZNAB_DATA, NZBS, NZBS_UID, NZBS_HASH, INDEXER_DEFAULT, INDEXER_TIMEOUT, USENET_RETENTION, TORRENT_DIR, \ + NEWZNAB_DATA, INDEXER_DEFAULT, INDEXER_TIMEOUT, USENET_RETENTION, TORRENT_DIR, \ QUALITY_DEFAULT, FLATTEN_FOLDERS_DEFAULT, SUBTITLES_DEFAULT, STATUS_DEFAULT, WANTED_BEGIN_DEFAULT, WANTED_LATEST_DEFAULT, RECENTSEARCH_STARTUP, \ GROWL_NOTIFY_ONSNATCH, GROWL_NOTIFY_ONDOWNLOAD, GROWL_NOTIFY_ONSUBTITLEDOWNLOAD, TWITTER_NOTIFY_ONSNATCH, TWITTER_NOTIFY_ONDOWNLOAD, TWITTER_NOTIFY_ONSUBTITLEDOWNLOAD, \ USE_GROWL, GROWL_HOST, GROWL_PASSWORD, USE_PROWL, PROWL_NOTIFY_ONSNATCH, PROWL_NOTIFY_ONDOWNLOAD, PROWL_NOTIFY_ONSUBTITLEDOWNLOAD, PROWL_API, PROWL_PRIORITY, PROG_DIR, \ @@ -513,20 +488,20 @@ def initialize(consoleLogging=True): showQueueScheduler, searchQueueScheduler, ROOT_DIRS, CACHE_DIR, ACTUAL_CACHE_DIR, TIMEZONE_DISPLAY, \ NAMING_PATTERN, NAMING_MULTI_EP, NAMING_ANIME_MULTI_EP, NAMING_FORCE_FOLDERS, NAMING_ABD_PATTERN, NAMING_CUSTOM_ABD, NAMING_SPORTS_PATTERN, NAMING_CUSTOM_SPORTS, NAMING_ANIME_PATTERN, NAMING_CUSTOM_ANIME, NAMING_STRIP_YEAR, \ RENAME_EPISODES, AIRDATE_EPISODES, properFinderScheduler, PROVIDER_ORDER, autoPostProcesserScheduler, \ - WOMBLE, OMGWTFNZBS, OMGWTFNZBS_USERNAME, OMGWTFNZBS_APIKEY, providerList, newznabProviderList, torrentRssProviderList, \ + providerList, newznabProviderList, torrentRssProviderList, \ EXTRA_SCRIPTS, USE_TWITTER, TWITTER_USERNAME, TWITTER_PASSWORD, TWITTER_PREFIX, RECENTSEARCH_FREQUENCY, \ USE_BOXCAR2, BOXCAR2_ACCESSTOKEN, BOXCAR2_NOTIFY_ONDOWNLOAD, BOXCAR2_NOTIFY_ONSUBTITLEDOWNLOAD, BOXCAR2_NOTIFY_ONSNATCH, BOXCAR2_SOUND, \ USE_PUSHOVER, PUSHOVER_USERKEY, PUSHOVER_APIKEY, PUSHOVER_NOTIFY_ONDOWNLOAD, PUSHOVER_NOTIFY_ONSUBTITLEDOWNLOAD, PUSHOVER_NOTIFY_ONSNATCH, PUSHOVER_PRIORITY, PUSHOVER_DEVICE, PUSHOVER_SOUND, \ USE_LIBNOTIFY, LIBNOTIFY_NOTIFY_ONSNATCH, LIBNOTIFY_NOTIFY_ONDOWNLOAD, LIBNOTIFY_NOTIFY_ONSUBTITLEDOWNLOAD, USE_NMJ, NMJ_HOST, NMJ_DATABASE, NMJ_MOUNT, USE_NMJv2, NMJv2_HOST, NMJv2_DATABASE, NMJv2_DBLOC, USE_SYNOINDEX, \ USE_SYNOLOGYNOTIFIER, SYNOLOGYNOTIFIER_NOTIFY_ONSNATCH, SYNOLOGYNOTIFIER_NOTIFY_ONDOWNLOAD, SYNOLOGYNOTIFIER_NOTIFY_ONSUBTITLEDOWNLOAD, \ USE_EMAIL, EMAIL_HOST, EMAIL_PORT, EMAIL_TLS, EMAIL_USER, EMAIL_PASSWORD, EMAIL_FROM, EMAIL_NOTIFY_ONSNATCH, EMAIL_NOTIFY_ONDOWNLOAD, EMAIL_NOTIFY_ONSUBTITLEDOWNLOAD, EMAIL_LIST, \ - USE_LISTVIEW, METADATA_XBMC, METADATA_XBMC_12PLUS, METADATA_MEDIABROWSER, METADATA_PS3, METADATA_KODI, metadata_provider_dict, \ + METADATA_XBMC, METADATA_XBMC_12PLUS, METADATA_MEDIABROWSER, METADATA_PS3, METADATA_KODI, metadata_provider_dict, \ GIT_PATH, MOVE_ASSOCIATED_FILES, POSTPONE_IF_SYNC_FILES, recentSearchScheduler, NFO_RENAME, \ GUI_NAME, DEFAULT_HOME, HOME_LAYOUT, HISTORY_LAYOUT, DISPLAY_SHOW_SPECIALS, EPISODE_VIEW_LAYOUT, EPISODE_VIEW_SORT, EPISODE_VIEW_DISPLAY_PAUSED, EPISODE_VIEW_MISSED_RANGE, FUZZY_DATING, TRIM_ZERO, DATE_PRESET, TIME_PRESET, TIME_PRESET_W_SECONDS, THEME_NAME, \ POSTER_SORTBY, POSTER_SORTDIR, \ METADATA_WDTV, METADATA_TIVO, METADATA_MEDE8ER, IGNORE_WORDS, REQUIRE_WORDS, CALENDAR_UNPROTECTED, CREATE_MISSING_SHOW_DIRS, \ ADD_SHOWS_WO_DIR, USE_SUBTITLES, SUBTITLES_LANGUAGES, SUBTITLES_DIR, SUBTITLES_SERVICES_LIST, SUBTITLES_SERVICES_ENABLED, SUBTITLES_HISTORY, SUBTITLES_FINDER_FREQUENCY, subtitlesFinderScheduler, \ - USE_FAILED_DOWNLOADS, DELETE_FAILED, ANON_REDIRECT, LOCALHOST_IP, TMDB_API_KEY, DEBUG, PROXY_SETTING, PROXY_INDEXERS, \ + USE_FAILED_DOWNLOADS, DELETE_FAILED, ANON_REDIRECT, TMDB_API_KEY, DEBUG, PROXY_SETTING, PROXY_INDEXERS, \ AUTOPOSTPROCESSER_FREQUENCY, DEFAULT_AUTOPOSTPROCESSER_FREQUENCY, MIN_AUTOPOSTPROCESSER_FREQUENCY, \ ANIME_DEFAULT, NAMING_ANIME, USE_ANIDB, ANIDB_USERNAME, ANIDB_PASSWORD, ANIDB_USE_MYLIST, \ SCENE_DEFAULT, BACKLOG_DAYS, SEARCH_UNAIRED, ANIME_TREAT_AS_HDTV, \ @@ -637,8 +612,6 @@ def initialize(consoleLogging=True): WEB_PASSWORD = check_setting_str(CFG, 'General', 'web_password', '') LAUNCH_BROWSER = bool(check_setting_int(CFG, 'General', 'launch_browser', 1)) - LOCALHOST_IP = check_setting_str(CFG, 'General', 'localhost_ip', '') - CPU_PRESET = check_setting_str(CFG, 'General', 'cpu_preset', 'NORMAL') ANON_REDIRECT = check_setting_str(CFG, 'General', 'anon_redirect', '') @@ -758,10 +731,6 @@ def initialize(consoleLogging=True): CREATE_MISSING_SHOW_DIRS = bool(check_setting_int(CFG, 'General', 'create_missing_show_dirs', 0)) ADD_SHOWS_WO_DIR = bool(check_setting_int(CFG, 'General', 'add_shows_wo_dir', 0)) - NZBS = bool(check_setting_int(CFG, 'NZBs', 'nzbs', 0)) - NZBS_UID = check_setting_str(CFG, 'NZBs', 'nzbs_uid', '') - NZBS_HASH = check_setting_str(CFG, 'NZBs', 'nzbs_hash', '') - SAB_USERNAME = check_setting_str(CFG, 'SABnzbd', 'sab_username', '') SAB_PASSWORD = check_setting_str(CFG, 'SABnzbd', 'sab_password', '') SAB_APIKEY = check_setting_str(CFG, 'SABnzbd', 'sab_apikey', '') @@ -970,8 +939,6 @@ def initialize(consoleLogging=True): EXTRA_SCRIPTS = [x.strip() for x in check_setting_str(CFG, 'General', 'extra_scripts', '').split('|') if x.strip()] - USE_LISTVIEW = bool(check_setting_int(CFG, 'General', 'use_listview', 0)) - USE_ANIDB = bool(check_setting_int(CFG, 'ANIDB', 'use_anidb', 0)) ANIDB_USERNAME = check_setting_str(CFG, 'ANIDB', 'anidb_username', '') ANIDB_PASSWORD = check_setting_str(CFG, 'ANIDB', 'anidb_password', '') @@ -1398,7 +1365,6 @@ def save_config(): new_config['General']['web_root'] = WEB_ROOT new_config['General']['web_username'] = WEB_USERNAME new_config['General']['web_password'] = helpers.encrypt(WEB_PASSWORD, ENCRYPTION_VERSION) - new_config['General']['localhost_ip'] = LOCALHOST_IP new_config['General']['cpu_preset'] = CPU_PRESET new_config['General']['anon_redirect'] = ANON_REDIRECT new_config['General']['use_api'] = int(USE_API) @@ -1460,7 +1426,6 @@ def save_config(): new_config['General']['display_background_transparent'] = DISPLAY_BACKGROUND_TRANSPARENT new_config['General']['display_all_seasons'] = int(DISPLAY_ALL_SEASONS) - new_config['General']['use_listview'] = int(USE_LISTVIEW) new_config['General']['metadata_xbmc'] = METADATA_XBMC new_config['General']['metadata_xbmc_12plus'] = METADATA_XBMC_12PLUS new_config['General']['metadata_mediabrowser'] = METADATA_MEDIABROWSER @@ -1562,11 +1527,6 @@ def save_config(): if hasattr(nzb_prov, 'enable_backlog'): new_config[prov_id_uc][prov_id + '_enable_backlog'] = int(nzb_prov.enable_backlog) - new_config['NZBs'] = {} - new_config['NZBs']['nzbs'] = int(NZBS) - new_config['NZBs']['nzbs_uid'] = NZBS_UID - new_config['NZBs']['nzbs_hash'] = NZBS_HASH - new_config['SABnzbd'] = {} new_config['SABnzbd']['sab_username'] = SAB_USERNAME new_config['SABnzbd']['sab_password'] = helpers.encrypt(SAB_PASSWORD, ENCRYPTION_VERSION) @@ -1832,27 +1792,3 @@ def launchBrowser(start_port=None): webbrowser.open(browser_url, 1, 1) except: logger.log(u'Unable to launch a browser', logger.ERROR) - - -def getEpList(ep_ids, showid=None): - if None is ep_ids or 0 == len(ep_ids): - return [] - - query = 'SELECT * FROM tv_episodes WHERE indexerid in (%s)' % (','.join(['?'] * len(ep_ids)),) - params = ep_ids - - if None is not showid: - query += ' AND showid = ?' - params.append(showid) - - my_db = db.DBConnection() - sql_results = my_db.select(query, params) - - ep_list = [] - - for curEp in sql_results: - cur_show_obj = helpers.findCertainShow(showList, int(curEp['showid'])) - cur_ep_obj = cur_show_obj.getEpisode(int(curEp['season']), int(curEp['episode'])) - ep_list.append(cur_ep_obj) - - return ep_list diff --git a/sickbeard/config.py b/sickbeard/config.py index 151af6a3..93979a16 100644 --- a/sickbeard/config.py +++ b/sickbeard/config.py @@ -600,8 +600,9 @@ class ConfigMigrator(): Reads in the old naming settings from your config and generates a new config template from them. """ # get the old settings from the file and store them in the new variable names - sickbeard.OMGWTFNZBS_USERNAME = check_setting_str(self.config_obj, 'omgwtfnzbs', 'omgwtfnzbs_uid', '') - sickbeard.OMGWTFNZBS_APIKEY = check_setting_str(self.config_obj, 'omgwtfnzbs', 'omgwtfnzbs_key', '') + for prov in [curProvider for curProvider in providers.sortedProviderList() if curProvider.name == 'omgwtfnzbs']: + prov.username = check_setting_str(self.config_obj, 'omgwtfnzbs', 'omgwtfnzbs_uid', '') + prov.api_key = check_setting_str(self.config_obj, 'omgwtfnzbs', 'omgwtfnzbs_key', '') # Migration v4: Add default newznab cat_ids def _migrate_v4(self): From 32706c2a0b46e139566493d620cdbe60a85d197b Mon Sep 17 00:00:00 2001 From: JackDandy Date: Sun, 6 Sep 2015 21:05:24 +0100 Subject: [PATCH 24/95] Change TtN provider to parse new layout. Improve recognition of SD quality. --- CHANGES.md | 2 ++ sickbeard/common.py | 2 +- sickbeard/providers/transmithe_net.py | 5 ++++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 3955615d..eb463908 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -27,6 +27,8 @@ * Fix post processing season pack folders * Fix saving torrent provider option "Seed until ratio" after recent refactor * Change white text in light theme on Manage / Episode Status Management page to black for better readability +* Change TtN provider to parse new layout +* Improve recognition of SD quality ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/sickbeard/common.py b/sickbeard/common.py index 602bc58d..1d7e4dc3 100644 --- a/sickbeard/common.py +++ b/sickbeard/common.py @@ -205,7 +205,7 @@ class Quality: else: return Quality.UNKNOWN - if checkName(['(pdtv|hdtv|dsr|tvrip).((aac|ac3|dd).?\d\.?\d.)*(xvid|x264|h.?264)'], all) and not checkName(['(720|1080)[pi]'], all) \ + if checkName(['(pdtv|hdtv|dsr|tvrip)([-]|.((aac|ac3|dd).?\d\.?\d.)*(xvid|x264|h.?264))'], all) and not checkName(['(720|1080)[pi]'], all) \ and not checkName(['hr.ws.pdtv.(x264|h.?264)'], any): return Quality.SDTV elif checkName(['web.?dl|web.?rip', 'xvid|x264|h.?264'], all) and not checkName(['(720|1080)[pi]'], all): diff --git a/sickbeard/providers/transmithe_net.py b/sickbeard/providers/transmithe_net.py index bfa25791..8bc48a5b 100644 --- a/sickbeard/providers/transmithe_net.py +++ b/sickbeard/providers/transmithe_net.py @@ -93,6 +93,8 @@ class TransmithenetProvider(generic.TorrentProvider): raise generic.HaltParseException for tr in torrent_rows[1:]: + if tr.find('td', class_='header'): + continue downlink = tr.find('a', href=rc['get']) if None is downlink: continue @@ -102,7 +104,8 @@ class TransmithenetProvider(generic.TorrentProvider): continue info = tr.find('a', href=rc['info']) - title = ('title' in info.attrs and info['title']) or info.get_text().strip() + title = ('data-src' in info.attrs and info['data-src']) or\ + ('title' in info.attrs and info['title']) or info.get_text().strip() download_url = self.urls['get'] % str(downlink['href']).lstrip('/') except (AttributeError, TypeError): From 042ad5cafc263d929e7144503802808b2fdbf03a Mon Sep 17 00:00:00 2001 From: JackDandy Date: Sat, 12 Sep 2015 02:29:35 +0100 Subject: [PATCH 25/95] Fix halting in mid flow of Add Existing Show which resulted in failure to scan statuses and filesizes. --- CHANGES.md | 1 + sickbeard/tv.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index eb463908..e8e2f743 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -29,6 +29,7 @@ * Change white text in light theme on Manage / Episode Status Management page to black for better readability * Change TtN provider to parse new layout * Improve recognition of SD quality +* Fix halting in mid flow of Add Existing Show which resulted in failure to scan statuses and filesizes ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/sickbeard/tv.py b/sickbeard/tv.py index 2ed8fe57..5a4d5783 100644 --- a/sickbeard/tv.py +++ b/sickbeard/tv.py @@ -726,7 +726,7 @@ class TVShow(object): if None is not new_status: with cur_ep.lock: logger.log(u'STATUS: we have an associated file, so setting the status from %s to DOWNLOADED/%s' - % (cur_ep.status, Quality.compositeStatus(Quality.DOWNLOADED, new_quality)), logger.DEBUG) + % (cur_ep.status, Quality.compositeStatus(new_status, new_quality)), logger.DEBUG) cur_ep.status = Quality.compositeStatus(new_status, new_quality) with cur_ep.lock: From f3fa30a015147956fbbbfecd55c7a0dab7f881e2 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Sat, 12 Sep 2015 18:59:33 +0100 Subject: [PATCH 26/95] Add provider SceneTime. --- CHANGES.md | 1 + gui/slick/images/providers/scenetime.png | Bin 0 -> 583 bytes sickbeard/__init__.py | 2 +- sickbeard/providers/__init__.py | 1 + sickbeard/providers/scenetime.py | 150 +++++++++++++++++++++++ 5 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 gui/slick/images/providers/scenetime.png create mode 100644 sickbeard/providers/scenetime.py diff --git a/CHANGES.md b/CHANGES.md index e8e2f743..0f7ee085 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -27,6 +27,7 @@ * Fix post processing season pack folders * Fix saving torrent provider option "Seed until ratio" after recent refactor * Change white text in light theme on Manage / Episode Status Management page to black for better readability +* Add SceneTime torrent provider * Change TtN provider to parse new layout * Improve recognition of SD quality * Fix halting in mid flow of Add Existing Show which resulted in failure to scan statuses and filesizes diff --git a/gui/slick/images/providers/scenetime.png b/gui/slick/images/providers/scenetime.png new file mode 100644 index 0000000000000000000000000000000000000000..0982d8d54bc7e00096e8f098e9a67bc63d659092 GIT binary patch literal 583 zcmV-N0=WH&P)C$X&Y*DVw-`O+0_Gaec(o|cJgek*zcN^KdCYFOW5|aSn zN`^+Ke$iVg_c^au3nLE}CfdTlLzJ$n)!D)Q-Yb*_rulYylA8xFS8H(?;%#oUQ&Pij zVWpT;{-$+nL#Cc5$8XTn(pk;bnGA&59C$ksFdYNC+m-BO|2;N-p&bnV=q;!qk#LQSnqpcs8QYE5ytttZ7v6yp+U zQ2*A`yZ0!A{TDda(VzYoRkOy(;G$zh$JJbu)5w`4XD!YPH+Wtcw>SXN;CO*o#z0jy zM`^|2t@Er_eqeD4n1FI7>hQaa&@Ed#(y^vB+se9E;#u)2L$?Q+oEuLyHX4$T+>^Rf z?4p4gX{KESQp-*`|Cr->7R(p=#26!Px3p8wur!iGX#Q^#14NXPpX4bs9Dg^. + +import re +import datetime +import traceback + +from . import generic +from sickbeard import logger, tvcache, helpers +from sickbeard.bs4_parser import BS4Parser +from lib.unidecode import unidecode + + +class SceneTimeProvider(generic.TorrentProvider): + + def __init__(self): + generic.TorrentProvider.__init__(self, 'SceneTime') + +# https://www.scenetime.com/download.php/860856/Whose.Line.is.it.Anyway.US.S11E13.HDTV.x264-BAJSKORV.torrent +# Whose Line is it Anyway US S11E13 HDTV x264-BAJSKORV + self.url_base = 'https://www.scenetime.com/' + self.urls = {'config_provider_home_uri': self.url_base, + 'login': self.url_base + 'takelogin.php', + 'search': self.url_base + 'browse.php?%ssearch=%s', + 'get': self.url_base + 'download.php/%(id)s/%(title)s.torrent'} + + self.categories = 'c2=1&c43=1&c9=1&c63=1&c77=1&c79=1&c101=1&cata=yes&' + + self.url = self.urls['config_provider_home_uri'] + + self.username, self.password, self.minseed, self.minleech = 4 * [None] + self.cache = SceneTimeCache(self) + + def _do_login(self): + + logged_in = lambda: 'uid' in self.session.cookies and 'pass' in self.session.cookies + if logged_in(): + return True + + if self._check_auth(): + login_params = {'username': self.username, 'password': self.password, 'submit': 'Log in'} + response = helpers.getURL(self.urls['login'], post_data=login_params, session=self.session) + if response and logged_in(): + return True + + msg = u'Failed to authenticate with %s, abort provider' + if response and 'Username or password incorrect' in response: + msg = u'Invalid username or password for %s. Check settings' + logger.log(msg % self.name, logger.ERROR) + + return False + + def _do_search(self, search_params, search_mode='eponly', epcount=0, age=0): + + results = [] + if not self._do_login(): + return results + + items = {'Season': [], 'Episode': [], 'Cache': []} + + rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': '.*id=(\d+).*', + 'cats': 'cat=(?:2|9|43|63|77|79|101)'}.items()) + for mode in search_params.keys(): + for search_string in search_params[mode]: + if isinstance(search_string, unicode): + search_string = unidecode(search_string) + + search_url = self.urls['search'] % (self.categories, search_string) + html = self.get_url(search_url) + + cnt = len(items[mode]) + try: + if not html or self._has_no_results(html): + raise generic.HaltParseException + + with BS4Parser(html, features=['html5lib', 'permissive']) as soup: + torrent_table = soup.find('div', id='torrenttable').find('table') + torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + + if 2 > len(torrent_rows): + raise generic.HaltParseException + + for tr in torrent_rows[1:]: + try: + seeders, leechers = [int(tr.find_all('td')[x].get_text().strip()) for x in (-2, -1)] + if None is tr.find('a', href=rc['cats'])\ + or ('Cache' != mode and (seeders < self.minseed or leechers < self.minleech)): + continue + + info = tr.find('a', href=rc['info']) + title = 'title' in info.attrs and info.attrs['title'] or info.get_text().strip() + + download_url = self.urls['get'] % {'id': re.sub(rc['get'], r'\1', str(info.attrs['href'])), + 'title': str(title).replace(' ', '.')} + except (AttributeError, TypeError): + continue + + if title and download_url: + items[mode].append((title, download_url, seeders)) + + except generic.HaltParseException: + pass + except Exception: + logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) + + self._log_result(mode, len(items[mode]) - cnt, search_url) + + # For each search mode sort all the items by seeders + items[mode].sort(key=lambda tup: tup[2], reverse=True) + + results += items[mode] + + return results + + def find_propers(self, search_date=datetime.datetime.today()): + + return self._find_propers(search_date) + + def _get_episode_search_strings(self, ep_obj, add_string='', **kwargs): + + return generic.TorrentProvider._get_episode_search_strings(self, ep_obj, add_string, use_or=False) + + +class SceneTimeCache(tvcache.TVCache): + + def __init__(self, this_provider): + tvcache.TVCache.__init__(self, this_provider) + + self.minTime = 15 # cache update frequency + + def _getRSSData(self): + + return self.provider.get_cache_data() + + +provider = SceneTimeProvider() From a019e13a7518c05c576337505dab38ae6cafe820 Mon Sep 17 00:00:00 2001 From: Adam Date: Sat, 12 Sep 2015 15:17:26 +0800 Subject: [PATCH 27/95] Change javascript urls in templates to allow proper caching --- CHANGES.md | 4 +++- gui/slick/interfaces/default/apiBuilder.tmpl | 4 ++-- gui/slick/interfaces/default/config_anime.tmpl | 2 +- gui/slick/interfaces/default/config_general.tmpl | 4 ++-- .../interfaces/default/config_notifications.tmpl | 4 ++-- .../default/config_postProcessing.tmpl | 4 ++-- .../interfaces/default/config_providers.tmpl | 4 ++-- gui/slick/interfaces/default/config_search.tmpl | 4 ++-- .../interfaces/default/config_subtitles.tmpl | 2 +- gui/slick/interfaces/default/displayShow.tmpl | 16 ++++++++-------- gui/slick/interfaces/default/editShow.tmpl | 4 ++-- gui/slick/interfaces/default/episodeView.tmpl | 6 +++--- .../interfaces/default/home_addExistingShow.tmpl | 8 ++++---- gui/slick/interfaces/default/home_newShow.tmpl | 12 ++++++------ .../default/home_recommendedShows.tmpl | 10 +++++----- .../interfaces/default/home_trendingShows.tmpl | 2 +- gui/slick/interfaces/default/manage.tmpl | 2 +- .../default/manage_episodeStatuses.tmpl | 2 +- .../default/manage_failedDownloads.tmpl | 2 +- .../default/manage_manageSearches.tmpl | 4 ++-- .../interfaces/default/manage_massEdit.tmpl | 4 ++-- .../default/manage_showQueueOverview.tmpl | 2 +- .../default/manage_subtitleMissed.tmpl | 2 +- gui/slick/interfaces/default/restart_bare.tmpl | 4 ++-- 24 files changed, 57 insertions(+), 55 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 0f7ee085..67c5be21 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -30,7 +30,9 @@ * Add SceneTime torrent provider * Change TtN provider to parse new layout * Improve recognition of SD quality -* Fix halting in mid flow of Add Existing Show which resulted in failure to scan statuses and filesizes +* Fix halting in mid flow of Add Existing Show which resulted in failure to scan statuses and filesizes +* Change default de-referrer url to blank +* Change javascript urls in templates to allow proper caching ### 0.10.0 (2015-08-06 11:05:00 UTC) diff --git a/gui/slick/interfaces/default/apiBuilder.tmpl b/gui/slick/interfaces/default/apiBuilder.tmpl index 750d5936..c3660035 100644 --- a/gui/slick/interfaces/default/apiBuilder.tmpl +++ b/gui/slick/interfaces/default/apiBuilder.tmpl @@ -8,8 +8,8 @@ sbRoot = "$sbRoot"; //--> - - + + + + #end if   -  * - -   - +  * + + #if 1 < $len($indexers) + + #end if +   +

    Enter show name, TVDB ID, IMDb Url, or IMDb ID.  *SickGear supports english, language is used for show/episode data

    diff --git a/sickbeard/__init__.py b/sickbeard/__init__.py index 0158628f..bb0b6eab 100755 --- a/sickbeard/__init__.py +++ b/sickbeard/__init__.py @@ -38,6 +38,7 @@ from sickbeard import helpers, logger, db, naming, metadata, providers, scene_ex from sickbeard.config import CheckSection, check_setting_int, check_setting_str, ConfigMigrator, minimax from sickbeard.common import SD, SKIPPED from sickbeard.databases import mainDB, cache_db, failed_db +from indexers.indexer_config import INDEXER_TVDB from indexers.indexer_api import indexerApi from indexers.indexer_exceptions import indexer_shownotfound, indexer_exception, indexer_error, \ indexer_episodenotfound, indexer_attributenotfound, indexer_seasonnotfound, indexer_userabort, indexerExcepts @@ -667,6 +668,8 @@ def initialize(consoleLogging=True): NOTIFY_ON_UPDATE = bool(check_setting_int(CFG, 'General', 'notify_on_update', 1)) FLATTEN_FOLDERS_DEFAULT = bool(check_setting_int(CFG, 'General', 'flatten_folders_default', 0)) INDEXER_DEFAULT = check_setting_int(CFG, 'General', 'indexer_default', 0) + if INDEXER_DEFAULT and not indexerApi(INDEXER_DEFAULT).config['active']: + INDEXER_DEFAULT = INDEXER_TVDB INDEXER_TIMEOUT = check_setting_int(CFG, 'General', 'indexer_timeout', 20) ANIME_DEFAULT = bool(check_setting_int(CFG, 'General', 'anime_default', 0)) SCENE_DEFAULT = bool(check_setting_int(CFG, 'General', 'scene_default', 0)) diff --git a/sickbeard/indexers/indexer_api.py b/sickbeard/indexers/indexer_api.py index f2adff71..1b29e8d6 100644 --- a/sickbeard/indexers/indexer_api.py +++ b/sickbeard/indexers/indexer_api.py @@ -17,10 +17,63 @@ # along with SickGear. If not, see . import os import sickbeard +import time from indexer_config import initConfig, indexerConfig from sickbeard.helpers import proxy_setting + +class ShowContainer(dict): + """Simple dict that holds a series of Show instances + """ + + def __init__(self): + self._stack = [] + self._lastgc = time.time() + + def __setitem__(self, key, value): + self._stack.append(key) + + # keep only the 100th latest results + if time.time() - self._lastgc > 20: + for o in self._stack[:-100]: + del self[o] + + self._stack = self._stack[-100:] + + self._lastgc = time.time() + + super(ShowContainer, self).__setitem__(key, value) + + +class DummyIndexer: + def __init__(self, *args, **kwargs): + self.config = { + 'apikey': '', + 'debug_enabled': False, + 'custom_ui': None, + 'proxy': None, + 'cache_enabled': False, + 'cache_location': '', + 'valid_languages': [], + 'langabbv_to_id': {}, + 'language': 'en', + 'base_url': '', + } + + self.corrections = {} + self.shows = ShowContainer() + + def __getitem__(self, key): + return None + + def __repr__(self): + return str(self.shows) + + def search(self, series): + return [] + + class indexerApi(object): def __init__(self, indexerID=None): self.indexerID = int(indexerID) if indexerID else None @@ -30,7 +83,10 @@ class indexerApi(object): def indexer(self, *args, **kwargs): if self.indexerID: - return indexerConfig[self.indexerID]['module'](*args, **kwargs) + if indexerConfig[self.indexerID]['active']: + return indexerConfig[self.indexerID]['module'](*args, **kwargs) + else: + return DummyIndexer(*args, **kwargs) @property def config(self): diff --git a/sickbeard/indexers/indexer_config.py b/sickbeard/indexers/indexer_config.py index 8764dcbc..68e9895a 100644 --- a/sickbeard/indexers/indexer_config.py +++ b/sickbeard/indexers/indexer_config.py @@ -25,6 +25,7 @@ indexerConfig[INDEXER_TVDB] = { 'language': 'en', 'useZip': True, }, + 'active': True, } indexerConfig[INDEXER_TVRAGE] = { @@ -34,6 +35,7 @@ indexerConfig[INDEXER_TVRAGE] = { 'api_params': {'apikey': 'Uhewg1Rr0o62fvZvUIZt', 'language': 'en', }, + 'active': False, } # TVDB Indexer Settings diff --git a/sickbeard/providers/generic.py b/sickbeard/providers/generic.py index 0539399e..db018ccb 100644 --- a/sickbeard/providers/generic.py +++ b/sickbeard/providers/generic.py @@ -556,7 +556,7 @@ class GenericProvider: value *= 1024 ** ['b', 'k', 'm', 'g', 't'].index(re.findall('(t|g|m|k)[i]?b', size_dim.lower())[0]) except IndexError: pass - return int(math.ceil(value)) + return long(math.ceil(value)) class NZBProvider(object, GenericProvider): diff --git a/sickbeard/show_queue.py b/sickbeard/show_queue.py index c2ff930b..907602f1 100644 --- a/sickbeard/show_queue.py +++ b/sickbeard/show_queue.py @@ -578,6 +578,11 @@ class QueueItemUpdate(ShowQueueItem): ShowQueueItem.run(self) + if not sickbeard.indexerApi(self.show.indexer).config['active']: + logger.log(u'Indexer %s is marked inactive, aborting update for show %s and continue with refresh.' % (sickbeard.indexerApi(self.show.indexer).config['name'], self.show.name)) + sickbeard.showQueueScheduler.action.refreshShow(self.show, self.force, self.scheduled_update, after_update=True) + return + logger.log(u'Beginning update of ' + self.show.name) logger.log(u'Retrieving show info from ' + sickbeard.indexerApi(self.show.indexer).name + '', logger.DEBUG) diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index d894a24b..50c0cc6f 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -2316,7 +2316,8 @@ class NewHomeAddShows(Home): t.provided_show_dir = show_dir t.other_shows = other_shows t.provided_indexer = int(indexer or sickbeard.INDEXER_DEFAULT) - t.indexers = sickbeard.indexerApi().indexers + t.indexers = dict([(i, sickbeard.indexerApi().indexers[i]) for i in sickbeard.indexerApi().indexers + if sickbeard.indexerApi(i).config['active']]) t.whitelist = [] t.blacklist = [] t.groups = [] @@ -3744,6 +3745,8 @@ class ConfigGeneral(Config): t = PageTemplate(headers=self.request.headers, file='config_general.tmpl') t.submenu = self.ConfigMenu t.show_tags = ', '.join(sickbeard.SHOW_TAGS) + t.indexers = dict([(i, sickbeard.indexerApi().indexers[i]) for i in sickbeard.indexerApi().indexers + if sickbeard.indexerApi(i).config['active']]) return t.respond() def saveRootDirs(self, rootDirString=None): @@ -3876,6 +3879,8 @@ class ConfigGeneral(Config): if indexer_default: sickbeard.INDEXER_DEFAULT = config.to_int(indexer_default) + if not sickbeard.indexerApi(sickbeard.INDEXER_DEFAULT).config['active']: + sickbeard.INDEXER_DEFAULT = INDEXER_TVDB if indexer_timeout: sickbeard.INDEXER_TIMEOUT = config.to_int(indexer_timeout) From b3be940d445e8393b4d1fe2fe10f206b974261d8 Mon Sep 17 00:00:00 2001 From: Prinz23 Date: Thu, 19 Nov 2015 23:05:19 +0100 Subject: [PATCH 86/95] Add multiple Trakt account support to Config/Notifications/Social. Add setting to Trakt notification to update collection with downloaded episode info. Add Most Watched, Collected during the last month on Trakt. Change Add from Trakt/"Shows:" with Anticipated, Popular views. Change improve robustness of Trakt communications. Change Trakt notifier logo. Change pep8 and cleanup. --- CHANGES.md | 11 +- gui/slick/css/dark.css | 14 +- gui/slick/css/light.css | 14 +- gui/slick/css/style.css | 26 ++ gui/slick/images/notifiers/trakt.png | Bin 3589 -> 3662 bytes .../default/config_notifications.tmpl | 74 ++++- .../interfaces/default/home_browseShows.tmpl | 14 +- gui/slick/js/configNotifications.js | 166 ++++++++++- gui/slick/js/formwizard.js | 5 +- lib/libtrakt/__init__.py | 2 +- lib/libtrakt/exceptions.py | 6 +- lib/libtrakt/trakt.py | 276 +++++++++++++----- sickbeard/__init__.py | 36 +-- sickbeard/config.py | 32 +- sickbeard/notifiers/__init__.py | 5 +- sickbeard/notifiers/trakt.py | 131 ++++----- sickbeard/postProcessor.py | 2 +- sickbeard/show_queue.py | 14 +- sickbeard/trakt_helpers.py | 57 ++++ sickbeard/tv.py | 12 +- sickbeard/webserve.py | 134 +++++++-- 21 files changed, 762 insertions(+), 269 deletions(-) create mode 100644 sickbeard/trakt_helpers.py diff --git a/CHANGES.md b/CHANGES.md index 14271fc9..04de6adf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -64,17 +64,22 @@ * Change increase frequency of updating show data * Remove Animenzb provider * Change increase the scope and number of non release group text that is identified and removed -* Add a general config setting to allow adding incomplete show data +* Add general config setting to allow adding incomplete show data * Change to throttle connection rate on thread initiation for adba library * Change default manage episodes selector to Snatched episodes if items exist else Wanted on Episode Status Manage page * Change snatched row colour on Episode Status Manage page to match colour used on the show details page * Change replace trakt with libtrakt for API v2 -* Change Trakt notification config to only handle PIN authentication with the service +* Change improve robustness of Trakt communications +* Change Trakt notification config to use PIN authentication with the service +* Add multiple Trakt account support to Config/Notifications/Social +* Add setting to Trakt notification to update collection with downloaded episode info +* Change trakt notifier logo * Remove all other Trakt deprecated API V1 service features pending reconsideration * Change increase show search capability when using plain text and also add TVDB id, IMDb id and IMDb url search * Change improve existing show page and the handling when an attempt to add a show to an existing location * Change consolidate Trakt Trending and Recommended views into an "Add From Trakt" view which defaults to trending -* Change Trakt view drop down "Show" to reveal Brand-new Shows, Season Premieres, Recommendations and Trending views +* Change Add from Trakt/"Shows:" with Anticipated, New Seasons, New Shows, Popular, Recommendations, and Trending views +* Change Add from Trakt/"Shows:" with Most Watched, Collected during the last month on Trakt * Change add season info to "Show: Trakt New Seasons" view on the Add from Trakt page * Change increase number of displayed Trakt shows to 100 * Add genres and rating to all Trakt shows diff --git a/gui/slick/css/dark.css b/gui/slick/css/dark.css index 5b35a874..69da66e5 100644 --- a/gui/slick/css/dark.css +++ b/gui/slick/css/dark.css @@ -635,6 +635,14 @@ div.metadataDiv .disabled{ background:url("../images/warning16.png") no-repeat right 5px center #fff } +.solid-border{ + border:1px solid #555 +} + +.solid-border-top{ + border-top:1px solid #555 +} + /* ======================================================================= manage*.tmpl ========================================================================== */ @@ -1190,7 +1198,7 @@ browser.css #fileBrowserDialog ul li a:hover{ color:#09a2ff; - background:none + background: rgb(61, 61, 61) none } .ui-menu .ui-menu-item{ @@ -1224,6 +1232,10 @@ div.stepsguide .step p{ color:#646464 } +#newShowPortal #addShowForm .stepsguide .disabledstep:hover > .smalltext{ + color:#ccc; +} + div.stepsguide .disabledstep p{ border-color:#1178B3 } diff --git a/gui/slick/css/light.css b/gui/slick/css/light.css index 2b99452f..0b30169b 100644 --- a/gui/slick/css/light.css +++ b/gui/slick/css/light.css @@ -610,6 +610,14 @@ div.metadataDiv .disabled{ background:url("../images/warning16.png") no-repeat right 5px center #fff } +.solid-border{ + border:1px solid #ccc +} + +.solid-border-top{ + border-top:1px solid #ccc +} + /* ======================================================================= manage*.tmpl ========================================================================== */ @@ -1152,7 +1160,7 @@ browser.css #fileBrowserDialog ul li a:hover{ color:#00f; - background:none + background: rgb(220, 220, 220) none } .ui-menu .ui-menu-item{ @@ -1190,6 +1198,10 @@ div.stepsguide .disabledstep p{ border-color:#8a775e } +#newShowPortal #addShowForm .stepsguide .disabledstep:hover > .smalltext{ + color:#8a775e; +} + div.formpaginate .prev, div.formpaginate .next{ color:#fff; background:#57442b diff --git a/gui/slick/css/style.css b/gui/slick/css/style.css index ee1b3765..9e591042 100644 --- a/gui/slick/css/style.css +++ b/gui/slick/css/style.css @@ -2559,6 +2559,32 @@ div.metadataDiv .disabled{ margin:6px 4px 0 0 } +#trakt-collection th,#trakt-collection td{ + padding:3px 5px +} + +#trakt-collection .col-1{ + text-align:left +} + +#trakt-collection th,#trakt-collection td.opt{ + text-align:center +} + +#trakt-collection .col-1{ + width:192px +} + +#config #trakt-collection input{ + float:none; + margin:0; + vertical-align:middle +} + +#config .trakt.component-desc{ + margin-left:0 +} + /* ======================================================================= manage*.tmpl ========================================================================== */ diff --git a/gui/slick/images/notifiers/trakt.png b/gui/slick/images/notifiers/trakt.png index a767eea4b8b24f951714fa497119c3f2460fe0a6..e91f57f1fd64d5a54d87e17db6277c5fd03b3503 100644 GIT binary patch delta 960 zcmV;x13&zQ9L^jeiBL{Q4GJ0x0000DNk~Le0000O0000O2nGNE0N{5$_^~0M3V(S? zL_t(|+LV@Eh?I2{#@{ZYGR!g|qyO%{^USm3t|PT-lSTw3iKaTt(@^8qykk=sJjJy*0 z9^_}^d>r}ZW}vCWE6Cp>|Ac%e@`cDkz8?8uh~0F!ljJ@+JS#H%7kQ`|pn`k?c|G#k$a|&`t;aSx+&2Ukz6BroHj3_;J&!8G8ynB@)+`& z$Zxg~-w|c0lDv@Q4J21c{v{yCX!i`{*N{I(J}pC@jr=R}?Z~|?#COu+Zjy&Xyh?J3 zzYk7p?OqWI794Ee5TwSV)F549*undJLPUP|&KB%9=Y4dOMD|EAq@Ba2-b@{7^O zg{I=UGm_Vn+)wgak_#m7pF+GIcOds6f1e?Lh1`$4VhZu?$lb^z8S+F-=s<(`bI4`n2a%T`4+UiFG~zXqKOlDr`G+jL<$osRb(6$Lkh_pK zAYX>ODJFN2+}$K^n+9-@+?~irGvx1(?+wUFx(8qMwP`8Vb`2AK3 z%cvp07nux30}PY9LYpt4;A`Wa-bTA#9tW%A2@jnDq$@IBjDOq~nV&#@s@cL>6|K4v zc_#AR(Zjo5_MQ(gLV zV>ZTPKW{`{MWNVBp}34fab*Ax#5&!Fy!8KkRn()1ypCM&Q0)9>th@a4SapihVSRm_SS&`lT&7Sc zkVqtc1KtPT4t@yaOw+V8nas=OvbH*`4s5*^~%qGwk+#RR4SEcsZ{Fg!NEaH)5I|D z*ilMh+cy3E{VXgjkV>UcN`KL8Hrd$+TCUUGDF#LdmkkOWrCEZLl~Qy%9e--I+7G}NfQ(4X z@b#>GRm6Qx19T4%ycJcW8HBk zU(>;H(=-u61T<-bHx&N^{}tFL-FrxKD45&T4Jjn -#if $varExists('header') +#if $varExists('header')

    $header

    -#else +#else

    $title

    #end if @@ -155,7 +156,7 @@ -
    +

    Kodi

    @@ -275,7 +276,7 @@
    - +
    @@ -1481,16 +1482,77 @@
    +
    + +
    + +#set num_accounts = len($trakt_accounts) +#set $num_columns = (1, num_accounts)[1 < num_accounts] + + + + + + + +#if not len($trakt_accounts) + +#end if +#for $void, $account in $trakt_accounts.items() + +#end for + + + +#if not $root_dirs: + #set $root_dirs = [{'root_def': False, 'loc': 'all folders. Multiple parent folders will appear here.', 'b64': ''}] +#end if +#for $root_info in $root_dirs: + + + #if not len($trakt_accounts) + + #end if + #for $void, $account in $trakt_accounts.items() + #set $cur_selected = ('', ' checked="checked"')[$root_info['loc'] in $sickbeard.TRAKT_UPDATE_COLLECTION.get($account.account_id, '')] + #set $id_loc = "update_trakt_%s_%s" % ($account.account_id, $root_info['b64']) + + #end for + + + + +#end for + +
    Update multiple accounts with downloaded episode info#echo not len($trakt_accounts) and 'Connect New Pin' or 1 < len($trakt_accounts) and 'Trakt accounts' or 'Account'#
    ..$account.name#if $account.active then '' else '
    (inactive)'#
    Update collection.. + +
    for #if $root_info['root_def'] then '*' else ''#$root_info['loc']
    +
    +
    -#include $os.path.join($sickbeard.PROG_DIR, 'gui/slick/interfaces/default/inc_bottom.tmpl') \ No newline at end of file +#include $os.path.join($sickbeard.PROG_DIR, 'gui/slick/interfaces/default/inc_bottom.tmpl') diff --git a/gui/slick/interfaces/default/home_browseShows.tmpl b/gui/slick/interfaces/default/home_browseShows.tmpl index 1a6c7cb3..2c565f9f 100644 --- a/gui/slick/interfaces/default/home_browseShows.tmpl +++ b/gui/slick/interfaces/default/home_browseShows.tmpl @@ -128,10 +128,18 @@ #if 'Trakt' == $browse_type #set $mode = $kwargs and $kwargs.get('mode', None) #set $selected = ' class="selected"' - - + - + + + + + + #for $account in $sickbeard.TRAKT_ACCOUNTS + #if $sickbeard.TRAKT_ACCOUNTS[$account].active and $sickbeard.TRAKT_ACCOUNTS[$account].name + + #end if + #end for #end if diff --git a/gui/slick/js/configNotifications.js b/gui/slick/js/configNotifications.js index 1f47dcbd..9bfde006 100644 --- a/gui/slick/js/configNotifications.js +++ b/gui/slick/js/configNotifications.js @@ -1,4 +1,4 @@ -$(document).ready(function(){ +$(document).ready(function(){ var loading = ''; $('#testGrowl').click(function () { @@ -353,28 +353,162 @@ $(document).ready(function(){ }); var elTraktAuth = $('#trakt-authenticate'), elTraktAuthResult = $('#trakt-authentication-result'); - elTraktAuth.click(function() { - var elTrakt = $('#trakt_pin'), traktPin = $.trim(elTrakt.val()); - if(!traktPin) { - elTrakt.addClass('warning'); + + function trakt_send_auth(){ + var elAccountSelect = $('#trakt_accounts'), strCurAccountId = elAccountSelect.find('option:selected').val(), + elTraktPin = $('#trakt_pin'), strPin = $.trim(elTraktPin.val()); + + elTraktAuthResult.html(loading); + + $.get(sbRoot + '/home/trakt_authenticate', {'pin': strPin, 'account': strCurAccountId}) + .done(function(data) { + elTraktAuth.prop('disabled', !1); + elTraktPin.val(''); + + var JSONData = $.parseJSON(data); + + elTraktAuthResult.html('Success' == JSONData.result + ? JSONData.result + ' account: ' + JSONData.account_name + : JSONData.result + ' ' + JSONData.error_message); + + if ('Success' == JSONData.result) { + var elUpdateRows = $('#trakt-collection').find('tr'); + if ('new' == strCurAccountId) { + elAccountSelect.append($('