SickGear/lib/hachoir/parser/parser_list.py
JackDandy 980e05cc99 Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo.
Backported 400 revisions from rev 1de4961-8897c5b (2018-2014).
Move core/benchmark, core/cmd_line, core/memory, core/profiler and core/timeout to core/optional/*
Remove metadata/qt*

PORT: Version 2.0a3 (inline with 3.0a3 @ f80c7d5).
Basic Support for XMP Packets.
tga: improvements to adhere more closely to the spec.
pdf: slightly improved parsing.
rar: fix TypeError on unknown block types.
Add MacRoman win32 codepage.
tiff/exif: support SubIFDs and tiled images.
Add method to export metadata in dictionary.
mpeg_video: don't attempt to parse Stream past length.
mpeg_video: parse ESCR correctly, add SCR value.
Change centralise CustomFragments.
field: don't set parser class if class is None, to enable autodetect.
field: add value/display for CustomFragment.
parser: inline warning to enable tracebacks in debug mode.
Fix empty bytestrings in makePrintable.
Fix contentSize in jpeg.py to account for image_data blocks.
Fix the ELF parser.
Enhance the AR archive parser.
elf parser: fix wrong wrong fields order in parsing little endian section flags.
elf parser: add s390 as a machine type.
Flesh out mp4 parser.

PORT: Version 2.0a1 (inline with 3.0a1).
Major refactoring and PEP8.
Fix ResourceWarning warnings on files. Add a close() method and support for the context manager protocol ("with obj: ...") to parsers, input and output streams.
metadata: get comment from ZIP.
Support for InputIOStream.read(0).
Fix sizeGe when size is None.
Remove unused new_seekable_field_set file.
Remove parser Mapsforge .map.
Remove parser Parallel Realities Starfighter .pak files.
sevenzip: fix for newer archives.
java: update access flags and modifiers for Java 1.7 and update description text for most recent Java.
Support ustar prefix field in tar archives.
Remove file_system* parsers.
Remove misc parsers 3d0, 3ds, gnome_keyring, msoffice*, mstask, ole*, word*.
Remove program parsers macho, nds, prc.
Support non-8bit Character subclasses.
Python parser supports Python 3.7.
Enhance mpeg_ts parser to support MTS/M2TS.
Support for creation date in tiff.
Change don't hardcode errno constant.

PORT: 1.9.1
Internal Only: The following are legacy reference to upstream commit messages.
Relevant changes up to b0a115f8.
Use integer division.
Replace HACHOIR_ERRORS with Exception.
Fix metadata.Data: make it sortable.
Import fixes from e7de492.
PORT: Version 2.0a1 (inline with 3.0a1 @ e9f8fad).
Replace hachoir.core.field with hachoir.field
Replace hachoir.core.stream with hachoir.stream
Remove the compatibility module for PY1.5 to PY2.5.
metadata: support TIFF picture.
metadata: fix string normalization.
metadata: fix datetime regex Fix hachoir bug #57.
FileFromInputStream: fix comparison between None and an int.
InputIOStream: open the file in binary mode.
2018-03-28 00:43:11 +01:00

221 lines
7.9 KiB
Python

from __future__ import print_function
import re
import types
from hachoir.core.error import error
from hachoir.core.i18n import _
from hachoir.parser import Parser, HachoirParser
import sys
# Parser list ################################################################
class ParserList(object):
VALID_CATEGORY = ("archive", "audio", "container",
"image", "misc", "program", "video")
ID_REGEX = re.compile("^[a-z0-9][a-z0-9_]{2,}$")
def __init__(self):
self.parser_list = []
self.bytag = {"id": {}, "category": {}}
def translate(self, name, value):
if name in ("magic",):
return True
elif name == "min_size":
return - value < 0 or "Invalid minimum size (min_size)"
elif name == "description":
return isinstance(value, (str, unicode)) and bool(value) or "Invalid description"
elif name == "category":
if value not in self.VALID_CATEGORY:
return "Invalid category: %r" % value
elif name == "id":
if type(value) is not str or not self.ID_REGEX.match(value):
return "Invalid identifier: %r" % value
parser = self.bytag[name].get(value)
if parser:
return "Duplicate parser id: %s already used by %s" % \
(value, parser[0].__name__)
# TODO: lists should be forbidden
if isinstance(value, list):
value = tuple(value)
elif not isinstance(value, tuple):
value = value,
return name, value
def validParser(self, parser, tags):
if "id" not in tags:
return "No identifier"
if "description" not in tags:
return "No description"
# TODO: Allow simple strings for file_ext/mime ?
# (see also HachoirParser.createFilenameSuffix)
file_ext = tags.get("file_ext", ())
if not isinstance(file_ext, (tuple, list)):
return "File extension is not a tuple or list"
mimes = tags.get("mime", ())
if not isinstance(mimes, tuple):
return "MIME type is not a tuple"
for mime in mimes:
if not isinstance(mime, unicode):
return "MIME type %r is not an unicode string" % mime
return ""
def add(self, parser):
tags = parser.getParserTags()
err = self.validParser(parser, tags)
if err:
error("Skip parser %s: %s" % (parser.__name__, err))
return
_tags = []
for tag in tags.iteritems():
tag = self.translate(*tag)
if isinstance(tag, tuple):
_tags.append(tag)
elif tag is not True:
error("[%s] %s" % (parser.__name__, tag))
return
self.parser_list.append(parser)
for name, values in _tags:
byname = self.bytag.setdefault(name, {})
for value in values:
byname.setdefault(value, []).append(parser)
def __iter__(self):
return iter(self.parser_list)
def print_(self, title=None, out=None, verbose=False, format="one-line"):
"""Display a list of parser with its title
* out: output file
* title : title of the list to display
* format: "rest", "trac", "file-ext", "mime" or "one_line" (default)
"""
if out is None:
out = sys.stdout
if format in ("file-ext", "mime"):
# Create file extension set
extensions = set()
for parser in self:
file_ext = parser.getParserTags().get(format, ())
file_ext = list(file_ext)
try:
file_ext.remove("")
except ValueError:
pass
extensions |= set(file_ext)
# Remove empty extension
extensions -= set(('',))
# Convert to list and sort by ASCII order
extensions = list(extensions)
extensions.sort()
# Print list
text = ", ".join(str(item) for item in extensions)
if format == "file-ext":
print("File extensions: %s." % text, file=out)
print(file=out)
print("Total: %s file extensions." % len(extensions), file=out)
else:
print("MIME types: %s." % text, file=out)
print(file=out)
print("Total: %s MIME types." % len(extensions), file=out)
return
if format == "trac":
print("== List of parsers ==", file=out)
print(file=out)
print("Total: %s parsers" % len(self.parser_list), file=out)
print(file=out)
elif format == "one_line":
if title:
print(title, file=out)
else:
print("List of Hachoir parsers.", file=out)
print(file=out)
# Create parser list sorted by module
bycategory = self.bytag["category"]
for category in sorted(bycategory.iterkeys()):
if format == "one_line":
parser_list = [parser.PARSER_TAGS["id"]
for parser in bycategory[category]]
parser_list.sort()
print("- %s: %s" %
(category.title(), ", ".join(parser_list)), file=out)
else:
if format == "rest":
print(category.replace("_", " ").title(), file=out)
print("-" * len(category), file=out)
print(file=out)
elif format == "trac":
print("=== %s ===" % category.replace(
"_", " ").title(), file=out)
print(file=out)
else:
print("[%s]" % category, file=out)
parser_list = sorted(bycategory[category],
key=lambda parser: parser.PARSER_TAGS["id"])
if format == "rest":
for parser in parser_list:
tags = parser.getParserTags()
print("* %s: %s" %
(tags["id"], tags["description"]), file=out)
elif format == "trac":
for parser in parser_list:
tags = parser.getParserTags()
desc = tags["description"]
desc = re.sub(r"([A-Z][a-z]+[A-Z][^ ]+)", r"!\1", desc)
print(" * %s: %s" % (tags["id"], desc), file=out)
else:
for parser in parser_list:
parser.print_(out, verbose)
print(file=out)
if format != "trac":
print("Total: %s parsers" % len(self.parser_list), file=out)
class HachoirParserList(ParserList):
_instance = None
@classmethod
def getInstance(cls):
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
ParserList.__init__(self)
self._load()
def _load(self):
"""
Load all parsers from "hachoir.parser" module.
Return the list of loaded parsers.
"""
# Parser list is already loaded?
if self.parser_list:
return self.parser_list
todo = []
module = __import__("hachoir.parser").parser
for attrname in dir(module):
attr = getattr(module, attrname)
if isinstance(attr, types.ModuleType):
todo.append(attr)
for module in todo:
for name in dir(module):
attr = getattr(module, name)
if isinstance(attr, type) \
and issubclass(attr, HachoirParser) \
and attr not in (Parser, HachoirParser):
self.add(attr)
assert 1 <= len(self.parser_list)
return self.parser_list