SickGear/lib/hachoir/metadata/csv.py

84 lines
2.7 KiB
Python
Raw Permalink Normal View History

from hachoir.parser import createParser
from hachoir.core.tools import makePrintable
from hachoir.metadata import extractMetadata
from hachoir.core.i18n import initLocale
from sys import argv, stderr, exit
from os import walk
from os.path import join as path_join
from fnmatch import fnmatch
import codecs
OUTPUT_FILENAME = "metadata.csv"
class Extractor:
def __init__(self, directory, fields):
self.directory = directory
self.fields = fields
self.charset = "UTF-8"
self.total = 0
self.invalid = 0
def main(self):
output = codecs.open(OUTPUT_FILENAME, "w", self.charset)
for filename in self.findFiles(self.directory, '*.doc'):
self.total += 1
line = self.processFile(filename)
if line:
print(line, file=output)
else:
self.invalid += 1
output.close()
self.summary()
def summary(self):
print(file=stderr)
print("Valid files: %s" % (self.total - self.invalid), file=stderr)
print("Invalid files: %s" % self.invalid, file=stderr)
print("Total files: %s" % self.total, file=stderr)
print(file=stderr)
print("Result written into %s" % OUTPUT_FILENAME, file=stderr)
def findFiles(self, directory, pattern):
for dirpath, dirnames, filenames in walk(directory):
for filename in filenames:
if not fnmatch(filename.lower(), pattern):
continue
yield path_join(dirpath, filename)
def processFile(self, filename):
print("[%s] Process file %s..." % (self.total, filename))
parser = createParser(filename)
if not parser:
print("Unable to parse file", file=stderr)
return None
try:
metadata = extractMetadata(parser)
except Exception as err:
print("Metadata extraction error: %s" % str(err), file=stderr)
return None
if not metadata:
print("Unable to extract metadata", file=stderr)
return None
filename = makePrintable(filename, self.charset)
line = [filename]
for field in self.fields:
value = metadata.getText(field, '')
value = makePrintable(value, self.charset)
line.append(value)
return '; '.join(line)
def main():
initLocale()
if len(argv) != 3:
print("usage: %s directory fields" % argv[0], file=stderr)
print(file=stderr)
print("eg. %s . title,creation_date" % argv[0], file=stderr)
exit(1)
directory = argv[1]
fields = [field.strip() for field in argv[2].split(",")]
Extractor(directory, fields).main()