mirror of
https://github.com/SickGear/SickGear.git
synced 2025-01-07 10:33:38 +00:00
Remove unused libraries fuzzywuzzy and pysrt
This commit is contained in:
parent
89990d1085
commit
c4908144e1
16 changed files with 1 additions and 1460 deletions
|
@ -1,6 +1,7 @@
|
|||
### 0.x.x (2015-xx-xx xx:xx:xx UTC)
|
||||
|
||||
* Add requirements file for pip (port from midgetspy/sick-beard)
|
||||
* Remove unused libraries fuzzywuzzy and pysrt
|
||||
|
||||
[develop changelog]
|
||||
|
||||
|
|
|
@ -1,78 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
"""
|
||||
StringMatcher.py
|
||||
|
||||
ported from python-Levenshtein
|
||||
[https://github.com/miohtama/python-Levenshtein]
|
||||
"""
|
||||
|
||||
from Levenshtein import *
|
||||
from warnings import warn
|
||||
|
||||
class StringMatcher:
|
||||
"""A SequenceMatcher-like class built on the top of Levenshtein"""
|
||||
|
||||
def _reset_cache(self):
|
||||
self._ratio = self._distance = None
|
||||
self._opcodes = self._editops = self._matching_blocks = None
|
||||
|
||||
def __init__(self, isjunk=None, seq1='', seq2=''):
|
||||
if isjunk:
|
||||
warn("isjunk not NOT implemented, it will be ignored")
|
||||
self._str1, self._str2 = seq1, seq2
|
||||
self._reset_cache()
|
||||
|
||||
def set_seqs(self, seq1, seq2):
|
||||
self._str1, self._str2 = seq1, seq2
|
||||
self._reset_cache()
|
||||
|
||||
def set_seq1(self, seq1):
|
||||
self._str1 = seq1
|
||||
self._reset_cache()
|
||||
|
||||
def set_seq2(self, seq2):
|
||||
self._str2 = seq2
|
||||
self._reset_cache()
|
||||
|
||||
def get_opcodes(self):
|
||||
if not self._opcodes:
|
||||
if self._editops:
|
||||
self._opcodes = opcodes(self._editops, self._str1, self._str2)
|
||||
else:
|
||||
self._opcodes = opcodes(self._str1, self._str2)
|
||||
return self._opcodes
|
||||
|
||||
def get_editops(self):
|
||||
if not self._editops:
|
||||
if self._opcodes:
|
||||
self._editops = editops(self._opcodes, self._str1, self._str2)
|
||||
else:
|
||||
self._editops = editops(self._str1, self._str2)
|
||||
return self._editops
|
||||
|
||||
def get_matching_blocks(self):
|
||||
if not self._matching_blocks:
|
||||
self._matching_blocks = matching_blocks(self.get_opcodes(),
|
||||
self._str1, self._str2)
|
||||
return self._matching_blocks
|
||||
|
||||
def ratio(self):
|
||||
if not self._ratio:
|
||||
self._ratio = ratio(self._str1, self._str2)
|
||||
return self._ratio
|
||||
|
||||
def quick_ratio(self):
|
||||
# This is usually quick enough :o)
|
||||
if not self._ratio:
|
||||
self._ratio = ratio(self._str1, self._str2)
|
||||
return self._ratio
|
||||
|
||||
def real_quick_ratio(self):
|
||||
len1, len2 = len(self._str1), len(self._str2)
|
||||
return 2.0 * min(len1, len2) / (len1 + len2)
|
||||
|
||||
def distance(self):
|
||||
if not self._distance:
|
||||
self._distance = distance(self._str1, self._str2)
|
||||
return self._distance
|
|
@ -1,263 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
"""
|
||||
fuzz.py
|
||||
|
||||
Copyright (c) 2011 Adam Cohen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
try:
|
||||
from StringMatcher import StringMatcher as SequenceMatcher
|
||||
except:
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
from . import utils
|
||||
|
||||
|
||||
###########################
|
||||
# Basic Scoring Functions #
|
||||
###########################
|
||||
|
||||
|
||||
def ratio(s1, s2):
|
||||
|
||||
if s1 is None:
|
||||
raise TypeError("s1 is None")
|
||||
if s2 is None:
|
||||
raise TypeError("s2 is None")
|
||||
s1, s2 = utils.make_type_consistent(s1, s2)
|
||||
if len(s1) == 0 or len(s2) == 0:
|
||||
return 0
|
||||
|
||||
m = SequenceMatcher(None, s1, s2)
|
||||
return utils.intr(100 * m.ratio())
|
||||
|
||||
|
||||
# todo: skip duplicate indexes for a little more speed
|
||||
def partial_ratio(s1, s2):
|
||||
|
||||
if s1 is None:
|
||||
raise TypeError("s1 is None")
|
||||
if s2 is None:
|
||||
raise TypeError("s2 is None")
|
||||
s1, s2 = utils.make_type_consistent(s1, s2)
|
||||
if len(s1) == 0 or len(s2) == 0:
|
||||
return 0
|
||||
|
||||
if len(s1) <= len(s2):
|
||||
shorter = s1
|
||||
longer = s2
|
||||
else:
|
||||
shorter = s2
|
||||
longer = s1
|
||||
|
||||
m = SequenceMatcher(None, shorter, longer)
|
||||
blocks = m.get_matching_blocks()
|
||||
|
||||
# each block represents a sequence of matching characters in a string
|
||||
# of the form (idx_1, idx_2, len)
|
||||
# the best partial match will block align with at least one of those blocks
|
||||
# e.g. shorter = "abcd", longer = XXXbcdeEEE
|
||||
# block = (1,3,3)
|
||||
# best score === ratio("abcd", "Xbcd")
|
||||
scores = []
|
||||
for block in blocks:
|
||||
long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
|
||||
long_end = long_start + len(shorter)
|
||||
long_substr = longer[long_start:long_end]
|
||||
|
||||
m2 = SequenceMatcher(None, shorter, long_substr)
|
||||
r = m2.ratio()
|
||||
if r > .995:
|
||||
return 100
|
||||
else:
|
||||
scores.append(r)
|
||||
|
||||
return int(100 * max(scores))
|
||||
|
||||
|
||||
##############################
|
||||
# Advanced Scoring Functions #
|
||||
##############################
|
||||
|
||||
# Sorted Token
|
||||
# find all alphanumeric tokens in the string
|
||||
# sort those tokens and take ratio of resulting joined strings
|
||||
# controls for unordered string elements
|
||||
def _token_sort(s1, s2, partial=True, force_ascii=True):
|
||||
|
||||
if s1 is None:
|
||||
raise TypeError("s1 is None")
|
||||
if s2 is None:
|
||||
raise TypeError("s2 is None")
|
||||
|
||||
# pull tokens
|
||||
tokens1 = utils.full_process(s1, force_ascii=force_ascii).split()
|
||||
tokens2 = utils.full_process(s2, force_ascii=force_ascii).split()
|
||||
|
||||
# sort tokens and join
|
||||
sorted1 = " ".join(sorted(tokens1))
|
||||
sorted2 = " ".join(sorted(tokens2))
|
||||
|
||||
sorted1 = sorted1.strip()
|
||||
sorted2 = sorted2.strip()
|
||||
|
||||
if partial:
|
||||
return partial_ratio(sorted1, sorted2)
|
||||
else:
|
||||
return ratio(sorted1, sorted2)
|
||||
|
||||
|
||||
def token_sort_ratio(s1, s2, force_ascii=True):
|
||||
return _token_sort(s1, s2, partial=False, force_ascii=force_ascii)
|
||||
|
||||
|
||||
def partial_token_sort_ratio(s1, s2, force_ascii=True):
|
||||
return _token_sort(s1, s2, partial=True, force_ascii=force_ascii)
|
||||
|
||||
|
||||
# Token Set
|
||||
# find all alphanumeric tokens in each string...treat them as a set
|
||||
# construct two strings of the form
|
||||
# <sorted_intersection><sorted_remainder>
|
||||
# take ratios of those two strings
|
||||
# controls for unordered partial matches
|
||||
def _token_set(s1, s2, partial=True, force_ascii=True):
|
||||
|
||||
if s1 is None:
|
||||
raise TypeError("s1 is None")
|
||||
if s2 is None:
|
||||
raise TypeError("s2 is None")
|
||||
|
||||
p1 = utils.full_process(s1, force_ascii=force_ascii)
|
||||
p2 = utils.full_process(s2, force_ascii=force_ascii)
|
||||
|
||||
if not utils.validate_string(p1):
|
||||
return 0
|
||||
if not utils.validate_string(p2):
|
||||
return 0
|
||||
|
||||
# pull tokens
|
||||
tokens1 = set(utils.full_process(p1).split())
|
||||
tokens2 = set(utils.full_process(p2).split())
|
||||
|
||||
intersection = tokens1.intersection(tokens2)
|
||||
diff1to2 = tokens1.difference(tokens2)
|
||||
diff2to1 = tokens2.difference(tokens1)
|
||||
|
||||
sorted_sect = " ".join(sorted(intersection))
|
||||
sorted_1to2 = " ".join(sorted(diff1to2))
|
||||
sorted_2to1 = " ".join(sorted(diff2to1))
|
||||
|
||||
combined_1to2 = sorted_sect + " " + sorted_1to2
|
||||
combined_2to1 = sorted_sect + " " + sorted_2to1
|
||||
|
||||
# strip
|
||||
sorted_sect = sorted_sect.strip()
|
||||
combined_1to2 = combined_1to2.strip()
|
||||
combined_2to1 = combined_2to1.strip()
|
||||
|
||||
pairwise = [
|
||||
ratio(sorted_sect, combined_1to2),
|
||||
ratio(sorted_sect, combined_2to1),
|
||||
ratio(combined_1to2, combined_2to1)
|
||||
]
|
||||
return max(pairwise)
|
||||
|
||||
|
||||
def token_set_ratio(s1, s2, force_ascii=True):
|
||||
return _token_set(s1, s2, partial=False, force_ascii=force_ascii)
|
||||
|
||||
|
||||
def partial_token_set_ratio(s1, s2, force_ascii=True):
|
||||
return _token_set(s1, s2, partial=True, force_ascii=force_ascii)
|
||||
|
||||
|
||||
# TODO: numerics
|
||||
|
||||
###################
|
||||
# Combination API #
|
||||
###################
|
||||
|
||||
# q is for quick
|
||||
def QRatio(s1, s2, force_ascii=True):
|
||||
|
||||
p1 = utils.full_process(s1, force_ascii=force_ascii)
|
||||
p2 = utils.full_process(s2, force_ascii=force_ascii)
|
||||
|
||||
if not utils.validate_string(p1):
|
||||
return 0
|
||||
if not utils.validate_string(p2):
|
||||
return 0
|
||||
|
||||
return ratio(p1, p2)
|
||||
|
||||
|
||||
def UQRatio(s1, s2):
|
||||
return QRatio(s1, s2, force_ascii=False)
|
||||
|
||||
|
||||
# w is for weighted
|
||||
def WRatio(s1, s2, force_ascii=True):
|
||||
|
||||
p1 = utils.full_process(s1, force_ascii=force_ascii)
|
||||
p2 = utils.full_process(s2, force_ascii=force_ascii)
|
||||
|
||||
if not utils.validate_string(p1):
|
||||
return 0
|
||||
if not utils.validate_string(p2):
|
||||
return 0
|
||||
|
||||
# should we look at partials?
|
||||
try_partial = True
|
||||
unbase_scale = .95
|
||||
partial_scale = .90
|
||||
|
||||
base = ratio(p1, p2)
|
||||
len_ratio = float(max(len(p1), len(p2))) / min(len(p1), len(p2))
|
||||
|
||||
# if strings are similar length, don't use partials
|
||||
if len_ratio < 1.5:
|
||||
try_partial = False
|
||||
|
||||
# if one string is much much shorter than the other
|
||||
if len_ratio > 8:
|
||||
partial_scale = .6
|
||||
|
||||
if try_partial:
|
||||
partial = partial_ratio(p1, p2) * partial_scale
|
||||
ptsor = partial_token_sort_ratio(p1, p2, force_ascii=force_ascii) \
|
||||
* unbase_scale * partial_scale
|
||||
ptser = partial_token_set_ratio(p1, p2, force_ascii=force_ascii) \
|
||||
* unbase_scale * partial_scale
|
||||
|
||||
return int(max(base, partial, ptsor, ptser))
|
||||
else:
|
||||
tsor = token_sort_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale
|
||||
tser = token_set_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale
|
||||
|
||||
return int(max(base, tsor, tser))
|
||||
|
||||
|
||||
def UWRatio(s1, s2):
|
||||
return WRatio(s1, s2, force_ascii=False)
|
|
@ -1,119 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
"""
|
||||
process.py
|
||||
|
||||
Copyright (c) 2011 Adam Cohen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
"""
|
||||
import itertools
|
||||
|
||||
from . import fuzz
|
||||
from . import utils
|
||||
|
||||
|
||||
def extract(query, choices, processor=None, scorer=None, limit=5):
|
||||
"""Find best matches in a list of choices, return a list of tuples
|
||||
containing the match and it's score.
|
||||
|
||||
Arguments:
|
||||
query -- an object representing the thing we want to find
|
||||
choices -- a list of objects we are attempting to extract
|
||||
values from
|
||||
scorer -- f(OBJ, QUERY) --> INT. We will return the objects
|
||||
with the highest score by default, we use
|
||||
score.WRatio() and both OBJ and QUERY should be
|
||||
strings
|
||||
processor -- f(OBJ_A) --> OBJ_B, where the output is an input
|
||||
to scorer for example, "processor = lambda x:
|
||||
x[0]" would return the first element in a
|
||||
collection x (of, say, strings) this would then
|
||||
be used in the scoring collection by default, we
|
||||
use utils.full_process()
|
||||
|
||||
"""
|
||||
if choices is None or len(choices) == 0:
|
||||
return []
|
||||
|
||||
# default, turn whatever the choice is into a workable string
|
||||
if processor is None:
|
||||
processor = lambda x: utils.full_process(x)
|
||||
|
||||
# default: wratio
|
||||
if scorer is None:
|
||||
scorer = fuzz.WRatio
|
||||
|
||||
sl = list()
|
||||
|
||||
for choice in choices:
|
||||
processed = processor(choice)
|
||||
score = scorer(query, processed)
|
||||
tuple = (choice, score)
|
||||
sl.append(tuple)
|
||||
|
||||
sl.sort(key=lambda i: i[1], reverse=True)
|
||||
return sl[:limit]
|
||||
|
||||
|
||||
def extractBests(query, choices, processor=None, scorer=None, score_cutoff=0, limit=5):
|
||||
"""Find best matches above a score in a list of choices, return a
|
||||
list of tuples containing the match and it's score.
|
||||
|
||||
Convenience method which returns the choices with best scores, see
|
||||
extract() for full arguments list
|
||||
|
||||
Optional parameter: score_cutoff.
|
||||
If the choice has a score of less than or equal to score_cutoff
|
||||
it will not be included on result list
|
||||
|
||||
"""
|
||||
|
||||
best_list = extract(query, choices, processor, scorer, limit)
|
||||
if len(best_list) > 0:
|
||||
return list(itertools.takewhile(lambda x: x[1] > score_cutoff, best_list))
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def extractOne(query, choices, processor=None, scorer=None, score_cutoff=0):
|
||||
"""Find the best match above a score in a list of choices, return a
|
||||
tuple containing the match and it's score if it's above the treshold
|
||||
or None.
|
||||
|
||||
Convenience method which returns the single best choice, see
|
||||
extract() for full arguments list
|
||||
|
||||
Optional parameter: score_cutoff.
|
||||
If the best choice has a score of less than or equal to
|
||||
score_cutoff we will return none (intuition: not a good enough
|
||||
match)
|
||||
|
||||
"""
|
||||
|
||||
best_list = extract(query, choices, processor, scorer, limit=1)
|
||||
if len(best_list) > 0:
|
||||
best = best_list[0]
|
||||
if best[1] > score_cutoff:
|
||||
return best
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return None
|
|
@ -1,41 +0,0 @@
|
|||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
|
||||
class StringProcessor(object):
|
||||
"""
|
||||
This class defines method to process strings in the most
|
||||
efficient way. Ideally all the methods below use unicode strings
|
||||
for both input and output.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def replace_non_letters_non_numbers_with_whitespace(cls, a_string):
|
||||
"""
|
||||
This function replaces any sequence of non letters and non
|
||||
numbers with a single white space.
|
||||
"""
|
||||
regex = re.compile(r"(?ui)\W")
|
||||
return regex.sub(" ", a_string)
|
||||
|
||||
@classmethod
|
||||
def strip(cls, a_string):
|
||||
"""
|
||||
This function strips leading and trailing white space.
|
||||
"""
|
||||
|
||||
return a_string.strip()
|
||||
|
||||
@classmethod
|
||||
def to_lower_case(cls, a_string):
|
||||
"""
|
||||
This function returns the lower-cased version of the string given.
|
||||
"""
|
||||
return a_string.lower()
|
||||
|
||||
@classmethod
|
||||
def to_upper_case(cls, a_string):
|
||||
"""
|
||||
This function returns the upper-cased version of the string given.
|
||||
"""
|
||||
return a_string.upper()
|
|
@ -1,76 +0,0 @@
|
|||
from __future__ import unicode_literals
|
||||
import sys
|
||||
|
||||
from fuzzywuzzy.string_processing import StringProcessor
|
||||
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
|
||||
def validate_string(s):
|
||||
try:
|
||||
if len(s) > 0:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
bad_chars = str('') # ascii dammit!
|
||||
for i in range(128, 256):
|
||||
bad_chars += chr(i)
|
||||
if PY3:
|
||||
translation_table = dict((ord(c), None) for c in bad_chars)
|
||||
|
||||
|
||||
def asciionly(s):
|
||||
if PY3:
|
||||
return s.translate(translation_table)
|
||||
else:
|
||||
return s.translate(None, bad_chars)
|
||||
|
||||
|
||||
def asciidammit(s):
|
||||
if type(s) is str:
|
||||
return asciionly(s)
|
||||
elif type(s) is unicode:
|
||||
return asciionly(s.encode('ascii', 'ignore'))
|
||||
else:
|
||||
return asciidammit(unicode(s))
|
||||
|
||||
|
||||
def make_type_consistent(s1, s2):
|
||||
if isinstance(s1, str) and isinstance(s2, str):
|
||||
return s1, s2
|
||||
|
||||
elif isinstance(s1, unicode) and isinstance(s2, unicode):
|
||||
return s1, s2
|
||||
|
||||
else:
|
||||
return unicode(s1), unicode(s2)
|
||||
|
||||
|
||||
def full_process(s, force_ascii=False):
|
||||
"""Process string by
|
||||
-- removing all but letters and numbers
|
||||
-- trim whitespace
|
||||
-- force to lower case
|
||||
if force_ascii == True, force convert to ascii"""
|
||||
|
||||
if s is None:
|
||||
return ""
|
||||
|
||||
if force_ascii:
|
||||
s = asciidammit(s)
|
||||
# Keep only Letters and Numbres (see Unicode docs).
|
||||
string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s)
|
||||
# Force into lowercase.
|
||||
string_out = StringProcessor.to_lower_case(string_out)
|
||||
# Remove leading and trailing whitespaces.
|
||||
string_out = StringProcessor.strip(string_out)
|
||||
return string_out
|
||||
|
||||
|
||||
def intr(n):
|
||||
'''Returns a correctly rounded integer'''
|
||||
return int(round(n))
|
|
@ -1,18 +0,0 @@
|
|||
from pysrt.srttime import SubRipTime
|
||||
from pysrt.srtitem import SubRipItem
|
||||
from pysrt.srtfile import SubRipFile
|
||||
from pysrt.srtexc import Error, InvalidItem, InvalidTimeString
|
||||
from pysrt.version import VERSION, VERSION_STRING
|
||||
|
||||
__all__ = [
|
||||
'SubRipFile', 'SubRipItem', 'SubRipFile', 'SUPPORT_UTF_32_LE',
|
||||
'SUPPORT_UTF_32_BE', 'InvalidItem', 'InvalidTimeString'
|
||||
]
|
||||
|
||||
ERROR_PASS = SubRipFile.ERROR_PASS
|
||||
ERROR_LOG = SubRipFile.ERROR_LOG
|
||||
ERROR_RAISE = SubRipFile.ERROR_RAISE
|
||||
|
||||
open = SubRipFile.open
|
||||
stream = SubRipFile.stream
|
||||
from_string = SubRipFile.from_string
|
|
@ -1,218 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable-all
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import codecs
|
||||
import shutil
|
||||
import argparse
|
||||
from textwrap import dedent
|
||||
|
||||
from chardet import detect
|
||||
from pysrt import SubRipFile, SubRipTime, VERSION_STRING
|
||||
|
||||
def underline(string):
|
||||
return "\033[4m%s\033[0m" % string
|
||||
|
||||
|
||||
class TimeAwareArgumentParser(argparse.ArgumentParser):
|
||||
|
||||
RE_TIME_REPRESENTATION = re.compile(r'^\-?(\d+[hms]{0,2}){1,4}$')
|
||||
|
||||
def parse_args(self, args=None, namespace=None):
|
||||
time_index = -1
|
||||
for index, arg in enumerate(args):
|
||||
match = self.RE_TIME_REPRESENTATION.match(arg)
|
||||
if match:
|
||||
time_index = index
|
||||
break
|
||||
|
||||
if time_index >= 0:
|
||||
args.insert(time_index, '--')
|
||||
|
||||
return super(TimeAwareArgumentParser, self).parse_args(args, namespace)
|
||||
|
||||
|
||||
class SubRipShifter(object):
|
||||
|
||||
BACKUP_EXTENSION = '.bak'
|
||||
RE_TIME_STRING = re.compile(r'(\d+)([hms]{0,2})')
|
||||
UNIT_RATIOS = {
|
||||
'ms': 1,
|
||||
'': SubRipTime.SECONDS_RATIO,
|
||||
's': SubRipTime.SECONDS_RATIO,
|
||||
'm': SubRipTime.MINUTES_RATIO,
|
||||
'h': SubRipTime.HOURS_RATIO,
|
||||
}
|
||||
DESCRIPTION = dedent("""\
|
||||
Srt subtitle editor
|
||||
|
||||
It can either shift, split or change the frame rate.
|
||||
""")
|
||||
TIMESTAMP_HELP = "A timestamp in the form: [-][Hh][Mm]S[s][MSms]"
|
||||
SHIFT_EPILOG = dedent("""\
|
||||
|
||||
Examples:
|
||||
1 minute and 12 seconds foreward (in place):
|
||||
$ srt -i shift 1m12s movie.srt
|
||||
|
||||
half a second foreward:
|
||||
$ srt shift 500ms movie.srt > othername.srt
|
||||
|
||||
1 second and half backward:
|
||||
$ srt -i shift -1s500ms movie.srt
|
||||
|
||||
3 seconds backward:
|
||||
$ srt -i shift -3 movie.srt
|
||||
""")
|
||||
RATE_EPILOG = dedent("""\
|
||||
|
||||
Examples:
|
||||
Convert 23.9fps subtitles to 25fps:
|
||||
$ srt -i rate 23.9 25 movie.srt
|
||||
""")
|
||||
LIMITS_HELP = "Each parts duration in the form: [Hh][Mm]S[s][MSms]"
|
||||
SPLIT_EPILOG = dedent("""\
|
||||
|
||||
Examples:
|
||||
For a movie in 2 parts with the first part 48 minutes and 18 seconds long:
|
||||
$ srt split 48m18s movie.srt
|
||||
=> creates movie.1.srt and movie.2.srt
|
||||
|
||||
For a movie in 3 parts of 20 minutes each:
|
||||
$ srt split 20m 20m movie.srt
|
||||
=> creates movie.1.srt, movie.2.srt and movie.3.srt
|
||||
""")
|
||||
FRAME_RATE_HELP = "A frame rate in fps (commonly 23.9 or 25)"
|
||||
ENCODING_HELP = dedent("""\
|
||||
Change file encoding. Useful for players accepting only latin1 subtitles.
|
||||
List of supported encodings: http://docs.python.org/library/codecs.html#standard-encodings
|
||||
""")
|
||||
BREAK_EPILOG = dedent("""\
|
||||
Break lines longer than defined length
|
||||
""")
|
||||
LENGTH_HELP = "Maximum number of characters per line"
|
||||
|
||||
def __init__(self):
|
||||
self.output_file_path = None
|
||||
|
||||
def build_parser(self):
|
||||
parser = TimeAwareArgumentParser(description=self.DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter)
|
||||
parser.add_argument('-i', '--in-place', action='store_true', dest='in_place',
|
||||
help="Edit file in-place, saving a backup as file.bak (do not works for the split command)")
|
||||
parser.add_argument('-e', '--output-encoding', metavar=underline('encoding'), action='store', dest='output_encoding',
|
||||
type=self.parse_encoding, help=self.ENCODING_HELP)
|
||||
parser.add_argument('-v', '--version', action='version', version='%%(prog)s %s' % VERSION_STRING)
|
||||
subparsers = parser.add_subparsers(title='commands')
|
||||
|
||||
shift_parser = subparsers.add_parser('shift', help="Shift subtitles by specified time offset", epilog=self.SHIFT_EPILOG, formatter_class=argparse.RawTextHelpFormatter)
|
||||
shift_parser.add_argument('time_offset', action='store', metavar=underline('offset'),
|
||||
type=self.parse_time, help=self.TIMESTAMP_HELP)
|
||||
shift_parser.set_defaults(action=self.shift)
|
||||
|
||||
rate_parser = subparsers.add_parser('rate', help="Convert subtitles from a frame rate to another", epilog=self.RATE_EPILOG, formatter_class=argparse.RawTextHelpFormatter)
|
||||
rate_parser.add_argument('initial', action='store', type=float, help=self.FRAME_RATE_HELP)
|
||||
rate_parser.add_argument('final', action='store', type=float, help=self.FRAME_RATE_HELP)
|
||||
rate_parser.set_defaults(action=self.rate)
|
||||
|
||||
split_parser = subparsers.add_parser('split', help="Split a file in multiple parts", epilog=self.SPLIT_EPILOG, formatter_class=argparse.RawTextHelpFormatter)
|
||||
split_parser.add_argument('limits', action='store', nargs='+', type=self.parse_time, help=self.LIMITS_HELP)
|
||||
split_parser.set_defaults(action=self.split)
|
||||
|
||||
break_parser = subparsers.add_parser('break', help="Break long lines", epilog=self.BREAK_EPILOG, formatter_class=argparse.RawTextHelpFormatter)
|
||||
break_parser.add_argument('length', action='store', type=int, help=self.LENGTH_HELP)
|
||||
break_parser.set_defaults(action=self.break_lines)
|
||||
|
||||
parser.add_argument('file', action='store')
|
||||
|
||||
return parser
|
||||
|
||||
def run(self, args):
|
||||
self.arguments = self.build_parser().parse_args(args)
|
||||
if self.arguments.in_place:
|
||||
self.create_backup()
|
||||
self.arguments.action()
|
||||
|
||||
def parse_time(self, time_string):
|
||||
negative = time_string.startswith('-')
|
||||
if negative:
|
||||
time_string = time_string[1:]
|
||||
ordinal = sum(int(value) * self.UNIT_RATIOS[unit] for value, unit
|
||||
in self.RE_TIME_STRING.findall(time_string))
|
||||
return -ordinal if negative else ordinal
|
||||
|
||||
def parse_encoding(self, encoding_name):
|
||||
try:
|
||||
codecs.lookup(encoding_name)
|
||||
except LookupError as error:
|
||||
raise argparse.ArgumentTypeError(error.message)
|
||||
return encoding_name
|
||||
|
||||
def shift(self):
|
||||
self.input_file.shift(milliseconds=self.arguments.time_offset)
|
||||
self.input_file.write_into(self.output_file)
|
||||
|
||||
def rate(self):
|
||||
ratio = self.arguments.final / self.arguments.initial
|
||||
self.input_file.shift(ratio=ratio)
|
||||
self.input_file.write_into(self.output_file)
|
||||
|
||||
def split(self):
|
||||
limits = [0] + self.arguments.limits + [self.input_file[-1].end.ordinal + 1]
|
||||
base_name, extension = os.path.splitext(self.arguments.file)
|
||||
for index, (start, end) in enumerate(zip(limits[:-1], limits[1:])):
|
||||
file_name = '%s.%s%s' % (base_name, index + 1, extension)
|
||||
part_file = self.input_file.slice(ends_after=start, starts_before=end)
|
||||
part_file.shift(milliseconds=-start)
|
||||
part_file.clean_indexes()
|
||||
part_file.save(path=file_name, encoding=self.output_encoding)
|
||||
|
||||
def create_backup(self):
|
||||
backup_file = self.arguments.file + self.BACKUP_EXTENSION
|
||||
if not os.path.exists(backup_file):
|
||||
shutil.copy2(self.arguments.file, backup_file)
|
||||
self.output_file_path = self.arguments.file
|
||||
self.arguments.file = backup_file
|
||||
|
||||
def break_lines(self):
|
||||
split_re = re.compile(r'(.{,%i})(?:\s+|$)' % self.arguments.length)
|
||||
for item in self.input_file:
|
||||
item.text = '\n'.join(split_re.split(item.text)[1::2])
|
||||
self.input_file.write_into(self.output_file)
|
||||
|
||||
@property
|
||||
def output_encoding(self):
|
||||
return self.arguments.output_encoding or self.input_file.encoding
|
||||
|
||||
@property
|
||||
def input_file(self):
|
||||
if not hasattr(self, '_source_file'):
|
||||
with open(self.arguments.file, 'rb') as f:
|
||||
content = f.read()
|
||||
encoding = detect(content).get('encoding')
|
||||
encoding = self.normalize_encoding(encoding)
|
||||
|
||||
self._source_file = SubRipFile.open(self.arguments.file,
|
||||
encoding=encoding, error_handling=SubRipFile.ERROR_LOG)
|
||||
return self._source_file
|
||||
|
||||
@property
|
||||
def output_file(self):
|
||||
if not hasattr(self, '_output_file'):
|
||||
if self.output_file_path:
|
||||
self._output_file = codecs.open(self.output_file_path, 'w+', encoding=self.output_encoding)
|
||||
else:
|
||||
self._output_file = sys.stdout
|
||||
return self._output_file
|
||||
|
||||
def normalize_encoding(self, encoding):
|
||||
return encoding.lower().replace('-', '_')
|
||||
|
||||
|
||||
def main():
|
||||
SubRipShifter().run(sys.argv[1:])
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,26 +0,0 @@
|
|||
class ComparableMixin(object):
|
||||
def _compare(self, other, method):
|
||||
try:
|
||||
return method(self._cmpkey(), other._cmpkey())
|
||||
except (AttributeError, TypeError):
|
||||
# _cmpkey not implemented, or return different type,
|
||||
# so I can't compare with "other".
|
||||
return NotImplemented
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __ge__(self, other):
|
||||
return self._compare(other, lambda s, o: s >= o)
|
||||
|
||||
def __gt__(self, other):
|
||||
return self._compare(other, lambda s, o: s > o)
|
||||
|
||||
def __ne__(self, other):
|
||||
return self._compare(other, lambda s, o: s != o)
|
|
@ -1,24 +0,0 @@
|
|||
|
||||
import sys
|
||||
|
||||
# Syntax sugar.
|
||||
_ver = sys.version_info
|
||||
|
||||
#: Python 2.x?
|
||||
is_py2 = (_ver[0] == 2)
|
||||
|
||||
#: Python 3.x?
|
||||
is_py3 = (_ver[0] == 3)
|
||||
|
||||
from io import open as io_open
|
||||
|
||||
if is_py2:
|
||||
builtin_str = str
|
||||
basestring = basestring
|
||||
str = unicode
|
||||
open = io_open
|
||||
elif is_py3:
|
||||
builtin_str = str
|
||||
basestring = (str, bytes)
|
||||
str = str
|
||||
open = open
|
|
@ -1,31 +0,0 @@
|
|||
"""
|
||||
Exception classes
|
||||
"""
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
"""
|
||||
Pysrt's base exception
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class InvalidTimeString(Error):
|
||||
"""
|
||||
Raised when parser fail on bad formated time strings
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class InvalidItem(Error):
|
||||
"""
|
||||
Raised when parser fail to parse a sub title item
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class InvalidIndex(InvalidItem):
|
||||
"""
|
||||
Raised when parser fail to parse a sub title index
|
||||
"""
|
||||
pass
|
|
@ -1,312 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import sys
|
||||
import codecs
|
||||
|
||||
try:
|
||||
from collections import UserList
|
||||
except ImportError:
|
||||
from UserList import UserList
|
||||
|
||||
from itertools import chain
|
||||
from copy import copy
|
||||
|
||||
from pysrt.srtexc import Error
|
||||
from pysrt.srtitem import SubRipItem
|
||||
from pysrt.compat import str
|
||||
|
||||
BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'),
|
||||
(codecs.BOM_UTF32_BE, 'utf_32_be'),
|
||||
(codecs.BOM_UTF16_LE, 'utf_16_le'),
|
||||
(codecs.BOM_UTF16_BE, 'utf_16_be'),
|
||||
(codecs.BOM_UTF8, 'utf_8'))
|
||||
CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS)
|
||||
BIGGER_BOM = max(len(bom) for bom, encoding in BOMS)
|
||||
|
||||
|
||||
class SubRipFile(UserList, object):
|
||||
"""
|
||||
SubRip file descriptor.
|
||||
|
||||
Provide a pure Python mapping on all metadata.
|
||||
|
||||
SubRipFile(items, eol, path, encoding)
|
||||
|
||||
items -> list of SubRipItem. Default to [].
|
||||
eol -> str: end of line character. Default to linesep used in opened file
|
||||
if any else to os.linesep.
|
||||
path -> str: path where file will be saved. To open an existant file see
|
||||
SubRipFile.open.
|
||||
encoding -> str: encoding used at file save. Default to utf-8.
|
||||
"""
|
||||
ERROR_PASS = 0
|
||||
ERROR_LOG = 1
|
||||
ERROR_RAISE = 2
|
||||
|
||||
DEFAULT_ENCODING = 'utf_8'
|
||||
|
||||
def __init__(self, items=None, eol=None, path=None, encoding='utf-8'):
|
||||
UserList.__init__(self, items or [])
|
||||
self._eol = eol
|
||||
self.path = path
|
||||
self.encoding = encoding
|
||||
|
||||
def _get_eol(self):
|
||||
return self._eol or os.linesep
|
||||
|
||||
def _set_eol(self, eol):
|
||||
self._eol = self._eol or eol
|
||||
|
||||
eol = property(_get_eol, _set_eol)
|
||||
|
||||
def slice(self, starts_before=None, starts_after=None, ends_before=None,
|
||||
ends_after=None):
|
||||
"""
|
||||
slice([starts_before][, starts_after][, ends_before][, ends_after]) \
|
||||
-> SubRipFile clone
|
||||
|
||||
All arguments are optional, and should be coercible to SubRipTime
|
||||
object.
|
||||
|
||||
It reduce the set of subtitles to those that match match given time
|
||||
constraints.
|
||||
|
||||
The returned set is a clone, but still contains references to original
|
||||
subtitles. So if you shift this returned set, subs contained in the
|
||||
original SubRipFile instance will be altered too.
|
||||
|
||||
Example:
|
||||
>>> subs.slice(ends_after={'seconds': 20}).shift(seconds=2)
|
||||
"""
|
||||
clone = copy(self)
|
||||
|
||||
if starts_before:
|
||||
clone.data = (i for i in clone.data if i.start < starts_before)
|
||||
if starts_after:
|
||||
clone.data = (i for i in clone.data if i.start > starts_after)
|
||||
if ends_before:
|
||||
clone.data = (i for i in clone.data if i.end < ends_before)
|
||||
if ends_after:
|
||||
clone.data = (i for i in clone.data if i.end > ends_after)
|
||||
|
||||
clone.data = list(clone.data)
|
||||
return clone
|
||||
|
||||
def at(self, timestamp=None, **kwargs):
|
||||
"""
|
||||
at(timestamp) -> SubRipFile clone
|
||||
|
||||
timestamp argument should be coercible to SubRipFile object.
|
||||
|
||||
A specialization of slice. Return all subtiles visible at the
|
||||
timestamp mark.
|
||||
|
||||
Example:
|
||||
>>> subs.at((0, 0, 20, 0)).shift(seconds=2)
|
||||
>>> subs.at(seconds=20).shift(seconds=2)
|
||||
"""
|
||||
time = timestamp or kwargs
|
||||
return self.slice(starts_before=time, ends_after=time)
|
||||
|
||||
def shift(self, *args, **kwargs):
|
||||
"""shift(hours, minutes, seconds, milliseconds, ratio)
|
||||
|
||||
Shift `start` and `end` attributes of each items of file either by
|
||||
applying a ratio or by adding an offset.
|
||||
|
||||
`ratio` should be either an int or a float.
|
||||
Example to convert subtitles from 23.9 fps to 25 fps:
|
||||
>>> subs.shift(ratio=25/23.9)
|
||||
|
||||
All "time" arguments are optional and have a default value of 0.
|
||||
Example to delay all subs from 2 seconds and half
|
||||
>>> subs.shift(seconds=2, milliseconds=500)
|
||||
"""
|
||||
for item in self:
|
||||
item.shift(*args, **kwargs)
|
||||
|
||||
def clean_indexes(self):
|
||||
"""
|
||||
clean_indexes()
|
||||
|
||||
Sort subs and reset their index attribute. Should be called after
|
||||
destructive operations like split or such.
|
||||
"""
|
||||
self.sort()
|
||||
for index, item in enumerate(self):
|
||||
item.index = index + 1
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return '\n'.join(i.text for i in self)
|
||||
|
||||
@classmethod
|
||||
def open(cls, path='', encoding=None, error_handling=ERROR_PASS):
|
||||
"""
|
||||
open([path, [encoding]])
|
||||
|
||||
If you do not provide any encoding, it can be detected if the file
|
||||
contain a bit order mark, unless it is set to utf-8 as default.
|
||||
"""
|
||||
new_file = cls(path=path, encoding=encoding)
|
||||
source_file = cls._open_unicode_file(path, claimed_encoding=encoding)
|
||||
new_file.read(source_file, error_handling=error_handling)
|
||||
source_file.close()
|
||||
return new_file
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, source, **kwargs):
|
||||
"""
|
||||
from_string(source, **kwargs) -> SubRipFile
|
||||
|
||||
`source` -> a unicode instance or at least a str instance encoded with
|
||||
`sys.getdefaultencoding()`
|
||||
"""
|
||||
error_handling = kwargs.pop('error_handling', None)
|
||||
new_file = cls(**kwargs)
|
||||
new_file.read(source.splitlines(True), error_handling=error_handling)
|
||||
return new_file
|
||||
|
||||
def read(self, source_file, error_handling=ERROR_PASS):
|
||||
"""
|
||||
read(source_file, [error_handling])
|
||||
|
||||
This method parse subtitles contained in `source_file` and append them
|
||||
to the current instance.
|
||||
|
||||
`source_file` -> Any iterable that yield unicode strings, like a file
|
||||
opened with `codecs.open()` or an array of unicode.
|
||||
"""
|
||||
self.eol = self._guess_eol(source_file)
|
||||
self.extend(self.stream(source_file, error_handling=error_handling))
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def stream(cls, source_file, error_handling=ERROR_PASS):
|
||||
"""
|
||||
stream(source_file, [error_handling])
|
||||
|
||||
This method yield SubRipItem instances a soon as they have been parsed
|
||||
without storing them. It is a kind of SAX parser for .srt files.
|
||||
|
||||
`source_file` -> Any iterable that yield unicode strings, like a file
|
||||
opened with `codecs.open()` or an array of unicode.
|
||||
|
||||
Example:
|
||||
>>> import pysrt
|
||||
>>> import codecs
|
||||
>>> file = codecs.open('movie.srt', encoding='utf-8')
|
||||
>>> for sub in pysrt.stream(file):
|
||||
... sub.text += "\nHello !"
|
||||
... print unicode(sub)
|
||||
"""
|
||||
string_buffer = []
|
||||
for index, line in enumerate(chain(source_file, '\n')):
|
||||
if line.strip():
|
||||
string_buffer.append(line)
|
||||
else:
|
||||
source = string_buffer
|
||||
string_buffer = []
|
||||
if source and all(source):
|
||||
try:
|
||||
yield SubRipItem.from_lines(source)
|
||||
except Error as error:
|
||||
error.args += (''.join(source), )
|
||||
cls._handle_error(error, error_handling, index)
|
||||
|
||||
def save(self, path=None, encoding=None, eol=None):
|
||||
"""
|
||||
save([path][, encoding][, eol])
|
||||
|
||||
Use initial path if no other provided.
|
||||
Use initial encoding if no other provided.
|
||||
Use initial eol if no other provided.
|
||||
"""
|
||||
path = path or self.path
|
||||
encoding = encoding or self.encoding
|
||||
|
||||
save_file = codecs.open(path, 'w+', encoding=encoding)
|
||||
self.write_into(save_file, eol=eol)
|
||||
save_file.close()
|
||||
|
||||
def write_into(self, output_file, eol=None):
|
||||
"""
|
||||
write_into(output_file [, eol])
|
||||
|
||||
Serialize current state into `output_file`.
|
||||
|
||||
`output_file` -> Any instance that respond to `write()`, typically a
|
||||
file object
|
||||
"""
|
||||
output_eol = eol or self.eol
|
||||
|
||||
for item in self:
|
||||
string_repr = str(item)
|
||||
if output_eol != '\n':
|
||||
string_repr = string_repr.replace('\n', output_eol)
|
||||
output_file.write(string_repr)
|
||||
# Only add trailing eol if it's not already present.
|
||||
# It was kept in the SubRipItem's text before but it really
|
||||
# belongs here. Existing applications might give us subtitles
|
||||
# which already contain a trailing eol though.
|
||||
if not string_repr.endswith(2 * output_eol):
|
||||
output_file.write(output_eol)
|
||||
|
||||
@classmethod
|
||||
def _guess_eol(cls, string_iterable):
|
||||
first_line = cls._get_first_line(string_iterable)
|
||||
for eol in ('\r\n', '\r', '\n'):
|
||||
if first_line.endswith(eol):
|
||||
return eol
|
||||
return os.linesep
|
||||
|
||||
@classmethod
|
||||
def _get_first_line(cls, string_iterable):
|
||||
if hasattr(string_iterable, 'tell'):
|
||||
previous_position = string_iterable.tell()
|
||||
|
||||
try:
|
||||
first_line = next(iter(string_iterable))
|
||||
except StopIteration:
|
||||
return ''
|
||||
if hasattr(string_iterable, 'seek'):
|
||||
string_iterable.seek(previous_position)
|
||||
|
||||
return first_line
|
||||
|
||||
@classmethod
|
||||
def _detect_encoding(cls, path):
|
||||
file_descriptor = open(path, 'rb')
|
||||
first_chars = file_descriptor.read(BIGGER_BOM)
|
||||
file_descriptor.close()
|
||||
|
||||
for bom, encoding in BOMS:
|
||||
if first_chars.startswith(bom):
|
||||
return encoding
|
||||
|
||||
# TODO: maybe a chardet integration
|
||||
return cls.DEFAULT_ENCODING
|
||||
|
||||
@classmethod
|
||||
def _open_unicode_file(cls, path, claimed_encoding=None):
|
||||
encoding = claimed_encoding or cls._detect_encoding(path)
|
||||
source_file = codecs.open(path, 'rU', encoding=encoding)
|
||||
|
||||
# get rid of BOM if any
|
||||
possible_bom = CODECS_BOMS.get(encoding, None)
|
||||
if possible_bom:
|
||||
file_bom = source_file.read(len(possible_bom))
|
||||
if not file_bom == possible_bom:
|
||||
source_file.seek(0) # if not rewind
|
||||
return source_file
|
||||
|
||||
@classmethod
|
||||
def _handle_error(cls, error, error_handling, index):
|
||||
if error_handling == cls.ERROR_RAISE:
|
||||
error.args = (index, ) + error.args
|
||||
raise error
|
||||
if error_handling == cls.ERROR_LOG:
|
||||
name = type(error).__name__
|
||||
sys.stderr.write('PySRT-%s(line %s): \n' % (name, index))
|
||||
sys.stderr.write(error.args[0].encode('ascii', 'replace'))
|
||||
sys.stderr.write('\n')
|
|
@ -1,76 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
SubRip's subtitle parser
|
||||
"""
|
||||
from pysrt.srtexc import InvalidItem, InvalidIndex
|
||||
from pysrt.srttime import SubRipTime
|
||||
from pysrt.comparablemixin import ComparableMixin
|
||||
from pysrt.compat import str
|
||||
|
||||
class SubRipItem(ComparableMixin):
|
||||
"""
|
||||
SubRipItem(index, start, end, text, position)
|
||||
|
||||
index -> int: index of item in file. 0 by default.
|
||||
start, end -> SubRipTime or coercible.
|
||||
text -> unicode: text content for item.
|
||||
position -> unicode: raw srt/vtt "display coordinates" string
|
||||
"""
|
||||
ITEM_PATTERN = '%s\n%s --> %s%s\n%s\n'
|
||||
TIMESTAMP_SEPARATOR = '-->'
|
||||
|
||||
def __init__(self, index=0, start=None, end=None, text='', position=''):
|
||||
try:
|
||||
self.index = int(index)
|
||||
except (TypeError, ValueError): # try to cast as int, but it's not mandatory
|
||||
self.index = index
|
||||
|
||||
self.start = SubRipTime.coerce(start or 0)
|
||||
self.end = SubRipTime.coerce(end or 0)
|
||||
self.position = str(position)
|
||||
self.text = str(text)
|
||||
|
||||
def __str__(self):
|
||||
position = ' %s' % self.position if self.position.strip() else ''
|
||||
return self.ITEM_PATTERN % (self.index, self.start, self.end,
|
||||
position, self.text)
|
||||
|
||||
def _cmpkey(self):
|
||||
return (self.start, self.end)
|
||||
|
||||
def shift(self, *args, **kwargs):
|
||||
"""
|
||||
shift(hours, minutes, seconds, milliseconds, ratio)
|
||||
|
||||
Add given values to start and end attributes.
|
||||
All arguments are optional and have a default value of 0.
|
||||
"""
|
||||
self.start.shift(*args, **kwargs)
|
||||
self.end.shift(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, source):
|
||||
return cls.from_lines(source.splitlines(True))
|
||||
|
||||
@classmethod
|
||||
def from_lines(cls, lines):
|
||||
if len(lines) < 2:
|
||||
raise InvalidItem()
|
||||
lines = [l.rstrip() for l in lines]
|
||||
index = None
|
||||
if cls.TIMESTAMP_SEPARATOR not in lines[0]:
|
||||
index = lines.pop(0)
|
||||
start, end, position = cls.split_timestamps(lines[0])
|
||||
body = '\n'.join(lines[1:])
|
||||
return cls(index, start, end, body, position)
|
||||
|
||||
@classmethod
|
||||
def split_timestamps(cls, line):
|
||||
timestamps = line.split(cls.TIMESTAMP_SEPARATOR)
|
||||
if len(timestamps) != 2:
|
||||
raise InvalidItem()
|
||||
start, end_and_position = timestamps
|
||||
end_and_position = end_and_position.lstrip().split(' ', 1)
|
||||
end = end_and_position[0]
|
||||
position = end_and_position[1] if len(end_and_position) > 1 else ''
|
||||
return (s.strip() for s in (start, end, position))
|
|
@ -1,176 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
SubRip's time format parser: HH:MM:SS,mmm
|
||||
"""
|
||||
import re
|
||||
from datetime import time
|
||||
|
||||
from pysrt.srtexc import InvalidTimeString
|
||||
from pysrt.comparablemixin import ComparableMixin
|
||||
from pysrt.compat import str, basestring
|
||||
|
||||
class TimeItemDescriptor(object):
|
||||
# pylint: disable-msg=R0903
|
||||
def __init__(self, ratio, super_ratio=0):
|
||||
self.ratio = int(ratio)
|
||||
self.super_ratio = int(super_ratio)
|
||||
|
||||
def _get_ordinal(self, instance):
|
||||
if self.super_ratio:
|
||||
return instance.ordinal % self.super_ratio
|
||||
return instance.ordinal
|
||||
|
||||
def __get__(self, instance, klass):
|
||||
if instance is None:
|
||||
raise AttributeError
|
||||
return self._get_ordinal(instance) // self.ratio
|
||||
|
||||
def __set__(self, instance, value):
|
||||
part = self._get_ordinal(instance) - instance.ordinal % self.ratio
|
||||
instance.ordinal += value * self.ratio - part
|
||||
|
||||
|
||||
class SubRipTime(ComparableMixin):
|
||||
TIME_PATTERN = '%02d:%02d:%02d,%03d'
|
||||
TIME_REPR = 'SubRipTime(%d, %d, %d, %d)'
|
||||
RE_TIME_SEP = re.compile(r'\:|\.|\,')
|
||||
RE_INTEGER = re.compile(r'^(\d+)')
|
||||
SECONDS_RATIO = 1000
|
||||
MINUTES_RATIO = SECONDS_RATIO * 60
|
||||
HOURS_RATIO = MINUTES_RATIO * 60
|
||||
|
||||
hours = TimeItemDescriptor(HOURS_RATIO)
|
||||
minutes = TimeItemDescriptor(MINUTES_RATIO, HOURS_RATIO)
|
||||
seconds = TimeItemDescriptor(SECONDS_RATIO, MINUTES_RATIO)
|
||||
milliseconds = TimeItemDescriptor(1, SECONDS_RATIO)
|
||||
|
||||
def __init__(self, hours=0, minutes=0, seconds=0, milliseconds=0):
|
||||
"""
|
||||
SubRipTime(hours, minutes, seconds, milliseconds)
|
||||
|
||||
All arguments are optional and have a default value of 0.
|
||||
"""
|
||||
super(SubRipTime, self).__init__()
|
||||
self.ordinal = hours * self.HOURS_RATIO \
|
||||
+ minutes * self.MINUTES_RATIO \
|
||||
+ seconds * self.SECONDS_RATIO \
|
||||
+ milliseconds
|
||||
|
||||
def __repr__(self):
|
||||
return self.TIME_REPR % tuple(self)
|
||||
|
||||
def __str__(self):
|
||||
if self.ordinal < 0:
|
||||
# Represent negative times as zero
|
||||
return str(SubRipTime.from_ordinal(0))
|
||||
return self.TIME_PATTERN % tuple(self)
|
||||
|
||||
def _compare(self, other, method):
|
||||
return super(SubRipTime, self)._compare(self.coerce(other), method)
|
||||
|
||||
def _cmpkey(self):
|
||||
return self.ordinal
|
||||
|
||||
def __add__(self, other):
|
||||
return self.from_ordinal(self.ordinal + self.coerce(other).ordinal)
|
||||
|
||||
def __iadd__(self, other):
|
||||
self.ordinal += self.coerce(other).ordinal
|
||||
return self
|
||||
|
||||
def __sub__(self, other):
|
||||
return self.from_ordinal(self.ordinal - self.coerce(other).ordinal)
|
||||
|
||||
def __isub__(self, other):
|
||||
self.ordinal -= self.coerce(other).ordinal
|
||||
return self
|
||||
|
||||
def __mul__(self, ratio):
|
||||
return self.from_ordinal(int(round(self.ordinal * ratio)))
|
||||
|
||||
def __imul__(self, ratio):
|
||||
self.ordinal = int(round(self.ordinal * ratio))
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def coerce(cls, other):
|
||||
"""
|
||||
Coerce many types to SubRipTime instance.
|
||||
Supported types:
|
||||
- str/unicode
|
||||
- int/long
|
||||
- datetime.time
|
||||
- any iterable
|
||||
- dict
|
||||
"""
|
||||
if isinstance(other, SubRipTime):
|
||||
return other
|
||||
if isinstance(other, basestring):
|
||||
return cls.from_string(other)
|
||||
if isinstance(other, int):
|
||||
return cls.from_ordinal(other)
|
||||
if isinstance(other, time):
|
||||
return cls.from_time(other)
|
||||
try:
|
||||
return cls(**other)
|
||||
except TypeError:
|
||||
return cls(*other)
|
||||
|
||||
def __iter__(self):
|
||||
yield self.hours
|
||||
yield self.minutes
|
||||
yield self.seconds
|
||||
yield self.milliseconds
|
||||
|
||||
def shift(self, *args, **kwargs):
|
||||
"""
|
||||
shift(hours, minutes, seconds, milliseconds)
|
||||
|
||||
All arguments are optional and have a default value of 0.
|
||||
"""
|
||||
if 'ratio' in kwargs:
|
||||
self *= kwargs.pop('ratio')
|
||||
self += self.__class__(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_ordinal(cls, ordinal):
|
||||
"""
|
||||
int -> SubRipTime corresponding to a total count of milliseconds
|
||||
"""
|
||||
return cls(milliseconds=int(ordinal))
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, source):
|
||||
"""
|
||||
str/unicode(HH:MM:SS,mmm) -> SubRipTime corresponding to serial
|
||||
raise InvalidTimeString
|
||||
"""
|
||||
items = cls.RE_TIME_SEP.split(source)
|
||||
if len(items) != 4:
|
||||
raise InvalidTimeString
|
||||
return cls(*(cls.parse_int(i) for i in items))
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, digits):
|
||||
try:
|
||||
return int(digits)
|
||||
except ValueError:
|
||||
match = cls.RE_INTEGER.match(digits)
|
||||
if match:
|
||||
return int(match.group())
|
||||
return 0
|
||||
|
||||
@classmethod
|
||||
def from_time(cls, source):
|
||||
"""
|
||||
datetime.time -> SubRipTime corresponding to time object
|
||||
"""
|
||||
return cls(hours=source.hour, minutes=source.minute,
|
||||
seconds=source.second, milliseconds=source.microsecond // 1000)
|
||||
|
||||
def to_time(self):
|
||||
"""
|
||||
Convert SubRipTime instance into a pure datetime.time object
|
||||
"""
|
||||
return time(self.hours, self.minutes, self.seconds,
|
||||
self.milliseconds * 1000)
|
|
@ -1,2 +0,0 @@
|
|||
VERSION = (1, 0, 1)
|
||||
VERSION_STRING = '.'.join(str(i) for i in VERSION)
|
Loading…
Reference in a new issue