SickGear/lib/js2py/translators/jsregexps.py
JackDandy d97bb7174d Remove Torrentshack.
Change improve provider title processing.
Change improve handling erroneous JSON responses.
Change improve find show with unicode characters.
Change improve result for providers Omgwtf, SpeedCD, Transmithenet, Zoogle.
Change validate .torrent files that contain optional header data.
Fix case where an episode status was not restored on failure.
Add raise log error if no wanted qualities are found.
Change add un/pw to Config/Media providers/Options for BTN API graceful fallback (can remove Api key for security).
Change only download torrent once when using blackhole.
Add Cloudflare module 1.6.8 (be0a536) to mitigate CloudFlare (IUAM) access validator.
Add Js2Py 0.43 (c1442f1) Cloudflare dependency.
Add pyjsparser 2.4.5 (cd5b829) Js2Py dependency.
2017-02-17 04:48:49 +00:00

219 lines
No EOL
6.7 KiB
Python

from pyjsparser.pyjsparserdata import *
REGEXP_SPECIAL_SINGLE = {'\\', '^', '$', '*', '+', '?', '.'}
NOT_PATTERN_CHARS = {'^', '$', '\\', '.', '*', '+', '?', '(', ')', '[', ']', '|'} # what about '{', '}', ???
CHAR_CLASS_ESCAPE = {'d', 'D', 's', 'S', 'w', 'W'}
CONTROL_ESCAPE_CHARS = {'f', 'n', 'r', 't', 'v'}
CONTROL_LETTERS = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'}
def SpecialChar(char):
return {'type': 'SpecialChar',
'content': char}
def isPatternCharacter(char):
return char not in NOT_PATTERN_CHARS
class JsRegExpParser:
def __init__(self, source, flags):
self.source = source
self.flags = flags
self.index = 0
self.length = len(source)
self.lineNumber = 0
self.lineStart = 0
def parsePattern(self):
'''Perform sctring escape - for regexp literals'''
return {'type': 'Pattern',
'contents': self.parseDisjunction()}
def parseDisjunction(self):
alternatives = []
while True:
alternatives.append(self.parseAlternative())
if not self.isEOF():
self.expect_character('|')
else:
break
return {'type': 'Disjunction',
'contents': alternatives}
def isEOF(self):
if self.index>=self.length:
return True
return False
def expect_character(self, character):
if self.source[self.index]!=character:
self.throwUnexpected(character)
self.index += 1
def parseAlternative(self):
contents = []
while not self.isEOF() and self.source[self.index]!='|':
contents.append(self.parseTerm())
return {'type': 'Alternative',
'contents': contents}
def follows(self, chars):
for i, c in enumerate(chars):
if self.index+i>=self.length or self.source[self.index+i] != c:
return False
return True
def parseTerm(self):
assertion = self.parseAssertion()
if assertion:
return assertion
else:
return {'type': 'Term',
'contents': self.parseAtom()} # quantifier will go inside atom!
def parseAssertion(self):
if self.follows('$'):
content = SpecialChar('$')
self.index += 1
elif self.follows('^'):
content = SpecialChar('^')
self.index += 1
elif self.follows('\\b'):
content = SpecialChar('\\b')
self.index += 2
elif self.follows('\\B'):
content = SpecialChar('\\B')
self.index += 2
elif self.follows('(?='):
self.index += 3
dis = self.parseDisjunction()
self.expect_character(')')
content = {'type': 'Lookached',
'contents': dis,
'negated': False}
elif self.follows('(?!'):
self.index += 3
dis = self.parseDisjunction()
self.expect_character(')')
content = {'type': 'Lookached',
'contents': dis,
'negated': True}
else:
return None
return {'type': 'Assertion',
'content': content}
def parseAtom(self):
if self.follows('.'):
content = SpecialChar('.')
self.index += 1
elif self.follows('\\'):
self.index += 1
content = self.parseAtomEscape()
elif self.follows('['):
content = self.parseCharacterClass()
elif self.follows('(?:'):
self.index += 3
dis = self.parseDisjunction()
self.expect_character(')')
content = 'idk'
elif self.follows('('):
self.index += 1
dis = self.parseDisjunction()
self.expect_character(')')
content = 'idk'
elif isPatternCharacter(self.source[self.index]):
content = self.source[self.index]
self.index += 1
else:
return None
quantifier = self.parseQuantifier()
return {'type': 'Atom',
'content': content,
'quantifier': quantifier}
def parseQuantifier(self):
prefix = self.parseQuantifierPrefix()
if not prefix:
return None
greedy = True
if self.follows('?'):
self.index += 1
greedy = False
return {'type': 'Quantifier',
'contents': prefix,
'greedy': greedy}
def parseQuantifierPrefix(self):
if self.isEOF():
return None
if self.follows('+'):
content = '+'
self.index += 1
elif self.follows('?'):
content = '?'
self.index += 1
elif self.follows('*'):
content = '*'
self.index += 1
elif self.follows('{'): # try matching otherwise return None and restore the state
i = self.index
self.index += 1
digs1 = self.scanDecimalDigs()
# if no minimal number of digs provided then return no quantifier
if not digs1:
self.index = i
return None
# scan char limit if provided
if self.follows(','):
self.index += 1
digs2 = self.scanDecimalDigs()
else:
digs2 = ''
# must be valid!
if not self.follows('}'):
self.index = i
return None
else:
self.expect_character('}')
content = int(digs1), int(digs2) if digs2 else None
else:
return None
return content
def parseAtomEscape(self):
ch = self.source[self.index]
if isDecimalDigit(ch) and ch!=0:
digs = self.scanDecimalDigs()
elif ch in CHAR_CLASS_ESCAPE:
self.index += 1
return SpecialChar('\\' + ch)
else:
return self.parseCharacterEscape()
def parseCharacterEscape(self):
ch = self.source[self.index]
if ch in CONTROL_ESCAPE_CHARS:
return SpecialChar('\\' + ch)
if ch=='c':
'ok, fuck this shit.'
def scanDecimalDigs(self):
s = self.index
while not self.isEOF() and isDecimalDigit(self.source[self.index]):
self.index += 1
return self.source[s:self.index]
a = JsRegExpParser('a(?=x)', '')
print(a.parsePattern())