SickGear/lib/js2py/translators/jsregexps.py

from pyjsparser.pyjsparserdata import *

REGEXP_SPECIAL_SINGLE = {'\\', '^', '$', '*', '+', '?', '.'}

NOT_PATTERN_CHARS = {'^', '$', '\\', '.', '*', '+', '?', '(', ')', '[', ']',  '|'}  # what about '{', '}',  ???

CHAR_CLASS_ESCAPE = {'d', 'D', 's', 'S', 'w', 'W'}
CONTROL_ESCAPE_CHARS = {'f', 'n', 'r', 't', 'v'}
CONTROL_LETTERS = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
                   'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
                   'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'}

def SpecialChar(char):
    return {'type': 'SpecialChar',
            'content': char}


def isPatternCharacter(char):
    return char not in NOT_PATTERN_CHARS

class JsRegExpParser:
    def __init__(self, source, flags):
        self.source = source
        self.flags = flags
        self.index = 0
        self.length = len(source)
        self.lineNumber = 0
        self.lineStart = 0


    def parsePattern(self):
        '''Perform sctring escape - for regexp literals'''
        return {'type': 'Pattern',
                'contents': self.parseDisjunction()}

    def parseDisjunction(self):
        alternatives = []
        while True:
            alternatives.append(self.parseAlternative())
            if not self.isEOF():
                self.expect_character('|')
            else:
                break
        return {'type': 'Disjunction',
                'contents': alternatives}

    def isEOF(self):
        if self.index>=self.length:
            return True
        return False

    def expect_character(self, character):
        if self.source[self.index]!=character:
            self.throwUnexpected(character)
        self.index += 1

    def parseAlternative(self):
        contents = []
        while not self.isEOF() and self.source[self.index]!='|':
            contents.append(self.parseTerm())
        return {'type': 'Alternative',
                'contents': contents}

    def follows(self, chars):
        for i, c in enumerate(chars):
            if self.index+i>=self.length or self.source[self.index+i] != c:
                return False
        return True

    def parseTerm(self):
        assertion = self.parseAssertion()
        if assertion:
            return assertion
        else:
            return {'type': 'Term',
                    'contents': self.parseAtom()}  # quantifier will go inside atom!


    def parseAssertion(self):
        if self.follows('$'):
            content = SpecialChar('$')
            self.index += 1
        elif self.follows('^'):
            content = SpecialChar('^')
            self.index += 1
        elif self.follows('\\b'):
            content = SpecialChar('\\b')
            self.index += 2
        elif self.follows('\\B'):
            content = SpecialChar('\\B')
            self.index += 2
        elif self.follows('(?='):
            self.index += 3
            dis = self.parseDisjunction()
            self.expect_character(')')
            content = {'type': 'Lookached',
                       'contents': dis,
                       'negated': False}
        elif self.follows('(?!'):
            self.index += 3
            dis = self.parseDisjunction()
            self.expect_character(')')
            content = {'type': 'Lookached',
                       'contents': dis,
                       'negated': True}
        else:
            return None
        return {'type': 'Assertion',
                'content': content}

    def parseAtom(self):
        if self.follows('.'):
            content =  SpecialChar('.')
            self.index += 1
        elif self.follows('\\'):
            self.index += 1
            content = self.parseAtomEscape()
        elif self.follows('['):
            content = self.parseCharacterClass()
        elif self.follows('(?:'):
            self.index += 3
            dis = self.parseDisjunction()
            self.expect_character(')')
            content = 'idk'
        elif self.follows('('):
            self.index += 1
            dis = self.parseDisjunction()
            self.expect_character(')')
            content = 'idk'
        elif isPatternCharacter(self.source[self.index]):
            content = self.source[self.index]
            self.index += 1
        else:
            return None
        quantifier = self.parseQuantifier()
        return {'type': 'Atom',
                'content': content,
                'quantifier': quantifier}

    def parseQuantifier(self):
        prefix = self.parseQuantifierPrefix()
        if not prefix:
            return None
        greedy = True
        if self.follows('?'):
            self.index += 1
            greedy = False
        return {'type': 'Quantifier',
                'contents': prefix,
                'greedy': greedy}

    def parseQuantifierPrefix(self):
        if self.isEOF():
            return None
        if self.follows('+'):
            content = '+'
            self.index += 1
        elif self.follows('?'):
            content = '?'
            self.index += 1
        elif self.follows('*'):
            content = '*'
            self.index += 1
        elif self.follows('{'): # try matching otherwise return None and restore the state
            i = self.index
            self.index += 1
            digs1 = self.scanDecimalDigs()
            # if no minimal number of digs provided then return no quantifier
            if not digs1:
                self.index = i
                return None
            # scan char limit if provided
            if self.follows(','):
                self.index += 1
                digs2 = self.scanDecimalDigs()
            else:
                digs2 = ''
            # must be valid!
            if not self.follows('}'):
                self.index = i
                return None
            else:
                self.expect_character('}')
                content = int(digs1), int(digs2) if digs2 else None
        else:
            return None
        return content


    def parseAtomEscape(self):
        ch = self.source[self.index]
        if isDecimalDigit(ch) and ch!=0:
            digs = self.scanDecimalDigs()
        elif ch in CHAR_CLASS_ESCAPE:
            self.index += 1
            return SpecialChar('\\' + ch)
        else:
            return self.parseCharacterEscape()

    def parseCharacterEscape(self):
        ch = self.source[self.index]
        if ch in CONTROL_ESCAPE_CHARS:
            return SpecialChar('\\' + ch)
        if ch=='c':
            'ok, fuck this shit.'


    def scanDecimalDigs(self):
        s = self.index
        while not self.isEOF() and isDecimalDigit(self.source[self.index]):
            self.index += 1
        return self.source[s:self.index]


a = JsRegExpParser('a(?=x)', '')
print(a.parsePattern())
Remove Torrentshack. Change improve provider title processing. Change improve handling erroneous JSON responses. Change improve find show with unicode characters. Change improve result for providers Omgwtf, SpeedCD, Transmithenet, Zoogle. Change validate .torrent files that contain optional header data. Fix case where an episode status was not restored on failure. Add raise log error if no wanted qualities are found. Change add un/pw to Config/Media providers/Options for BTN API graceful fallback (can remove Api key for security). Change only download torrent once when using blackhole. Add Cloudflare module 1.6.8 (be0a536) to mitigate CloudFlare (IUAM) access validator. Add Js2Py 0.43 (c1442f1) Cloudflare dependency. Add pyjsparser 2.4.5 (cd5b829) Js2Py dependency. 2017-02-17 03:16:51 +00:00			`from pyjsparser.pyjsparserdata import *`

			`REGEXP_SPECIAL_SINGLE = {'\\', '^', '$', '*', '+', '?', '.'}`

			`NOT_PATTERN_CHARS = {'^', '$', '\\', '.', '*', '+', '?', '(', ')', '[', ']', '\|'} # what about '{', '}', ???`

			`CHAR_CLASS_ESCAPE = {'d', 'D', 's', 'S', 'w', 'W'}`
			`CONTROL_ESCAPE_CHARS = {'f', 'n', 'r', 't', 'v'}`
			`CONTROL_LETTERS = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',`
			`'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',`
			`'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'}`

			`def SpecialChar(char):`
			`return {'type': 'SpecialChar',`
			`'content': char}`


			`def isPatternCharacter(char):`
			`return char not in NOT_PATTERN_CHARS`

			`class JsRegExpParser:`
			`def __init__(self, source, flags):`
			`self.source = source`
			`self.flags = flags`
			`self.index = 0`
			`self.length = len(source)`
			`self.lineNumber = 0`
			`self.lineStart = 0`


			`def parsePattern(self):`
			`'''Perform sctring escape - for regexp literals'''`
			`return {'type': 'Pattern',`
			`'contents': self.parseDisjunction()}`

			`def parseDisjunction(self):`
			`alternatives = []`
			`while True:`
			`alternatives.append(self.parseAlternative())`
			`if not self.isEOF():`
			`self.expect_character('\|')`
			`else:`
			`break`
			`return {'type': 'Disjunction',`
			`'contents': alternatives}`

			`def isEOF(self):`
			`if self.index>=self.length:`
			`return True`
			`return False`

			`def expect_character(self, character):`
			`if self.source[self.index]!=character:`
			`self.throwUnexpected(character)`
			`self.index += 1`

			`def parseAlternative(self):`
			`contents = []`
			`while not self.isEOF() and self.source[self.index]!='\|':`
			`contents.append(self.parseTerm())`
			`return {'type': 'Alternative',`
			`'contents': contents}`

			`def follows(self, chars):`
			`for i, c in enumerate(chars):`
			`if self.index+i>=self.length or self.source[self.index+i] != c:`
			`return False`
			`return True`

			`def parseTerm(self):`
			`assertion = self.parseAssertion()`
			`if assertion:`
			`return assertion`
			`else:`
			`return {'type': 'Term',`
			`'contents': self.parseAtom()} # quantifier will go inside atom!`


			`def parseAssertion(self):`
			`if self.follows('$'):`
			`content = SpecialChar('$')`
			`self.index += 1`
			`elif self.follows('^'):`
			`content = SpecialChar('^')`
			`self.index += 1`
			`elif self.follows('\\b'):`
			`content = SpecialChar('\\b')`
			`self.index += 2`
			`elif self.follows('\\B'):`
			`content = SpecialChar('\\B')`
			`self.index += 2`
			`elif self.follows('(?='):`
			`self.index += 3`
			`dis = self.parseDisjunction()`
			`self.expect_character(')')`
			`content = {'type': 'Lookached',`
			`'contents': dis,`
			`'negated': False}`
			`elif self.follows('(?!'):`
			`self.index += 3`
			`dis = self.parseDisjunction()`
			`self.expect_character(')')`
			`content = {'type': 'Lookached',`
			`'contents': dis,`
			`'negated': True}`
			`else:`
			`return None`
			`return {'type': 'Assertion',`
			`'content': content}`

			`def parseAtom(self):`
			`if self.follows('.'):`
			`content = SpecialChar('.')`
			`self.index += 1`
			`elif self.follows('\\'):`
			`self.index += 1`
			`content = self.parseAtomEscape()`
			`elif self.follows('['):`
			`content = self.parseCharacterClass()`
			`elif self.follows('(?:'):`
			`self.index += 3`
			`dis = self.parseDisjunction()`
			`self.expect_character(')')`
			`content = 'idk'`
			`elif self.follows('('):`
			`self.index += 1`
			`dis = self.parseDisjunction()`
			`self.expect_character(')')`
			`content = 'idk'`
			`elif isPatternCharacter(self.source[self.index]):`
			`content = self.source[self.index]`
			`self.index += 1`
			`else:`
			`return None`
			`quantifier = self.parseQuantifier()`
			`return {'type': 'Atom',`
			`'content': content,`
			`'quantifier': quantifier}`

			`def parseQuantifier(self):`
			`prefix = self.parseQuantifierPrefix()`
			`if not prefix:`
			`return None`
			`greedy = True`
			`if self.follows('?'):`
			`self.index += 1`
			`greedy = False`
			`return {'type': 'Quantifier',`
			`'contents': prefix,`
			`'greedy': greedy}`

			`def parseQuantifierPrefix(self):`
			`if self.isEOF():`
			`return None`
			`if self.follows('+'):`
			`content = '+'`
			`self.index += 1`
			`elif self.follows('?'):`
			`content = '?'`
			`self.index += 1`
			`elif self.follows('*'):`
			`content = '*'`
			`self.index += 1`
			`elif self.follows('{'): # try matching otherwise return None and restore the state`
			`i = self.index`
			`self.index += 1`
			`digs1 = self.scanDecimalDigs()`
			`# if no minimal number of digs provided then return no quantifier`
			`if not digs1:`
			`self.index = i`
			`return None`
			`# scan char limit if provided`
			`if self.follows(','):`
			`self.index += 1`
			`digs2 = self.scanDecimalDigs()`
			`else:`
			`digs2 = ''`
			`# must be valid!`
			`if not self.follows('}'):`
			`self.index = i`
			`return None`
			`else:`
			`self.expect_character('}')`
			`content = int(digs1), int(digs2) if digs2 else None`
			`else:`
			`return None`
			`return content`


			`def parseAtomEscape(self):`
			`ch = self.source[self.index]`
			`if isDecimalDigit(ch) and ch!=0:`
			`digs = self.scanDecimalDigs()`
			`elif ch in CHAR_CLASS_ESCAPE:`
			`self.index += 1`
			`return SpecialChar('\\' + ch)`
			`else:`
			`return self.parseCharacterEscape()`

			`def parseCharacterEscape(self):`
			`ch = self.source[self.index]`
			`if ch in CONTROL_ESCAPE_CHARS:`
			`return SpecialChar('\\' + ch)`
			`if ch=='c':`
			`'ok, fuck this shit.'`


			`def scanDecimalDigs(self):`
			`s = self.index`
			`while not self.isEOF() and isDecimalDigit(self.source[self.index]):`
			`self.index += 1`
			`return self.source[s:self.index]`





			`a = JsRegExpParser('a(?=x)', '')`
			`print(a.parsePattern())`