python/parser2.py

# walter's erster versuch mit Python
#
# starten mit python -i basics.py
def err(*e):
    print('error:', *e)
    exit()

from datetime import * ;
import re;

class PObj:
    n2o = []
    def __init__(self, nm = ''):
        self.no = len(self.n2o)
        self.name = f"{self.__class__.__name__ if nm == '' else nm}#{self.no}"
        self.n2o.append(self)
    def __str__ (self):
        return self.name
    def __repr__ (self, at=''):
        return self.name
    def sExp (self, at=''):
        return self.name if at == '' else f'{self.name}{{{at}}}'
    @staticmethod
    def test1 ():
        PObj('null')
        PObj()
        PObj('zwei')
        print(f"test1 n2o={PObj.n2o}")
        print(f"test1 str", [str(o) for o in PObj.n2o])
    @staticmethod
    def print(m):
        print(m)
        p = '['
        for o in PObj.n2o:
            print(f"{p} {o.sExp()}")
            p = ','
        print(']')

end = 'end'
dflt = 'dflt'
rStart = 'start'
lrK = 0
mLahC = 0
mLahO1 = False

class Rule (PObj):
    rules = None
    def __init__(self, src):
        spl = re.split(r'\s+', src)
        assert(spl[1] == '=')
        self.sym = spl[0]
        super().__init__(f"R#{spl[0]}")
        self.src = src
        self.body = spl[2:]
        self.pos0 = None
        self.lah = {()}
    def sExp (self, at=''):
        return super().sExp(f'sym={self.sym!s} src={self.src} body={self.body} pos0={self.pos0!s} lah={self.lah}')
    @staticmethod
    def test2 ():
        PObj.test1()
        Rule("drei  = b '+' c")
        print(f"test2 n2o={PObj.n2o}")
        print(f"test2 str", [str(o) for o in PObj.n2o])
    @staticmethod
    def make(*src):
        Rule(f"{rStart} = ?")
        for r in src:
            Rule(r);
        PObj.n2o[0].body[0] = PObj.n2o[1].sym
        Rule.rules = Rule.n2o.copy()
    @staticmethod
    def makeLah(toK):
        global mLahC, lrK
        tS = datetime.now()
        mLahC = 0
        stop = set()                                 
        for k in range(1, toK+1):
            for r in Rule.rules:
                assert len(stop) == 0
                r.lah = r.mLah(k, stop)
                print(f"mLahC={mLahC}, {(datetime.now() - tS).total_seconds()}sec, {len(r.lah)} {r}")
            lrK = k
    @staticmethod
    def makeLah2():
        global mLahC
        tS = datetime.now()
        mLahC = 0
        stop = {}                                 
        for r in Rule.rules:
            r.mLah2(lrK, stop)
            print(f"mLahC={mLahC}, {(datetime.now() - tS).total_seconds()}sec, {len(r.lah)} {r}")

    def mLah(self, k, stop):
        global mLahC            
        mLahC += 1
        # print(f"mLah2 {self} {k} {stop}")
        assert k >= 1
        assert k == lrK + 1
        if self in stop:
            return set()
        stop.add(self)
        lP = {()}
        lF = set()
        for y in self.body:
            if isinstance(y, str):
                lQ = set()
                for o in lP:
                    n = o + (y, )
                    if len(n) < k:
                        lQ.add(n)
                    else:
                        lF.add(n)
                lP = lQ
            else:
                lE = set()
                lD = set()
                hasEmpty = () in lP
                for r in y.rules:
                    if hasEmpty:
                        lE |= r.mLah(k, stop)
                    if len(lP) > 0:
                        lD |= r.lah
                lQ = set()
                if hasEmpty:
                    lP.remove(())
                for o in lP:
                    for p in lD:
                        if len(o) + len(p) < k:
                            lQ.add(o + p)
                        else:
                            lF.add((o + p)[0:k])
                lP = lQ
                if hasEmpty:
                    for n in lE:
                        if len(n) < k:
                            lP.add(n)
                        else:
                            lF.add(n)
            if len(lP) == 0:
                break    

        stop.remove(self)
        return lF | lP
        
    def mLah2(self, k, stop):
        global mLahC
        mLahC += 1
        # print(f"mLah2 {self} {k} {stop}")
        assert k >= 1
        if mLahO1:
            if self.lah != None:
                return self.lah
        r = {()}
        if self not in stop:
            stop[self] = 1
        elif stop[self] <= lrK:
            stop[self] += 1
        else:
            return None
        
        for p in range(len(self.body)):
            rMin = min(len(s) for s in r)
            if rMin >= k:
                break
            y = self.body[p]
            if isinstance(y, str):
                #print(f"y={y} {k} {r}")
                r = {e if len(e) >= k else e + (y,) for e in r}
                #print(f"y={y} ==> {r}")
            else:
                r3 = set()
                cnt = 0
                hasEmp = () in r
                for q in y.rules:
                    #print(f"q={q} {r}")
                    r2 = q.mLah2(k-rMin, stop)
                    if r2 != None:
                        cnt += 1
                        r3 |= {e if len(e) >= k else (e + f)[0 : k] for e in r for f in r2}
                    # print(f"q={q} ==> {r3} <== r={r} r2={r2} k={k}")
                # print(f"after {y} cnt={cnt} {r3} <== {r}")
                if cnt < 1:
                    stop[self] -= 1
                    return None
                r = r3

        # print(f"mLah2 {self} {k} ==> {r}")
        stop[self] -= 1
        if k == lrK and all(0 == v for v in stop.values()):
            #print(f"{self} {k} returning {r} lah={self.lah}")
            if self.lah == None:
                self.lah = frozenset(r)
            else:
                assert self.lah == r
        return r

class RuleSym (PObj):
    n2y = {}
    def __init__(self, sym):
        self.sym = sym
        super().__init__(f"Y#{sym}")
        self.rules = []
    def sExp (self):
        return super().sExp(f"sym={self.sym!s} {self.rules}")

    @staticmethod
    def make():
        for r in Rule.rules:
            if r.sym not in RuleSym.n2y:
                RuleSym.n2y[r.sym] = RuleSym(r.sym)
            RuleSym.n2y[r.sym].rules.append(r)
            r.sym = RuleSym.n2y[r.sym]
        for r in Rule.rules:
            b = r.body
            for ix in range(len(b)):
                v = b[ix]
                if v[0] == "'" and v[-1] == "'":
                    b[ix] = v[1: len(v)-1]
                    assert (b[ix] == end) == (r.sym.sym == rStart and ix > 0)
                elif v in RuleSym.n2y:
                    b[ix] = RuleSym.n2y[v]
                elif v != end:
                    err(f"rule {v} in {r} not in", ruleD)

class RulePos (PObj):
    def __init__(self, r, p):
        super().__init__('P')
        self.rule = r
        self.pos = p
        self.next = None
    def sExp (self):
        return super().sExp(f"{self.rule!s} {self.pos} next={self.next!s}")

    @staticmethod
    def make():
        for r in Rule.rules:
            b = r.body
            r.pos0 = RulePos(r, 0)
            for ix in range(1, len(b)+1):
                n = RulePos(r, ix)
                PObj.n2o[-2].next = n

class RuleState (PObj):
    p2s =  {}
    start = None
    states = None
    def __init__(self, ps):
        super().__init__('S')
        self.pos = frozenset(ps)
        self.to = {}
        # self.stack = None
        # self.red = {}
        self.lah = 0
        self.p2s[self.pos] = self        
        for p in self.pos:
            if p.next != None:
                e = p.rule.body[p.pos]
                if e in self.to:
                    self.to[e].add(p.next)
                else:
                    self.to[e] = {p.next}

    def sExp (self):
        return super().sExp(f"{self.pos} to={self.to} lah={self.lah}") 

    def makeStack(self):
        for k, v in self.to.items():
            n = self.stack + [v]
            if v.stack == None:
                v.stack = n
                v.makeStack()
            elif er:
                PObj.print('stack mix max')
                err(f"self={self} k={k} v={v} n={n}")
            
        for p in self.pos:
            if p.next != None:
                continue
            r = p.rule
            for x in range(p.no, r.pos0.no - 1, -1):
                print(f"reduce {r} {p} {x} {PObj.n2o[x]}")

    def addLah(self, len):
        assert len == self.lah + 1
        stop = set() 
        for k,v in self.to.items():
            if len == 1 and not isinstance(v, tuple):
                v = v, {}
                self.to[k] = v
            assert isinstance(v, tuple)

    @staticmethod
    def posExp(pp):
        work = pp.copy()
        done = set()
        while True:
            try:
                p = work.pop()
            except KeyError:
                break
            done.add(p)
            if p.next != None: 
                e = p.rule.body[p.pos]
                if isinstance(e, RuleSym):
                    for r in e.rules:
                        if r.pos0 not in done:
                            work.add(r.pos0)
        pp |= done

    @staticmethod
    def make():
        pp = {Rule.rules[0].pos0}
        RuleState.posExp(pp)
        RuleState.start = RuleState(frozenset(pp))
        work = {RuleState.start}
        while True:
            try:
                s = work.pop()
            except KeyError:
                break
            for k, v in s.to.items():        
                RuleState.posExp(v)
                w = frozenset(v)
                if w in RuleState.p2s:
                    s.to[k] = RuleState.p2s[w]
                else:
                    n = RuleState(w)
                    s.to[k] = n
                    work.add(n)
            for p in s.pos:
                if p.next == None:
                    s.to[p.rule] = None
        RuleState.states = PObj.n2o[RuleState.start.no:]

    @staticmethod
    def makeRed():
        for s in RuleState.states:
            for y, t in s.to.items():
                if isinstance(y, RuleSym):
                    print(f"red {s} y={y} t={t}")
                    for r in y.rules:
                        RuleState.addRed(r, s, t)

    @staticmethod
    def addRed(r, s, t):
        x = 0
        p = r.pos0
        for y in r.body:
            assert p.pos == x
            assert r.body[x] == y
            assert p in s.pos
            s = s.to[y]
            p = p.next
            x += 1
        assert x == len(r.body)
        assert p.pos == x
        assert p.next == None
        assert p in s.pos
        if r in s.red:
            s.red[r].add(t)
        else:
            s.red[r] = {t}

    @staticmethod
    def makeLahX(k):
        pEnd = RuleState.states[0]
        pEnd = pEnd.to[PObj.n2o[0].body[0]]
        pEnd = pEnd.to[end]
        assert len(pEnd.to) == 1
        assert end in pEnd.to
        if pEnd.lah < 1:
            pEnd.to[end] = (pEnd.to[end], {(end,)})
            for s in RuleState.states:
                s.addLah(1, set())

Rule.make("s = s '+' p", "s = p", "p = p '*' i", "p = i", "i = '(' s           ')'", "i = '0'", "i = '1'", "i = '2'")
print('rules', PObj.n2o)
RuleSym.make()
print('RuleSym n2y', RuleSym.n2y)
print(f'RuleSym n2y {{}} {RuleSym.n2y}')
print(f'RuleSym n2y {{!s}} {RuleSym.n2y!s}')
PObj.print('RuleSym')
Rule.makeLah(4)
PObj.print('RuleSym lah')
[print(f) for f in sorted([ ''.join(e) for e in PObj.n2o[0].lah])]

err('end test')        
RulePos.make()
print('RulePos', PObj.n2o)
RuleState.make()
PObj.print('RuleState')
RuleState.addLah(1)
PObj.print('RuleState lah 1')
#RuleState.makeRed()
#RuleState.start.stack = [RuleState.start]
# print('RuleState start', repr(RuleState.start))
# RuleState.start.makeStack()
#PObj.print('RuleState red')
#Rule.test2()
import re;
if False:
    class wk:
        def __init__(self):
            if False:
                self.eins = 'null'
        def __repr__ ( self):
            return 'wk' + repr(self.__dict__)
    o = wk()
    print('o', o)
    o.eins = 'eins'
    print('o', o)
end = 'eof'
dflt = 'dflt'
ruleStr = ["s = s '+' p", "s = p", "p = p '*' i", "p = i", "i = '(' s           ')'", "i = '0'", "i = '1'", "i = '2'"]
print("ruleStr", ruleStr);
tb = [re.split(r'\s+', r) for r in ruleStr]
tb.insert(0, ['star t ', '=', tb[0][0]])
rules = tb.copy()
accX = len(tb)
failX = accX+1
RuleSymX = failX + 1
tb += ['accept', 'fail']
print("rules", tb)
ruleset4name = {}
ruleset4rule = []
for i in range(accX):
    r = tb[i]
    if r[1] != '=':
        err("r[1] not =", r)
    if r[0] in ruleset4name:
        s = ruleset4name[r[0]]
        tb[s].append(i)
    else:
        s = len(tb)
        ruleset4name[r[0]] = s
        tb.append(['ruleset', r[0], s, i])
    r[0:2] = ['rule', r[0], i]
    assert len(ruleset4rule) == i
    ruleset4rule.append(s)
handleX = len(tb)
print("ruleset4name", ruleset4name, 'ruleset4rule', ruleset4rule)
print('tb', tb)
for r in rules:
    for i in range(3, len(r)):
        v = r[i]
        if v[0] == "'" and v[-1] == "'":
            r[i] = v[1: len(v)-1]
        elif v in ruleset4name:
            r[i] = ruleset4name[v]
        elif v != eof:
            err(f"rule {v} in {r} not in", ruleD)
print("tb after rulemap", tb)

class Handle:
    byStart = {}
    def __init__(self, s):
        self.start = frozenset(s)
        assert self.start not in Handle.byStart
        self.tbX = len(tb)
        tb.append(self)
        self.byStart[self.start] = self.tbX
        self.map = {}
        self.go = {}
        self.goDef = None
    def __repr__ ( self):
        return 'handle' + repr(self.__dict__)

    def mapAdd(self, l):
        rx, px = l[-1]
        r = tb[rx]
        if px > len(r):
            return
        nx = (rx, px+1)
        c = r[px] if px < len(r) else dflt
        #print(f'mapadd enter c {c}, l {l}, self {self}');
        if isinstance(c, str):
            if c in self.map:
                self.map[c].add(nx)
            else:
                self.map[c] = {nx}
        elif isinstance(c, int):
            assert c >= rulesetX and c < handleX
            rs = tb[c]
            
            # forward: search tree for next terminal avoid infinite recursion        
            for cx in rs[3:]:        
                if (cx, 3) not in l:
                    self.mapAdd(l + [(cx, 3)])
            # afterward next step
            if c in self.map:
                self.map[c].add(nx)
            else:
                self.map[c] = {nx}
        #print(f'handleadd exit  l {l}, handle {self}');

    @staticmethod
    def mapExp():
        hx = handleX
        print('mapExp0', handleX, 'tb', len(tb))
        while hx < len(tb):
            h = tb[hx]
            for k in h.map:
                strt = frozenset(h.map[k])
                h.map[k] = strt
                if strt not in Handle.byStart:
                    n = None 
                    for s in strt:
                        if s[1] <= len(tb[s[0]]):
                            if n == None:
                                n = Handle(strt)
                            n.mapAdd([s])
            hx += 1
        print('mapExp', handleX, 'tb', len(tb))

    @classmethod
    def genGo(cl):
        print('genGo 0')
        for hx in Handle.byStart.values():
            h = tb[hx]
            # h.go = {}
            for k in h.map:
                v = h.map[k]
                if len(v) != 1:
                    r = cl.byStart[v]    
                else:
                    for e in v: 
                        if e[1] > len(tb[e[0]]) :
                            r = e[0]
                        else:
                            r = cl.byStart[v]            
                if k == dflt: 
                    h.goDef = r
                else:
                    h.go[k] = r
        print('genGo 9')
                

h = Handle({(0, 3)})
print('handle', h, 'tb', tb);
h.mapAdd([(0, 3)])
print('Handle.byStart', Handle.byStart, tb);
Handle.mapExp()
print('Handle.mapExp', Handle.byStart, tb);
accRS = ruleset4rule[0]
print(f"accRS {accRS}, {handleX} {tb[handleX]} accX={accX}")
accH = 0, len(tb[0])
print('accH', accH)
cnt = 0
for h in [h for h in tb[handleX:] if accH in h.start]:
    cnt += 1
    print(f'accH {h}')
    assert eof not in h.map
    h.map[eof] = frozenset([(accX, 999)])
assert cnt >= 1
assert accRS not in tb[handleX].map
#tb[handleX].map[accRS] = frozenset([(accX,99)])
print(f"accRS2 {accRS}, {handleX} {tb[handleX]}")
Handle.genGo()
print('Handle.genGo', Handle.byStart);

def run(inp):
    inI = iter(inp + [eof])
    st = [[handleX, 'st 0']]
    lah = next(inI)
    while True:
        tos = st[-1]
        print(f'run1 lah={lah} tos={tos}, st={len(st)} {st}')
        m = tb[tos[0]]
        go = m.go[lah] if lah in m.go else m.goDef 
        if go == None or go == failX:
            err(f"bad lah={lah} at {[tb[s[0]][1] + '.' + str(s[1]) for s in m.start]}, "
                f"expected {[k for k in m.map.keys() if isinstance(k, str)]}, "
                f"go {go}, st={len(st)} {st}")
        elif go >= handleX: #shift
            st.append([go, lah])
            lah = next(inI)
        elif go < accX:    
            r = tb[go]
            n = [r[1], go]
            nx = len(st) - len(r) + 3
            for i in range(nx, len(st)):
                n.append(st[i][1])
            pa = tb[st[nx-1][0] ]
            #assert pa[0] == 'handle'
            rs = ruleset4rule[go]
            if rs not in pa.go:
                err(f'reduce {rs} missing in pa', pa, 'stack', st)
            st[nx] = [pa.go[rs], n]            
            del st[nx+1:] 
        elif go == accX:
            assert lah == eof
            assert len(st) == 2
            return st[1][1]
        else:
            err(f"bad go {go}, lah={lah}, st={st}")

def treeSplit(nd, pr):
    if isinstance(nd, str):
        return pr + nd
    r = pr + nd[0] + '.' + str(nd[1])
    for ch in nd[2:]:
        r += treeSplit(ch, pr + '  ')
    return r

def treeSpli2(nd, of):
    if isinstance(nd, str):
        return nd
    r = nd[0] + '.' + str(nd[1]) + ' '
    of += len(r)
    r += treeSpli2(nd[2], of)
    for ch in nd[3:]:
        r += "\n" + of * ' ' + treeSpli2(ch, of)
    return r

for s in ['2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )']:
    ast = run(re.split(r'\s+', s))
    print('run', s , ' ==>', treeSplit(ast, "\n  "))
    print('run', s , ' ==2\n  ' + treeSpli2(ast, 2))
print('end', __file__)