file browser

python/parserGoLah.py

# knuth lr(k) parser

import re

def strL(*ee):
    return     ', '.join(e if type(e) == str else e.strL() if hasattr(type(e), 'strL') else strC(e) for e in ee)

def strS(*ee):
    return     ', '.join(e if type(e) == str else strC(e) for e in ee)

strCW = re.compile('\w+')
def strC(e):
    """return a str represntig e. recursively expand collections, otherwise use str"""
    t = type(e)
    if t == str:
         return repr(e) if strCW.fullmatch(e) == None else e
    elif t == tuple:
         return '(' + ', '.join(strC(v) for v in e) + ')'
    elif isinstance(e, list): # t == list:
         return '[' + ', '.join(strC(v) for v in e) + ']'
    elif t == set or t == frozenset:
         return '{' + ', '. join(strC(v) for v in e) + '}'
    elif isinstance(e, dict):
         return '{' + ', '. join(strC(k) + ': ' + strC(v) for k, v in e.items()) + '}'
    else:
        return str(e)

def err(*e):
    raise Exception('error: ' + strL(*e))
    exit()

dbgLev = 3
dbgTime = 0
def dbg(l, *e):
    if dbgLev >= l:
        print('dbg: ' + strL(*e))
        
from datetime import * ;
import re;

def dsUpdate(le, ri):
    u = False
    for k,v in ri.items():
        if k not in le:
            le[k] = set()
        o = len(le[k])
        le[k] |= ri[k]
        if o!= len(le[k]):
            u = True
    return u

def dsAdd(g, e, p):
    if e in g:
        s = g[e]
        o = len(s)
        s.add(p)
        return o < len(s)
    else:
        g[e] = {p}
        return True

def dsAddS(g, e, s):
    if len(s) < 1:
        return False
    if e in g:
        o = len(g[e])
        g[e] |= s
        return len(g[e]) > o
    else:
        g[e] = s.copy()
        return True
class Leaf(dict):
    pass
    
def d2sTg(g, *kk):
    a = g
    for kx in range(len(kk)-2):
        k = kk[kx]
        if not k in a:
            a[k] = {}
        a = a[k]
    k = kk[-2]
    if not k in a:
        a[k] = Leaf()
    a = a[k] 
    k = kk[-1]
    if k not in a:
        a[k] = set()
    return a[k]

def d2sAdd(g, e, f, p):
    s = d2sTg(g, e, f)
    o = len(s)
    s.add(p)
    return o < len(s)

def d2sAddS(g, e, f, pp):
    s = d2sTg(g, e, f)
    o = len(s)
    s |= pp
    return o < len(s)
        
    
def    dsDbg(lv, g, *a):
    if dbgLev < lv:
        return
    if len(a) > 0:
        print(*a)
    for k,v in g.items():
        print("\t" + str(k) + ' => ' + ', '.join({str(p) for p in v}))

class PObj:
    "root and container for all our objects"
    end = '!'
    rStart = 'start'
    lahStr = True # lah is true ==> a string, False => tuple of strings
    lahEmpty = '' if lahStr else ()
    objs = []
    lrK = 0

    def setName(self, nm):
        self.name =  (self.__class__.__name__ + '#' + str(len(self.objs))) if nm == '' else nm

    def __init__(self, nm = ''):
        self.setName(nm)
        self.objs.append(self)

    def __str__ (self):
        return self.name

    def strA(self, *aa):
        return str(self) + '{' + ', '.join(a + '=' + strC(getattr(self, a)) if (ex := a.find('=')) < 0 else a[0:ex] + '=' + strC(getattr(self, a[ex+1:])) for a in aa) + '}'

    @staticmethod
    def dbg(l, m):
        err('wer braucht das')
        if dbgLev >= l:
            print(m)
            p = '['
            for o in PObj.objs:
                print(p, o)
                p = ','
            print(']')

class Rule (PObj):
    "a Rule has a name (a symbol or nonterminal) and a body of terminals and nonterminals"
    rules = []
    s2r = {}
    def __init__(self, sy, bd):
        super().__init__(sy)
        self.body = bd
        self.pos = None
        self.go = False
        self.brd = False
        self.redCh = {self}
        self.sub = {}
        self.subRi = {self}
        self.prd = set()
        self.lah = set()
        self.st = set()
        self.tst = False
        Rule.rules.append(self)
        if sy != '':
            if sy in Rule.s2r:
                Rule.s2r[sy].append(self)
            else:
                Rule.s2r[sy] = [self]

    def strL(self):
        return self.strA(type(self).__name__ + '=body', 'go')
        
    def dbg(self, l, *a):
        if dbgLev >= l:
            print(self.strL(), *a)
                
    @staticmethod
    def clear(): 
        Rule.rules.clear()
        Rule.s2r.clear()
        State.clear()    
        PObj.objs.clear()

    @staticmethod
    def add(*src):
        """add new rules: each rule is a string
                elements are words (double or sinble quoted strings, spechial char
            if an unquoted element is found as a rule name, it designates a rule, otherwise a terminal
        """
        for s in src:
            spl = re.findall('\w+|\'(?:[^\']|\'\')*\'?|"(?:[^"]|"")*"?|\S', s)     # words | single quoted | doubble quoted | single nonspace char
                                                                                # (?: non capturing group, otherwise findall will use group!
            if len(spl) > 0 and spl[-1][0] in "\"'" and (len(spl[-1]) < 2 or spl[-1][0] != spl[-1][-1]):
                err('missing endquote string:', spl[-1], 'in source', s, src)
            assert(spl[1] == '=')
            RuleSeq(spl[0], spl[2:])

    @staticmethod
    def finish():
        rr = Rule.rules
        Rule.rules = []
        r0 = RuleSeq('', [None, PObj.end])
        r0.setName('@@0')
        for r in rr:
            if type(Rule.s2r[r.name]) == list:
                Rule.s2r[r.name] = r if len(Rule.s2r[r.name]) == 1 else RuleOr(r.name + '@' + str(len(Rule.rules)), Rule.s2r[r.name])
            r.setName(r.name + '@' + str(len(Rule.rules)))
            Rule.rules.append(r)        
        r0.body[0] = Rule.rules[1]
        for r in Rule.rules:
            for i in range(len(r.body)):
                b = r.body[i]
                if type(b) == str:
                    if b[0] == "'" and b[-1] == "'" or b[0] == '"' and b[-1] == '"':
                        b = b[1: -1]
                    elif b in Rule.s2r:
                        b = Rule.s2r[b]
                    r.body[i] = b
                    assert b != PObj.end or r == Rule.rules[0]
            r.makePos()
            r.dbg(1, '+pos len', len(r.body) , len(r.pos), r.strA('body', 'pos'))

        for r in Rule.rules:
            assert r.go == False
            g = {}
            r.goGen(g)
            assert r.go == False
            for e,p in g.items():
                g[e] = frozenset(p)
            r.go = g
            dbg(1, 'goGen', r.strA('body', 'go'))
        return
            
        if False:
            Rule.prdLah(1)
            for r in Rule.rules:
                bd = r.body
                if isinstance(r, RuleOr):
                    r.redCh |= set(b for b in bd if isinstance(b, Rule))
                else:
                    ne = [b for b in bd if isinstance(b, str) or '' not in b.prd]
                    if len(ne) == 0:
                        r.redCh |= set(b for b in bd if isinstance(b, Rule))
                    elif len(ne) == 1 and isinstance(ne[0], Rule):
                        r.redCh.add(ne[0])
            u = True
            while u:
                u = False
                for r in Rule.rules:
                    o = r.redCh.copy()
                    for s in o:
                        r.redCh |= s.redCh
                    if len(o) < len(r.redCh):
                        u = True
                        dbg(99, 'redCh', r.strA('redCh'))                    

            for r in Rule.rules:
                dbg(1, 'redCh', r.strA('body', 'redCh'))                    

            for r in Rule.rules:
                r.sub = {r for r in r.body if isinstance(r, Rule)}
            u = True
            while u:
                u = False
                for r in Rule.rules:
                    oo = r.sub.copy()
                    for o in oo:
                        r.sub |= o.sub
                    if len(oo) < len(r.sub):
                        u = True
                        dbg(99, 'sub', r.strA('sub'))                    
            for r in Rule.rules:
                dbg(1, 'sub', r.strA('body', 'sub'))                    
                        
            for r in Rule.rules:
                bd = r.body
                if isinstance(r, RuleOr):
                    r.subRi |= set(b for b in bd if isinstance(b, Rule))
                else:
                    for b in reversed(bd):
                        if isinstance(b, str):
                            break
                        r.subRi.add(b)
                        if '' not in b.prd:
                            break
            u = True
            while u:
                u = False
                for r in Rule.rules:
                    o = r.subRi.copy()
                    for s in o:
                        r.subRi |= s.subRi
                    if len(o) < len(r.subRi):
                        u = True
                        print('subRi', r.strA('subRi'))                    
                    
        if False:
            for r in Rule.rules:
                assert r.brd == False
                b = set()
                r.brdGen(b)
                assert r.brd == False
                r.brd = b
                dbg(1, 'brdGen', r.strA('body', 'brd'))
                
            for r in Rule.rules:
                if isinstance(r, RuleSeq):
                    bd = r.body
                    bL = len(bd)
                    for bx in range(bL-1):
                        b = bd[bx]
                        if isinstance(b, Rule) and r not in b.redCh:
                            for nx in range(bx+1, bL):
                                n = bd[nx]
                                if isinstance(n, str):
                                    if n in b.brd:
                                        err('in', r.strA('body'), 'subrule', b.strA('brd'), 'border contains follower', n, nx)
                                    break
                                else:
                                    if len(b.brd & (n.prd | n.lah)) != 0:
                                        err('in', r.strA('body'), 'subrule', b.strA('brd'), 'border contains follower', nx, n.stra('prd', 'lah'))
                                    if '' not in n.prd:
                                        break
                
        
#        for r in Rule.rules:
#            for p in r.pos:
#                g = {}
#                p.goAdd(g)
#                dsDbg(1, g, str(p))
            

        if False:
            for s in State.p2s.values():
                if '' in s.go and len(s.go['']) != 1:
                    err("reduce not unique go['']", s.go[''], 'in', s)
                dbg(1, s)

            u = True
            while u:
                u = False
                for s in State.p2s.values():
                    u |= s.auGen()
                dbg(1, 'auGen')
            for s in State.p2s.values():
                dbg(1, 'auGen', s.strA('pa', 'au'))

            stp = set()
            for s in State.p2s.values():
                stp.clear()
                s.check(stp)

            r2s = {}
            s2r = {}
            for s in State.p2s.values():
                if '' in s.go:
                    for r in s.go['']:
                        dsAdd(r2s, r, s)
                        dsAdd(s2r, s, r)
            dbg(1, 'r2s st',sum(len(s) for s in r2s.values()), r2s)
            for ix in range(4):
                for s in State.p2s.values():
                    for a, t in s.go.items():
                        if isinstance(a, Rule):
                            if t in s2r:
                                for w in s2r[t]:
                                    dsAdd(r2s, w, s)
                                    dsAdd(s2r, s, w)
                dbg(1, 'r2s', ix, sum(len(s) for s in r2s.values()), 'r2s', r2s)
                    
        #        for r in s.go['']:
        #            dsAdd(s2r, s,r.redP.add(r.pos[-1])
        #for r in Rule.rules:
        #    print(str(r), 'redP', strC(r.redP))

    def makePos(self, l):
        assert(self.pos == None)
        self.pos = [RulePos(self, ix) for ix in range(l)]

    def goGen(self, g):
        """ recursively add all start transactions for self to go-dict g
        """
        if self.go == True:
            return 
        elif self.go != False:
            dsUpdate(g, self.go)
        else:
            self.go = True
            self.goGenRec(g)
            self.go = False

    @staticmethod
    def goGenRecEP(e, p, g):
        """ add element e to position p to go-dict g
            if e was not yet in g and g is a Rule, recursively generate go for g
        """
        dbg(99, 'goGenRecEP', str(e), str(p))
        if e in g:
            g[e].add(p)
        else:
            g[e] = {p}
            if isinstance(e, Rule):
                e.goGen(g)

    def brdGen(self, b):
        """ recursively add all start transactions for self to go-dict g
        """
        if self.brd == True:
            return 
        elif self.brd != False:
            b |= self.brd
        else:
            self.brd = True
            self.brdGenRec(b)
            self.brd = False

    @staticmethod
    def makeSymPos():
        "make symbols and positions, after we got the source of all rules, convert body to symbols and bare strings"
        PObj.objs[0].body[0] = PObj.objs[1].sym
        Rule.rules = Rule.objs.copy()
        PObj.dbg(4, 'made rules')
        RuleSym.make()
        PObj.dbg(4, 'made sym')
        RulePos.make()
        PObj.dbg(3, 'made pos')

    @staticmethod
    def prdLah(k):
        """ compute for all rules 
             prd = productions of length <= k and
            lah = prefixes of length k of all productions with length > k
        """

        tS = datetime.now()
        assert not PObj.lahStr or (all([len(y) == 1 for r in Rule.rules for y in r.body if isinstance(y, str)]))
        cnt = 0
        upd = True
        while upd: # recompute all rules, until no change
            upd = False
            lC = 0
            pC = 0
            for r in Rule.rules:
                cnt += 1
                if r.prdLahComb(k):
                    upd = True
                    dbg(1, 'prdLah', str(r), r.prd, r.lah)
                lC += len(r.lah)
                pC += len(r.prd)
            if cnt > 1000:
                err('loop')
        elT = f"{(datetime.now() - tS).total_seconds()}sec, " if dbgTime else ''
        print(f"Rule.prdLah k={k} <- {PObj.lrK}: {cnt} {elT} #prd={pC} #lah={lC}")

        if dbgLev >= 3:
            for r in Rule.rules:
                dbg(3, 'prdLah', r.strA('body', 'prd', 'lah'))
        assert all([len(s) == k for s in r.lah for r in Rule.rules])
        assert all([len(s) <= k for s in r.prd for r in Rule.rules])
        PObj.lrK = k

    def prdLahAdd(self, k, pp, ll):
        upd = False
        assert self.prd <= pp
        if self.prd != pp:
            assert self.prd < pp
            upd = True
            self.prd = pp

        ll |= self.lah
        jj = {len(l) for l in ll if len(l) != k}
        if len(jj) != 0:
            for l in [l for l in ll if len(l) == k]:
                for j in jj:
                    ll.discard(l[0:j])
        if ll != self.lah:
            upd = True
            dbg(99, 'lahupd', self.lah, ll)
            self.lah = ll            
        return upd
        
    def hasCycle(self):
        if self.tst:
            return True
        self.tst = True
        r = any(b.hasCycle() for b in self.body if isinstance(b, Rule))
        self.tst = False
        return r
            
class RuleSeq(Rule):
    def __init__(self, sym, bd):
        super().__init__(sym, bd)

    def makePos(self):
        super().makePos(len(self.body)+1)
        
    def goGenRec(self, g):
        if 0 == len(self.body):
            dsAdd(g, '', self)
        else:
            Rule.goGenRecEP(self.body[0], self.pos[1], g)

    def prdLahComb(self, k):
        "compute lah and prd combining rules and already computed prd and lah"

        if len(self.body) == 0:
            return self.prdLahAdd(k, { PObj.lahEmpty }, set())
        b = self.body[0]
        pp = { b if PObj.lahStr else (b,) } if isinstance(b, str) else b.prd.copy()
        ll = set() if isinstance(b, str) else b.lah.copy()
        for b in (self.body[bx] for bx in range(1, len(self.body))):
            n = set()
            if isinstance(b, str):
                for p in pp:
                    if len(p) < k :
                        n.add(p+b)
                    else:
                        ll.add((p+b)[0:k])
            else:
                for p in pp:
                    for q in b.prd:
                        a = p + q
                        if len(a) <= k:
                            n.add(a)
                        else:
                            ll.add(a[0:k])
                    for q in b.lah:
                        a = p + q
                        if len(a) >= k:
                            ll.add(a[0:k])
            pp = n
            if len(pp) == 0:
                break
        return self.prdLahAdd(k, pp, ll)

    def brdGenRec(self, b):
        bd = self.body
        bL = len(bd)
        if len(bd) == 0:
            return
        e = bd[0]
        if isinstance(e, Rule) and self in e.redCh and bL > 1:
            for fx in range(0, bL):
                f = bd[fx]
                if isinstance(f, Rule) and self in f.sub:
                    g = bd[1]
                    if isinstance(g, str):
                        b.add(g)
                    else:
                        b |= {p for p in g.prd if p != ''} | g.lah 
                    break
        for ex in range(len(self.body)-1, -1, -1):
            e = self.body[ex]
            if isinstance(e, str):
                break
            e.brdGen(b)
            if '' not in e.prd:
                break
        return
        
        for ex in range(len(self.body)-1):
            e = self.body[ex]
            if isinstance(e, str):
                break
            if e in self.redCh or self in e.redCh:
                f = self.body[ex+1]
                if isinstance(f, str):
                    b.add(f)
                else:
                    b |= {x for x in f.prd if len(x) != 0} | f.lah
            assert all(len(x) == 1 for x in b)
            if '' not in e.prd:
                break
        
        assert not self.tst

        e = self.body[0]
        if isinstance(e, Rule) and '' in e.prd:
            for ex in range(len(self.body)-2, -1, -1):
                e = self.body[ex]
                if isinstance(e, str):
                    break
                if self in e.subRi:
                    dbg(1, 'hasCycle', str(self), str(e))
                    b |= {x for x in e.prd if len(x) > 0} | e.lah
                if '' not in e.prd:
                    break
        
         
class RuleOr(Rule):
    def __init__(self, sym, bd):
        super().__init__(sym, bd)

    def makePos(self):
        super().makePos(2)

    def goGenRec(self, g):
        for b in self.body:
            Rule.goGenRecEP(b, self.pos[1], g)

    def prdLahComb(self, k):
        "compute lah and prd combining rules and already computed prd and lah"

        pp = set()
        ll = set()
        for y in self.body:
            if isinstance(y, str):
                pp.add( y if PObj.lahStr else (y,))
            else:
                pp |= y.prd
                ll |= y.lah
        return self.prdLahAdd(k, pp, ll)

    def brdGenRec(self, b):
        for e in self.body:
            e.brdGen(b)

class RuleSymRemove (PObj):
    "the symbols are our nonterminals, which are a set of rules"
    n2y = {}
    syms = None
    def __init__(self, sym):
        self.sym = sym
        super().__init__(f"Y#{sym}")
        self.rules = []
        self.lah = set()
    def sExp (self):
        return super().sExp(f"sym={self.sym!s} {self.rules} lah={self.lah}")

    @staticmethod
    def make():
        "make the symbols from defined rules and convert Rule.body to symbols and bare strings"
        i0 = len(PObj.objs)
        for r in Rule.rules: #collect symbols
            if r.sym not in RuleSym.n2y:
                RuleSym.n2y[r.sym] = RuleSym(r.sym)
            RuleSym.n2y[r.sym].rules.append(r)
            r.sym = RuleSym.n2y[r.sym]
        RuleSym.syms = PObj.objs[i0:]
        assert Rule.objs[0].sym == RuleSym.syms[0]
        assert RuleSym.syms[0].sym == PObj.rStart
        assert RuleSym.n2y[PObj.rStart] == RuleSym.syms[0]
        assert len(RuleSym.n2y[PObj.rStart].rules) == 1
        for r in Rule.rules: # convert Rule.body to symbols and bare strings
            b = r.body
            if r.pos == 'a':
                for ix in range(len(b)):
                    v = b[ix]
                    if v[0] == "'" and v[-1] == "'":
                        b[ix] = v[1: len(v)-1]
                        assert b[ix] != PObj.end
                    elif v in RuleSym.n2y:
                        b[ix] = RuleSym.n2y[v]
                    else:
                        err(f"rule {v} in {r} not in {Rule.rules}")
            elif r.pos == 'n':
                for ix in range(len(b)):
                    v = b[ix]
                    if v in RuleSym.n2y:
                        b[ix] = RuleSym.n2y[v]
            else:
                err(f'bad pos {r.pos} in {r.sExp()}')

class RulePos (PObj):
    def __init__(self, r, p):
        super().__init__(r.name + '#' + str(p))
        self.rule = r
        self.pos = p
        
    def strL(self):
        return self.strA('rule', 'pos')

    def goAdd(self, g):
        if self.pos == 0:
            dsUpdate(g, self.rule.go)
        elif self.pos + 1 == len(self.rule.pos):
            dsAdd(g, '', self.rule)
        else:
            b = self.rule.body[self.pos]
            dsAdd(g, b, self.rule.pos[self.pos + 1])
            if isinstance(b, Rule):
                dsUpdate(g, b.go)
                
    @staticmethod                
    def makeLah(k):
        # compute lah for RuleSym and RulePos using lah from Rule
        #     lah = production of len <= k and prefix of len k of all production longer k
        Rule.prdLah(k)
        for y in RuleSym.syms:
            y.lah = set().union(*[r.lah | r.prd for r in y.rules])
            dbg(3, f"{y} {y.lah}")
        for r in Rule.rules:
            r.pos[-1].lah = {lahEmpty}
            for pX in range(len(r.body)-1, -1, -1): # backward through the positions
                p = r.pos[pX]
                e = r.body[pX]
                lS = e.lah if isinstance(e, RuleSym) else {e if PObj.lahStr else (e,)}
                n = set()
                for l in lS:
                    if len(l) == k:
                        n.add(l)
                    else:
                        for o in p.next.lah:
                            dbg(9, f"n.add(({l}+{o})[0:k])")
                            n.add((l+o)[0:k])
                p.lah = n
                dbg(3, f"{r} {pX} {p} {p.lah}")
            assert r.pos[0].lah == r.prd | r.lah, (f"rule {r.sExp()} fails \npos[0].lah={r.pos[0].lah}" 
                    f" \n== prd={r.prd} \n| lah={r.lah}"
                    f",\ndifference lah-..={r.pos[0].lah-r.prd-r.lah}, ..-lah={(r.prd|r.lah)-r.pos[0].lah}")

class State (PObj):
    p2s = {}
    state0 = None
    lrErr = None

    def __init__(self, pa):
        super().__init__('s' + str(len(State.p2s)))
        assert isinstance(pa, frozenset)
        assert all(isinstance(p, RulePos) for p in pa)
        assert pa not in State.p2s
        self.pa = pa
        self.go = None
        self.done = False
        self.au = {}
        self.lah = {}
        self.lahOne = {}
        State.p2s[pa] = self    

    @staticmethod
    def getOrMake(pa):
        pa = frozenset(pa)
        return State.p2s[pa] if pa in State.p2s else State(pa)

            
    @staticmethod
    def makeAll():
        State.state0 = State(frozenset([Rule.rules[0].pos[0]]))
        State.state0.goExp()
        State.err = ''
        
        for s in State.p2s.values():
            dbg(1, 'goExp', s.strA('pa', 'go'))
        for s in State.p2s.values():
            for r in s.go.keys():
                if isinstance(r, Rule):
                    r.st.add(s)
        for r in Rule.rules:
            dbg(1, 'stGen', r.strA('body', 'st'))
            
        State.lahGen()
        if State.err == '':
            for s in State.p2s.values():
                s.goFix()                
    
    def strL(self):
        return self.strA('pa', 'go')

    def dbg(self, lv):
        if lv > dbgLev:
            return
        print(str(self), ':', ', '.join(str(p) for p in self.pa))
        for e, s in self.go.items():
            print('\t', e, ('=> ' + str(s)) if isinstance(s, State) else '<= ' + ', '.join(str(r) for r in s))
            
    def goExp(self):
        if self.go != None:
            return
        self.go = {}
        for p in self.pa:
            p.goAdd(self.go)
        for e, pp in self.go.items():
            if e != '':
                s = State.getOrMake(pp)
                self.go[e] = s
                s.goExp()
    
    def goFix(self):
        if '' not in self.go:
            return
        dbg(1, 'goFix begin', self.strA('pa', 'go', 'lah'))
        self.goFix1(self.go[''], self.go, self.lah)
        dbg(1, 'goFix end  ', self.strA('pa', 'go'))
        
    def goFix1(self, rr, gg, ll):
        if len(rr) == 1:
            for r in rr: break
            gg[''] = r
        else: 
            assert len(rr) > 1
            cnt = {r: 0 for r in rr}
            for a,b in ll.items():
                if isinstance(b, Leaf):
                    assert len(b) == 1
                    for k in b.keys():
                        if isinstance(k, Rule):
                            cnt[k] += 1
            dbg(1, 'goFix1 cnt', cnt)
            r = max(cnt.items(), key = lambda i: i[1])
            dbg(1, 'goFix1 max', r)
            r = r[0]
            # if r[1] < 1:
            #    del gg['']
            # else:
        gg[''] = r

        for a,b in ll.items():
            if isinstance(b, Leaf):
                assert len(b) == 1
                if a != '':
                    for k in b.keys(): break
                    if k == '':
                        if sTo != None:
                            gg[a] = sTo
                        else:
                            assert a in gg
                    elif k != r:
                        gg[a] = k
                    elif a in gg:
                        del gg[a] 
                    dbg(1, 'goFix1 ed', a, gg)
        for a,b in ll.items():
            if type(b) ==  dict:
                dbg(1, 'goFix1 diving into', a, 'from', gg[a] if a in gg else '---', 'to', b)
                gg[a] = {}
                self.goFix1(rr, gg[a], b)
                                
    def addPosA(self, pa):
        "add (position, the string after the the end of the current rule) to self, recursively add subrules"
        p, a = pa
        if State.lrErr != None:
            return
        if p.next == None:    #after all positions of rule ==> reduce
            if a not in self.to:
                self.to[a] = p.rule
            elif self.to[a] != p.rule:
                State.lrErr = strL("lrK mismatch [", a, "]=", self.to[a])
        else:
            e = p.rule.body[p.pos]
            k = PObj.lrK
            t = (p.next, a)
            if isinstance(e, str): #shift character on stack with next state
                for l in p.lah:
                    assert l[0] == e
                    if len(l) != k:
                        l = (l+a)[0:k] 
                    if l not in self.to:
                        self.to[l] = {t}
                    elif not isinstance(self.to[l], set):
                        State.lrErr = f"lrK mismatch [{l}]->{self.to[l]} cannot add {t}"
                        return
                    else:
                        self.to[l].add(t)
            else: 
                assert isinstance(e, RuleSym)    
                if e not in self.to: # next state after this symbol
                    self.to[e] = {t}
                elif t in self.to[e]:
                    return
                else:
                    self.to[e].add(t)
                for n in p.next.lah: # enter pos[0] of subrules, string after is lah of next position of rule
                    if len(n) != k:
                        n = (n+a)[0:k] 
                    for r in e.rules:
                        self.addPosA((r.pos[0], n))
                        if State.lrErr != None:
                            return

    def addTrans(self):
        "modify to: find/create states for (pos, aft) sets in to.values"
        for k, v in self.to.items():
            if isinstance(v, Rule):
                continue
            pa = frozenset(v)
            if pa in self.pa2u:
                self.to[k] = self.pa2u[pa]
            elif State.lrErr != None:
                return
            else:
                n = type(self)(pa)
                self.to[k] = n
                n.addTrans()
    

    @staticmethod
    def clear():
        State.states = []
        State.p2s.clear()
        
    @staticmethod
    def lahGen():
        for s in State.p2s.values():
            s.lahGen1()
        u = True
        while u:
            u = False
            for s in State.p2s.values():
                u |= s.lahGen2()
            dbg(1, 'lahGen')
        for s in State.p2s.values():
            for a, l in s.lah.items():
                for q, r in l.items():
                    dsAddS(s.lahOne, a, r)
            dbg(1, 'lahGen', s.strA('pa', 'lah', 'lahOne'))

        for s in State.p2s.values():
            s.lahExp(9)
            
        
    def lahGen1(self):
        for a, s in self.go.items():
            if isinstance(a, str) and a != '':
                d2sAdd(self.lah, a, s, s)
                
    def lahGen2(self):
        u = False
        if '' in self.go:
            for r in self.go['']:
                for s in r.st:
                    for e, eD in s.go[r].lah.items():
                        for pp in eD.values():
                            if e != PObj.end or isinstance(r, Rule):
                                u |= d2sAddS(self.lah, e, r, pp)
        return u
        
    def lahExp(self, maxi):
        self.lahExp1(maxi, self.lah, [])
        
    def lahExp1(self, maxi, dct, kys):
        for a, e in dct.items():
            if a == PObj.end:
                dbg(1, 'exp1 end', e, kys)
            elif len(e) > 1:
                dbg(1, 'lahExp1 conflict ', 'expanding' if maxi > 0 else 'unresolved', ' in', self.strA('pa'), 'at lah', kys + [a], ':', e)
                if maxi > 0:
                    self.lahExp2(maxi-1, dct, kys + [a])
                elif State.err == '':
                    State.err = strS('lahExp1 conflict unresolved in', self.strA('pa'), 'at lah', kys + [a], ':', e)

    def lahExp2(self, maxi, dct, kys):
        oo = dct[kys[-1]]
        assert isinstance(oo, Leaf)
        nn = {}
        dct[kys[-1]] = nn
        for r, ss in oo.items():
            dbg(1, 'exp2a', str(r), ss)
            for s in ss:
                for u, v in s.lahOne.items():
                    if u != PObj.end or isinstance(r, Rule):
                        d2sAddS(nn, u, r, v) 
            dbg(1, 'exp2z', nn)
            if kys[-1] == PObj.end:
                dbg(1, 'exp2zend', nn)
        dbg(1, 'expanded', self.strA('pa'), 'from', oo, 'to', dct)
        self.lahExp1(maxi, nn, kys)                

    def auGen(self):
        if '' not in self.go:
            return False
        n = self.au.copy()
        for a,s in self.go.items():
            if isinstance(a, str) and a != '':
                dsAdd(n,a,s)
        u = False
        for r in self.go['']:
            for s in r.st:
                for a, b in n.items():
                    if dsAddS(s.au, a, b - {s}):
                        u = True
                        dbg(1, 'auGen0', s.strA('au'))
        return u
            
    
    hist = []
    done = []
    def check(self, stp):
        if self.done:
            #dbg(1, 'check0-', State.lv, self.strA('done', 'go'))
            return
        l = len(State.hist)
        if l >= len(State.done):
            if l > len(State.done):
                err('done smaller', State.done, State.hist)
            State.done.append({self})
        else:
            if self in State.done[l]:
                return
            else:
                State.done[l].add(self)
                l += 1
                if len(State.done) < l:
                    State.done[l].clear()
        assert self.done == False
        self.done = True
        State.hist.append(self)
        
        dbg(1, 'check1', len(State.hist), State.hist, self.strA('go'), stp)
        gg = {g for g in self.go.keys() if isinstance(g, str) and g != ''}
        if len(gg & stp) != 0:
            dbg(1, 'check dead end', gg, self.strA('pa', 'go'), State.hist)        
        if '' in self.go:
            if len(self.go['']) > 1:
                dbg(1, "check several go['']", self.strA('pa', 'go'))
            for g in self.go['']:
                #dbg(1, "check2", str(g), g.st)
                for s in g.st:
                    s.chec2(stp | gg, g)
        self.done = False
#        dbg(1, 'check9-', my, self.strA('done'))
        State.hist.pop()

    def chec2(self, stp, ru):
        assert ru in self.go
        dbg(1, 'chec2=', str(self), str(ru), str(self.go[ru]))
        self.go[ru].check(stp)

    @staticmethod
    def makeLookahead(k):
        "make state for LR(k) grammar, return errormessage if not LR(k)"
        assert k >= 1
        State.clear()
        RulePos.makeLah(k)
        State.st0 = len(PObj.objs)
        State.lrErr = None
        b = State(frozenset([(Rule.rules[0].pos[0], k * (PObj.end if PObj.lahStr else (PObj.end, )))]))
        e = State(frozenset(b.to[RuleSym.syms[1]]))
        b.addTrans()
        e.addTrans()
        State.states = PObj.objs[State.st0: ]
        Parser.states = State.states
        if State.lrErr != None:
            return f"grammar not LR({k}), because of {State.lrErr}"

    @staticmethod
    def checkLR0():
        """ check if the current LR(1) grammar is even LR(0)
            
            return None if it is, otherwsie a string describing the conflicting state

            LR(0) grammars are implemented using LR(1) grammars

            LR(0) means, that a reduce step does not depend on lookahead.
                a shift step however uses the char shifted (== lookahead) to determine the next state """
        assert PObj.lrK == 1
        for u in State.states:
            assert len(u.to) > 0
            fi = None
            for k,v in u.to.items():
                if not isinstance(k, RuleSym):
                    if fi == None:
                        fi = v
                    elif v != fi if isinstance(v, Rule) else not (isinstance(v, State) and isinstance(fi, State)):
                        return strL("grammar not LR(0) because of",  u)

def eStr(e):
    if type(e) != tuple and type(e) != list:
        return str(e)
    else:
        return str(e[0]) + '>' + str(len(e)-1) + '(' + ', '.join(x if type(x) != tuple and type(x) != list else str(x[0]) + '>' + str(len(x)-1) for x in e[1:]) + ')'
        
class LahIter():
    def __init__(self, l):
        self.list = l
        self.ix = 0

    def __iter__(self):
        return self
        
    def __next__(self):
        self.ix += 1
        if self.ix <= len(self.list):
            return self.list[self.ix - 1]
        raise StopIteration()
    def __getitem__(self, i):
        i += self.ix
        return self.list[i] if i < len(self.list) else PObj.end
    @staticmethod
    def test():
        print('LahIter test begin')
        iter = LahIter(['begin', 0, 1,2,3,4])
        for i in iter:
            print(i, iter[0], iter[1], iter[-1])
        print('LahIter test end')

class Parser:
    pCnt = -1

    @staticmethod
    def parse(inp):
        assert len(State.state0.pa) == 1
        for r0 in State.state0.pa: break
        assert r0.pos == 0 and r0.rule == Rule.rules[0]
        r0 = r0.rule
        dbg(1, 'parsing for', r0)
        
        st = [(None, State.state0)]
        inI = LahIter(inp)
        tNr = -1
        Parser.pCnt = -1
        cntL = 1000
        act = 'start'
        shiL = st[-1][1]
        try:
            while True:
                Parser.pCnt  += 1
                e,s = st[-1]
                lah = inI[0]
                print('parse', (str(Parser.pCnt) + ' ' + act).ljust(20), 'lah', lah, 'stck', len(st), '>', eStr(e), str(s))
                if Parser.pCnt  > cntL:
                    err("too many loops cnt={Parser.pCnt}, lah={lah} st={st}")
                # dbg(2, f'run{Parser.pCnt} {ls} --> lah={lah} tos={e}, {s} st={len(st)} {st}')

                to = s.go
                for fx in range(20):
                    to = to[inI[fx]] if inI[fx] in to else to[''] if '' in to else None
                    if not isinstance(to, dict): break
                else:
                    err('parser to deeply nested lah', lah, s.go)
                if isinstance(to, State):
                    st.append((next(inI), to))
                    assert st[-1][0] == lah
                    shiL = to
                    tNr += 1
                    act = 'shift ' + lah
                    lah = inI[0]
                elif isinstance(to, Rule):
                    sx = len(st) - len(to.pos) + 1
                    p = [to] + [x0 for x0, x1 in st[sx : ]]
                    act = 'reduce ' + str(to)
                    del st[sx:]
                    st.append((p, st[-1][1].go[to]))
                else:
                    break
        except StopIteration:
            assert lah == PObj.end and len(st) == 2 and st[1][0][0] == Rule.rules[1]
            dbg(1, 'stopIteration parse', st)
            print('parsed', str(Rule.rules[1]), 'from', len(inp), 'tokens, in', Parser.pCnt, 'steps,', len(State.p2s), 'states')
            return st[1][0]

            
        print('syntax...')
        print('last', inI[-1], 'tokennr', tNr, ', lah', inI[0], inI[1], inI[2])
        print(' preceeding', inp[0: tNr+1])
        print(' following ', iA := inp[tNr+1: ] +[ PObj.end] )
        print('stack', len(st))
        for sx in range(len(st)-1, -1, -1):
            print(' ', sx, ':', eStr(st[sx][0]), 'state', st[sx][1])
        r = f"syntax after {inI[-1]} tokenNr {tNr} expected {set(shiL.lahOne.keys())}, not lah {iA if len(iA) <= 5 else iA[0:5]}"
        print(r)
        return r

    @staticmethod
    def makeGr(*grammar):
        "make Rule, Sym and Pos for given grammer in form for function f"
        dbg(1, "grammar source", grammar)
        PObj.lrK = 0
        Rule.clear()
        Rule.add(*grammar)
        Rule.finish()
        State.makeAll()

    @staticmethod
    def makeStateRemove():
        "try to make lr(k) states for k=0,1, ..."
        tS = datetime.now()
        k = 0
        st = f"{len(RuleSym.syms)} syms, {len(Rule.rules)} rules, {len(RulePos.poss)} rulePos"
        while True:
            k += 1
            res = State.make(k)
            el = (datetime.now() - tS).total_seconds()
            elT = f"after {el}sec: " if dbgTime else ''
            if res == None:
                break
            print(f"{res}: {elT}{st} {len(State.states)} states")
            if el > 7:
                print("stop by timeout, do not check next LR(k)")
                return False
            if k > 20:
                print("stop by k >Y 20, do not check next LR(k)")
                return 

        if k == 1:
            res = State.checkLR0()
            if res == None:
                k=0
            else:
                print(f"{res}: {elT}")
        print(f'grammar is LR({k}): {elT}{st} {len(State.states)} states')
        Parser.states = State.states
        PObj.dbg(1, 'State 0')
        return True

    @staticmethod
    def testGr(nm, syn, *cs):
        """test named nm, with grammar syn and testcases cs"""
        print(f"{nm} --- begin test -----------")
        Parser.makeGr(*syn)
        for s in State.p2s.values():
            dbg(1, s.strA('pa', 'go'))
        if State.err != '':
            print('error generating grammar', State.err)
        else:
            Parser.test(nm, *cs)

    @staticmethod
    def test(nm, *tsts):
        "test for the current grammar, named nm the tests tsts"
        if tsts == None:
            tsts = ['2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (']
        trS = 0
        def treeSplit(nd, pr):
            nonlocal trS
            trS += 1
            if isinstance(nd, str):
                return pr + nd
            r = pr + str(nd[0]) + '/' + str(len(nd[0].body))
            for ch in nd[1:]:
                r += treeSplit(ch, pr + '  ')
            return r

        def treeSpli2(nd, of):
            nonlocal trS
            trS += 1
            if isinstance(nd, str):
                return nd
            r = str(nd[0]) +  ' ' #  + '/' + str(len(nd[0].body))
            of += len(r)
            if len(nd) > 1:
                r += treeSpli2(nd[1], of)
                for ch in nd[2:]:
                    r += "\n" + of * ' ' + treeSpli2(ch, of)
            return r

        for i in range(len(tsts)) :
            sT = [x for x in re.split(r'\s+', tsts[i]) if x != ''] # void empty strings, e.g. for ''
            print(f"test begin  {nm} {i} input {sT} ..........................")
            ast = Parser.parse(sT)
            if isinstance(ast, str):
                print(f'syntax test {nm} {i} input {tsts[i]}: {ast}')
            else:
                print(f"parsed {str(ast[0])} from {len(sT)} tokens, {Parser.pCnt} steps,  {len(State.p2s)} states, Rules {len(Rule.rules)}")
                print(' ', treeSpli2(ast, 2))
            print('test', 'syntax' if isinstance(ast, str) else 'parsed', f"{nm} {i} input {sT} ..........................")

    @staticmethod
    def testSyn(nm, syn, *cs):
        """test named nm, with grammar syn and testcases cs"""
        Parser.testGr(nm, syn, *cs)

    @staticmethod
    def test1():
        """my tests """
        g = ["s=s+p", "s = p", "p = p '*' i", "p = i", "i = '(' s           ')'", "i = '0'", "i = '1'", "i = '2'"]
        Parser.testSyn('arithExpr', g
            ,'2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +')
        
        g2 = g[1:-3] + ["s = s pm p", "p = p '/' i", "i = epm d", "i = epm i d", "epm = pm", "epm ="
                , "pm = '+'", "pm = '-'", "d = '0'", "d = '1'", "d = '2'"]
        g = g[1:-3] + ["s = s pm p", "p = p '/' i", "i = epm d", "i = i d", "epm = pm epm", "epm ="
                , "pm = '+'", "pm = '-'", "d = '0'", "d = '1'", "d = '2'"]
        Parser.testSyn('arithExprPM' , ["s=s pm p", "s=p", "p=p*i", "p=p/i", "p=i", "i=(  s )", "i = j"
            , "j=epm d", "j=j d", "epm = pm epm", "epm =  ", "pm=+", "pm=-", "d=0", "d=1", "d =  2  "]
            ,'2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +'
            , '1 0 / - - 1 1 + + - 1 2 / ( 1 - - 2 / + 0 )')

        Parser.testSyn('arithExprPE' , ["s=s pm p", "s=p", "p=p*e", "p=p/e", "p=e", "e=i", "e=i**e", "i=(  s )", " i=j"
            , "j=epm d", "j=j d", "epm = pm epm", "epm =  ", "pm=+", "pm=-", "d=0", "d=1", "d =  2  "]
            ,'2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +'
            , '1 0 / - - 1 1 + + - 1 2 / ( 1 - - 2 / + 0 )')

        Parser.testSyn('LR(2)?' , ["S = AA 'a' 'a'", "S = AB 'a' 'b'", "AA =", "AB ="]
            ,"a a", "a b", "a c", "b a")

    @staticmethod
    def testKnuth():
        """some of the examples given by Donald E. Knuth. 1965. On the Translation of Languages from Left to Right """
        Parser.testSyn('knuth(1)'
            , ["S = A D", "A = 'a' C", "B = 'b' 'c' 'd'", "C = B E", "D =", "E = 'e'"]
            , "a b c d e", "a b x d e", "a b", "a b c d e f")
        Parser.testSyn('knuth(6)'
            , ["S = 'a' A 'c'", "A = 'b' A 'b'", "A = 'b'"]
            , "a b b b  c", "a b c", "a b b c", "a b b b b b b b c e")
        Parser.testSyn('knuth(7) lr(0)'
            , ["S = 'a' A 'c'", "A =  A 'b' 'b'", "A = 'b'"]
            , "a b b b  c", "a b c", "a b b c", "a b b b b b b b c e")
        Parser.testSyn('knuth(16x) lr(3)'
            , ["S=B C", "B=C e", "B = ", "C=D", "C=D c", "D=", "D=d"]
            , "", "c", "c e", "c e d")
        Parser.testSyn('knuth(24)'
            , ["S =",  "S = 'a' A 'b' S", "S =  'b' B 'a' S", "A =", "A = 'a' A 'b' A", "B =", "B = 'b' B 'a' B"]
            , "", "a b", "b a", "a b b", "a a b b", "a b a b", "b a a b")
# "IS = IP", "IS = IS + IP", "IS = IS - IP", "IP = IE", "IP = IP * IE", "IP = IP / IE", "IE = IA", "IE = IA * * IE", "IA = IB", "IA = + IA", "IA = - IA", "IB =( IS )", "IB = ID", "IB = IB ID", "ID = 0", "ID = 1", "ID = 2" ... 

def testRe():
    x = ' ab cd  e?f   *+??'
    print(f"x={x}")
    print("re: lookahead assertion and lookbehind assertions to find boundaries between words and or special character")
    for y in re.finditer(r'\s+|(?<=\S)(?=\S)((?<=\W)|(?=\W))', x):
        print([y.start(),y.end(), x[y.start(): y.end()+1]])
    print("easier and more stable against leading/trailing spaces: use findall for words or special chars")
    for y in re.finditer(r'\w+|\S', x):
        print([y.start(),y.end(), x[y.start(): y.end()]])
    print(re.findall(r'\w+|\S', x))
    x += "' '' b 'oder1''1.2und. 3 ' '.a.234 4.. single' !"
    print('mit strings and numbers:', x)
    print(re.findall(r"""'(?:''|[^'])*'    # single quoted strings, doubble apostrophs for a single
                                        # non capturing group (?:...) otherwise findall will return groups!
                        |\d+(?:\.\d*)    # number with leading digits and possible decimal point/digits
                        |\.\d+            # number with leading decimal points and digits
                        |\w+            # words
                        |\S"""            # special chars
                , x
                , re.X))                    # re.X = re.VERBOSE flag

if 0:
   testRe()
   exit()
if 0:
    LahIter.test()
    l = [0, 1, 2]
    s = '012'
    print([(i, l[0:i], s[0:i]) for i in range(6)])
    print([(x, y) for x in range(3) for y in range(2)])
    print([(x, y) for x in range(3) for y in range(3*x, 4*x+1)])
    print([(x, y) for x in range(3) if x > 0 for y in range(3*x, 4*x+1) if y > x])
    exit()


        

Parser.test1()
Parser.testKnuth()
print('end', __file__)