python/parser4.py

# knuth lr(k) parser

import re
"""
    state is set of positions, not (set of positions, lookAhead) as for knuth
    thus, we reduce the number of states
"""
strSep1 = '--------------------------------'
def strL(*ee):
    return     ', '.join(e if type(e) == str else e.strL() if hasattr(type(e), 'strL') else strC(e) for e in ee)

def strS(*ee):
    return     ', '.join(e if type(e) == str else strC(e) for e in ee)

strCW = re.compile('\w+')
strNSs = re.compile('\s')

def strNS(s):
    return strC(s) if not isinstance(s, str) else s if strNSs.search(s) == None else repr(s)

def strNSset(s):
    return ' '.join(sorted([strNS(a) for a in s]))    
def strC(e):
    """return a str represntig e. recursively expand collections, otherwise use str"""
    t = type(e)
    if t == str:
         return repr(e) if strCW.fullmatch(e) == None else e
    elif t == tuple:
         return '(' + ', '.join(strC(v) for v in e) + ')'
    elif isinstance(e, list): # t == list:
         return '[' + ', '.join(strC(v) for v in e) + ']'
    elif t == set or t == frozenset:
         return '{' + ', '. join(sorted([strC(v) for v in e])) + '}'
    elif isinstance(e, dict):
         return '{' + ', '. join(strC(k) + ': ' + strC(v) for k, v in e.items()) + '}'
    else:
        return str(e)

def out(*ee):
    print(strL(*ee))
    
def outS(*ee):
    print(strS(*ee))

def err(*e):
    raise Exception('error: ' + strL(*e))
    exit()

def errS(*e):
    raise Exception('error: ' + strS(*e))
    exit()

dbgLev = 1
dbgTime = 0
def dbg(l, *e):
    if dbgLev >= l:
        print('dbg: ' + strL(*e))

def dbgS(l, *e):
    if dbgLev >= l:
        print('dbg: ' + strS(*e))
        
from datetime import * ;
import re;

def dsUpdate(le, ri):
    u = False
    for k,v in ri.items():
        if k not in le:
            le[k] = set()
        o = len(le[k])
        le[k] |= ri[k]
        if o!= len(le[k]):
            u = True
    return u

def dsAdd(g, e, p):
    if e in g:
        s = g[e]
        o = len(s)
        s.add(p)
        return o < len(s)
    else:
        g[e] = {p}
        return True

def dsAddS(g, e, s):
    if len(s) < 1:
        return False
    if e in g:
        o = len(g[e])
        g[e] |= s
        return len(g[e]) > o
    else:
        g[e] = s.copy()
        return True
class Leaf(dict):
    pass
    
def d2sTg(g, *kk):
    a = g
    for kx in range(len(kk)-2):
        k = kk[kx]
        if not k in a:
            a[k] = {}
        a = a[k]
    k = kk[-2]
    if not k in a:
        a[k] = Leaf()
    a = a[k] 
    k = kk[-1]
    if k not in a:
        a[k] = set()
    return a[k]

def d2sAdd(g, e, f, p):
    s = d2sTg(g, e, f)
    o = len(s)
    s.add(p)
    return o < len(s)

def d2sAddS(g, e, f, pp):
    s = d2sTg(g, e, f)
    o = len(s)
    s |= pp
    return o < len(s)
        
    
def    dsDbg(lv, g, *a):
    if dbgLev < lv:
        return
    if len(a) > 0:
        print(*a)
    for k,v in g.items():
        print("\t" + str(k) + ' => ' + ', '.join({str(p) for p in v}))

class PObj:
    "root and container for all our objects"
    end = '!'
    rStart = 'start'
    lahStr = True # lah is true ==> a string, False => tuple of strings
    lahEmpty = '' if lahStr else ()
    lahEmptySet = {lahEmpty}
    objs = []
    lrK = 0
    lrB = 0

    def setName(self, nm):
        self.name =  (self.__class__.__name__ + '#' + str(len(self.objs))) if nm == '' else nm

    def __init__(self, nm = ''):
        self.setName(nm)
        self.objs.append(self)

    def __str__ (self):
        return self.name

    def strA(self, *aa):
        return str(self) + '{' + ', '.join(a + '=' + strC(getattr(self, a)) if (ex := a.find('=')) < 0 else a[0:ex] + '=' + strC(getattr(self, a[ex+1:])) for a in aa) + '}'

    @staticmethod
    def dbg(l, m):
        err('wer braucht das')
        if dbgLev >= l:
            print(m)
            p = '['
            for o in PObj.objs:
                print(p, o)
                p = ','
            print(']')

    
    @staticmethod
    def lahGenZero(e):
        return ({e if PObj.lahStr else (e,)}, set()) if isinstance(e, str) else (e.prd, e.lah)


    def lahGenOr(self, ee):
        pp = set()
        ll = set()
        for e in ee:
            if isinstance(e, str):
                pp.add(e if PObj.lahStr else (e,))
            else:
                ll |= e.lah
                pp |= e.prd
        if ll == self.lah and pp == self.prd:
            return False
        self.lah = ll
        self.prd = pp
        return True

class Rule (PObj):
    "a Rule has a name (a symbol or nonterminal) and a body of terminals and nonterminals"
    rules = []
    ruleAll = []
    ruleLoA = []
    s2r = {}
    def __init__(self, sy, bd):
        super().__init__(sy)
        self.body = bd
        self.pos = None
        self.go = False
        self.loA = None
        self.brd = False
        self.redCh = {self}
        self.sub = {self}
        self.subRi = {self}
        self.stp = {}
        self.st = set()
        self.lt1 = {}
        self.tst = False
        self.lock = False
        Rule.ruleAll.append(self)
        if sy != '':
            if sy in Rule.s2r:
                Rule.s2r[sy].append(self)
            else:
                Rule.s2r[sy] = [self]

    def strA(self, *aa):
        return super().strA(type(self).__name__[4:] + '=' + aa[0] if aa[0].find('=') < 0 else aa[0], *aa[1:])

    def strL(self):
        return self.strA('body', 'go')
        
    def dbg(self, l, *a):
        if dbgLev >= l:
            print(self.strL(), *a)
                
    @staticmethod
    def clear(): 
        Rule.rules.clear()
        Rule.ruleAll.clear()
        Rule.ruleLoA.clear()
        Rule.s2r.clear()
        State.clear()    
        PObj.objs.clear()

    @staticmethod
    def add(*src):
        """add new rules: each rule is a string
                elements are words (double or sinble quoted strings, spechial char
            if an unquoted element is found as a rule name, it designates a rule, otherwise a terminal
        """
        for s in src:
            spl = re.findall('\w+|\'(?:[^\']|\'\')*\'?|"(?:[^"]|"")*"?|\S', s)     # words | single quoted | doubble quoted | single nonspace char
                                                                                # (?: non capturing group, otherwise findall will use group!
            if len(spl) > 0 and spl[-1][0] in "\"'" and (len(spl[-1]) < 2 or spl[-1][0] != spl[-1][-1]):
                err('missing endquote string:', spl[-1], 'in source', s, src)
            assert(spl[1] == '=')
            RuleSeq(spl[0], spl[2:])

    @staticmethod
    def finish():
        rr = Rule.ruleAll.copy()
        assert len(Rule.rules) == 0
        for r in rr:
            l = Rule.s2r[r.name] 
            if type(l) != list:
                assert type(l) == RuleOr and r in l.body
            else:
                assert type(l) == list and r in l, r.strA('body') + ', l=' + strC(l)
                if len(l) == 1:
                    Rule.s2r[r.name] = r 
                else: 
                    Rule.s2r[r.name] = RuleOr(r.name + '@' + str(len(Rule.rules)), l)
                    Rule.rules.append(Rule.s2r[r.name])
            r.setName(r.name + '@' + str(len(Rule.rules)))
            Rule.rules.append(r)        
        for r in Rule.rules:
            for i in range(len(r.body)):
                b = r.body[i]
                if type(b) == str:
                    if b[0] == "'" and b[-1] == "'" or b[0] == '"' and b[-1] == '"':
                        b = b[1: -1]
                    elif b in Rule.s2r:
                        b = Rule.s2r[b]
                    r.body[i] = b
                    assert b != PObj.end or r == Rule.rules[0]
            r.makePos()
            dbg(5, 'makePos body/pos len', len(r.body) , len(r.pos), r.strA('body','pos'))
        #Rule.stpGen()
        
        for r in Rule.rules:
            assert r.go == False
            g = {}
            r.goGen(g)
            assert r.go == False
            for e,p in g.items():
                g[e] = frozenset(p)
            r.go = g
            dbg(10, 'goGen', r.strA('body', 'go'))
        if False:
            Rule.lt1GenAll()
            o = len(Rule.ruleAll)
            for r in Rule.rules:
                r.loA = RuleOr('', [])
            for r in Rule.rules:
                for a, t in r.go.items():
                    if isinstance(a, Rule):
                        for p in t:
                            a.loA.body.append(RuleSeq('', [p, p.rule.loA]))
            for r in Rule.rules:
                for a in r.loA.body:
                    assert len(a.body) == 2
                    if len(a.body[1].body) == 0:
                        a.body.pop()
                        dbg(1, 'loA removed [2] in', r, a)                    
            Rule.ruleLoA = Rule.ruleAll[o:]
            Rule.lahGenK(1)
            for r in Rule.rules:
                if isinstance(r, RuleSeq):
                    do = [b for b in r.body if isinstance(b, str) or PObj.lahEmpty not in b.prd] 
                    if len(do) <= 1:
                        if len(do) == 0:
                            do = r.body
                        r.sub |= {d for d in do if isinstance(d, Rule)}
            u = True
            while u:
                u = False
                for r in Rule.rules:
                    n = r.sub.copy()
                    for s in r.sub if isinstance(r, RuleSeq) else r.body:
                            n |= s.sub
                    if n != r.sub:
                        u = True
                        r.sub = n
            for r in Rule.rules:
                dbg(1, 'subGen', r.strA('body', 'sub'))        
                        
            #Rule.lahGenK(5)
        return
            
        if False:
            Rule.prdLah(1)
            for r in Rule.rules:
                bd = r.body
                if isinstance(r, RuleOr):
                    r.redCh |= set(b for b in bd if isinstance(b, Rule))
                else:
                    ne = [b for b in bd if isinstance(b, str) or '' not in b.prd]
                    if len(ne) == 0:
                        r.redCh |= set(b for b in bd if isinstance(b, Rule))
                    elif len(ne) == 1 and isinstance(ne[0], Rule):
                        r.redCh.add(ne[0])
            u = True
            while u:
                u = False
                for r in Rule.rules:
                    o = r.redCh.copy()
                    for s in o:
                        r.redCh |= s.redCh
                    if len(o) < len(r.redCh):
                        u = True
                        dbg(99, 'redCh', r.strA('redCh'))                    

            for r in Rule.rules:
                dbg(1, 'redCh', r.strA('body', 'redCh'))                    

            for r in Rule.rules:
                r.sub = {r for r in r.body if isinstance(r, Rule)}
            u = True
            while u:
                u = False
                for r in Rule.rules:
                    oo = r.sub.copy()
                    for o in oo:
                        r.sub |= o.sub
                    if len(oo) < len(r.sub):
                        u = True
                        dbg(99, 'sub', r.strA('sub'))                    
            for r in Rule.rules:
                dbg(1, 'sub', r.strA('body', 'sub'))                    
                        
            for r in Rule.rules:
                bd = r.body
                if isinstance(r, RuleOr):
                    r.subRi |= set(b for b in bd if isinstance(b, Rule))
                else:
                    for b in reversed(bd):
                        if isinstance(b, str):
                            break
                        r.subRi.add(b)
                        if '' not in b.prd:
                            break
            u = True
            while u:
                u = False
                for r in Rule.rules:
                    o = r.subRi.copy()
                    for s in o:
                        r.subRi |= s.subRi
                    if len(o) < len(r.subRi):
                        u = True
                        print('subRi', r.strA('subRi'))                    
                    
        if False:
            for r in Rule.rules:
                assert r.brd == False
                b = set()
                r.brdGen(b)
                assert r.brd == False
                r.brd = b
                dbg(1, 'brdGen', r.strA('body', 'brd'))
                
            for r in Rule.rules:
                if isinstance(r, RuleSeq):
                    bd = r.body
                    bL = len(bd)
                    for bx in range(bL-1):
                        b = bd[bx]
                        if isinstance(b, Rule) and r not in b.redCh:
                            for nx in range(bx+1, bL):
                                n = bd[nx]
                                if isinstance(n, str):
                                    if n in b.brd:
                                        err('in', r.strA('body'), 'subrule', b.strA('brd'), 'border contains follower', n, nx)
                                    break
                                else:
                                    if len(b.brd & (n.prd | n.lah)) != 0:
                                        err('in', r.strA('body'), 'subrule', b.strA('brd'), 'border contains follower', nx, n.stra('prd', 'lah'))
                                    if '' not in n.prd:
                                        break
                
        
#        for r in Rule.rules:
#            for p in r.pos:
#                g = {}
#                p.goAdd(g)
#                dsDbg(1, g, str(p))
            

        if False:
            for s in State.p2s.values():
                if '' in s.go and len(s.go['']) != 1:
                    err("reduce not unique go['']", s.go[''], 'in', s)
                dbg(1, s)

            u = True
            while u:
                u = False
                for s in State.p2s.values():
                    u |= s.auGen()
                dbg(1, 'auGen')
            for s in State.p2s.values():
                dbg(1, 'auGen', s.strA('pa', 'au'))

            stp = set()
            for s in State.p2s.values():
                stp.clear()
                s.check(stp)

            r2s = {}
            s2r = {}
            for s in State.p2s.values():
                if '' in s.go:
                    for r in s.go['']:
                        dsAdd(r2s, r, s)
                        dsAdd(s2r, s, r)
            dbg(1, 'r2s st',sum(len(s) for s in r2s.values()), r2s)
            for ix in range(4):
                for s in State.p2s.values():
                    for a, t in s.go.items():
                        if isinstance(a, Rule):
                            if t in s2r:
                                for w in s2r[t]:
                                    dsAdd(r2s, w, s)
                                    dsAdd(s2r, s, w)
                dbg(1, 'r2s', ix, sum(len(s) for s in r2s.values()), 'r2s', r2s)
                    
        #        for r in s.go['']:
        #            dsAdd(s2r, s,r.redP.add(r.pos[-1])
        #for r in Rule.rules:
        #    print(str(r), 'redP', strC(r.redP))

    def makePos(self, l):
        assert(self.pos == None)
        self.pos = [RulePos(self, ix) for ix in range(l)]
        self.pos[-1].prd = PObj.lahEmptySet

    def goGen(self, g):
        """ recursively add all start transactions for self to go-dict g
        """
        if self.go == True:
            return 
        elif self.go != False:
            dsUpdate(g, self.go)
        else:
            self.go = True
            self.goGenRec(g)
            self.go = False

    @staticmethod
    def goGenRecEP(e, p, g):
        """ add element e to position p to go-dict g
            if e was not yet in g and g is a Rule, recursively generate go for g
        """
        dbg(99, 'goGenRecEP', str(e), str(p))
        if e in g:
            g[e].add(p)
        else:
            g[e] = {p}
            if isinstance(e, Rule):
                e.goGen(g)

    @staticmethod
    def lt1GenAll():
        for r in Rule.rules:
            r.lt1Gen((), r.lt1)
            dbg(1, 'lt1Gen', r.strA('body', 'lt1'))

    def lt1Gen(self, his, to):
        if self.lock:
            return
        if len(self.pos) <= 1:
            d2sAdd(to, '', self, his)
        else:
            self.lock = True
            self.lt1GenSub(his + (self.pos[1], ), to)
            self.lock = False

    @staticmethod
    def lahGenK(k):
        if k <= PObj.lrK:
            err(f"lahGenK({k} bur lrK already {PObj.lrK}")
        PObj.lrK = k
        u = True
        c = 0
        while u:
            c += 1
            if c > 40:
                err('too many')
            u = False
            for r in reversed(Rule.rules):
                u |= r.lahGen()
            dbg(15, 'lahGen', k, u)
        dbg(3, 'lahGen', c, 'rounds')
        for r in Rule.rules:
            dbg(10, 'lahGen', r.strA('body'), 'prd=', r.pos[0].prd, 'lah=', r.pos[0].lah)
        return
        #for s in State.p2s.values():
        #    s.lahGen()
        u = True
        c = 0
        while u:
            c += 1
            if c > 40:
                err('too many')
            u = False
            for r in Rule.ruleLoA:
                u |= r.lahGen()
            dbg(1, 'loAGen', k, u)
        dbg(1, 'loAgen', c, 'rounds')
        for r in Rule.rules:
            dbg(1, 'loA gen', r.strA('body'), 'loA.prd=', r.loA.prd, 'loA.lah=', r.loA.lah)
        if False:
            for r in Rule.ruleLoA:
                dbg(1, r.strA('body', 'prd', 'lah'))
            for r in Rule.rules:
                for p in r.pos:
                    dbg(1, p.strA('prd', 'lah'))
        
    def brdGen(self, b):
        """ recursively add all start transactions for self to go-dict g
        """
        if self.brd == True:
            return 
        elif self.brd != False:
            b |= self.brd
        else:
            self.brd = True
            self.brdGenRec(b)
            self.brd = False

    @staticmethod
    def makeSymPosRemove():
        "make symbols and positions, after we got the source of all rules, convert body to symbols and bare strings"
        PObj.objs[0].body[0] = PObj.objs[1].sym
        Rule.rules = Rule.objs.copy()
        PObj.dbg(4, 'made rules')
        RuleSym.make()
        PObj.dbg(4, 'made sym')
        RulePos.make()
        PObj.dbg(3, 'made pos')

    @staticmethod
    def prdLah(k):
        """ compute for all rules 
             prd = productions of length <= k and
            lah = prefixes of length k of all productions with length > k
        """

        tS = datetime.now()
        assert not PObj.lahStr or (all([len(y) == 1 for r in Rule.rules for y in r.body if isinstance(y, str)]))
        cnt = 0
        upd = True
        while upd: # recompute all rules, until no change
            upd = False
            lC = 0
            pC = 0
            for r in Rule.rules:
                cnt += 1
                if r.prdLahComb(k):
                    upd = True
                    dbg(1, 'prdLah', str(r), r.prd, r.lah)
                lC += len(r.lah)
                pC += len(r.prd)
            if cnt > 1000:
                err('loop')
        elT = f"{(datetime.now() - tS).total_seconds()}sec, " if dbgTime else ''
        print(f"Rule.prdLah k={k} <- {PObj.lrK}: {cnt} {elT} #prd={pC} #lah={lC}")

        if dbgLev >= 3:
            for r in Rule.rules:
                dbg(3, 'prdLah', r.strA('body', 'prd', 'lah'))
        assert all([len(s) == k for s in r.lah for r in Rule.rules])
        assert all([len(s) <= k for s in r.prd for r in Rule.rules])
        PObj.lrK = k

    def prdLahAdd(self, k, pp, ll):
        upd = False
        assert self.prd <= pp
        if self.prd != pp:
            assert self.prd < pp
            upd = True
            self.prd = pp

        ll |= self.lah
        jj = {len(l) for l in ll if len(l) != k}
        if len(jj) != 0:
            for l in [l for l in ll if len(l) == k]:
                for j in jj:
                    ll.discard(l[0:j])
        if ll != self.lah:
            upd = True
            dbg(99, 'lahupd', self.lah, ll)
            self.lah = ll            
        return upd
        
    def hasCycle(self):
        if self.tst:
            return True
        self.tst = True
        r = any(b.hasCycle() for b in self.body if isinstance(b, Rule))
        self.tst = False
        return r

    def lahOneAdd(self, his, to):
        p = his[-1]
        assert p.rule == self
        if self.tst:
            return
        self.tst = True
        nx = p.pos + 1
        if nx < len(self.pos):
            self.lahOneAddSub(his, to)
        else:
            if len(self.body) == 0:
                assert isinstance(self, RuleSeq)
                dsAdd(to, '', his + (self))
            if len(his) > 1:
                his[-2].rule.lahOneAdd(his[0:-1], to)
        self.tst = False    

    @staticmethod
    def lahOneGen():
        """ generate one char lookahead for all rules """
        for r in Rule.rules:
            r.lahOneAdd((r.pos[0], ), r.lahOne)
            dbg(1, 'lahOneGen', r.strA('body', 'lahOne', 'go'))
            dbg(1, 'lahLen', len(r.lahOne), len(r.go), len([r for r in r.go if isinstance(r, str)]))
            l = {a : {s[-1] for s in b} for  a, b in r.lahOne.items()}
            g = {a: {r.pos[0] if isinstance(r, Rule) else r for r in b} if isinstance(b, frozenset) else b.pa for a, b in r.go.items() }
            dbg(1, 'lahComp', l == g, l, g)
        for r in Rule.rules:
            if isinstance(r, RuleSeq):
                for ex in range(len(r.body)):
                    e = r.body[ex]
                    if isinstance(e, Rule):
                        e.aft.add(r.pos[ex+1])
            else:
                for e in r.body:
                    if isinstance(e, Rule):
                        e.aft.add(r.pos[1])
        for r in Rule.rules:
            dbg(1, 'aftGen1', r.strA('body', 'aft'))

    @staticmethod
    def aftGen2():
        for r in Rule.rules:
            pp = r.aft
            r.aft = {}
            for p in pp:
                for a, qq in p.lajOne.items():
                    if isinstance(a, str):
                        for q in qq.values:
                            dsAddS(r.aft, a, q)
            dbg(1, 'aftGen2', r.strA('body', 'aft'))
    
    @staticmethod
    def sucGen():
        for r in Rule.rules:
            for p in r.pos:
                p.suc.clear()
        Rule.rules[0].pos[-1].suc.add(PObj.end if PObj.lahStr else (PObj.end ,))
        u = True
        lC = 0
        uC = 1
        while uC > 0:
            lC += 1
            uC = 0
            for r in Rule.rules:
                uC += r.sucGen()
            dbg(5, f'sucGen loop {lC}, upd {uC}')
        for r in Rule.rules:
            dbg(5, 'sucGen', r.strA('body'), 'suc0=', r.pos[0].suc, 'suc-1=', r.pos [-1].suc)
            for p in r.pos:
                dbg(10, 'sucGen', p.strA('suc', 'prd', 'lah'))

    @staticmethod
    def stpGen():
        for r in Rule.rules:
            r.stpGen1(r.stp, ())
            dbg(1, 'stpGen', r.strA('body', 'stp', 'suc'))

    def stpGen1(self, to, his):
        if len(his) > 0:
            dsAdd(to, self, his)
        if len(self.body) == 0:
            assert isinstance(self, RuleSeq)
            dsAdd(to, '', his + (self, ))
        else:
            if not self.lock:
                self.lock = True
                self.stpGen2(to, his + (self.pos[1], ))
                self.lock = False
            
class RuleSeq(Rule):
    def __init__(self, sym, bd):
        super().__init__(sym, bd)

    def makePos(self):
        super().makePos(len(self.body)+1)
        
    def goGenRec(self, g):
        if 0 == len(self.body):
            dsAdd(g, '', self)
        else:
            Rule.goGenRecEP(self.body[0], self.pos[1], g)

    def lt1GenSub(self, his, to):
        b = self.body[0]    
        if isinstance(b, Rule):
            b.lt1Gen(his, to)
        else:
            d2sAdd(to, b, his[-1], his)

    def ltrGen2(self, pos, his, to):
        assert self == pos.rule
        nx = pos.pos + 1
        if nx >= (self.pos):
            return True
        b = self.body[pos.pos]
        if isinstance(b, str):
            d2sAdd(to, b, his[0, -1] + (self.pos[nx],))

    def lahGen(self):
        pp = self.pos[-1].prd
        ll = self.pos[-1].lah
        p0 = self.pos[0].prd
        l0 = self.pos[0].lah
        assert len(ll) == 0
        assert pp == PObj.lahEmptySet
        u = False
        for bx in range(len(self.body)-1, -1, -1):
            b = self.body[bx]
            o = self.pos[bx]
            if isinstance(b, str):
                a = b if PObj.lahStr else (b, )
                pN = {a + p for p in pp if len(p) < PObj.lrK}
                lN = {(a + p)[0:PObj.lrK] for p in pp if len(p) == PObj.lrK} | {(a + l)[0:PObj.lrK] for l in ll if len(l) == PObj.lrK}
            else:
                pN = {a + p for a in b.pos[0].prd for p in pp if len(a) + len(p) <= PObj.lrK}
                lN = {(a + p)[0:PObj.lrK] for a in b.pos[0].prd for p in pp if len(a) + len(p) > PObj.lrK} \
                        | {(a + l)[0:PObj.lrK] for a in b.pos[0].prd for l in ll if len(a) + len(l) >= PObj.lrK} \
                        | {a for a in b.pos[0].lah if len(a) == PObj.lrK}
            o.prd = pN
            o.lah = lN
            pp = pN
            ll = lN    
        return p0 != self.pos[0].prd or l0 != self.pos[0].lah

    def sucGen(self):
        u = False
        pp = self.pos[-1].suc
        nn = pp
        #dbg(1, 'sucGen0', self.strA('body'), pp)
        for qx in range(len(self.pos)-2, -1, -1):
            q = self.pos[qx]
            b = self.body[qx]
            if isinstance(b, Rule) and not nn <= b.pos[-1].suc:
                u = True
                b.pos[-1].suc |= nn
                # dbg(1, 'sucGen1', b.pos[-1].strA('rule', 'pos', 'suc'), 'nn=', nn)
            nn = {(a+p)[0: PObj.lrK] for a in q.prd for p in (pp if len(pp) > 0 else PObj.lahEmptySet) } | q.lah
            q.suc = nn
            #dbg(1, 'sucGen2', q.strA('suc', 'prd', 'lah'))
        return u

    def brdGenRec(self, b):
        bd = self.body
        bL = len(bd)
        if len(bd) == 0:
            return
        e = bd[0]
        if isinstance(e, Rule) and self in e.redCh and bL > 1:
            for fx in range(0, bL):
                f = bd[fx]
                if isinstance(f, Rule) and self in f.sub:
                    g = bd[1]
                    if isinstance(g, str):
                        b.add(g)
                    else:
                        b |= {p for p in g.prd if p != ''} | g.lah 
                    break
        for ex in range(len(self.body)-1, -1, -1):
            e = self.body[ex]
            if isinstance(e, str):
                break
            e.brdGen(b)
            if '' not in e.prd:
                break
        return
        
        for ex in range(len(self.body)-1):
            e = self.body[ex]
            if isinstance(e, str):
                break
            if e in self.redCh or self in e.redCh:
                f = self.body[ex+1]
                if isinstance(f, str):
                    b.add(f)
                else:
                    b |= {x for x in f.prd if len(x) != 0} | f.lah
            assert all(len(x) == 1 for x in b)
            if '' not in e.prd:
                break
        
        assert not self.tst

        e = self.body[0]
        if isinstance(e, Rule) and '' in e.prd:
            for ex in range(len(self.body)-2, -1, -1):
                e = self.body[ex]
                if isinstance(e, str):
                    break
                if self in e.subRi:
                    dbg(1, 'hasCycle', str(self), str(e))
                    b |= {x for x in e.prd if len(x) > 0} | e.lah
                if '' not in e.prd:
                    break

    def lahOneAddSub(self, his, to):
        p = his[-1]
        assert self == p.rule and p.pos < len(self.body)
        e = self.body[p.pos]
        dsAdd(to, e, his[0: -1] + (self.pos[p.pos+1],))
        if isinstance(e, Rule):
            e.lahOneAdd(his[0: -1] + (self.pos[p.pos+1], e.pos[0]), to)

    def stpGen2(self, to, his):
        b = self.body[0]
        if isinstance(b, str):
            dsAdd(to, b, his)
        else:
            b.stpGen1(to, his)
        
class RuleOr(Rule):
    def __init__(self, sym, bd):
        super().__init__(sym, bd)

    def makePos(self):
        super().makePos(2)

    def goGenRec(self, g):
        for b in self.body:
            Rule.goGenRecEP(b, self.pos[1], g)

    def lt1GenSub(self, his, to):
        for b in self.body:    
            if isinstance(b, Rule):
                b.lt1Gen(his, to)
            else:
                d2sAdd(to, b, his[-1], his)

    def lahGen(self):
        assert len(self.pos[-1].lah) == 0
        assert self.pos[-1].prd == PObj.lahEmptySet
        p0 = self.pos[0].prd
        l0 = self.pos[0].lah
        pp = set()
        ll = set()
        for b in self.body:
            if isinstance(b, str):
                pp.add(b if PObj.lahStr else(b,))
            else:
                pp |= b.pos[0].prd
                ll |= b.pos[0].lah
        if p0 == pp and l0 == ll:
            return False
        self.pos[0].prd = pp
        self.pos[0].lah = ll
        return True

    def sucGen(self):
        u = False
        pp = self.pos[-1].suc
        for b in self.body:
            if isinstance(b, Rule):
                if not pp <= b.pos[-1].suc:
                    u = True
                    b.pos[-1].suc |= pp
        self.pos[0].suc = {(a+p)[0:PObj.lrK] for a in self.pos[0].prd for p in pp} | self.pos[0].lah
        return u

    def lahGenPos(self):
        self.pos[1].lahGenSeq([])
        self.pos[0].prd = self.prd
        self.pos[0].lah = self.lah
        
    def prdLahComb(self, k):
        "compute lah and prd combining rules and already computed prd and lah"

        pp = set()
        ll = set()
        for y in self.body:
            if isinstance(y, str):
                pp.add( y if PObj.lahStr else (y,))
            else:
                pp |= y.prd
                ll |= y.lah
        return self.prdLahAdd(k, pp, ll)

    def brdGenRec(self, b):
        for e in self.body:
            e.brdGen(b)

    def lahOneAddSub(self, his, to):
        p = his[-1]
        assert p.rule == self and p.pos == 0
        hiN = his[0: -1] + (self.pos[1], )
        for e in self.body:
            dsAdd(to, e, hiN)
            if isinstance(e,Rule):
                e.lahOneAdd(hiN + (e.pos[0],), to)

    def stpGen2(self, to, his):
        for b in self.body:
            if isinstance(b, str):
                dsAdd(to, b, his)
            else:
                b.stpGen1(to, his)

class RuleSymRemove (PObj):
    "the symbols are our nonterminals, which are a set of rules"
    n2y = {}
    syms = None
    def __init__(self, sym):
        self.sym = sym
        super().__init__(f"Y#{sym}")
        self.rules = []
        self.lah = set()
    def sExp (self):
        return super().sExp(f"sym={self.sym!s} {self.rules} lah={self.lah}")

    @staticmethod
    def make():
        "make the symbols from defined rules and convert Rule.body to symbols and bare strings"
        i0 = len(PObj.objs)
        for r in Rule.rules: #collect symbols
            if r.sym not in RuleSym.n2y:
                RuleSym.n2y[r.sym] = RuleSym(r.sym)
            RuleSym.n2y[r.sym].rules.append(r)
            r.sym = RuleSym.n2y[r.sym]
        RuleSym.syms = PObj.objs[i0:]
        assert Rule.objs[0].sym == RuleSym.syms[0]
        assert RuleSym.syms[0].sym == PObj.rStart
        assert RuleSym.n2y[PObj.rStart] == RuleSym.syms[0]
        assert len(RuleSym.n2y[PObj.rStart].rules) == 1
        for r in Rule.rules: # convert Rule.body to symbols and bare strings
            b = r.body
            if r.pos == 'a':
                for ix in range(len(b)):
                    v = b[ix]
                    if v[0] == "'" and v[-1] == "'":
                        b[ix] = v[1: len(v)-1]
                        assert b[ix] != PObj.end
                    elif v in RuleSym.n2y:
                        b[ix] = RuleSym.n2y[v]
                    else:
                        err(f"rule {v} in {r} not in {Rule.rules}")
            elif r.pos == 'n':
                for ix in range(len(b)):
                    v = b[ix]
                    if v in RuleSym.n2y:
                        b[ix] = RuleSym.n2y[v]
            else:
                err(f'bad pos {r.pos} in {r.sExp()}')

class RulePos (PObj):
    def __init__(self, r, p):
        super().__init__(r.name + '#' + str(p))
        self.rule = r
        self.pos = p
        self.lah = set()
        self.prd = set()
        self.suc = set()
        
    def strL(self):
        return self.strA('rule', 'pos')

    def stpAdd(self, to):
        if self.pos == 0:
            dsUpdate(to, self.rule.stp)
        elif self.pos + 1 == len(self.rule.pos):
            dsAdd(to, '', (self.rule, ))
        else:
            b = self.rule.body[self.pos]
            pN = self.rule.pos[self.pos + 1]
            dsAdd(to, b, (pN, ))
            if isinstance(b, Rule):
                for t, uu in b.stp.items():
                    for u in uu:
                        dsAdd(to, t, (pN,) + u)
                    
    def goAdd(self, g):
        if self.pos == 0:
            dsUpdate(g, self.rule.go)
        elif self.pos + 1 == len(self.rule.pos):
            dsAdd(g, '', self.rule)
        else:
            b = self.rule.body[self.pos]
            dsAdd(g, b, self.rule.pos[self.pos + 1])
            if isinstance(b, Rule):
                dsUpdate(g, b.go)
                
    def stps(self):
        if self.pos == len(self.rule.pos) - 1:
            return {(self.rule, )}
        elif self.pos == 0:
            return self.rule.stp
        else:
            assert isinstance(self.rule, RuleSeq)
            n = self.rule.pos[self.pos+1]
            b = self.rule.body[self.pos]
            if isinstance(b, str):
                return {(n, b)}
            else:
                return {(n,) + x for x in b.stp}
                
    @staticmethod                
    def makeLah(k):
        # compute lah for RuleSym and RulePos using lah from Rule
        #     lah = production of len <= k and prefix of len k of all production longer k
        Rule.prdLah(k)
        for y in RuleSym.syms:
            y.lah = set().union(*[r.lah | r.prd for r in y.rules])
            dbg(3, f"{y} {y.lah}")
        for r in Rule.rules:
            r.pos[-1].lah = {lahEmpty}
            for pX in range(len(r.body)-1, -1, -1): # backward through the positions
                p = r.pos[pX]
                e = r.body[pX]
                lS = e.lah if isinstance(e, RuleSym) else {e if PObj.lahStr else (e,)}
                n = set()
                for l in lS:
                    if len(l) == k:
                        n.add(l)
                    else:
                        for o in p.next.lah:
                            dbg(9, f"n.add(({l}+{o})[0:k])")
                            n.add((l+o)[0:k])
                p.lah = n
                dbg(3, f"{r} {pX} {p} {p.lah}")
            assert r.pos[0].lah == r.prd | r.lah, (f"rule {r.sExp()} fails \npos[0].lah={r.pos[0].lah}" 
                    f" \n== prd={r.prd} \n| lah={r.lah}"
                    f",\ndifference lah-..={r.pos[0].lah-r.prd-r.lah}, ..-lah={(r.prd|r.lah)-r.pos[0].lah}")
def appK(ll, rr):
    a = set()
    for l in ll:
        if len(l) == PObj.lrK:
            a.add(l)
        else:
            assert len(l) < PObj.lrK
            for r in rr:
                a.add((l+r)[0:PObj.lrK])
    return a
    
class State (PObj):
    p2s = {}
    state0 = None
    lrErr = None

    def __init__(self, pa):
        super().__init__('s' + str(len(State.p2s)))
        assert isinstance(pa, frozenset)
        assert all(isinstance(p, RulePos) for p in pa)
        assert pa not in State.p2s
        self.pa = pa
        self.go = None
        State.p2s[pa] = self    

    @staticmethod
    def getOrMake(pa):
        pa = frozenset(pa)
        return State.p2s[pa] if pa in State.p2s else State(pa)

            
    @staticmethod
    def makeAll():
        State.state0 = State(frozenset([Rule.rules[0].pos[0]]))
        State.state0.goExp()
        if Rule.rules[0] not in State.state0.go:
            eP = frozenset()
            e = State(eP)
            e.go = {}
            del State.p2s[eP]
            State.state0.go[Rule.rules[0]] = e
            dbg(1, 'makeAll added endState', e, 'to', State.state0.strA('pa', 'go'))
            
        State.err = ''
    
        for s in State.p2s.values():
            dbg(5, 'goExp', s.strA('pa', 'go'))


        for lrK in range(1,5):
            dbg(1, 'gen lrK', lrK)
            Rule.lahGenK(lrK)
            Rule.sucGen()
            cnf = False
            for s in State.p2s.values():
                cnf |= s.goF2()
            PObj.lrK = lrK
            if not cnf:
                return
        State.err = f'makeAll: still conflicts at lrK {lrK}'
        return
        for s in State.p2s.values():
            s.lajOneGen()
            dbg(1, 'lajOneGen', s.strA('pa', 'lajOne'))
            for a, l in s.lajOne.items():
                 if len(l) > 1:
                     dbg(1, 'lajOneGem multi', a, l)
    #    Rule.aftGen2()

        for s in State.p2s.values():
            s.ltrGen((), s.ltr)
            dbg(1, 'ltr', s.strA('pa', 'ltr'))

        for s in State.p2s.values():
            for r in s.go.keys():
                if isinstance(r, Rule):
                    r.st.add(s)
        for r in Rule.rules:
            dbg(1, 'stGen', r.strA('body', 'st'))
            
        # State.lahGen()
        if State.err == '':
            for s in State.p2s.values():
                s.goFix()                
    
    def strL(self):
        return self.strA('pa', 'go')

    def dbg(self, lv):
        if lv > dbgLev:
            return
        print(str(self), ':', ', '.join(str(p) for p in self.pa))
        for e, s in self.go.items():
            print('\t', e, ('=> ' + str(s)) if isinstance(s, State) else '<= ' + ', '.join(str(r) for r in s))
            
    def goExp(self):
        if self.go != None:
            return
        self.go = {}
        for p in self.pa:
            p.goAdd(self.go)
        for e, pp in self.go.items():
            if e != '':
                s = State.getOrMake(pp)
                self.go[e] = s
                s.goExp()

    def stpExp(self):
        if self.stp != None:
            return
        self.stp = {}
        dbg(1, 'SstpExp1', self)
        for p in self.pa:
            p.stpAdd(self.stp)
        dbg(1, 'SstpExp2', self.strA('pa', 'stp'))
        for e, pp in self.stp.items():
            dbg(1, 'SstpExp3', e, pp)
            if e != '':
                s = State.getOrMake(frozenset(p[-1] for p in pp))
                self.go[e] = s
                s.stpExp()

    def ltrGen(self, his, to):
        for p in self.pa: 
            p.rule.ltrGen2(self, his, to)

    def lahGenOld(self):
        if len(self.pa) == 1:
            self.prd, self.lah = PObj.lahGenZero(list(self.pa)[0])
        else:
            self.lahGenOr(self.pa)
    
    def lajOneGenOld(self):
        for p in self.pa:
            if p.pos >= len(p.rule.pos) - 1:
                d2sAdd(self.lajOne, '', p.rule, None)
            else:
                e = p.rule.body[p.pos]
                f = p.rule.pos[p.pos+1]
                if p.pos == 0:
                    for a, h in p.rule.lahOne.items():
                        if isinstance(self.go[a], State):
                            dbg(1, 'lajOneGen d2sAdd(', self.lajOne, strC(a), self.go[a], h)
                            d2sAddS(self.lajOne, a, self.go[a], h)
                        else:
                            for r in self.go[a]:
                                dbg(1, 'lajOneGen 2sAdd(', self.lajOne, a, r, h)
                                d2sAddS(self.lajOne, a, r, h)
                else:
                    if isinstance(e, str):
                        d2sAdd(self.lajOne, e, self.go[e], (f,))
                    else:
                        for a, h in e.lahOne.items():
                            if isinstance(self.go[a], State):
                                dbg(1, 'lajOneGen d2sAdd(', self.lajOne, strC(a), self.go[a], {(f,)+j for j in h})
                                d2sAddS(self.lajOne, a, self.go[a], {(f,)+j for j in h})
                            else:
                                for r in self.go[a]:
                                    dbg(1, 'lajOneGen 2sAdd(', self.lajOne, a, r, h)
                                    d2sAddS(self.lajOne, a, r, {(f,)+j for j in h})

    def llGet(self, r):
        if r not in self.go:
            ll = None
        else:
            ll = set()
            for p in self.go[r].pa: 
                if len(p.rule.pos) > 2 or p.rule not in self.go:
                    ll |= p.suc
                else:
                    a = self.llGet(p.rule)
                    if isinstance(r, RuleOr):
                        if a != None:
                            ll |= p.suc
                    else:
                        ll |= p.suc if a == None else (a & p.suc)
        dbgS(15, 'llGet', self, r, 'returning', ll)
        return ll

    def llGe2(self, s, ll):
        nn = set()
        for p in s.pa:
            if p.pos == 1:
                r = p.rule
                assert r in self.go
                mm = {(l+q)[0:PObj.lrK] for l in ll for q in r.pos[1].prd | r.pos[1].lah}
                if any(len(m) < PObj.lrK for m in mm):
                    mm = self.llGe2(self.go[r], mm)
            else:
                mm = p.suc
            nn |= mm
        ss = {q for q in p.suc for p in s.pa}
        dbgS(15, 'llGe2', self, s, 'returning', nn, nn <= ss, 's.suc=', ss)
        return nn

    def sucInState(self, r, ll):
        if all(len(l) == PObj.lrK for l in ll):
            return ll
        if isinstance(r, Rule):
            aa = appK(ll, r.pos[-1].suc)
            dbgS(10, 'sucInState1 aa', self, r, 'aa=', aa, 'll=', ll)
            if r not in self.go:     
                return aa
        elif r not in self.go:     
            err(f'lahInPos but r={r} not in go', self.strA('pa', 'go'))
        else:
            aa = None
        s = self.go[r]
        if len(s.pa) == 0:
            return ll if aa == None else aa
        bb = set()
        for p in s.pa:
            cc = appK(ll, p.suc)
            dbgS(10, 'sucInState2 cc', self, r, p, 'cc=', cc, 'll=', ll)
            if p.pos == 1:
                assert p.rule in self.go
                dd = appK(ll, p.prd | p.lah)
                dbgS(10, 'sucInState3 dd', self, r, p, 'dd=', dd, 'll=', ll)
                ee = self.sucInState(p.rule, dd)
                dbgS(10, 'sucInState4 ee', self, r, p, 'ee=', dd, 'dd=', dd)
                cc &= ee
            bb |= cc
            dbgS(10, 'sucInState5 bb', self, r, p, 'bb=', bb, 'll=', ll)
        if aa != None:
            bb &= aa        
        return bb
                                
    def goF2(self):
        if '' not in self.go or not isinstance(self.go[''], set):
            return False
        dbg(15, 'goF2 before', self.strA('pa', 'go'))
        ge = self.go['']
        if False: # use .suc
            d = {}
            for ga, gp in self.go.items():
                if isinstance(ga, str) and ga != '':
                    a = ga if PObj.lahStr else (ga,)
                    for l in {(a + l)[0:PObj.lrK] for p in gp.pa for l in p.suc}:
                         ddPut(d, *l, gp)
            dbg(15, 'goF2a', d) 
            cnf = False
            for r in ge:
                for l in r.pos[-1].suc: 
                    o = ddGet(d, *l)
                    if o == None:
                        ddPut(d, *l, r)
                    else:
                        dbg(3, 'goF2a1 conflict', self.strA('pa'), l, r, 'old', o)
                        cnf = True
        if True: # cnf # use sucInState
            d = {}
            for ga, gp in self.go.items():
                if isinstance(ga, str) and ga != '':
                    a = ga if PObj.lahStr else (ga,)
                    for l in self.sucInState(ga, a):
                         ddPut(d, *l, gp)
            cnf = False
            dbg(3, 'retry with sucInState')
            for r in ge:
                si = self.sucInState(r, PObj.lahEmptySet)
                dbgS(5, 'goF2sSucIn', self, r, 'sucInState', si, 'r.suc', r.pos[-1].suc)
                for l in si: 
                    o = ddGet(d, *l)
                    if o == None:
                        ddPut(d, *l, r)
                    else:
                        dbg(3, 'goF2s1 conflict', self.strA('pa'), l, r, 'old', o)
                        cnf = True

            if False:
                if len(r.body) == 0:
                    ll = self.llGet(r)
                    dbgS(10, 'goF2', r, 'llGet', ll)
                    for l in ll: 
                        o = ddGet(d, *l)
                        if o == None:
                            ddPut(d, *l, r)
                        else:
                            dbgS(2, 'goF2p1 conflict', self.strA('pa'), l, 'should add', r, 'but old', o)
                            cnf = True
                    
                else:
                    for l in r.pos[-1].suc: 
                        o = ddGet(d, *l)
                        if o == None:
                            ddPut(d, *l, r)
                        else:
                            dbgS(2, 'goF2a1 conflict', self.strA('pa'), l, 'should add', r, 'but old', o)
                            cnf = True
        dbg(10, 'goF2a2 cnf', cnf, d) 
        if False:
            xx = ddRed(d.copy(), -9876)
            dbg(15, 'goF2a2 red', xx)
        dflt = anyOf(ge) if len(ge) == 1 else None 
        if PObj.end in d:
            eR = d[PObj.end]
            if isinstance(eR, Rule) and eR in ge:
                dflt = eR
                del d[PObj.end]
            else:
                err('goF2 d[end]', eR, 'ge', ge)        
        if cnf:
            return cnf
        ddRed(d, dflt)
        dbgS(10, 'goF2a3 red dflt', dflt, d) 
        if dflt == None:
            del self.go['']
        else:
            self.go[''] = dflt
        for a,t in d.items():
            assert a not in self.go or isinstance(t, dict) or t == self.go[a]
            self.go[a] = t
        dbg(5, 'goF2a9 go', self.strA('pa', 'go'))
        return False

        for r in ge:
        #    to = self.go[r]
        #    for tP in to.pa:
            for l in r.pos[-1].suc: 
                dsAdd(rd, l, r)
        cnf = False
        for a, rr in rd.items():
            if a in self.go:
                dbg(10, 'goF2 conflic1', self.strA('pa'), a, 'go', self.go[a], 'versus rd', rd[a])
                for r in rr:
                    if a not in r.pos[-1].suc:
                        dbg(10, 'goF2 c2 not in suc', r, r.pos[-1].suc)
                    else:
                        dbg(1, 'goF2 c2 conflict in suc', r, r.pos[-1].suc)
                        cnf = True
            elif len(rr) != 1:
                dbg(1, 'goF2 conflic1', self.strA('pa'), a, 'rd', rd[a])
                cc = {}
                for r in rr:
                    for l in r.pos[0].suc:
                        if l not in cc and l not in self.go:
                            cc[l] = r
                            dbg(1, 'goF2 c3 adding', l, r)
                        else:
                            dbg(1, 'goF2 c3 conflict', a, r)
                            cnf = True
            elif len(ge) > 1:
                self.go[a] = list(rr)[0]
        if not cnf:
            if len(ge) > 1:
                del self.go['']
            else:
                self.go[''] = list(ge)[0]
        dbg(1, 'goF2 after', 'conflict' if cnf else 'fixed', self.strA('pa', 'go'))
        return cnf
        
    def goFix(self):
        if '' not in self.go:
            return
        # dbg(1, 'goFix begin', self.strA('pa', 'go', 'lah'))
        self.goFix1(self.go[''], self.go, self.lah)
        #dbg(1, 'goFix end  ', self.strA('pa', 'go'))
        
    def goFix1(self, rr, gg, ll):
        if len(rr) == 1:
            for r in rr: break
            gg[''] = r
        else: 
            assert len(rr) > 1
            cnt = {r: 0 for r in rr}
            for a,b in ll.items():
                if isinstance(b, Leaf):
                    assert len(b) == 1
                    for k in b.keys():
                        if isinstance(k, Rule):
                            cnt[k] += 1
            # dbg(1, 'goFix1 cnt', cnt)
            r = max(cnt.items(), key = lambda i: i[1])
            # dbg(1, 'goFix1 max', r)
            r = r[0]
            # if r[1] < 1:
            #    del gg['']
            # else:
        gg[''] = r

        for a,b in ll.items():
            if isinstance(b, Leaf):
                assert len(b) == 1
                if a != '':
                    for k in b.keys(): break
                    if k == '':
                        if sTo != None:
                            gg[a] = sTo
                        else:
                            assert a in gg
                    elif k != r:
                        gg[a] = k
                    elif a in gg:
                        del gg[a] 
                    dbg(1, 'goFix1 ed', a, gg)
        for a,b in ll.items():
            if type(b) ==  dict:
                dbg(1, 'goFix1 diving into', a, 'from', gg[a] if a in gg else '---', 'to', b)
                gg[a] = {}
                self.goFix1(rr, gg[a], b)
                                
    def addPosA(self, pa):
        "add (position, the string after the the end of the current rule) to self, recursively add subrules"
        p, a = pa
        if State.lrErr != None:
            return
        if p.next == None:    #after all positions of rule ==> reduce
            if a not in self.to:
                self.to[a] = p.rule
            elif self.to[a] != p.rule:
                State.lrErr = strL("lrK mismatch [", a, "]=", self.to[a])
        else:
            e = p.rule.body[p.pos]
            k = PObj.lrK
            t = (p.next, a)
            if isinstance(e, str): #shift character on stack with next state
                for l in p.lah:
                    assert l[0] == e
                    if len(l) != k:
                        l = (l+a)[0:k] 
                    if l not in self.to:
                        self.to[l] = {t}
                    elif not isinstance(self.to[l], set):
                        State.lrErr = f"lrK mismatch [{l}]->{self.to[l]} cannot add {t}"
                        return
                    else:
                        self.to[l].add(t)
            else: 
                assert isinstance(e, RuleSym)    
                if e not in self.to: # next state after this symbol
                    self.to[e] = {t}
                elif t in self.to[e]:
                    return
                else:
                    self.to[e].add(t)
                for n in p.next.lah: # enter pos[0] of subrules, string after is lah of next position of rule
                    if len(n) != k:
                        n = (n+a)[0:k] 
                    for r in e.rules:
                        self.addPosA((r.pos[0], n))
                        if State.lrErr != None:
                            return

    def addTrans(self):
        "modify to: find/create states for (pos, aft) sets in to.values"
        for k, v in self.to.items():
            if isinstance(v, Rule):
                continue
            pa = frozenset(v)
            if pa in self.pa2u:
                self.to[k] = self.pa2u[pa]
            elif State.lrErr != None:
                return
            else:
                n = type(self)(pa)
                self.to[k] = n
                n.addTrans()
    

    @staticmethod
    def clear():
        State.states = []
        State.p2s.clear()
        
    @staticmethod
    def lahGenOld():
        for s in State.p2s.values():
            s.lahGen1()
        u = True
        while u:
            u = False
            for s in State.p2s.values():
                u |= s.lahGen2()
            dbg(1, 'lahGen')
        for s in State.p2s.values():
            for a, l in s.lah.items():
                for q, r in l.items():
                    dsAddS(s.lahOne, a, r)
            dbg(1, 'lahGen', s.strA('pa', 'lah', 'lahOne'))

        for s in State.p2s.values():
            s.lahExp(9)
            
        
    def lahGenOld1(self):
        for a, s in self.go.items():
            if isinstance(a, str) and a != '':
                d2sAdd(self.lah, a, s, s)
    def lahGenOld1Pos(self):
        for a, s in self.go.items():
            if isinstance(a, str) and a != '':
                d2sAdd(self.lah, a, s, s)
                
    def lahGenOld2(self):
        u = False
        if '' in self.go:
            for r in self.go['']:
                for s in r.st:
                    for e, eD in s.go[r].lah.items():
                        for pp in eD.values():
                            if e != PObj.end or isinstance(r, Rule):
                                u |= d2sAddS(self.lah, e, r, pp)
        return u
        
    def lahExp(self, maxi):
        self.lahExp1(maxi, self.lah, [])
        
    def lahExp1(self, maxi, dct, kys):
        for a, e in dct.items():
            if a == PObj.end:
                dbg(1, 'exp1 end', e, kys)
            elif len(e) > 1:
                dbg(1, 'lahExp1 conflict ', 'expanding' if maxi > 0 else 'unresolved', ' in', self.strA('pa'), 'at lah', kys + [a], ':', e)
                if maxi > 0:
                    self.lahExp2(maxi-1, dct, kys + [a])
                elif State.err == '':
                    State.err = strS('lahExp1 conflict unresolved in', self.strA('pa'), 'at lah', kys + [a], ':', e)

    def lahExp2(self, maxi, dct, kys):
        oo = dct[kys[-1]]
        assert isinstance(oo, Leaf)
        nn = {}
        dct[kys[-1]] = nn
        for r, ss in oo.items():
            dbg(99, 'exp2a', str(r), ss)
            for s in ss:
                for u, v in s.lahOne.items():
                    if u != PObj.end or isinstance(r, Rule):
                        d2sAddS(nn, u, r, v) 
            dbg(99, 'exp2z', nn)
            if kys[-1] == PObj.end:
                dbg(1, 'exp2zend', nn)
        dbg(1, 'expanded', self.strA('pa'), 'from', oo, 'to', dct)
        self.lahExp1(maxi, nn, kys)                

    def auGen(self):
        if '' not in self.go:
            return False
        n = self.au.copy()
        for a,s in self.go.items():
            if isinstance(a, str) and a != '':
                dsAdd(n,a,s)
        u = False
        for r in self.go['']:
            for s in r.st:
                for a, b in n.items():
                    if dsAddS(s.au, a, b - {s}):
                        u = True
                        dbg(1, 'auGen0', s.strA('au'))
        return u
            
    
    hist = []
    done = []
    def check(self, stp):
        if self.done:
            #dbg(1, 'check0-', State.lv, self.strA('done', 'go'))
            return
        l = len(State.hist)
        if l >= len(State.done):
            if l > len(State.done):
                err('done smaller', State.done, State.hist)
            State.done.append({self})
        else:
            if self in State.done[l]:
                return
            else:
                State.done[l].add(self)
                l += 1
                if len(State.done) < l:
                    State.done[l].clear()
        assert self.done == False
        self.done = True
        State.hist.append(self)
        
        dbg(1, 'check1', len(State.hist), State.hist, self.strA('go'), stp)
        gg = {g for g in self.go.keys() if isinstance(g, str) and g != ''}
        if len(gg & stp) != 0:
            dbg(1, 'check dead end', gg, self.strA('pa', 'go'), State.hist)        
        if '' in self.go:
            if len(self.go['']) > 1:
                dbg(1, "check several go['']", self.strA('pa', 'go'))
            for g in self.go['']:
                #dbg(1, "check2", str(g), g.st)
                for s in g.st:
                    s.chec2(stp | gg, g)
        self.done = False
#        dbg(1, 'check9-', my, self.strA('done'))
        State.hist.pop()

    def chec2(self, stp, ru):
        assert ru in self.go
        dbg(1, 'chec2=', str(self), str(ru), str(self.go[ru]))
        self.go[ru].check(stp)

    @staticmethod
    def makeLookahead(k):
        "make state for LR(k) grammar, return errormessage if not LR(k)"
        assert k >= 1
        State.clear()
        RulePos.makeLah(k)
        State.st0 = len(PObj.objs)
        State.lrErr = None
        b = State(frozenset([(Rule.rules[0].pos[0], k * (PObj.end if PObj.lahStr else (PObj.end, )))]))
        e = State(frozenset(b.to[RuleSym.syms[1]]))
        b.addTrans()
        e.addTrans()
        State.states = PObj.objs[State.st0: ]
        Parser.states = State.states
        if State.lrErr != None:
            return f"grammar not LR({k}), because of {State.lrErr}"

    @staticmethod
    def checkLR0():
        """ check if the current LR(1) grammar is even LR(0)
            
            return None if it is, otherwsie a string describing the conflicting state

            LR(0) grammars are implemented using LR(1) grammars

            LR(0) means, that a reduce step does not depend on lookahead.
                a shift step however uses the char shifted (== lookahead) to determine the next state """
        assert PObj.lrK == 1
        for u in State.states:
            assert len(u.to) > 0
            fi = None
            for k,v in u.to.items():
                if not isinstance(k, RuleSym):
                    if fi == None:
                        fi = v
                    elif v != fi if isinstance(v, Rule) else not (isinstance(v, State) and isinstance(fi, State)):
                        return strL("grammar not LR(0) because of",  u)

def eStr(e):
    if type(e) != tuple and type(e) != list:
        return str(e)
    else:
        return str(e[0]) + '>' + str(len(e)-1) + '(' + ', '.join(x if type(x) != tuple and type(x) != list else str(x[0]) + '>' + str(len(x)-1) for x in e[1:]) + ')'
def anyOf(c):
    for a in c:
        return a
    err('anyOf empty', c)
    
def ddPut(d, *key):
    e = d
    for k in key[0:-2]:
        if k not in e:
            e[k] = {}
        elif not isinstance(e[k], dict):
            e[k] = {'': e[k]}
        e = e[k]
    k, v = key[-2:]
    if k not in e or not isinstance(e[k], dict):
        e[k] = v
    else:
        e[k][''] = v

def ddGet(d, *key):
    e = d
    for k in key:
        if not isinstance(e, dict) or k not in e:
            return None
        e = e[k]
    return e if not isinstance(e, dict) else e[''] if '' in e else None

def ddRed(d, dflt):
    hasDi = False
    for k, v in d.items():
        if isinstance(v, dict):
            v = ddRed(v, dflt)
            d[k] = v
            hasDi |= isinstance(v, dict)
    dbg(20, 'ddRed di', hasDi)
    if not hasDi:
        vv = set(d.values())
        dbg(20, 'ddRed vv', vv)
        if len(vv) == 1:
             return vv.pop()
    for k in {k for k,v in d.items() if v == dflt}:
        del d[k]                
    return d
            
def ddTest():
    d = {}
    ddPut(d, 0, 'null')
    ddPut(d, 'a', 'b', 'ab')
    dbg(1, 'a, b', d)
    ddPut(d, 'a', 'c', 'ac')
    dbg(1, 'a, c', d)
    ddPut(d, 'a', 'c', 'd', 'acd')
    dbg(1, 'a, c, d', d)
    ddPut(d, 'a', 'c', 'ac++')
    dbg(1, 'a, c', d)
    dbg(1, 'ddGet a', ddGet(d, 'a'))
    dbg(1, 'ddGet y', ddGet(d, 'y'))
    dbg(1, 'ddGet a b', ddGet(d, 'a', 'b'))
    dbg(1, 'ddGet a y', ddGet(d, 'a', 'y'))
    dbg(1, 'ddGet a c d', ddGet(d, 'a', 'c', 'd'))
    dbg(1, 'ddGet a c d y', ddGet(d, 'a', 'c', 'd', 'y'))

#ddTest()
#exit()
            
class LahIter():
    def __init__(self, l):
        self.list = l
        self.ix = 0

    def __iter__(self):
        return self
        
    def __next__(self):
        self.ix += 1
        if self.ix <= len(self.list):
            return self.list[self.ix - 1]
        raise StopIteration()
    def __getitem__(self, i):
        i += self.ix
        return self.list[i] if i < len(self.list) else PObj.end
    @staticmethod
    def test():
        print('LahIter test begin')
        iter = LahIter(['begin', 0, 1,2,3,4])
        for i in iter:
            print(i, iter[0], iter[1], iter[-1])
        print('LahIter test end')

class Parser:
    pCnt = -1

    @staticmethod
    def parse(inp):
        assert len(State.state0.pa) == 1
        for r0 in State.state0.pa: break
        assert r0.pos == 0 and r0.rule == Rule.rules[0]
        r0 = r0.rule
        dbg(5, 'parsing for', r0)
        
        st = [(None, State.state0)]
        inI = LahIter(inp)
        tNr = -1
        Parser.pCnt = -1
        cntL = 1000
        act = 'start'
        shiL = st[-1][1]
        try:
            while True:
                Parser.pCnt  += 1
                e,s = st[-1]
                lah = inI[0]
                dbgS(10, 'parse', (str(Parser.pCnt) + ' ' + act).ljust(20), 'lah', lah, 'stck', len(st), '>', e, s)
                if Parser.pCnt  > cntL:
                    err("too many loops cnt={Parser.pCnt}, lah={lah} st={st}")
                # dbg(2, f'run{Parser.pCnt} {ls} --> lah={lah} tos={e}, {s} st={len(st)} {st}')

                to = s.go
                dbg(15, 'parse to0', to)
                for fx in range(20):
                    to = to[inI[fx]] if inI[fx] in to else to[''] if '' in to else None
                    dbg(20, 'parse to1', to)
                    if not isinstance(to, dict): break
                else:
                    err('parser to deeply nested lah', lah, s.go)
                if isinstance(to, State):
                    st.append((next(inI), to))
                    assert st[-1][0] == lah
                    shiL = to
                    tNr += 1
                    act = 'shift ' + lah
                    lah = inI[0]
                elif isinstance(to, Rule):
                    sx = len(st) - len(to.pos) + 1
                    p = [to] + [x0 for x0, x1 in st[sx : ]]
                    act = 'reduce ' + str(to)
                    del st[sx:]
                    st.append((p, st[-1][1].go[to]))
                else:
                    break
        except StopIteration:
            err('parse internal')            
        # ??? assert len(st) > 1
        
        # ??? dbg(1, 'parseEnd lah', lah, len(st), st[1][0], lah == PObj.end and len(st) == 2 and st[1][0][0] == Rule.rules[0])    
        if lah == PObj.end and len(st) == 2 and st[1][0][0] == Rule.rules[0]:
            dbgS(5, 'parsed', str(Rule.rules[0]), 'from', len(inp), 'tokens, in', Parser.pCnt, 'steps,', len(State.p2s), 'states')
            return st[1][0]
        else:            
            iA = inp[tNr+1: ] +[ PObj.end]
            r = f"syntax after {inI[-1]} tokenNr {tNr} expected: {strNSset(set().union(*[p.suc for p in shiL.pa]))}, not lah: {' '.join(strNS(l) for l in iA[-5:])}"
            dbg(1, r)
            dbg(5, 'last', inI[-1], 'tokennr', tNr, ', lah', inI[0], inI[1], inI[2]
                , '\n\tpreceeding', inp[0: tNr+1]
                , '\n\tfollowing ', iA
                , '\n\tstack', len(st)
                , '\n\t\t' + '\n\t\t'.join(strS(str(sx) + ': ' + eStr(st[sx][0]), 'state =', st[sx][1]) for sx in range(len(st)-1, -1, -1))
                )
            return r

    @staticmethod
    def makeGr(*grammar):
        "make Rule, Sym and Pos for given grammer in form for function f"
        out("grammar source", grammar)
        PObj.lrK = 0
        Rule.clear()
        Rule.add(*grammar)
        Rule.finish()
        State.makeAll()

    @staticmethod
    def makeStateRemove():
        "try to make lr(k) states for k=0,1, ..."
        tS = datetime.now()
        k = 0
        st = f"{len(RuleSym.syms)} syms, {len(Rule.rules)} rules, {len(RulePos.poss)} rulePos"
        while True:
            k += 1
            res = State.make(k)
            el = (datetime.now() - tS).total_seconds()
            elT = f"after {el}sec: " if dbgTime else ''
            if res == None:
                break
            print(f"{res}: {elT}{st} {len(State.states)} states")
            if el > 7:
                print("stop by timeout, do not check next LR(k)")
                return False
            if k > 20:
                print("stop by k >Y 20, do not check next LR(k)")
                return 

        if k == 1:
            res = State.checkLR0()
            if res == None:
                k=0
            else:
                print(f"{res}: {elT}")
        print(f'grammar is LR({k}): {elT}{st} {len(State.states)} states')
        Parser.states = State.states
        PObj.dbg(1, 'State 0')
        return True

    @staticmethod
    def testGr(nm, syn, *cs):
        """test named nm, with grammar syn and testcases cs"""
        print(f"{nm} --- begin test " + strSep1)
        Parser.makeGr(*syn)
        for s in State.p2s.values():
            dbg(5, s.strA('pa', 'go'))
        if State.err != '':
            print('error generating grammar', nm, State.err)
        else:
            Parser.test(nm, *cs)

    @staticmethod
    def test(nm, *tsts):
        "test for the current grammar, named nm the tests tsts"
        if tsts == None:
            tsts = ['2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (']
        ll = 5
        def treeSplit(nd, b):
            """split the tree nd into linkes, 
                each line contains the path to one (shifted) token or to one empty rule
                intendation shows the children of a rule
                the left ll characters, contain the shifted token or spaces for an empty rule
                b is the current line, filled with the path of entered rules with the correct intendation
            """
            nonlocal ll
            if isinstance(nd, str):
                return nd[0:ll-1].ljust(ll) + b[ll:] + '==' + nd
            c = b + str(nd[0]) +  ' ' 
            if len(nd) <= 1:
                return c + '==emptyRule'
            r = treeSplit(nd[1], c)
            c = len(c) * ' '
            for ch in nd[2:]:
                r += "\n" + treeSplit(ch, c)
            return r

        for i in range(len(tsts)) :
            sT = [x for x in re.split(r'\s+', tsts[i]) if x != ''] # void empty strings, e.g. for ''
            print(f"test begin  {nm} {i} input: {' '.join(strNS(t) for t in sT)} {strSep1}")
            ast = Parser.parse(sT)
            if isinstance(ast, str):
                print(f'syntax test {nm} {i} input {tsts[i]}: {ast}')
            else:
                print(f"parsed {str(ast[0])} from {len(sT)} tokens, {Parser.pCnt} steps,  {len(State.p2s)} states, Rules {len(Rule.rules)}")
                print(treeSplit(ast, ll * ' '))
            dbg(5, 'test', 'syntaxed' if isinstance(ast, str) else 'parsed', f"{nm} {i} input {sT} {strSep1}")

    @staticmethod
    def testSyn(nm, syn, *cs):
        """test named nm, with grammar syn and testcases cs"""
        Parser.testGr(nm, syn, *cs)

    @staticmethod
    def test1():
        """my tests """
        g = ["s=s+p", "s = p", "p = p '*' i", "p = i", "i = '(' s           ')'", "i = '0'", "i = '1'", "i = '2'"]
        Parser.testSyn('arithExpr', g
            ,'2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +')
        
        g2 = g[1:-3] + ["s = s pm p", "p = p '/' i", "i = epm d", "i = epm i d", "epm = pm", "epm ="
                , "pm = '+'", "pm = '-'", "d = '0'", "d = '1'", "d = '2'"]
        g = g[1:-3] + ["s = s pm p", "p = p '/' i", "i = epm d", "i = i d", "epm = pm epm", "epm ="
                , "pm = '+'", "pm = '-'", "d = '0'", "d = '1'", "d = '2'"]

        Parser.testSyn('arithExprPM' , ["s=s pm p", "s=p", "p=p*i", "p=p/i", "p=i", "i=(  s )", "i = j"
            , "j=epm d", "j=j d", "epm = pm epm", "epm =  ", "pm=+", "pm=-", "d=0", "d=1", "d =  2  "]
            ,'2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +'
            , '1 0 / - - 1 1 + + - 1 2 / ( 1 - - 2 / + 0 )')

        Parser.testSyn('arithExprPE' , ["s=s pm p", "s=p", "p=p*e", "p=p/e", "p=e", "e=i", "e=i**e", "i=(  s )", " i=j"
            , "j=epm d", "j=j d", "epm = pm epm", "epm =  ", "pm=+", "pm=-", "d=0", "d=1", "d =  2  "]
            ,'2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +'
            , '1 0 / - - 1 1 + + - 1 2 / ( 1 - - 2 / + 0 )')

        Parser.testSyn('arithExprPE2' , ["s=s pm p", "s=p", "p=p*e", "p=p/e", "p=e" 
            , "e=i", "e=i**e", "e=pm e", "pm=+", "pm=-", "i=(  s )", "i = dd", "dd=d", "dd=d dd", "d=0", "d=1", "d =  2  "]
            ,'2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +'
            , '1 0 / - - 1 1 + + - 1 2 / ( 1 - - 2 / + 0 )')

        Parser.testSyn('LR(2)?' , ["S = AA 'a' 'a'", "S = AB 'a' 'b'", "AA =", "AB ="]
            ,"a a", "a b", "a c", "b a")

    @staticmethod
    def testKnuth():
        """some of the examples given by Donald E. Knuth. 1965. On the Translation of Languages from Left to Right """
        Parser.testSyn('knuth(1)'
            , ["S = A D", "A = 'a' C", "B = 'b' 'c' 'd'", "C = B E", "D =", "E = 'e'"]
            , "a b c d e", "a b x d e", "a b", "a b c d e f")
        Parser.testSyn('knuth(6)'
            , ["S = 'a' A 'c'", "A = 'b' A 'b'", "A = 'b'"]
            , "a b b b  c", "a b c", "a b b c", "a b b b b b b b c e")
        Parser.testSyn('knuth(7) lr(0)'
            , ["S = 'a' A 'c'", "A =  A 'b' 'b'", "A = 'b'"]
            , "a b b b  c", "a b c", "a b b c", "a b b b b b b b c e")
        Parser.testSyn('knuth(16x) lr(3)'
            , ["S=B C", "B=C e", "B = ", "C=D", "C=D c", "D=", "D=d"]
            , "", "c", "c e", "c e d")
        Parser.testSyn('knuth(24)'
            , ["S =",  "S = 'a' A 'b' S", "S =  'b' B 'a' S", "A =", "A = 'a' A 'b' A", "B =", "B = 'b' B 'a' B"]
            , "", "a b", "b a", "a b b", "a a b b", "a b a b", "b a a b")
# "IS = IP", "IS = IS + IP", "IS = IS - IP", "IP = IE", "IP = IP * IE", "IP = IP / IE", "IE = IA", "IE = IA * * IE", "IA = IB", "IA = + IA", "IA = - IA", "IB =( IS )", "IB = ID", "IB = IB ID", "ID = 0", "ID = 1", "ID = 2" ... 

def testRe():
    x = ' ab cd  e?f   *+??'
    print(f"x={x}")
    print("re: lookahead assertion and lookbehind assertions to find boundaries between words and or special character")
    for y in re.finditer(r'\s+|(?<=\S)(?=\S)((?<=\W)|(?=\W))', x):
        print([y.start(),y.end(), x[y.start(): y.end()+1]])
    print("easier and more stable against leading/trailing spaces: use findall for words or special chars")
    for y in re.finditer(r'\w+|\S', x):
        print([y.start(),y.end(), x[y.start(): y.end()]])
    print(re.findall(r'\w+|\S', x))
    x += "' '' b 'oder1''1.2und. 3 ' '.a.234 4.. single' !"
    print('mit strings and numbers:', x)
    print(re.findall(r"""'(?:''|[^'])*'    # single quoted strings, doubble apostrophs for a single
                                        # non capturing group (?:...) otherwise findall will return groups!
                        |\d+(?:\.\d*)    # number with leading digits and possible decimal point/digits
                        |\.\d+            # number with leading decimal points and digits
                        |\w+            # words
                        |\S"""            # special chars
                , x
                , re.X))                    # re.X = re.VERBOSE flag

if 0:
   testRe()
   exit()
if 0:
    LahIter.test()
    l = [0, 1, 2]
    s = '012'
    print([(i, l[0:i], s[0:i]) for i in range(6)])
    print([(x, y) for x in range(3) for y in range(2)])
    print([(x, y) for x in range(3) for y in range(3*x, 4*x+1)])
    print([(x, y) for x in range(3) if x > 0 for y in range(3*x, 4*x+1) if y > x])
    exit()


        

Parser.test1()
Parser.testKnuth()
print('end', __file__)