python/parser4.py
# knuth lr(k) parser
import re
"""
state is set of positions, not (set of positions, lookAhead) as for knuth
thus, we reduce the number of states
"""
strSep1 = '--------------------------------'
def strL(*ee):
return ', '.join(e if type(e) == str else e.strL() if hasattr(type(e), 'strL') else strC(e) for e in ee)
def strS(*ee):
return ', '.join(e if type(e) == str else strC(e) for e in ee)
strCW = re.compile('\w+')
strNSs = re.compile('\s')
def strNS(s):
return strC(s) if not isinstance(s, str) else s if strNSs.search(s) == None else repr(s)
def strNSset(s):
return ' '.join(sorted([strNS(a) for a in s]))
def strC(e):
"""return a str represntig e. recursively expand collections, otherwise use str"""
t = type(e)
if t == str:
return repr(e) if strCW.fullmatch(e) == None else e
elif t == tuple:
return '(' + ', '.join(strC(v) for v in e) + ')'
elif isinstance(e, list): # t == list:
return '[' + ', '.join(strC(v) for v in e) + ']'
elif t == set or t == frozenset:
return '{' + ', '. join(sorted([strC(v) for v in e])) + '}'
elif isinstance(e, dict):
return '{' + ', '. join(strC(k) + ': ' + strC(v) for k, v in e.items()) + '}'
else:
return str(e)
def out(*ee):
print(strL(*ee))
def outS(*ee):
print(strS(*ee))
def err(*e):
raise Exception('error: ' + strL(*e))
exit()
def errS(*e):
raise Exception('error: ' + strS(*e))
exit()
dbgLev = 1
dbgTime = 0
def dbg(l, *e):
if dbgLev >= l:
print('dbg: ' + strL(*e))
def dbgS(l, *e):
if dbgLev >= l:
print('dbg: ' + strS(*e))
from datetime import * ;
import re;
def dsUpdate(le, ri):
u = False
for k,v in ri.items():
if k not in le:
le[k] = set()
o = len(le[k])
le[k] |= ri[k]
if o!= len(le[k]):
u = True
return u
def dsAdd(g, e, p):
if e in g:
s = g[e]
o = len(s)
s.add(p)
return o < len(s)
else:
g[e] = {p}
return True
def dsAddS(g, e, s):
if len(s) < 1:
return False
if e in g:
o = len(g[e])
g[e] |= s
return len(g[e]) > o
else:
g[e] = s.copy()
return True
class Leaf(dict):
pass
def d2sTg(g, *kk):
a = g
for kx in range(len(kk)-2):
k = kk[kx]
if not k in a:
a[k] = {}
a = a[k]
k = kk[-2]
if not k in a:
a[k] = Leaf()
a = a[k]
k = kk[-1]
if k not in a:
a[k] = set()
return a[k]
def d2sAdd(g, e, f, p):
s = d2sTg(g, e, f)
o = len(s)
s.add(p)
return o < len(s)
def d2sAddS(g, e, f, pp):
s = d2sTg(g, e, f)
o = len(s)
s |= pp
return o < len(s)
def dsDbg(lv, g, *a):
if dbgLev < lv:
return
if len(a) > 0:
print(*a)
for k,v in g.items():
print("\t" + str(k) + ' => ' + ', '.join({str(p) for p in v}))
class PObj:
"root and container for all our objects"
end = '!'
rStart = 'start'
lahStr = True # lah is true ==> a string, False => tuple of strings
lahEmpty = '' if lahStr else ()
lahEmptySet = {lahEmpty}
objs = []
lrK = 0
lrB = 0
def setName(self, nm):
self.name = (self.__class__.__name__ + '#' + str(len(self.objs))) if nm == '' else nm
def __init__(self, nm = ''):
self.setName(nm)
self.objs.append(self)
def __str__ (self):
return self.name
def strA(self, *aa):
return str(self) + '{' + ', '.join(a + '=' + strC(getattr(self, a)) if (ex := a.find('=')) < 0 else a[0:ex] + '=' + strC(getattr(self, a[ex+1:])) for a in aa) + '}'
@staticmethod
def dbg(l, m):
err('wer braucht das')
if dbgLev >= l:
print(m)
p = '['
for o in PObj.objs:
print(p, o)
p = ','
print(']')
@staticmethod
def lahGenZero(e):
return ({e if PObj.lahStr else (e,)}, set()) if isinstance(e, str) else (e.prd, e.lah)
def lahGenOr(self, ee):
pp = set()
ll = set()
for e in ee:
if isinstance(e, str):
pp.add(e if PObj.lahStr else (e,))
else:
ll |= e.lah
pp |= e.prd
if ll == self.lah and pp == self.prd:
return False
self.lah = ll
self.prd = pp
return True
class Rule (PObj):
"a Rule has a name (a symbol or nonterminal) and a body of terminals and nonterminals"
rules = []
ruleAll = []
ruleLoA = []
s2r = {}
def __init__(self, sy, bd):
super().__init__(sy)
self.body = bd
self.pos = None
self.go = False
self.loA = None
self.brd = False
self.redCh = {self}
self.sub = {self}
self.subRi = {self}
self.stp = {}
self.st = set()
self.lt1 = {}
self.tst = False
self.lock = False
Rule.ruleAll.append(self)
if sy != '':
if sy in Rule.s2r:
Rule.s2r[sy].append(self)
else:
Rule.s2r[sy] = [self]
def strA(self, *aa):
return super().strA(type(self).__name__[4:] + '=' + aa[0] if aa[0].find('=') < 0 else aa[0], *aa[1:])
def strL(self):
return self.strA('body', 'go')
def dbg(self, l, *a):
if dbgLev >= l:
print(self.strL(), *a)
@staticmethod
def clear():
Rule.rules.clear()
Rule.ruleAll.clear()
Rule.ruleLoA.clear()
Rule.s2r.clear()
State.clear()
PObj.objs.clear()
@staticmethod
def add(*src):
"""add new rules: each rule is a string
elements are words (double or sinble quoted strings, spechial char
if an unquoted element is found as a rule name, it designates a rule, otherwise a terminal
"""
for s in src:
spl = re.findall('\w+|\'(?:[^\']|\'\')*\'?|"(?:[^"]|"")*"?|\S', s) # words | single quoted | doubble quoted | single nonspace char
# (?: non capturing group, otherwise findall will use group!
if len(spl) > 0 and spl[-1][0] in "\"'" and (len(spl[-1]) < 2 or spl[-1][0] != spl[-1][-1]):
err('missing endquote string:', spl[-1], 'in source', s, src)
assert(spl[1] == '=')
RuleSeq(spl[0], spl[2:])
@staticmethod
def finish():
rr = Rule.ruleAll.copy()
assert len(Rule.rules) == 0
for r in rr:
l = Rule.s2r[r.name]
if type(l) != list:
assert type(l) == RuleOr and r in l.body
else:
assert type(l) == list and r in l, r.strA('body') + ', l=' + strC(l)
if len(l) == 1:
Rule.s2r[r.name] = r
else:
Rule.s2r[r.name] = RuleOr(r.name + '@' + str(len(Rule.rules)), l)
Rule.rules.append(Rule.s2r[r.name])
r.setName(r.name + '@' + str(len(Rule.rules)))
Rule.rules.append(r)
for r in Rule.rules:
for i in range(len(r.body)):
b = r.body[i]
if type(b) == str:
if b[0] == "'" and b[-1] == "'" or b[0] == '"' and b[-1] == '"':
b = b[1: -1]
elif b in Rule.s2r:
b = Rule.s2r[b]
r.body[i] = b
assert b != PObj.end or r == Rule.rules[0]
r.makePos()
dbg(5, 'makePos body/pos len', len(r.body) , len(r.pos), r.strA('body','pos'))
#Rule.stpGen()
for r in Rule.rules:
assert r.go == False
g = {}
r.goGen(g)
assert r.go == False
for e,p in g.items():
g[e] = frozenset(p)
r.go = g
dbg(10, 'goGen', r.strA('body', 'go'))
if False:
Rule.lt1GenAll()
o = len(Rule.ruleAll)
for r in Rule.rules:
r.loA = RuleOr('', [])
for r in Rule.rules:
for a, t in r.go.items():
if isinstance(a, Rule):
for p in t:
a.loA.body.append(RuleSeq('', [p, p.rule.loA]))
for r in Rule.rules:
for a in r.loA.body:
assert len(a.body) == 2
if len(a.body[1].body) == 0:
a.body.pop()
dbg(1, 'loA removed [2] in', r, a)
Rule.ruleLoA = Rule.ruleAll[o:]
Rule.lahGenK(1)
for r in Rule.rules:
if isinstance(r, RuleSeq):
do = [b for b in r.body if isinstance(b, str) or PObj.lahEmpty not in b.prd]
if len(do) <= 1:
if len(do) == 0:
do = r.body
r.sub |= {d for d in do if isinstance(d, Rule)}
u = True
while u:
u = False
for r in Rule.rules:
n = r.sub.copy()
for s in r.sub if isinstance(r, RuleSeq) else r.body:
n |= s.sub
if n != r.sub:
u = True
r.sub = n
for r in Rule.rules:
dbg(1, 'subGen', r.strA('body', 'sub'))
#Rule.lahGenK(5)
return
if False:
Rule.prdLah(1)
for r in Rule.rules:
bd = r.body
if isinstance(r, RuleOr):
r.redCh |= set(b for b in bd if isinstance(b, Rule))
else:
ne = [b for b in bd if isinstance(b, str) or '' not in b.prd]
if len(ne) == 0:
r.redCh |= set(b for b in bd if isinstance(b, Rule))
elif len(ne) == 1 and isinstance(ne[0], Rule):
r.redCh.add(ne[0])
u = True
while u:
u = False
for r in Rule.rules:
o = r.redCh.copy()
for s in o:
r.redCh |= s.redCh
if len(o) < len(r.redCh):
u = True
dbg(99, 'redCh', r.strA('redCh'))
for r in Rule.rules:
dbg(1, 'redCh', r.strA('body', 'redCh'))
for r in Rule.rules:
r.sub = {r for r in r.body if isinstance(r, Rule)}
u = True
while u:
u = False
for r in Rule.rules:
oo = r.sub.copy()
for o in oo:
r.sub |= o.sub
if len(oo) < len(r.sub):
u = True
dbg(99, 'sub', r.strA('sub'))
for r in Rule.rules:
dbg(1, 'sub', r.strA('body', 'sub'))
for r in Rule.rules:
bd = r.body
if isinstance(r, RuleOr):
r.subRi |= set(b for b in bd if isinstance(b, Rule))
else:
for b in reversed(bd):
if isinstance(b, str):
break
r.subRi.add(b)
if '' not in b.prd:
break
u = True
while u:
u = False
for r in Rule.rules:
o = r.subRi.copy()
for s in o:
r.subRi |= s.subRi
if len(o) < len(r.subRi):
u = True
print('subRi', r.strA('subRi'))
if False:
for r in Rule.rules:
assert r.brd == False
b = set()
r.brdGen(b)
assert r.brd == False
r.brd = b
dbg(1, 'brdGen', r.strA('body', 'brd'))
for r in Rule.rules:
if isinstance(r, RuleSeq):
bd = r.body
bL = len(bd)
for bx in range(bL-1):
b = bd[bx]
if isinstance(b, Rule) and r not in b.redCh:
for nx in range(bx+1, bL):
n = bd[nx]
if isinstance(n, str):
if n in b.brd:
err('in', r.strA('body'), 'subrule', b.strA('brd'), 'border contains follower', n, nx)
break
else:
if len(b.brd & (n.prd | n.lah)) != 0:
err('in', r.strA('body'), 'subrule', b.strA('brd'), 'border contains follower', nx, n.stra('prd', 'lah'))
if '' not in n.prd:
break
# for r in Rule.rules:
# for p in r.pos:
# g = {}
# p.goAdd(g)
# dsDbg(1, g, str(p))
if False:
for s in State.p2s.values():
if '' in s.go and len(s.go['']) != 1:
err("reduce not unique go['']", s.go[''], 'in', s)
dbg(1, s)
u = True
while u:
u = False
for s in State.p2s.values():
u |= s.auGen()
dbg(1, 'auGen')
for s in State.p2s.values():
dbg(1, 'auGen', s.strA('pa', 'au'))
stp = set()
for s in State.p2s.values():
stp.clear()
s.check(stp)
r2s = {}
s2r = {}
for s in State.p2s.values():
if '' in s.go:
for r in s.go['']:
dsAdd(r2s, r, s)
dsAdd(s2r, s, r)
dbg(1, 'r2s st',sum(len(s) for s in r2s.values()), r2s)
for ix in range(4):
for s in State.p2s.values():
for a, t in s.go.items():
if isinstance(a, Rule):
if t in s2r:
for w in s2r[t]:
dsAdd(r2s, w, s)
dsAdd(s2r, s, w)
dbg(1, 'r2s', ix, sum(len(s) for s in r2s.values()), 'r2s', r2s)
# for r in s.go['']:
# dsAdd(s2r, s,r.redP.add(r.pos[-1])
#for r in Rule.rules:
# print(str(r), 'redP', strC(r.redP))
def makePos(self, l):
assert(self.pos == None)
self.pos = [RulePos(self, ix) for ix in range(l)]
self.pos[-1].prd = PObj.lahEmptySet
def goGen(self, g):
""" recursively add all start transactions for self to go-dict g
"""
if self.go == True:
return
elif self.go != False:
dsUpdate(g, self.go)
else:
self.go = True
self.goGenRec(g)
self.go = False
@staticmethod
def goGenRecEP(e, p, g):
""" add element e to position p to go-dict g
if e was not yet in g and g is a Rule, recursively generate go for g
"""
dbg(99, 'goGenRecEP', str(e), str(p))
if e in g:
g[e].add(p)
else:
g[e] = {p}
if isinstance(e, Rule):
e.goGen(g)
@staticmethod
def lt1GenAll():
for r in Rule.rules:
r.lt1Gen((), r.lt1)
dbg(1, 'lt1Gen', r.strA('body', 'lt1'))
def lt1Gen(self, his, to):
if self.lock:
return
if len(self.pos) <= 1:
d2sAdd(to, '', self, his)
else:
self.lock = True
self.lt1GenSub(his + (self.pos[1], ), to)
self.lock = False
@staticmethod
def lahGenK(k):
if k <= PObj.lrK:
err(f"lahGenK({k} bur lrK already {PObj.lrK}")
PObj.lrK = k
u = True
c = 0
while u:
c += 1
if c > 40:
err('too many')
u = False
for r in reversed(Rule.rules):
u |= r.lahGen()
dbg(15, 'lahGen', k, u)
dbg(3, 'lahGen', c, 'rounds')
for r in Rule.rules:
dbg(10, 'lahGen', r.strA('body'), 'prd=', r.pos[0].prd, 'lah=', r.pos[0].lah)
return
#for s in State.p2s.values():
# s.lahGen()
u = True
c = 0
while u:
c += 1
if c > 40:
err('too many')
u = False
for r in Rule.ruleLoA:
u |= r.lahGen()
dbg(1, 'loAGen', k, u)
dbg(1, 'loAgen', c, 'rounds')
for r in Rule.rules:
dbg(1, 'loA gen', r.strA('body'), 'loA.prd=', r.loA.prd, 'loA.lah=', r.loA.lah)
if False:
for r in Rule.ruleLoA:
dbg(1, r.strA('body', 'prd', 'lah'))
for r in Rule.rules:
for p in r.pos:
dbg(1, p.strA('prd', 'lah'))
def brdGen(self, b):
""" recursively add all start transactions for self to go-dict g
"""
if self.brd == True:
return
elif self.brd != False:
b |= self.brd
else:
self.brd = True
self.brdGenRec(b)
self.brd = False
@staticmethod
def makeSymPosRemove():
"make symbols and positions, after we got the source of all rules, convert body to symbols and bare strings"
PObj.objs[0].body[0] = PObj.objs[1].sym
Rule.rules = Rule.objs.copy()
PObj.dbg(4, 'made rules')
RuleSym.make()
PObj.dbg(4, 'made sym')
RulePos.make()
PObj.dbg(3, 'made pos')
@staticmethod
def prdLah(k):
""" compute for all rules
prd = productions of length <= k and
lah = prefixes of length k of all productions with length > k
"""
tS = datetime.now()
assert not PObj.lahStr or (all([len(y) == 1 for r in Rule.rules for y in r.body if isinstance(y, str)]))
cnt = 0
upd = True
while upd: # recompute all rules, until no change
upd = False
lC = 0
pC = 0
for r in Rule.rules:
cnt += 1
if r.prdLahComb(k):
upd = True
dbg(1, 'prdLah', str(r), r.prd, r.lah)
lC += len(r.lah)
pC += len(r.prd)
if cnt > 1000:
err('loop')
elT = f"{(datetime.now() - tS).total_seconds()}sec, " if dbgTime else ''
print(f"Rule.prdLah k={k} <- {PObj.lrK}: {cnt} {elT} #prd={pC} #lah={lC}")
if dbgLev >= 3:
for r in Rule.rules:
dbg(3, 'prdLah', r.strA('body', 'prd', 'lah'))
assert all([len(s) == k for s in r.lah for r in Rule.rules])
assert all([len(s) <= k for s in r.prd for r in Rule.rules])
PObj.lrK = k
def prdLahAdd(self, k, pp, ll):
upd = False
assert self.prd <= pp
if self.prd != pp:
assert self.prd < pp
upd = True
self.prd = pp
ll |= self.lah
jj = {len(l) for l in ll if len(l) != k}
if len(jj) != 0:
for l in [l for l in ll if len(l) == k]:
for j in jj:
ll.discard(l[0:j])
if ll != self.lah:
upd = True
dbg(99, 'lahupd', self.lah, ll)
self.lah = ll
return upd
def hasCycle(self):
if self.tst:
return True
self.tst = True
r = any(b.hasCycle() for b in self.body if isinstance(b, Rule))
self.tst = False
return r
def lahOneAdd(self, his, to):
p = his[-1]
assert p.rule == self
if self.tst:
return
self.tst = True
nx = p.pos + 1
if nx < len(self.pos):
self.lahOneAddSub(his, to)
else:
if len(self.body) == 0:
assert isinstance(self, RuleSeq)
dsAdd(to, '', his + (self))
if len(his) > 1:
his[-2].rule.lahOneAdd(his[0:-1], to)
self.tst = False
@staticmethod
def lahOneGen():
""" generate one char lookahead for all rules """
for r in Rule.rules:
r.lahOneAdd((r.pos[0], ), r.lahOne)
dbg(1, 'lahOneGen', r.strA('body', 'lahOne', 'go'))
dbg(1, 'lahLen', len(r.lahOne), len(r.go), len([r for r in r.go if isinstance(r, str)]))
l = {a : {s[-1] for s in b} for a, b in r.lahOne.items()}
g = {a: {r.pos[0] if isinstance(r, Rule) else r for r in b} if isinstance(b, frozenset) else b.pa for a, b in r.go.items() }
dbg(1, 'lahComp', l == g, l, g)
for r in Rule.rules:
if isinstance(r, RuleSeq):
for ex in range(len(r.body)):
e = r.body[ex]
if isinstance(e, Rule):
e.aft.add(r.pos[ex+1])
else:
for e in r.body:
if isinstance(e, Rule):
e.aft.add(r.pos[1])
for r in Rule.rules:
dbg(1, 'aftGen1', r.strA('body', 'aft'))
@staticmethod
def aftGen2():
for r in Rule.rules:
pp = r.aft
r.aft = {}
for p in pp:
for a, qq in p.lajOne.items():
if isinstance(a, str):
for q in qq.values:
dsAddS(r.aft, a, q)
dbg(1, 'aftGen2', r.strA('body', 'aft'))
@staticmethod
def sucGen():
for r in Rule.rules:
for p in r.pos:
p.suc.clear()
Rule.rules[0].pos[-1].suc.add(PObj.end if PObj.lahStr else (PObj.end ,))
u = True
lC = 0
uC = 1
while uC > 0:
lC += 1
uC = 0
for r in Rule.rules:
uC += r.sucGen()
dbg(5, f'sucGen loop {lC}, upd {uC}')
for r in Rule.rules:
dbg(5, 'sucGen', r.strA('body'), 'suc0=', r.pos[0].suc, 'suc-1=', r.pos [-1].suc)
for p in r.pos:
dbg(10, 'sucGen', p.strA('suc', 'prd', 'lah'))
@staticmethod
def stpGen():
for r in Rule.rules:
r.stpGen1(r.stp, ())
dbg(1, 'stpGen', r.strA('body', 'stp', 'suc'))
def stpGen1(self, to, his):
if len(his) > 0:
dsAdd(to, self, his)
if len(self.body) == 0:
assert isinstance(self, RuleSeq)
dsAdd(to, '', his + (self, ))
else:
if not self.lock:
self.lock = True
self.stpGen2(to, his + (self.pos[1], ))
self.lock = False
class RuleSeq(Rule):
def __init__(self, sym, bd):
super().__init__(sym, bd)
def makePos(self):
super().makePos(len(self.body)+1)
def goGenRec(self, g):
if 0 == len(self.body):
dsAdd(g, '', self)
else:
Rule.goGenRecEP(self.body[0], self.pos[1], g)
def lt1GenSub(self, his, to):
b = self.body[0]
if isinstance(b, Rule):
b.lt1Gen(his, to)
else:
d2sAdd(to, b, his[-1], his)
def ltrGen2(self, pos, his, to):
assert self == pos.rule
nx = pos.pos + 1
if nx >= (self.pos):
return True
b = self.body[pos.pos]
if isinstance(b, str):
d2sAdd(to, b, his[0, -1] + (self.pos[nx],))
def lahGen(self):
pp = self.pos[-1].prd
ll = self.pos[-1].lah
p0 = self.pos[0].prd
l0 = self.pos[0].lah
assert len(ll) == 0
assert pp == PObj.lahEmptySet
u = False
for bx in range(len(self.body)-1, -1, -1):
b = self.body[bx]
o = self.pos[bx]
if isinstance(b, str):
a = b if PObj.lahStr else (b, )
pN = {a + p for p in pp if len(p) < PObj.lrK}
lN = {(a + p)[0:PObj.lrK] for p in pp if len(p) == PObj.lrK} | {(a + l)[0:PObj.lrK] for l in ll if len(l) == PObj.lrK}
else:
pN = {a + p for a in b.pos[0].prd for p in pp if len(a) + len(p) <= PObj.lrK}
lN = {(a + p)[0:PObj.lrK] for a in b.pos[0].prd for p in pp if len(a) + len(p) > PObj.lrK} \
| {(a + l)[0:PObj.lrK] for a in b.pos[0].prd for l in ll if len(a) + len(l) >= PObj.lrK} \
| {a for a in b.pos[0].lah if len(a) == PObj.lrK}
o.prd = pN
o.lah = lN
pp = pN
ll = lN
return p0 != self.pos[0].prd or l0 != self.pos[0].lah
def sucGen(self):
u = False
pp = self.pos[-1].suc
nn = pp
#dbg(1, 'sucGen0', self.strA('body'), pp)
for qx in range(len(self.pos)-2, -1, -1):
q = self.pos[qx]
b = self.body[qx]
if isinstance(b, Rule) and not nn <= b.pos[-1].suc:
u = True
b.pos[-1].suc |= nn
# dbg(1, 'sucGen1', b.pos[-1].strA('rule', 'pos', 'suc'), 'nn=', nn)
nn = {(a+p)[0: PObj.lrK] for a in q.prd for p in (pp if len(pp) > 0 else PObj.lahEmptySet) } | q.lah
q.suc = nn
#dbg(1, 'sucGen2', q.strA('suc', 'prd', 'lah'))
return u
def brdGenRec(self, b):
bd = self.body
bL = len(bd)
if len(bd) == 0:
return
e = bd[0]
if isinstance(e, Rule) and self in e.redCh and bL > 1:
for fx in range(0, bL):
f = bd[fx]
if isinstance(f, Rule) and self in f.sub:
g = bd[1]
if isinstance(g, str):
b.add(g)
else:
b |= {p for p in g.prd if p != ''} | g.lah
break
for ex in range(len(self.body)-1, -1, -1):
e = self.body[ex]
if isinstance(e, str):
break
e.brdGen(b)
if '' not in e.prd:
break
return
for ex in range(len(self.body)-1):
e = self.body[ex]
if isinstance(e, str):
break
if e in self.redCh or self in e.redCh:
f = self.body[ex+1]
if isinstance(f, str):
b.add(f)
else:
b |= {x for x in f.prd if len(x) != 0} | f.lah
assert all(len(x) == 1 for x in b)
if '' not in e.prd:
break
assert not self.tst
e = self.body[0]
if isinstance(e, Rule) and '' in e.prd:
for ex in range(len(self.body)-2, -1, -1):
e = self.body[ex]
if isinstance(e, str):
break
if self in e.subRi:
dbg(1, 'hasCycle', str(self), str(e))
b |= {x for x in e.prd if len(x) > 0} | e.lah
if '' not in e.prd:
break
def lahOneAddSub(self, his, to):
p = his[-1]
assert self == p.rule and p.pos < len(self.body)
e = self.body[p.pos]
dsAdd(to, e, his[0: -1] + (self.pos[p.pos+1],))
if isinstance(e, Rule):
e.lahOneAdd(his[0: -1] + (self.pos[p.pos+1], e.pos[0]), to)
def stpGen2(self, to, his):
b = self.body[0]
if isinstance(b, str):
dsAdd(to, b, his)
else:
b.stpGen1(to, his)
class RuleOr(Rule):
def __init__(self, sym, bd):
super().__init__(sym, bd)
def makePos(self):
super().makePos(2)
def goGenRec(self, g):
for b in self.body:
Rule.goGenRecEP(b, self.pos[1], g)
def lt1GenSub(self, his, to):
for b in self.body:
if isinstance(b, Rule):
b.lt1Gen(his, to)
else:
d2sAdd(to, b, his[-1], his)
def lahGen(self):
assert len(self.pos[-1].lah) == 0
assert self.pos[-1].prd == PObj.lahEmptySet
p0 = self.pos[0].prd
l0 = self.pos[0].lah
pp = set()
ll = set()
for b in self.body:
if isinstance(b, str):
pp.add(b if PObj.lahStr else(b,))
else:
pp |= b.pos[0].prd
ll |= b.pos[0].lah
if p0 == pp and l0 == ll:
return False
self.pos[0].prd = pp
self.pos[0].lah = ll
return True
def sucGen(self):
u = False
pp = self.pos[-1].suc
for b in self.body:
if isinstance(b, Rule):
if not pp <= b.pos[-1].suc:
u = True
b.pos[-1].suc |= pp
self.pos[0].suc = {(a+p)[0:PObj.lrK] for a in self.pos[0].prd for p in pp} | self.pos[0].lah
return u
def lahGenPos(self):
self.pos[1].lahGenSeq([])
self.pos[0].prd = self.prd
self.pos[0].lah = self.lah
def prdLahComb(self, k):
"compute lah and prd combining rules and already computed prd and lah"
pp = set()
ll = set()
for y in self.body:
if isinstance(y, str):
pp.add( y if PObj.lahStr else (y,))
else:
pp |= y.prd
ll |= y.lah
return self.prdLahAdd(k, pp, ll)
def brdGenRec(self, b):
for e in self.body:
e.brdGen(b)
def lahOneAddSub(self, his, to):
p = his[-1]
assert p.rule == self and p.pos == 0
hiN = his[0: -1] + (self.pos[1], )
for e in self.body:
dsAdd(to, e, hiN)
if isinstance(e,Rule):
e.lahOneAdd(hiN + (e.pos[0],), to)
def stpGen2(self, to, his):
for b in self.body:
if isinstance(b, str):
dsAdd(to, b, his)
else:
b.stpGen1(to, his)
class RuleSymRemove (PObj):
"the symbols are our nonterminals, which are a set of rules"
n2y = {}
syms = None
def __init__(self, sym):
self.sym = sym
super().__init__(f"Y#{sym}")
self.rules = []
self.lah = set()
def sExp (self):
return super().sExp(f"sym={self.sym!s} {self.rules} lah={self.lah}")
@staticmethod
def make():
"make the symbols from defined rules and convert Rule.body to symbols and bare strings"
i0 = len(PObj.objs)
for r in Rule.rules: #collect symbols
if r.sym not in RuleSym.n2y:
RuleSym.n2y[r.sym] = RuleSym(r.sym)
RuleSym.n2y[r.sym].rules.append(r)
r.sym = RuleSym.n2y[r.sym]
RuleSym.syms = PObj.objs[i0:]
assert Rule.objs[0].sym == RuleSym.syms[0]
assert RuleSym.syms[0].sym == PObj.rStart
assert RuleSym.n2y[PObj.rStart] == RuleSym.syms[0]
assert len(RuleSym.n2y[PObj.rStart].rules) == 1
for r in Rule.rules: # convert Rule.body to symbols and bare strings
b = r.body
if r.pos == 'a':
for ix in range(len(b)):
v = b[ix]
if v[0] == "'" and v[-1] == "'":
b[ix] = v[1: len(v)-1]
assert b[ix] != PObj.end
elif v in RuleSym.n2y:
b[ix] = RuleSym.n2y[v]
else:
err(f"rule {v} in {r} not in {Rule.rules}")
elif r.pos == 'n':
for ix in range(len(b)):
v = b[ix]
if v in RuleSym.n2y:
b[ix] = RuleSym.n2y[v]
else:
err(f'bad pos {r.pos} in {r.sExp()}')
class RulePos (PObj):
def __init__(self, r, p):
super().__init__(r.name + '#' + str(p))
self.rule = r
self.pos = p
self.lah = set()
self.prd = set()
self.suc = set()
def strL(self):
return self.strA('rule', 'pos')
def stpAdd(self, to):
if self.pos == 0:
dsUpdate(to, self.rule.stp)
elif self.pos + 1 == len(self.rule.pos):
dsAdd(to, '', (self.rule, ))
else:
b = self.rule.body[self.pos]
pN = self.rule.pos[self.pos + 1]
dsAdd(to, b, (pN, ))
if isinstance(b, Rule):
for t, uu in b.stp.items():
for u in uu:
dsAdd(to, t, (pN,) + u)
def goAdd(self, g):
if self.pos == 0:
dsUpdate(g, self.rule.go)
elif self.pos + 1 == len(self.rule.pos):
dsAdd(g, '', self.rule)
else:
b = self.rule.body[self.pos]
dsAdd(g, b, self.rule.pos[self.pos + 1])
if isinstance(b, Rule):
dsUpdate(g, b.go)
def stps(self):
if self.pos == len(self.rule.pos) - 1:
return {(self.rule, )}
elif self.pos == 0:
return self.rule.stp
else:
assert isinstance(self.rule, RuleSeq)
n = self.rule.pos[self.pos+1]
b = self.rule.body[self.pos]
if isinstance(b, str):
return {(n, b)}
else:
return {(n,) + x for x in b.stp}
@staticmethod
def makeLah(k):
# compute lah for RuleSym and RulePos using lah from Rule
# lah = production of len <= k and prefix of len k of all production longer k
Rule.prdLah(k)
for y in RuleSym.syms:
y.lah = set().union(*[r.lah | r.prd for r in y.rules])
dbg(3, f"{y} {y.lah}")
for r in Rule.rules:
r.pos[-1].lah = {lahEmpty}
for pX in range(len(r.body)-1, -1, -1): # backward through the positions
p = r.pos[pX]
e = r.body[pX]
lS = e.lah if isinstance(e, RuleSym) else {e if PObj.lahStr else (e,)}
n = set()
for l in lS:
if len(l) == k:
n.add(l)
else:
for o in p.next.lah:
dbg(9, f"n.add(({l}+{o})[0:k])")
n.add((l+o)[0:k])
p.lah = n
dbg(3, f"{r} {pX} {p} {p.lah}")
assert r.pos[0].lah == r.prd | r.lah, (f"rule {r.sExp()} fails \npos[0].lah={r.pos[0].lah}"
f" \n== prd={r.prd} \n| lah={r.lah}"
f",\ndifference lah-..={r.pos[0].lah-r.prd-r.lah}, ..-lah={(r.prd|r.lah)-r.pos[0].lah}")
def appK(ll, rr):
a = set()
for l in ll:
if len(l) == PObj.lrK:
a.add(l)
else:
assert len(l) < PObj.lrK
for r in rr:
a.add((l+r)[0:PObj.lrK])
return a
class State (PObj):
p2s = {}
state0 = None
lrErr = None
def __init__(self, pa):
super().__init__('s' + str(len(State.p2s)))
assert isinstance(pa, frozenset)
assert all(isinstance(p, RulePos) for p in pa)
assert pa not in State.p2s
self.pa = pa
self.go = None
State.p2s[pa] = self
@staticmethod
def getOrMake(pa):
pa = frozenset(pa)
return State.p2s[pa] if pa in State.p2s else State(pa)
@staticmethod
def makeAll():
State.state0 = State(frozenset([Rule.rules[0].pos[0]]))
State.state0.goExp()
if Rule.rules[0] not in State.state0.go:
eP = frozenset()
e = State(eP)
e.go = {}
del State.p2s[eP]
State.state0.go[Rule.rules[0]] = e
dbg(1, 'makeAll added endState', e, 'to', State.state0.strA('pa', 'go'))
State.err = ''
for s in State.p2s.values():
dbg(5, 'goExp', s.strA('pa', 'go'))
for lrK in range(1,5):
dbg(1, 'gen lrK', lrK)
Rule.lahGenK(lrK)
Rule.sucGen()
cnf = False
for s in State.p2s.values():
cnf |= s.goF2()
PObj.lrK = lrK
if not cnf:
return
State.err = f'makeAll: still conflicts at lrK {lrK}'
return
for s in State.p2s.values():
s.lajOneGen()
dbg(1, 'lajOneGen', s.strA('pa', 'lajOne'))
for a, l in s.lajOne.items():
if len(l) > 1:
dbg(1, 'lajOneGem multi', a, l)
# Rule.aftGen2()
for s in State.p2s.values():
s.ltrGen((), s.ltr)
dbg(1, 'ltr', s.strA('pa', 'ltr'))
for s in State.p2s.values():
for r in s.go.keys():
if isinstance(r, Rule):
r.st.add(s)
for r in Rule.rules:
dbg(1, 'stGen', r.strA('body', 'st'))
# State.lahGen()
if State.err == '':
for s in State.p2s.values():
s.goFix()
def strL(self):
return self.strA('pa', 'go')
def dbg(self, lv):
if lv > dbgLev:
return
print(str(self), ':', ', '.join(str(p) for p in self.pa))
for e, s in self.go.items():
print('\t', e, ('=> ' + str(s)) if isinstance(s, State) else '<= ' + ', '.join(str(r) for r in s))
def goExp(self):
if self.go != None:
return
self.go = {}
for p in self.pa:
p.goAdd(self.go)
for e, pp in self.go.items():
if e != '':
s = State.getOrMake(pp)
self.go[e] = s
s.goExp()
def stpExp(self):
if self.stp != None:
return
self.stp = {}
dbg(1, 'SstpExp1', self)
for p in self.pa:
p.stpAdd(self.stp)
dbg(1, 'SstpExp2', self.strA('pa', 'stp'))
for e, pp in self.stp.items():
dbg(1, 'SstpExp3', e, pp)
if e != '':
s = State.getOrMake(frozenset(p[-1] for p in pp))
self.go[e] = s
s.stpExp()
def ltrGen(self, his, to):
for p in self.pa:
p.rule.ltrGen2(self, his, to)
def lahGenOld(self):
if len(self.pa) == 1:
self.prd, self.lah = PObj.lahGenZero(list(self.pa)[0])
else:
self.lahGenOr(self.pa)
def lajOneGenOld(self):
for p in self.pa:
if p.pos >= len(p.rule.pos) - 1:
d2sAdd(self.lajOne, '', p.rule, None)
else:
e = p.rule.body[p.pos]
f = p.rule.pos[p.pos+1]
if p.pos == 0:
for a, h in p.rule.lahOne.items():
if isinstance(self.go[a], State):
dbg(1, 'lajOneGen d2sAdd(', self.lajOne, strC(a), self.go[a], h)
d2sAddS(self.lajOne, a, self.go[a], h)
else:
for r in self.go[a]:
dbg(1, 'lajOneGen 2sAdd(', self.lajOne, a, r, h)
d2sAddS(self.lajOne, a, r, h)
else:
if isinstance(e, str):
d2sAdd(self.lajOne, e, self.go[e], (f,))
else:
for a, h in e.lahOne.items():
if isinstance(self.go[a], State):
dbg(1, 'lajOneGen d2sAdd(', self.lajOne, strC(a), self.go[a], {(f,)+j for j in h})
d2sAddS(self.lajOne, a, self.go[a], {(f,)+j for j in h})
else:
for r in self.go[a]:
dbg(1, 'lajOneGen 2sAdd(', self.lajOne, a, r, h)
d2sAddS(self.lajOne, a, r, {(f,)+j for j in h})
def llGet(self, r):
if r not in self.go:
ll = None
else:
ll = set()
for p in self.go[r].pa:
if len(p.rule.pos) > 2 or p.rule not in self.go:
ll |= p.suc
else:
a = self.llGet(p.rule)
if isinstance(r, RuleOr):
if a != None:
ll |= p.suc
else:
ll |= p.suc if a == None else (a & p.suc)
dbgS(15, 'llGet', self, r, 'returning', ll)
return ll
def llGe2(self, s, ll):
nn = set()
for p in s.pa:
if p.pos == 1:
r = p.rule
assert r in self.go
mm = {(l+q)[0:PObj.lrK] for l in ll for q in r.pos[1].prd | r.pos[1].lah}
if any(len(m) < PObj.lrK for m in mm):
mm = self.llGe2(self.go[r], mm)
else:
mm = p.suc
nn |= mm
ss = {q for q in p.suc for p in s.pa}
dbgS(15, 'llGe2', self, s, 'returning', nn, nn <= ss, 's.suc=', ss)
return nn
def sucInState(self, r, ll):
if all(len(l) == PObj.lrK for l in ll):
return ll
if isinstance(r, Rule):
aa = appK(ll, r.pos[-1].suc)
dbgS(10, 'sucInState1 aa', self, r, 'aa=', aa, 'll=', ll)
if r not in self.go:
return aa
elif r not in self.go:
err(f'lahInPos but r={r} not in go', self.strA('pa', 'go'))
else:
aa = None
s = self.go[r]
if len(s.pa) == 0:
return ll if aa == None else aa
bb = set()
for p in s.pa:
cc = appK(ll, p.suc)
dbgS(10, 'sucInState2 cc', self, r, p, 'cc=', cc, 'll=', ll)
if p.pos == 1:
assert p.rule in self.go
dd = appK(ll, p.prd | p.lah)
dbgS(10, 'sucInState3 dd', self, r, p, 'dd=', dd, 'll=', ll)
ee = self.sucInState(p.rule, dd)
dbgS(10, 'sucInState4 ee', self, r, p, 'ee=', dd, 'dd=', dd)
cc &= ee
bb |= cc
dbgS(10, 'sucInState5 bb', self, r, p, 'bb=', bb, 'll=', ll)
if aa != None:
bb &= aa
return bb
def goF2(self):
if '' not in self.go or not isinstance(self.go[''], set):
return False
dbg(15, 'goF2 before', self.strA('pa', 'go'))
ge = self.go['']
if False: # use .suc
d = {}
for ga, gp in self.go.items():
if isinstance(ga, str) and ga != '':
a = ga if PObj.lahStr else (ga,)
for l in {(a + l)[0:PObj.lrK] for p in gp.pa for l in p.suc}:
ddPut(d, *l, gp)
dbg(15, 'goF2a', d)
cnf = False
for r in ge:
for l in r.pos[-1].suc:
o = ddGet(d, *l)
if o == None:
ddPut(d, *l, r)
else:
dbg(3, 'goF2a1 conflict', self.strA('pa'), l, r, 'old', o)
cnf = True
if True: # cnf # use sucInState
d = {}
for ga, gp in self.go.items():
if isinstance(ga, str) and ga != '':
a = ga if PObj.lahStr else (ga,)
for l in self.sucInState(ga, a):
ddPut(d, *l, gp)
cnf = False
dbg(3, 'retry with sucInState')
for r in ge:
si = self.sucInState(r, PObj.lahEmptySet)
dbgS(5, 'goF2sSucIn', self, r, 'sucInState', si, 'r.suc', r.pos[-1].suc)
for l in si:
o = ddGet(d, *l)
if o == None:
ddPut(d, *l, r)
else:
dbg(3, 'goF2s1 conflict', self.strA('pa'), l, r, 'old', o)
cnf = True
if False:
if len(r.body) == 0:
ll = self.llGet(r)
dbgS(10, 'goF2', r, 'llGet', ll)
for l in ll:
o = ddGet(d, *l)
if o == None:
ddPut(d, *l, r)
else:
dbgS(2, 'goF2p1 conflict', self.strA('pa'), l, 'should add', r, 'but old', o)
cnf = True
else:
for l in r.pos[-1].suc:
o = ddGet(d, *l)
if o == None:
ddPut(d, *l, r)
else:
dbgS(2, 'goF2a1 conflict', self.strA('pa'), l, 'should add', r, 'but old', o)
cnf = True
dbg(10, 'goF2a2 cnf', cnf, d)
if False:
xx = ddRed(d.copy(), -9876)
dbg(15, 'goF2a2 red', xx)
dflt = anyOf(ge) if len(ge) == 1 else None
if PObj.end in d:
eR = d[PObj.end]
if isinstance(eR, Rule) and eR in ge:
dflt = eR
del d[PObj.end]
else:
err('goF2 d[end]', eR, 'ge', ge)
if cnf:
return cnf
ddRed(d, dflt)
dbgS(10, 'goF2a3 red dflt', dflt, d)
if dflt == None:
del self.go['']
else:
self.go[''] = dflt
for a,t in d.items():
assert a not in self.go or isinstance(t, dict) or t == self.go[a]
self.go[a] = t
dbg(5, 'goF2a9 go', self.strA('pa', 'go'))
return False
for r in ge:
# to = self.go[r]
# for tP in to.pa:
for l in r.pos[-1].suc:
dsAdd(rd, l, r)
cnf = False
for a, rr in rd.items():
if a in self.go:
dbg(10, 'goF2 conflic1', self.strA('pa'), a, 'go', self.go[a], 'versus rd', rd[a])
for r in rr:
if a not in r.pos[-1].suc:
dbg(10, 'goF2 c2 not in suc', r, r.pos[-1].suc)
else:
dbg(1, 'goF2 c2 conflict in suc', r, r.pos[-1].suc)
cnf = True
elif len(rr) != 1:
dbg(1, 'goF2 conflic1', self.strA('pa'), a, 'rd', rd[a])
cc = {}
for r in rr:
for l in r.pos[0].suc:
if l not in cc and l not in self.go:
cc[l] = r
dbg(1, 'goF2 c3 adding', l, r)
else:
dbg(1, 'goF2 c3 conflict', a, r)
cnf = True
elif len(ge) > 1:
self.go[a] = list(rr)[0]
if not cnf:
if len(ge) > 1:
del self.go['']
else:
self.go[''] = list(ge)[0]
dbg(1, 'goF2 after', 'conflict' if cnf else 'fixed', self.strA('pa', 'go'))
return cnf
def goFix(self):
if '' not in self.go:
return
# dbg(1, 'goFix begin', self.strA('pa', 'go', 'lah'))
self.goFix1(self.go[''], self.go, self.lah)
#dbg(1, 'goFix end ', self.strA('pa', 'go'))
def goFix1(self, rr, gg, ll):
if len(rr) == 1:
for r in rr: break
gg[''] = r
else:
assert len(rr) > 1
cnt = {r: 0 for r in rr}
for a,b in ll.items():
if isinstance(b, Leaf):
assert len(b) == 1
for k in b.keys():
if isinstance(k, Rule):
cnt[k] += 1
# dbg(1, 'goFix1 cnt', cnt)
r = max(cnt.items(), key = lambda i: i[1])
# dbg(1, 'goFix1 max', r)
r = r[0]
# if r[1] < 1:
# del gg['']
# else:
gg[''] = r
for a,b in ll.items():
if isinstance(b, Leaf):
assert len(b) == 1
if a != '':
for k in b.keys(): break
if k == '':
if sTo != None:
gg[a] = sTo
else:
assert a in gg
elif k != r:
gg[a] = k
elif a in gg:
del gg[a]
dbg(1, 'goFix1 ed', a, gg)
for a,b in ll.items():
if type(b) == dict:
dbg(1, 'goFix1 diving into', a, 'from', gg[a] if a in gg else '---', 'to', b)
gg[a] = {}
self.goFix1(rr, gg[a], b)
def addPosA(self, pa):
"add (position, the string after the the end of the current rule) to self, recursively add subrules"
p, a = pa
if State.lrErr != None:
return
if p.next == None: #after all positions of rule ==> reduce
if a not in self.to:
self.to[a] = p.rule
elif self.to[a] != p.rule:
State.lrErr = strL("lrK mismatch [", a, "]=", self.to[a])
else:
e = p.rule.body[p.pos]
k = PObj.lrK
t = (p.next, a)
if isinstance(e, str): #shift character on stack with next state
for l in p.lah:
assert l[0] == e
if len(l) != k:
l = (l+a)[0:k]
if l not in self.to:
self.to[l] = {t}
elif not isinstance(self.to[l], set):
State.lrErr = f"lrK mismatch [{l}]->{self.to[l]} cannot add {t}"
return
else:
self.to[l].add(t)
else:
assert isinstance(e, RuleSym)
if e not in self.to: # next state after this symbol
self.to[e] = {t}
elif t in self.to[e]:
return
else:
self.to[e].add(t)
for n in p.next.lah: # enter pos[0] of subrules, string after is lah of next position of rule
if len(n) != k:
n = (n+a)[0:k]
for r in e.rules:
self.addPosA((r.pos[0], n))
if State.lrErr != None:
return
def addTrans(self):
"modify to: find/create states for (pos, aft) sets in to.values"
for k, v in self.to.items():
if isinstance(v, Rule):
continue
pa = frozenset(v)
if pa in self.pa2u:
self.to[k] = self.pa2u[pa]
elif State.lrErr != None:
return
else:
n = type(self)(pa)
self.to[k] = n
n.addTrans()
@staticmethod
def clear():
State.states = []
State.p2s.clear()
@staticmethod
def lahGenOld():
for s in State.p2s.values():
s.lahGen1()
u = True
while u:
u = False
for s in State.p2s.values():
u |= s.lahGen2()
dbg(1, 'lahGen')
for s in State.p2s.values():
for a, l in s.lah.items():
for q, r in l.items():
dsAddS(s.lahOne, a, r)
dbg(1, 'lahGen', s.strA('pa', 'lah', 'lahOne'))
for s in State.p2s.values():
s.lahExp(9)
def lahGenOld1(self):
for a, s in self.go.items():
if isinstance(a, str) and a != '':
d2sAdd(self.lah, a, s, s)
def lahGenOld1Pos(self):
for a, s in self.go.items():
if isinstance(a, str) and a != '':
d2sAdd(self.lah, a, s, s)
def lahGenOld2(self):
u = False
if '' in self.go:
for r in self.go['']:
for s in r.st:
for e, eD in s.go[r].lah.items():
for pp in eD.values():
if e != PObj.end or isinstance(r, Rule):
u |= d2sAddS(self.lah, e, r, pp)
return u
def lahExp(self, maxi):
self.lahExp1(maxi, self.lah, [])
def lahExp1(self, maxi, dct, kys):
for a, e in dct.items():
if a == PObj.end:
dbg(1, 'exp1 end', e, kys)
elif len(e) > 1:
dbg(1, 'lahExp1 conflict ', 'expanding' if maxi > 0 else 'unresolved', ' in', self.strA('pa'), 'at lah', kys + [a], ':', e)
if maxi > 0:
self.lahExp2(maxi-1, dct, kys + [a])
elif State.err == '':
State.err = strS('lahExp1 conflict unresolved in', self.strA('pa'), 'at lah', kys + [a], ':', e)
def lahExp2(self, maxi, dct, kys):
oo = dct[kys[-1]]
assert isinstance(oo, Leaf)
nn = {}
dct[kys[-1]] = nn
for r, ss in oo.items():
dbg(99, 'exp2a', str(r), ss)
for s in ss:
for u, v in s.lahOne.items():
if u != PObj.end or isinstance(r, Rule):
d2sAddS(nn, u, r, v)
dbg(99, 'exp2z', nn)
if kys[-1] == PObj.end:
dbg(1, 'exp2zend', nn)
dbg(1, 'expanded', self.strA('pa'), 'from', oo, 'to', dct)
self.lahExp1(maxi, nn, kys)
def auGen(self):
if '' not in self.go:
return False
n = self.au.copy()
for a,s in self.go.items():
if isinstance(a, str) and a != '':
dsAdd(n,a,s)
u = False
for r in self.go['']:
for s in r.st:
for a, b in n.items():
if dsAddS(s.au, a, b - {s}):
u = True
dbg(1, 'auGen0', s.strA('au'))
return u
hist = []
done = []
def check(self, stp):
if self.done:
#dbg(1, 'check0-', State.lv, self.strA('done', 'go'))
return
l = len(State.hist)
if l >= len(State.done):
if l > len(State.done):
err('done smaller', State.done, State.hist)
State.done.append({self})
else:
if self in State.done[l]:
return
else:
State.done[l].add(self)
l += 1
if len(State.done) < l:
State.done[l].clear()
assert self.done == False
self.done = True
State.hist.append(self)
dbg(1, 'check1', len(State.hist), State.hist, self.strA('go'), stp)
gg = {g for g in self.go.keys() if isinstance(g, str) and g != ''}
if len(gg & stp) != 0:
dbg(1, 'check dead end', gg, self.strA('pa', 'go'), State.hist)
if '' in self.go:
if len(self.go['']) > 1:
dbg(1, "check several go['']", self.strA('pa', 'go'))
for g in self.go['']:
#dbg(1, "check2", str(g), g.st)
for s in g.st:
s.chec2(stp | gg, g)
self.done = False
# dbg(1, 'check9-', my, self.strA('done'))
State.hist.pop()
def chec2(self, stp, ru):
assert ru in self.go
dbg(1, 'chec2=', str(self), str(ru), str(self.go[ru]))
self.go[ru].check(stp)
@staticmethod
def makeLookahead(k):
"make state for LR(k) grammar, return errormessage if not LR(k)"
assert k >= 1
State.clear()
RulePos.makeLah(k)
State.st0 = len(PObj.objs)
State.lrErr = None
b = State(frozenset([(Rule.rules[0].pos[0], k * (PObj.end if PObj.lahStr else (PObj.end, )))]))
e = State(frozenset(b.to[RuleSym.syms[1]]))
b.addTrans()
e.addTrans()
State.states = PObj.objs[State.st0: ]
Parser.states = State.states
if State.lrErr != None:
return f"grammar not LR({k}), because of {State.lrErr}"
@staticmethod
def checkLR0():
""" check if the current LR(1) grammar is even LR(0)
return None if it is, otherwsie a string describing the conflicting state
LR(0) grammars are implemented using LR(1) grammars
LR(0) means, that a reduce step does not depend on lookahead.
a shift step however uses the char shifted (== lookahead) to determine the next state """
assert PObj.lrK == 1
for u in State.states:
assert len(u.to) > 0
fi = None
for k,v in u.to.items():
if not isinstance(k, RuleSym):
if fi == None:
fi = v
elif v != fi if isinstance(v, Rule) else not (isinstance(v, State) and isinstance(fi, State)):
return strL("grammar not LR(0) because of", u)
def eStr(e):
if type(e) != tuple and type(e) != list:
return str(e)
else:
return str(e[0]) + '>' + str(len(e)-1) + '(' + ', '.join(x if type(x) != tuple and type(x) != list else str(x[0]) + '>' + str(len(x)-1) for x in e[1:]) + ')'
def anyOf(c):
for a in c:
return a
err('anyOf empty', c)
def ddPut(d, *key):
e = d
for k in key[0:-2]:
if k not in e:
e[k] = {}
elif not isinstance(e[k], dict):
e[k] = {'': e[k]}
e = e[k]
k, v = key[-2:]
if k not in e or not isinstance(e[k], dict):
e[k] = v
else:
e[k][''] = v
def ddGet(d, *key):
e = d
for k in key:
if not isinstance(e, dict) or k not in e:
return None
e = e[k]
return e if not isinstance(e, dict) else e[''] if '' in e else None
def ddRed(d, dflt):
hasDi = False
for k, v in d.items():
if isinstance(v, dict):
v = ddRed(v, dflt)
d[k] = v
hasDi |= isinstance(v, dict)
dbg(20, 'ddRed di', hasDi)
if not hasDi:
vv = set(d.values())
dbg(20, 'ddRed vv', vv)
if len(vv) == 1:
return vv.pop()
for k in {k for k,v in d.items() if v == dflt}:
del d[k]
return d
def ddTest():
d = {}
ddPut(d, 0, 'null')
ddPut(d, 'a', 'b', 'ab')
dbg(1, 'a, b', d)
ddPut(d, 'a', 'c', 'ac')
dbg(1, 'a, c', d)
ddPut(d, 'a', 'c', 'd', 'acd')
dbg(1, 'a, c, d', d)
ddPut(d, 'a', 'c', 'ac++')
dbg(1, 'a, c', d)
dbg(1, 'ddGet a', ddGet(d, 'a'))
dbg(1, 'ddGet y', ddGet(d, 'y'))
dbg(1, 'ddGet a b', ddGet(d, 'a', 'b'))
dbg(1, 'ddGet a y', ddGet(d, 'a', 'y'))
dbg(1, 'ddGet a c d', ddGet(d, 'a', 'c', 'd'))
dbg(1, 'ddGet a c d y', ddGet(d, 'a', 'c', 'd', 'y'))
#ddTest()
#exit()
class LahIter():
def __init__(self, l):
self.list = l
self.ix = 0
def __iter__(self):
return self
def __next__(self):
self.ix += 1
if self.ix <= len(self.list):
return self.list[self.ix - 1]
raise StopIteration()
def __getitem__(self, i):
i += self.ix
return self.list[i] if i < len(self.list) else PObj.end
@staticmethod
def test():
print('LahIter test begin')
iter = LahIter(['begin', 0, 1,2,3,4])
for i in iter:
print(i, iter[0], iter[1], iter[-1])
print('LahIter test end')
class Parser:
pCnt = -1
@staticmethod
def parse(inp):
assert len(State.state0.pa) == 1
for r0 in State.state0.pa: break
assert r0.pos == 0 and r0.rule == Rule.rules[0]
r0 = r0.rule
dbg(5, 'parsing for', r0)
st = [(None, State.state0)]
inI = LahIter(inp)
tNr = -1
Parser.pCnt = -1
cntL = 1000
act = 'start'
shiL = st[-1][1]
try:
while True:
Parser.pCnt += 1
e,s = st[-1]
lah = inI[0]
dbgS(10, 'parse', (str(Parser.pCnt) + ' ' + act).ljust(20), 'lah', lah, 'stck', len(st), '>', e, s)
if Parser.pCnt > cntL:
err("too many loops cnt={Parser.pCnt}, lah={lah} st={st}")
# dbg(2, f'run{Parser.pCnt} {ls} --> lah={lah} tos={e}, {s} st={len(st)} {st}')
to = s.go
dbg(15, 'parse to0', to)
for fx in range(20):
to = to[inI[fx]] if inI[fx] in to else to[''] if '' in to else None
dbg(20, 'parse to1', to)
if not isinstance(to, dict): break
else:
err('parser to deeply nested lah', lah, s.go)
if isinstance(to, State):
st.append((next(inI), to))
assert st[-1][0] == lah
shiL = to
tNr += 1
act = 'shift ' + lah
lah = inI[0]
elif isinstance(to, Rule):
sx = len(st) - len(to.pos) + 1
p = [to] + [x0 for x0, x1 in st[sx : ]]
act = 'reduce ' + str(to)
del st[sx:]
st.append((p, st[-1][1].go[to]))
else:
break
except StopIteration:
err('parse internal')
# ??? assert len(st) > 1
# ??? dbg(1, 'parseEnd lah', lah, len(st), st[1][0], lah == PObj.end and len(st) == 2 and st[1][0][0] == Rule.rules[0])
if lah == PObj.end and len(st) == 2 and st[1][0][0] == Rule.rules[0]:
dbgS(5, 'parsed', str(Rule.rules[0]), 'from', len(inp), 'tokens, in', Parser.pCnt, 'steps,', len(State.p2s), 'states')
return st[1][0]
else:
iA = inp[tNr+1: ] +[ PObj.end]
r = f"syntax after {inI[-1]} tokenNr {tNr} expected: {strNSset(set().union(*[p.suc for p in shiL.pa]))}, not lah: {' '.join(strNS(l) for l in iA[-5:])}"
dbg(1, r)
dbg(5, 'last', inI[-1], 'tokennr', tNr, ', lah', inI[0], inI[1], inI[2]
, '\n\tpreceeding', inp[0: tNr+1]
, '\n\tfollowing ', iA
, '\n\tstack', len(st)
, '\n\t\t' + '\n\t\t'.join(strS(str(sx) + ': ' + eStr(st[sx][0]), 'state =', st[sx][1]) for sx in range(len(st)-1, -1, -1))
)
return r
@staticmethod
def makeGr(*grammar):
"make Rule, Sym and Pos for given grammer in form for function f"
out("grammar source", grammar)
PObj.lrK = 0
Rule.clear()
Rule.add(*grammar)
Rule.finish()
State.makeAll()
@staticmethod
def makeStateRemove():
"try to make lr(k) states for k=0,1, ..."
tS = datetime.now()
k = 0
st = f"{len(RuleSym.syms)} syms, {len(Rule.rules)} rules, {len(RulePos.poss)} rulePos"
while True:
k += 1
res = State.make(k)
el = (datetime.now() - tS).total_seconds()
elT = f"after {el}sec: " if dbgTime else ''
if res == None:
break
print(f"{res}: {elT}{st} {len(State.states)} states")
if el > 7:
print("stop by timeout, do not check next LR(k)")
return False
if k > 20:
print("stop by k >Y 20, do not check next LR(k)")
return
if k == 1:
res = State.checkLR0()
if res == None:
k=0
else:
print(f"{res}: {elT}")
print(f'grammar is LR({k}): {elT}{st} {len(State.states)} states')
Parser.states = State.states
PObj.dbg(1, 'State 0')
return True
@staticmethod
def testGr(nm, syn, *cs):
"""test named nm, with grammar syn and testcases cs"""
print(f"{nm} --- begin test " + strSep1)
Parser.makeGr(*syn)
for s in State.p2s.values():
dbg(5, s.strA('pa', 'go'))
if State.err != '':
print('error generating grammar', nm, State.err)
else:
Parser.test(nm, *cs)
@staticmethod
def test(nm, *tsts):
"test for the current grammar, named nm the tests tsts"
if tsts == None:
tsts = ['2', '1 + 2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (']
ll = 5
def treeSplit(nd, b):
"""split the tree nd into linkes,
each line contains the path to one (shifted) token or to one empty rule
intendation shows the children of a rule
the left ll characters, contain the shifted token or spaces for an empty rule
b is the current line, filled with the path of entered rules with the correct intendation
"""
nonlocal ll
if isinstance(nd, str):
return nd[0:ll-1].ljust(ll) + b[ll:] + '==' + nd
c = b + str(nd[0]) + ' '
if len(nd) <= 1:
return c + '==emptyRule'
r = treeSplit(nd[1], c)
c = len(c) * ' '
for ch in nd[2:]:
r += "\n" + treeSplit(ch, c)
return r
for i in range(len(tsts)) :
sT = [x for x in re.split(r'\s+', tsts[i]) if x != ''] # void empty strings, e.g. for ''
print(f"test begin {nm} {i} input: {' '.join(strNS(t) for t in sT)} {strSep1}")
ast = Parser.parse(sT)
if isinstance(ast, str):
print(f'syntax test {nm} {i} input {tsts[i]}: {ast}')
else:
print(f"parsed {str(ast[0])} from {len(sT)} tokens, {Parser.pCnt} steps, {len(State.p2s)} states, Rules {len(Rule.rules)}")
print(treeSplit(ast, ll * ' '))
dbg(5, 'test', 'syntaxed' if isinstance(ast, str) else 'parsed', f"{nm} {i} input {sT} {strSep1}")
@staticmethod
def testSyn(nm, syn, *cs):
"""test named nm, with grammar syn and testcases cs"""
Parser.testGr(nm, syn, *cs)
@staticmethod
def test1():
"""my tests """
g = ["s=s+p", "s = p", "p = p '*' i", "p = i", "i = '(' s ')'", "i = '0'", "i = '1'", "i = '2'"]
Parser.testSyn('arithExpr', g
,'2', '1 + 2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +')
g2 = g[1:-3] + ["s = s pm p", "p = p '/' i", "i = epm d", "i = epm i d", "epm = pm", "epm ="
, "pm = '+'", "pm = '-'", "d = '0'", "d = '1'", "d = '2'"]
g = g[1:-3] + ["s = s pm p", "p = p '/' i", "i = epm d", "i = i d", "epm = pm epm", "epm ="
, "pm = '+'", "pm = '-'", "d = '0'", "d = '1'", "d = '2'"]
Parser.testSyn('arithExprPM' , ["s=s pm p", "s=p", "p=p*i", "p=p/i", "p=i", "i=( s )", "i = j"
, "j=epm d", "j=j d", "epm = pm epm", "epm = ", "pm=+", "pm=-", "d=0", "d=1", "d = 2 "]
,'2', '1 + 2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +'
, '1 0 / - - 1 1 + + - 1 2 / ( 1 - - 2 / + 0 )')
Parser.testSyn('arithExprPE' , ["s=s pm p", "s=p", "p=p*e", "p=p/e", "p=e", "e=i", "e=i**e", "i=( s )", " i=j"
, "j=epm d", "j=j d", "epm = pm epm", "epm = ", "pm=+", "pm=-", "d=0", "d=1", "d = 2 "]
,'2', '1 + 2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +'
, '1 0 / - - 1 1 + + - 1 2 / ( 1 - - 2 / + 0 )')
Parser.testSyn('arithExprPE2' , ["s=s pm p", "s=p", "p=p*e", "p=p/e", "p=e"
, "e=i", "e=i**e", "e=pm e", "pm=+", "pm=-", "i=( s )", "i = dd", "dd=d", "dd=d dd", "d=0", "d=1", "d = 2 "]
,'2', '1 + 2 * 0', '2 * ( 1 * 2 + 0 )', '1 + 2 (', '1 + 2 +'
, '1 0 / - - 1 1 + + - 1 2 / ( 1 - - 2 / + 0 )')
Parser.testSyn('LR(2)?' , ["S = AA 'a' 'a'", "S = AB 'a' 'b'", "AA =", "AB ="]
,"a a", "a b", "a c", "b a")
@staticmethod
def testKnuth():
"""some of the examples given by Donald E. Knuth. 1965. On the Translation of Languages from Left to Right """
Parser.testSyn('knuth(1)'
, ["S = A D", "A = 'a' C", "B = 'b' 'c' 'd'", "C = B E", "D =", "E = 'e'"]
, "a b c d e", "a b x d e", "a b", "a b c d e f")
Parser.testSyn('knuth(6)'
, ["S = 'a' A 'c'", "A = 'b' A 'b'", "A = 'b'"]
, "a b b b c", "a b c", "a b b c", "a b b b b b b b c e")
Parser.testSyn('knuth(7) lr(0)'
, ["S = 'a' A 'c'", "A = A 'b' 'b'", "A = 'b'"]
, "a b b b c", "a b c", "a b b c", "a b b b b b b b c e")
Parser.testSyn('knuth(16x) lr(3)'
, ["S=B C", "B=C e", "B = ", "C=D", "C=D c", "D=", "D=d"]
, "", "c", "c e", "c e d")
Parser.testSyn('knuth(24)'
, ["S =", "S = 'a' A 'b' S", "S = 'b' B 'a' S", "A =", "A = 'a' A 'b' A", "B =", "B = 'b' B 'a' B"]
, "", "a b", "b a", "a b b", "a a b b", "a b a b", "b a a b")
# "IS = IP", "IS = IS + IP", "IS = IS - IP", "IP = IE", "IP = IP * IE", "IP = IP / IE", "IE = IA", "IE = IA * * IE", "IA = IB", "IA = + IA", "IA = - IA", "IB =( IS )", "IB = ID", "IB = IB ID", "ID = 0", "ID = 1", "ID = 2" ...
def testRe():
x = ' ab cd e?f *+??'
print(f"x={x}")
print("re: lookahead assertion and lookbehind assertions to find boundaries between words and or special character")
for y in re.finditer(r'\s+|(?<=\S)(?=\S)((?<=\W)|(?=\W))', x):
print([y.start(),y.end(), x[y.start(): y.end()+1]])
print("easier and more stable against leading/trailing spaces: use findall for words or special chars")
for y in re.finditer(r'\w+|\S', x):
print([y.start(),y.end(), x[y.start(): y.end()]])
print(re.findall(r'\w+|\S', x))
x += "' '' b 'oder1''1.2und. 3 ' '.a.234 4.. single' !"
print('mit strings and numbers:', x)
print(re.findall(r"""'(?:''|[^'])*' # single quoted strings, doubble apostrophs for a single
# non capturing group (?:...) otherwise findall will return groups!
|\d+(?:\.\d*) # number with leading digits and possible decimal point/digits
|\.\d+ # number with leading decimal points and digits
|\w+ # words
|\S""" # special chars
, x
, re.X)) # re.X = re.VERBOSE flag
if 0:
testRe()
exit()
if 0:
LahIter.test()
l = [0, 1, 2]
s = '012'
print([(i, l[0:i], s[0:i]) for i in range(6)])
print([(x, y) for x in range(3) for y in range(2)])
print([(x, y) for x in range(3) for y in range(3*x, 4*x+1)])
print([(x, y) for x in range(3) if x > 0 for y in range(3*x, 4*x+1) if y > x])
exit()
Parser.test1()
Parser.testKnuth()
print('end', __file__)