python/parser2.py
# walter's erster versuch mit Python
#
# starten mit python -i basics.py
def err(*e):
print('error:', *e)
exit()
from datetime import * ;
import re;
class PObj:
n2o = []
def __init__(self, nm = ''):
self.no = len(self.n2o)
self.name = f"{self.__class__.__name__ if nm == '' else nm}#{self.no}"
self.n2o.append(self)
def __str__ (self):
return self.name
def __repr__ (self, at=''):
return self.name
def sExp (self, at=''):
return self.name if at == '' else f'{self.name}{{{at}}}'
@staticmethod
def test1 ():
PObj('null')
PObj()
PObj('zwei')
print(f"test1 n2o={PObj.n2o}")
print(f"test1 str", [str(o) for o in PObj.n2o])
@staticmethod
def print(m):
print(m)
p = '['
for o in PObj.n2o:
print(f"{p} {o.sExp()}")
p = ','
print(']')
end = 'end'
dflt = 'dflt'
rStart = 'start'
lrK = 0
mLahC = 0
mLahO1 = False
class Rule (PObj):
rules = None
def __init__(self, src):
spl = re.split(r'\s+', src)
assert(spl[1] == '=')
self.sym = spl[0]
super().__init__(f"R#{spl[0]}")
self.src = src
self.body = spl[2:]
self.pos0 = None
self.lah = {()}
def sExp (self, at=''):
return super().sExp(f'sym={self.sym!s} src={self.src} body={self.body} pos0={self.pos0!s} lah={self.lah}')
@staticmethod
def test2 ():
PObj.test1()
Rule("drei = b '+' c")
print(f"test2 n2o={PObj.n2o}")
print(f"test2 str", [str(o) for o in PObj.n2o])
@staticmethod
def make(*src):
Rule(f"{rStart} = ?")
for r in src:
Rule(r);
PObj.n2o[0].body[0] = PObj.n2o[1].sym
Rule.rules = Rule.n2o.copy()
@staticmethod
def makeLah(toK):
global mLahC, lrK
tS = datetime.now()
mLahC = 0
stop = set()
for k in range(1, toK+1):
for r in Rule.rules:
assert len(stop) == 0
r.lah = r.mLah(k, stop)
print(f"mLahC={mLahC}, {(datetime.now() - tS).total_seconds()}sec, {len(r.lah)} {r}")
lrK = k
@staticmethod
def makeLah2():
global mLahC
tS = datetime.now()
mLahC = 0
stop = {}
for r in Rule.rules:
r.mLah2(lrK, stop)
print(f"mLahC={mLahC}, {(datetime.now() - tS).total_seconds()}sec, {len(r.lah)} {r}")
def mLah(self, k, stop):
global mLahC
mLahC += 1
# print(f"mLah2 {self} {k} {stop}")
assert k >= 1
assert k == lrK + 1
if self in stop:
return set()
stop.add(self)
lP = {()}
lF = set()
for y in self.body:
if isinstance(y, str):
lQ = set()
for o in lP:
n = o + (y, )
if len(n) < k:
lQ.add(n)
else:
lF.add(n)
lP = lQ
else:
lE = set()
lD = set()
hasEmpty = () in lP
for r in y.rules:
if hasEmpty:
lE |= r.mLah(k, stop)
if len(lP) > 0:
lD |= r.lah
lQ = set()
if hasEmpty:
lP.remove(())
for o in lP:
for p in lD:
if len(o) + len(p) < k:
lQ.add(o + p)
else:
lF.add((o + p)[0:k])
lP = lQ
if hasEmpty:
for n in lE:
if len(n) < k:
lP.add(n)
else:
lF.add(n)
if len(lP) == 0:
break
stop.remove(self)
return lF | lP
def mLah2(self, k, stop):
global mLahC
mLahC += 1
# print(f"mLah2 {self} {k} {stop}")
assert k >= 1
if mLahO1:
if self.lah != None:
return self.lah
r = {()}
if self not in stop:
stop[self] = 1
elif stop[self] <= lrK:
stop[self] += 1
else:
return None
for p in range(len(self.body)):
rMin = min(len(s) for s in r)
if rMin >= k:
break
y = self.body[p]
if isinstance(y, str):
#print(f"y={y} {k} {r}")
r = {e if len(e) >= k else e + (y,) for e in r}
#print(f"y={y} ==> {r}")
else:
r3 = set()
cnt = 0
hasEmp = () in r
for q in y.rules:
#print(f"q={q} {r}")
r2 = q.mLah2(k-rMin, stop)
if r2 != None:
cnt += 1
r3 |= {e if len(e) >= k else (e + f)[0 : k] for e in r for f in r2}
# print(f"q={q} ==> {r3} <== r={r} r2={r2} k={k}")
# print(f"after {y} cnt={cnt} {r3} <== {r}")
if cnt < 1:
stop[self] -= 1
return None
r = r3
# print(f"mLah2 {self} {k} ==> {r}")
stop[self] -= 1
if k == lrK and all(0 == v for v in stop.values()):
#print(f"{self} {k} returning {r} lah={self.lah}")
if self.lah == None:
self.lah = frozenset(r)
else:
assert self.lah == r
return r
class RuleSym (PObj):
n2y = {}
def __init__(self, sym):
self.sym = sym
super().__init__(f"Y#{sym}")
self.rules = []
def sExp (self):
return super().sExp(f"sym={self.sym!s} {self.rules}")
@staticmethod
def make():
for r in Rule.rules:
if r.sym not in RuleSym.n2y:
RuleSym.n2y[r.sym] = RuleSym(r.sym)
RuleSym.n2y[r.sym].rules.append(r)
r.sym = RuleSym.n2y[r.sym]
for r in Rule.rules:
b = r.body
for ix in range(len(b)):
v = b[ix]
if v[0] == "'" and v[-1] == "'":
b[ix] = v[1: len(v)-1]
assert (b[ix] == end) == (r.sym.sym == rStart and ix > 0)
elif v in RuleSym.n2y:
b[ix] = RuleSym.n2y[v]
elif v != end:
err(f"rule {v} in {r} not in", ruleD)
class RulePos (PObj):
def __init__(self, r, p):
super().__init__('P')
self.rule = r
self.pos = p
self.next = None
def sExp (self):
return super().sExp(f"{self.rule!s} {self.pos} next={self.next!s}")
@staticmethod
def make():
for r in Rule.rules:
b = r.body
r.pos0 = RulePos(r, 0)
for ix in range(1, len(b)+1):
n = RulePos(r, ix)
PObj.n2o[-2].next = n
class RuleState (PObj):
p2s = {}
start = None
states = None
def __init__(self, ps):
super().__init__('S')
self.pos = frozenset(ps)
self.to = {}
# self.stack = None
# self.red = {}
self.lah = 0
self.p2s[self.pos] = self
for p in self.pos:
if p.next != None:
e = p.rule.body[p.pos]
if e in self.to:
self.to[e].add(p.next)
else:
self.to[e] = {p.next}
def sExp (self):
return super().sExp(f"{self.pos} to={self.to} lah={self.lah}")
def makeStack(self):
for k, v in self.to.items():
n = self.stack + [v]
if v.stack == None:
v.stack = n
v.makeStack()
elif er:
PObj.print('stack mix max')
err(f"self={self} k={k} v={v} n={n}")
for p in self.pos:
if p.next != None:
continue
r = p.rule
for x in range(p.no, r.pos0.no - 1, -1):
print(f"reduce {r} {p} {x} {PObj.n2o[x]}")
def addLah(self, len):
assert len == self.lah + 1
stop = set()
for k,v in self.to.items():
if len == 1 and not isinstance(v, tuple):
v = v, {}
self.to[k] = v
assert isinstance(v, tuple)
@staticmethod
def posExp(pp):
work = pp.copy()
done = set()
while True:
try:
p = work.pop()
except KeyError:
break
done.add(p)
if p.next != None:
e = p.rule.body[p.pos]
if isinstance(e, RuleSym):
for r in e.rules:
if r.pos0 not in done:
work.add(r.pos0)
pp |= done
@staticmethod
def make():
pp = {Rule.rules[0].pos0}
RuleState.posExp(pp)
RuleState.start = RuleState(frozenset(pp))
work = {RuleState.start}
while True:
try:
s = work.pop()
except KeyError:
break
for k, v in s.to.items():
RuleState.posExp(v)
w = frozenset(v)
if w in RuleState.p2s:
s.to[k] = RuleState.p2s[w]
else:
n = RuleState(w)
s.to[k] = n
work.add(n)
for p in s.pos:
if p.next == None:
s.to[p.rule] = None
RuleState.states = PObj.n2o[RuleState.start.no:]
@staticmethod
def makeRed():
for s in RuleState.states:
for y, t in s.to.items():
if isinstance(y, RuleSym):
print(f"red {s} y={y} t={t}")
for r in y.rules:
RuleState.addRed(r, s, t)
@staticmethod
def addRed(r, s, t):
x = 0
p = r.pos0
for y in r.body:
assert p.pos == x
assert r.body[x] == y
assert p in s.pos
s = s.to[y]
p = p.next
x += 1
assert x == len(r.body)
assert p.pos == x
assert p.next == None
assert p in s.pos
if r in s.red:
s.red[r].add(t)
else:
s.red[r] = {t}
@staticmethod
def makeLahX(k):
pEnd = RuleState.states[0]
pEnd = pEnd.to[PObj.n2o[0].body[0]]
pEnd = pEnd.to[end]
assert len(pEnd.to) == 1
assert end in pEnd.to
if pEnd.lah < 1:
pEnd.to[end] = (pEnd.to[end], {(end,)})
for s in RuleState.states:
s.addLah(1, set())
Rule.make("s = s '+' p", "s = p", "p = p '*' i", "p = i", "i = '(' s ')'", "i = '0'", "i = '1'", "i = '2'")
print('rules', PObj.n2o)
RuleSym.make()
print('RuleSym n2y', RuleSym.n2y)
print(f'RuleSym n2y {{}} {RuleSym.n2y}')
print(f'RuleSym n2y {{!s}} {RuleSym.n2y!s}')
PObj.print('RuleSym')
Rule.makeLah(4)
PObj.print('RuleSym lah')
[print(f) for f in sorted([ ''.join(e) for e in PObj.n2o[0].lah])]
err('end test')
RulePos.make()
print('RulePos', PObj.n2o)
RuleState.make()
PObj.print('RuleState')
RuleState.addLah(1)
PObj.print('RuleState lah 1')
#RuleState.makeRed()
#RuleState.start.stack = [RuleState.start]
# print('RuleState start', repr(RuleState.start))
# RuleState.start.makeStack()
#PObj.print('RuleState red')
#Rule.test2()
import re;
if False:
class wk:
def __init__(self):
if False:
self.eins = 'null'
def __repr__ ( self):
return 'wk' + repr(self.__dict__)
o = wk()
print('o', o)
o.eins = 'eins'
print('o', o)
end = 'eof'
dflt = 'dflt'
ruleStr = ["s = s '+' p", "s = p", "p = p '*' i", "p = i", "i = '(' s ')'", "i = '0'", "i = '1'", "i = '2'"]
print("ruleStr", ruleStr);
tb = [re.split(r'\s+', r) for r in ruleStr]
tb.insert(0, ['star t ', '=', tb[0][0]])
rules = tb.copy()
accX = len(tb)
failX = accX+1
RuleSymX = failX + 1
tb += ['accept', 'fail']
print("rules", tb)
ruleset4name = {}
ruleset4rule = []
for i in range(accX):
r = tb[i]
if r[1] != '=':
err("r[1] not =", r)
if r[0] in ruleset4name:
s = ruleset4name[r[0]]
tb[s].append(i)
else:
s = len(tb)
ruleset4name[r[0]] = s
tb.append(['ruleset', r[0], s, i])
r[0:2] = ['rule', r[0], i]
assert len(ruleset4rule) == i
ruleset4rule.append(s)
handleX = len(tb)
print("ruleset4name", ruleset4name, 'ruleset4rule', ruleset4rule)
print('tb', tb)
for r in rules:
for i in range(3, len(r)):
v = r[i]
if v[0] == "'" and v[-1] == "'":
r[i] = v[1: len(v)-1]
elif v in ruleset4name:
r[i] = ruleset4name[v]
elif v != eof:
err(f"rule {v} in {r} not in", ruleD)
print("tb after rulemap", tb)
class Handle:
byStart = {}
def __init__(self, s):
self.start = frozenset(s)
assert self.start not in Handle.byStart
self.tbX = len(tb)
tb.append(self)
self.byStart[self.start] = self.tbX
self.map = {}
self.go = {}
self.goDef = None
def __repr__ ( self):
return 'handle' + repr(self.__dict__)
def mapAdd(self, l):
rx, px = l[-1]
r = tb[rx]
if px > len(r):
return
nx = (rx, px+1)
c = r[px] if px < len(r) else dflt
#print(f'mapadd enter c {c}, l {l}, self {self}');
if isinstance(c, str):
if c in self.map:
self.map[c].add(nx)
else:
self.map[c] = {nx}
elif isinstance(c, int):
assert c >= rulesetX and c < handleX
rs = tb[c]
# forward: search tree for next terminal avoid infinite recursion
for cx in rs[3:]:
if (cx, 3) not in l:
self.mapAdd(l + [(cx, 3)])
# afterward next step
if c in self.map:
self.map[c].add(nx)
else:
self.map[c] = {nx}
#print(f'handleadd exit l {l}, handle {self}');
@staticmethod
def mapExp():
hx = handleX
print('mapExp0', handleX, 'tb', len(tb))
while hx < len(tb):
h = tb[hx]
for k in h.map:
strt = frozenset(h.map[k])
h.map[k] = strt
if strt not in Handle.byStart:
n = None
for s in strt:
if s[1] <= len(tb[s[0]]):
if n == None:
n = Handle(strt)
n.mapAdd([s])
hx += 1
print('mapExp', handleX, 'tb', len(tb))
@classmethod
def genGo(cl):
print('genGo 0')
for hx in Handle.byStart.values():
h = tb[hx]
# h.go = {}
for k in h.map:
v = h.map[k]
if len(v) != 1:
r = cl.byStart[v]
else:
for e in v:
if e[1] > len(tb[e[0]]) :
r = e[0]
else:
r = cl.byStart[v]
if k == dflt:
h.goDef = r
else:
h.go[k] = r
print('genGo 9')
h = Handle({(0, 3)})
print('handle', h, 'tb', tb);
h.mapAdd([(0, 3)])
print('Handle.byStart', Handle.byStart, tb);
Handle.mapExp()
print('Handle.mapExp', Handle.byStart, tb);
accRS = ruleset4rule[0]
print(f"accRS {accRS}, {handleX} {tb[handleX]} accX={accX}")
accH = 0, len(tb[0])
print('accH', accH)
cnt = 0
for h in [h for h in tb[handleX:] if accH in h.start]:
cnt += 1
print(f'accH {h}')
assert eof not in h.map
h.map[eof] = frozenset([(accX, 999)])
assert cnt >= 1
assert accRS not in tb[handleX].map
#tb[handleX].map[accRS] = frozenset([(accX,99)])
print(f"accRS2 {accRS}, {handleX} {tb[handleX]}")
Handle.genGo()
print('Handle.genGo', Handle.byStart);
def run(inp):
inI = iter(inp + [eof])
st = [[handleX, 'st 0']]
lah = next(inI)
while True:
tos = st[-1]
print(f'run1 lah={lah} tos={tos}, st={len(st)} {st}')
m = tb[tos[0]]
go = m.go[lah] if lah in m.go else m.goDef
if go == None or go == failX:
err(f"bad lah={lah} at {[tb[s[0]][1] + '.' + str(s[1]) for s in m.start]}, "
f"expected {[k for k in m.map.keys() if isinstance(k, str)]}, "
f"go {go}, st={len(st)} {st}")
elif go >= handleX: #shift
st.append([go, lah])
lah = next(inI)
elif go < accX:
r = tb[go]
n = [r[1], go]
nx = len(st) - len(r) + 3
for i in range(nx, len(st)):
n.append(st[i][1])
pa = tb[st[nx-1][0] ]
#assert pa[0] == 'handle'
rs = ruleset4rule[go]
if rs not in pa.go:
err(f'reduce {rs} missing in pa', pa, 'stack', st)
st[nx] = [pa.go[rs], n]
del st[nx+1:]
elif go == accX:
assert lah == eof
assert len(st) == 2
return st[1][1]
else:
err(f"bad go {go}, lah={lah}, st={st}")
def treeSplit(nd, pr):
if isinstance(nd, str):
return pr + nd
r = pr + nd[0] + '.' + str(nd[1])
for ch in nd[2:]:
r += treeSplit(ch, pr + ' ')
return r
def treeSpli2(nd, of):
if isinstance(nd, str):
return nd
r = nd[0] + '.' + str(nd[1]) + ' '
of += len(r)
r += treeSpli2(nd[2], of)
for ch in nd[3:]:
r += "\n" + of * ' ' + treeSpli2(ch, of)
return r
for s in ['2', '1 + 2 * 0', '2 * ( 1 * 2 + 0 )']:
ast = run(re.split(r'\s+', s))
print('run', s , ' ==>', treeSplit(ast, "\n "))
print('run', s , ' ==2\n ' + treeSpli2(ast, 2))
print('end', __file__)