python/regEx.py
print('start split')
s = 'a + bcd * e* #fg*x?+-hij*234/= !'
import re
print(s, 'split=>', re.split('\s+|(?<=\S)(?<=\W)(?=\S)|(?=\S)(?=\W)(?<=\S)', s))
print(s, 'split=>', re.split('\s+|(?<=\S)(?=\S)(?:(?<=\W)|(?=\W))', s))
print(s, 'split=>', re.split('\s+|(?<=\S)(?=\S)(?:(?<=\W)|(?=\W))', s))
#print(s, 'split=>', re.split('\s+|(?<=\S)(?=\S)((?<=\W)|(?=\W))', s))
print('start quote')
s = 'ab+"e ""+""f""" g*?#h\' k " l \' m "x?y a=)'
#wort
# single quoted string with embedded double '' without ending ' till eof
# doubble quoted string with embedded double "" without ending " till eof
# any single nonspace character
print(s, 'findall=>', f := re.findall('\w+|\'(?:[^\']|\'\')*\'?|"(?:[^"]|"")*"?|\S', s)) # (?: non capturing group, otherwise findall will use group!
if len(f) > 0 and f[-1][0] in "\"'" and (len(f[-1]) < 2 or f[-1][0] != f[-1][-1]):
print('missing endquote string:', f[-1])