This question has been asked and answered many times before. Some examples: [1], [2]. But there doesn\'t seem to be something somewhat more general. What I\'m looking for is
using iterators and generators:
def tokenize(txt, delim=',', pairs={'"':'"', '<':'>', '(':')'}):
fst, snd = set(pairs.keys()), set(pairs.values())
it = txt.__iter__()
def loop():
from collections import defaultdict
cnt = defaultdict(int)
while True:
ch = it.__next__()
if ch == delim and not any (cnt[x] for x in snd):
return
elif ch in fst:
cnt[pairs[ch]] += 1
elif ch in snd:
cnt[ch] -= 1
yield ch
while it.__length_hint__():
yield ''.join(loop())
and,
>>> txt = 'obj<1, sub<6, 7>, 3>,x(4, y(8, 9), 5),"msg, with comma"'
>>> [x for x in tokenize(txt)]
['obj<1, sub<6, 7>, 3>', 'x(4, y(8, 9), 5)', '"msg, with comma"']