i have a txt file, which has the following structure
start
id=1
date=21.05.2018
summ=500
end
start
id=7
date=23.05.2018
summ=500
owner=guest
end
You can build a simple parser with recursion that attempts to find data between start
and end
blocks:
import re
class Parser:
def __init__(self, source:str):
self.source = iter(filter(None, source.split('\n')))
self.results = []
self.parse()
@staticmethod
def to_dict(between_blocks):
return dict(re.split('\s*\=\s*', i) for i in between_blocks)
def parse(self):
_line = next(self.source, None)
if _line is not None:
if _line == 'start':
scope = []
while True:
_temp = next(self.source, None)
if _temp is None:
raise Exception("Missing 'end' tag")
if _temp != 'end':
scope.append(_temp)
else:
break
self.results.append(Parser.to_dict(filter(None, scope)))
self.parse()
def __repr__(self):
return f'{Parsed}({self.results})'
print(Parser(open('filename.txt').read())).results)
Output:
[{'id': '1', 'date': '21.05.2018', 'summ': '500'}, {'id': '7', 'date': '23.05.2018', 'summ': '500', 'owner': 'guest'}]
Tests:
tests = [[
"""
start
id=1
date=21.05.2018
summ=500
""", Exception],
[
"""
start
name = someone
age = 18
id = 23
end
start
name = someoneelse
age = 45
id = 55
end
start
name = lastname
age = 34
id = 5
end
""", None]
]
for text, is_error in tests:
try:
_ = Parser(text)
except:
assert is_error == Exception
else:
assert is_error is None
print('all tests passed')
Output:
all tests passed