Lets say I have a Text file with the below content
fdsjhgjhg
fdshkjhk
Start
Good Morning
Hello World
End
dashjkhjk
dsfjkhk
Now I need to wr
Using itertools.dropwhile, itertools.takewhile, itertools.islice:
import itertools
with open('data.txt') as f, open('result.txt', 'w') as fout:
it = itertools.dropwhile(lambda line: line.strip() != 'Start', f)
it = itertools.islice(it, 1, None)
it = itertools.takewhile(lambda line: line.strip() != 'End', it)
fout.writelines(it)
UPDATE: As inspectorG4dget commented, above code copies over the first block. To copy multiple blocks, use following:
import itertools
with open('data.txt', 'r') as f, open('result.txt', 'w') as fout:
while True:
it = itertools.dropwhile(lambda line: line.strip() != 'Start', f)
if next(it, None) is None: break
fout.writelines(itertools.takewhile(lambda line: line.strip() != 'End', it))
Just in case you have multiple "Start"s and "End"s in your text file, this will import all the data together, excluding all the "Start"s and "End"s.
with open('path/to/input') as infile, open('path/to/output', 'w') as outfile:
copy = False
for line in infile:
if line.strip() == "Start":
copy = True
continue
elif line.strip() == "End":
copy = False
continue
elif copy:
outfile.write(line)
If the text files aren't necessarily large, you can get the whole content of the file then use regular expressions:
import re
with open('data.txt') as myfile:
content = myfile.read()
text = re.search(r'Start\n.*?End', content, re.DOTALL).group()
with open("result.txt", "w") as myfile2:
myfile2.write(text)
I'm not a Python expert, but this code should do the job.
inFile = open("data.txt")
outFile = open("result.txt", "w")
keepCurrentSet = False
for line in inFile:
if line.startswith("End"):
keepCurrentSet = False
if keepCurrentSet:
outFile.write(line)
if line.startswith("Start"):
keepCurrentSet = True
inFile.close()
outFile.close()
import re
inFile = open("data.txt")
outFile = open("result.txt", "w")
buffer1 = ""
keepCurrentSet = True
for line in inFile:
buffer1=buffer1+(line)
buffer1=re.findall(r"(?<=Start) (.*?) (?=End)", buffer1)
outFile.write("".join(buffer1))
inFile.close()
outFile.close()
Move the outFile.write
call into the 2nd if
:
inFile = open("data.txt")
outFile = open("result.txt", "w")
buffer = []
for line in inFile:
if line.startswith("Start"):
buffer = ['']
elif line.startswith("End"):
outFile.write("".join(buffer))
buffer = []
elif buffer:
buffer.append(line)
inFile.close()
outFile.close()