I am trying to read a large csv file (aprox. 6 GB) in pandas and i am getting a memory error:
MemoryError Traceback (most recen
If you use pandas read large file into chunk and then yield row by row, here is what I have done
import pandas as pd
def chunck_generator(filename, header=False,chunk_size = 10 ** 5):
for chunk in pd.read_csv(filename,delimiter=',', iterator=True, chunksize=chunk_size, parse_dates=[1] ):
yield (chunk)
def _generator( filename, header=False,chunk_size = 10 ** 5):
chunk = chunck_generator(filename, header=False,chunk_size = 10 ** 5)
for row in chunk:
yield row
if __name__ == "__main__":
filename = r'file.csv'
generator = generator(filename=filename)
while True:
print(next(generator))