I am trying to import a CSV file data into AWS DynamoDB.
Here\'s what my CSV file looks like:
first_name last_name
sri ram
Rahul Dravid
JetPay Underw
You can try using batch writes and multiprocessing to speed up your bulk import.
import csv
import time
import boto3
from multiprocessing.dummy import Pool as ThreadPool
pool = ThreadPool(4)
current_milli_time = lambda: int(round(time.time() * 1000))
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('table_name')
def add_users_in_batch(data):
with table.batch_writer() as batch:
for item in data:
batch.put_item(Item = item)
def run_batch_migration():
start = current_milli_time()
row_count = 0
batch = []
batches = []
with open(CSV_PATH, newline = '') as csvfile:
reader = csv.reader(csvfile, delimiter = '\t', quotechar = '|')
for row in reader:
row_count += 1
item = {
'email': row[0],
'country': row[1]
}
batch.append(item)
if row_count % 25 == 0:
batches.append(batch)
batch = []
batches.append(batch)
pool.map(add_users_in_batch, batches)
print('Number of rows processed - ', str(row_count))
end = current_milli_time()
print('Total time taken for migration : ', str((end - start) / 1000), ' secs')
if __name__ == "__main__":
run_batch_migration()