I am trying to import a CSV file data into AWS DynamoDB.
Here\'s what my CSV file looks like:
first_name last_name
sri ram
Rahul Dravid
JetPay Underw
Here's my solution. I relied on the fact that there was some type of header indicating what column did what. Simple and straight forward. No pipeline nonsense for a quick upload..
import os, json, csv, yaml, time
from tqdm import tqdm
# For Database
import boto3
# Variable store
environment = {}
# Environment variables
with open("../env.yml", 'r') as stream:
try:
environment = yaml.load(stream)
except yaml.YAMLError as exc:
print(exc)
# Get the service resource.
dynamodb = boto3.resource('dynamodb',
aws_access_key_id=environment['AWS_ACCESS_KEY'],
aws_secret_access_key=environment['AWS_SECRET_KEY'],
region_name=environment['AWS_REGION_NAME'])
# Instantiate a table resource object without actually
# creating a DynamoDB table. Note that the attributes of this table
# are lazy-loaded: a request is not made nor are the attribute
# values populated until the attributes
# on the table resource are accessed or its load() method is called.
table = dynamodb.Table('data')
# Header
header = []
# Open CSV
with open('export.csv') as csvfile:
reader = csv.reader(csvfile,delimiter=',')
# Parse Each Line
with table.batch_writer() as batch:
for index,row in enumerate(tqdm(reader)):
if index == 0:
#save the header to be used as the keys
header = row
else:
if row == "":
continue
# Create JSON Object
# Push to DynamoDB
data = {}
# Iterate over each column
for index,entry in enumerate(header):
data[entry.lower()] = row[index]
response = batch.put_item(
Item=data
)
# Repeat
One way of importing/exporting stuff:
"""
Batch-writes data from a file to a dynamo-db database.
"""
import json
import boto3
# Get items from DynamoDB table like this:
# aws dynamodb scan --table-name <table-name>
# Create dynamodb client.
client = boto3.client(
'dynamodb',
aws_access_key_id='',
aws_secret_access_key=''
)
with open('', 'r') as file:
data = json.loads(file.read())['Items']
# Execute write-data request for each item.
for item in data:
client.put_item(
TableName='',
Item=item
)