I am very new to Python. I want to parse a csv file such that it will recognize quoted values - for example
1997,Ford,E350,\"Super, luxurious truck\"
The csv.py module is probably fine - but if you want to see and/or control how it works, here is a small python only solution based on a coroutine:
def csv_parser(delimiter=','):
field = []
while True:
char = (yield(''.join(field)))
field = []
leading_whitespace = []
while char and char == ' ':
leading_whitespace.append(char)
char = (yield)
if char == '"' or char == "'":
suround = char
char = (yield)
while True:
if char == suround:
char = (yield)
if not char == suround:
break
field.append(char)
char = (yield)
while not char == delimiter:
if char == None:
(yield(''.join(field)))
char = (yield)
else:
field = leading_whitespace
while not char == delimiter:
if char == None:
(yield(''.join(field)))
field.append(char)
char = (yield)
def parse_csv(csv_text):
processor = csv_parser()
processor.next() # start the processor coroutine
split_result = []
for c in list(csv_text) + [None]:
emit = processor.send(c)
if emit:
split_result.append(emit)
return split_result
print parse_csv('1997,Ford,E350,"Super, luxurious truck"')
Tested on python 2.7
The following method worked perfectly
d = {}
d['column1name'] = []
d['column2name'] = []
d['column3name'] = []
dictReader = csv.DictReader(open('filename.csv', 'rb'), fieldnames = ['column1name', 'column2name', 'column3name'], delimiter = ',', quotechar = '"')
for row in dictReader:
for key in row:
d[key].append(row[key])
The columns are stored in dictionary with the column names as the key.
You should use the csv
module:
import csv
reader = csv.reader(['1997,Ford,E350,"Super, luxurious truck"'], skipinitialspace=True)
for r in reader:
print r
output:
['1997', 'Ford', 'E350', 'Super, luxurious truck']
You have to define the doublequote as the quotechar
whithin the csv.reader()
statement:
>>> with open(r'<path_to_csv_test_file>') as csv_file:
... reader = csv.reader(csv_file, delimiter=',', quotechar='"')
... print(reader.next())
...
['1997', 'Ford', 'E350', 'Super, luxurious truck']
>>>
If you don't want to use the CSV module you need to use a regular expression. Try this:
import re
regex = ",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)"
string = '1997,Ford,E350,"Super, luxurious truck"'
array = re.split(regex, string)
print(array[3])
"Super, luxurious truck"