Trying to convert a CSV file into a JSON
Here is two sample lines :
-21.3214077;55.4851413;Ruizia cordata
-21.3213078;55.4849803;Cossinia pinnata
If you want to go crazy, you can write a parser using jq. Here's my implementation which can be thought of as the inverse of the @csv
filter. Throw this into your .jq file.
def do_if(pred; update):
if pred then update else . end;
def _parse_delimited($_delim; $_quot; $_nl; $_skip):
[($_delim, $_quot, $_nl, $_skip)|explode[]] as [$delim, $quot, $nl, $skip] |
[0,1,2,3,4,5] as [$s_start,$s_next_value,$s_read_value,$s_read_quoted,$s_escape,$s_final] |
def _append($arr; $value):
$arr + [$value];
def _do_start($c):
if $c == $nl then
[$s_start, null, null, _append(.[3]; [""])]
elif $c == $delim then
[$s_next_value, null, [""], .[3]]
elif $c == $quot then
[$s_read_quoted, [], [], .[3]]
else
[$s_read_value, [$c], [], .[3]]
end;
def _do_next_value($c):
if $c == $nl then
[$s_start, null, null, _append(.[3]; _append(.[2]; ""))]
elif $c == $delim then
[$s_next_value, null, _append(.[2]; ""), .[3]]
elif $c == $quot then
[$s_read_quoted, [], .[2], .[3]]
else
[$s_read_value, [$c], .[2], .[3]]
end;
def _do_read_value($c):
if $c == $nl then
[$s_start, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
elif $c == $delim then
[$s_next_value, null, _append(.[2]; .[1]|implode), .[3]]
else
[$s_read_value, _append(.[1]; $c), .[2], .[3]]
end;
def _do_read_quoted($c):
if $c == $quot then
[$s_escape, .[1], .[2], .[3]]
else
[$s_read_quoted, _append(.[1]; $c), .[2], .[3]]
end;
def _do_escape($c):
if $c == $nl then
[$s_start, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
elif $c == $delim then
[$s_next_value, null, _append(.[2]; .[1]|implode), .[3]]
else
[$s_read_quoted, _append(.[1]; $c), .[2], .[3]]
end;
def _do_final($c):
.;
def _do_finalize:
if .[0] == $s_start then
[$s_final, null, null, .[3]]
elif .[0] == $s_next_value then
[$s_final, null, null, _append(.[3]; [""])]
elif .[0] == $s_read_value then
[$s_final, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
elif .[0] == $s_read_quoted then
[$s_final, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
elif .[0] == $s_escape then
[$s_final, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
else # .[0] == $s_final
.
end;
reduce explode[] as $c (
[$s_start,null,null,[]];
do_if($c != $skip;
if .[0] == $s_start then
_do_start($c)
elif .[0] == $s_next_value then
_do_next_value($c)
elif .[0] == $s_read_value then
_do_read_value($c)
elif .[0] == $s_read_quoted then
_do_read_quoted($c)
elif .[0] == $s_escape then
_do_escape($c)
else # .[0] == $s_final
_do_final($c)
end
)
)
| _do_finalize[3][];
def parse_delimited($delim; $quot; $nl; $skip):
_parse_delimited($delim; $quot; $nl; $skip);
def parse_delimited($delim; $quot; $nl):
parse_delimited($delim; $quot; $nl; "\r");
def parse_delimited($delim; $quot):
parse_delimited($delim; $quot; "\n");
def parse_delimited($delim):
parse_delimited($delim; "\"");
def parse_csv:
parse_delimited(",");
For your data, you would want to change the delimiter to semicolons.
$ cat se.csv
-21.3214077;55.4851413;Ruizia cordata
-21.3213078;55.4849803;Cossinia pinnata
$ jq -R 'parse_delimited(";")' se.csv
[
"-21.3214077",
"55.4851413",
"Ruizia cordata"
]
[
"-21.3213078",
"55.4849803",
"Cossinia pinnata"
]
This will work fine for most inputs to parse a line at a time, but if your data has literal newlines, you will want to read the entire file as a string.
$ cat input.csv
Year,Make,Model,Description,Price
1997,Ford,E350,"ac, abs, moon",3000.00
1999,Chevy,"Venture ""Extended Edition""","",4900.00
1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
1996,Jeep,Grand Cherokee,"MUST SELL!
air, moon roof, loaded",4799.00
$ jq -Rs 'parse_csv' input.csv
[
"Year",
"Make",
"Model",
"Description",
"Price"
]
[
"1997",
"Ford",
"E350",
"ac, abs, moon",
"3000.00"
]
[
"1999",
"Chevy",
"Venture \"Extended Edition\"",
"",
"4900.00"
]
[
"1999",
"Chevy",
"Venture \"Extended Edition, Very Large\"",
"",
"5000.00"
]
[
"1996",
"Jeep",
"Grand Cherokee",
"MUST SELL!\nair, moon roof, loaded",
"4799.00"
]