Converting CSV to JSON in bash

前端 未结 9 1906
梦毁少年i
梦毁少年i 2021-02-04 03:12

Trying to convert a CSV file into a JSON

Here is two sample lines :

-21.3214077;55.4851413;Ruizia cordata
-21.3213078;55.4849803;Cossinia pinnata
         


        
9条回答
  •  借酒劲吻你
    2021-02-04 03:36

    If you want to go crazy, you can write a parser using jq. Here's my implementation which can be thought of as the inverse of the @csv filter. Throw this into your .jq file.

    def do_if(pred; update):
        if pred then update else . end;
    def _parse_delimited($_delim; $_quot; $_nl; $_skip):
        [($_delim, $_quot, $_nl, $_skip)|explode[]] as [$delim, $quot, $nl, $skip] |
        [0,1,2,3,4,5] as [$s_start,$s_next_value,$s_read_value,$s_read_quoted,$s_escape,$s_final] |
        def _append($arr; $value):
            $arr + [$value];
        def _do_start($c):
            if $c == $nl then
                [$s_start, null, null, _append(.[3]; [""])]
            elif $c == $delim then
                [$s_next_value, null, [""], .[3]]
            elif $c == $quot then
                [$s_read_quoted, [], [], .[3]]
            else
                [$s_read_value, [$c], [], .[3]]
            end;
        def _do_next_value($c):
            if $c == $nl then
                [$s_start, null, null, _append(.[3]; _append(.[2]; ""))]
            elif $c == $delim then
                [$s_next_value, null, _append(.[2]; ""), .[3]]
            elif $c == $quot then
                [$s_read_quoted, [], .[2], .[3]]
            else
                [$s_read_value, [$c], .[2], .[3]]
            end;
        def _do_read_value($c):
            if $c == $nl then
                [$s_start, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
            elif $c == $delim then
                [$s_next_value, null, _append(.[2]; .[1]|implode), .[3]]
            else
                [$s_read_value, _append(.[1]; $c), .[2], .[3]]
            end;
        def _do_read_quoted($c):
            if $c == $quot then
                [$s_escape, .[1], .[2], .[3]]
            else
                [$s_read_quoted, _append(.[1]; $c), .[2], .[3]]
            end;
        def _do_escape($c):
            if $c == $nl then
                [$s_start, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
            elif $c == $delim then
                [$s_next_value, null, _append(.[2]; .[1]|implode), .[3]]
            else
                [$s_read_quoted, _append(.[1]; $c), .[2], .[3]]
            end;
        def _do_final($c):
            .;
        def _do_finalize:
            if .[0] == $s_start then
                [$s_final, null, null, .[3]]
            elif .[0] == $s_next_value then
                [$s_final, null, null, _append(.[3]; [""])]
            elif .[0] == $s_read_value then
                [$s_final, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
            elif .[0] == $s_read_quoted then
                [$s_final, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
            elif .[0] == $s_escape then
                [$s_final, null, null, _append(.[3]; _append(.[2]; .[1]|implode))]
            else # .[0] == $s_final
                .
            end;
        reduce explode[] as $c (
            [$s_start,null,null,[]];
            do_if($c != $skip;
                if .[0] == $s_start then
                    _do_start($c)
                elif .[0] == $s_next_value then
                    _do_next_value($c)
                elif .[0] == $s_read_value then
                    _do_read_value($c)
                elif .[0] == $s_read_quoted then
                    _do_read_quoted($c)
                elif .[0] == $s_escape then
                    _do_escape($c)
                else # .[0] == $s_final
                    _do_final($c)
                end
            )
        )
        | _do_finalize[3][];
    def parse_delimited($delim; $quot; $nl; $skip):
        _parse_delimited($delim; $quot; $nl; $skip);
    def parse_delimited($delim; $quot; $nl):
        parse_delimited($delim; $quot; $nl; "\r");
    def parse_delimited($delim; $quot):
        parse_delimited($delim; $quot; "\n");
    def parse_delimited($delim):
        parse_delimited($delim; "\"");
    def parse_csv:
        parse_delimited(",");
    

    For your data, you would want to change the delimiter to semicolons.

    $ cat se.csv
    -21.3214077;55.4851413;Ruizia cordata
    -21.3213078;55.4849803;Cossinia pinnata
    $ jq -R 'parse_delimited(";")' se.csv
    [
      "-21.3214077",
      "55.4851413",
      "Ruizia cordata"
    ]
    [
      "-21.3213078",
      "55.4849803",
      "Cossinia pinnata"
    ]
    

    This will work fine for most inputs to parse a line at a time, but if your data has literal newlines, you will want to read the entire file as a string.

    $ cat input.csv
    Year,Make,Model,Description,Price
    1997,Ford,E350,"ac, abs, moon",3000.00
    1999,Chevy,"Venture ""Extended Edition""","",4900.00
    1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
    1996,Jeep,Grand Cherokee,"MUST SELL!
    air, moon roof, loaded",4799.00
    $ jq -Rs 'parse_csv' input.csv
    [
      "Year",
      "Make",
      "Model",
      "Description",
      "Price"
    ]
    [
      "1997",
      "Ford",
      "E350",
      "ac, abs, moon",
      "3000.00"
    ]
    [
      "1999",
      "Chevy",
      "Venture \"Extended Edition\"",
      "",
      "4900.00"
    ]
    [
      "1999",
      "Chevy",
      "Venture \"Extended Edition, Very Large\"",
      "",
      "5000.00"
    ]
    [
      "1996",
      "Jeep",
      "Grand Cherokee",
      "MUST SELL!\nair, moon roof, loaded",
      "4799.00"
    ]
    

提交回复
热议问题