How can I convert JSON to CSV?

前端 未结 26 1640
余生分开走
余生分开走 2020-11-21 22:32

I have a JSON file I want to convert to a CSV file. How can I do this with Python?

I tried:

import json
import c         


        
相关标签:
26条回答
  • 2020-11-21 22:49

    Try this

    import csv, json, sys
    
    input = open(sys.argv[1])
    data = json.load(input)
    input.close()
    
    output = csv.writer(sys.stdout)
    
    output.writerow(data[0].keys())  # header row
    
    for item in data:
        output.writerow(item.values())
    
    0 讨论(0)
  • 2020-11-21 22:49

    Modified Alec McGail's answer to support JSON with lists inside

        def flattenjson(self, mp, delim="|"):
                ret = []
                if isinstance(mp, dict):
                        for k in mp.keys():
                                csvs = self.flattenjson(mp[k], delim)
                                for csv in csvs:
                                        ret.append(k + delim + csv)
                elif isinstance(mp, list):
                        for k in mp:
                                csvs = self.flattenjson(k, delim)
                                for csv in csvs:
                                        ret.append(csv)
                else:
                        ret.append(mp)
    
                return ret
    

    Thanks!

    0 讨论(0)
  • 2020-11-21 22:49

    I might be late to the party, but I think, I have dealt with the similar problem. I had a json file which looked like this

    I only wanted to extract few keys/values from these json file. So, I wrote the following code to extract the same.

        """json_to_csv.py
        This script reads n numbers of json files present in a folder and then extract certain data from each file and write in a csv file.
        The folder contains the python script i.e. json_to_csv.py, output.csv and another folder descriptions containing all the json files.
    """
    
    import os
    import json
    import csv
    
    
    def get_list_of_json_files():
        """Returns the list of filenames of all the Json files present in the folder
        Parameter
        ---------
        directory : str
            'descriptions' in this case
        Returns
        -------
        list_of_files: list
            List of the filenames of all the json files
        """
    
        list_of_files = os.listdir('descriptions')  # creates list of all the files in the folder
    
        return list_of_files
    
    
    def create_list_from_json(jsonfile):
        """Returns a list of the extracted items from json file in the same order we need it.
        Parameter
        _________
        jsonfile : json
            The json file containing the data
        Returns
        -------
        one_sample_list : list
            The list of the extracted items needed for the final csv
        """
    
        with open(jsonfile) as f:
            data = json.load(f)
    
        data_list = []  # create an empty list
    
        # append the items to the list in the same order.
        data_list.append(data['_id'])
        data_list.append(data['_modelType'])
        data_list.append(data['creator']['_id'])
        data_list.append(data['creator']['name'])
        data_list.append(data['dataset']['_accessLevel'])
        data_list.append(data['dataset']['_id'])
        data_list.append(data['dataset']['description'])
        data_list.append(data['dataset']['name'])
        data_list.append(data['meta']['acquisition']['image_type'])
        data_list.append(data['meta']['acquisition']['pixelsX'])
        data_list.append(data['meta']['acquisition']['pixelsY'])
        data_list.append(data['meta']['clinical']['age_approx'])
        data_list.append(data['meta']['clinical']['benign_malignant'])
        data_list.append(data['meta']['clinical']['diagnosis'])
        data_list.append(data['meta']['clinical']['diagnosis_confirm_type'])
        data_list.append(data['meta']['clinical']['melanocytic'])
        data_list.append(data['meta']['clinical']['sex'])
        data_list.append(data['meta']['unstructured']['diagnosis'])
        # In few json files, the race was not there so using KeyError exception to add '' at the place
        try:
            data_list.append(data['meta']['unstructured']['race'])
        except KeyError:
            data_list.append("")  # will add an empty string in case race is not there.
        data_list.append(data['name'])
    
        return data_list
    
    
    def write_csv():
        """Creates the desired csv file
        Parameters
        __________
        list_of_files : file
            The list created by get_list_of_json_files() method
        result.csv : csv
            The csv file containing the header only
        Returns
        _______
        result.csv : csv
            The desired csv file
        """
    
        list_of_files = get_list_of_json_files()
        for file in list_of_files:
            row = create_list_from_json(f'descriptions/{file}')  # create the row to be added to csv for each file (json-file)
            with open('output.csv', 'a') as c:
                writer = csv.writer(c)
                writer.writerow(row)
            c.close()
    
    
    if __name__ == '__main__':
        write_csv()
    

    I hope this will help. For details on how this code work you can check here

    0 讨论(0)
  • 2020-11-21 22:52

    Surprisingly, I found that none of the answers posted here so far correctly deal with all possible scenarios (e.g., nested dicts, nested lists, None values, etc).

    This solution should work across all scenarios:

    def flatten_json(json):
        def process_value(keys, value, flattened):
            if isinstance(value, dict):
                for key in value.keys():
                    process_value(keys + [key], value[key], flattened)
            elif isinstance(value, list):
                for idx, v in enumerate(value):
                    process_value(keys + [str(idx)], v, flattened)
            else:
                flattened['__'.join(keys)] = value
    
        flattened = {}
        for key in json.keys():
            process_value([key], json[key], flattened)
        return flattened
    
    0 讨论(0)
  • 2020-11-21 22:53

    JSON can represent a wide variety of data structures -- a JS "object" is roughly like a Python dict (with string keys), a JS "array" roughly like a Python list, and you can nest them as long as the final "leaf" elements are numbers or strings.

    CSV can essentially represent only a 2-D table -- optionally with a first row of "headers", i.e., "column names", which can make the table interpretable as a list of dicts, instead of the normal interpretation, a list of lists (again, "leaf" elements can be numbers or strings).

    So, in the general case, you can't translate an arbitrary JSON structure to a CSV. In a few special cases you can (array of arrays with no further nesting; arrays of objects which all have exactly the same keys). Which special case, if any, applies to your problem? The details of the solution depend on which special case you do have. Given the astonishing fact that you don't even mention which one applies, I suspect you may not have considered the constraint, neither usable case in fact applies, and your problem is impossible to solve. But please do clarify!

    0 讨论(0)
  • 2020-11-21 22:53

    As mentioned in the previous answers the difficulty in converting json to csv is because a json file can contain nested dictionaries and therefore be a multidimensional data structure verses a csv which is a 2D data structure. However, a good way to turn a multidimensional structure to a csv is to have multiple csvs that tie together with primary keys.

    In your example, the first csv output has the columns "pk","model","fields" as your columns. Values for "pk", and "model" are easy to get but because the "fields" column contains a dictionary, it should be its own csv and because "codename" appears to the be the primary key, you can use as the input for "fields" to complete the first csv. The second csv contains the dictionary from the "fields" column with codename as the the primary key that can be used to tie the 2 csvs together.

    Here is a solution for your json file which converts a nested dictionaries to 2 csvs.

    import csv
    import json
    
    def readAndWrite(inputFileName, primaryKey=""):
        input = open(inputFileName+".json")
        data = json.load(input)
        input.close()
    
        header = set()
    
        if primaryKey != "":
            outputFileName = inputFileName+"-"+primaryKey
            if inputFileName == "data":
                for i in data:
                    for j in i["fields"].keys():
                        if j not in header:
                            header.add(j)
        else:
            outputFileName = inputFileName
            for i in data:
                for j in i.keys():
                    if j not in header:
                        header.add(j)
    
        with open(outputFileName+".csv", 'wb') as output_file:
            fieldnames = list(header)
            writer = csv.DictWriter(output_file, fieldnames, delimiter=',', quotechar='"')
            writer.writeheader()
            for x in data:
                row_value = {}
                if primaryKey == "":
                    for y in x.keys():
                        yValue = x.get(y)
                        if type(yValue) == int or type(yValue) == bool or type(yValue) == float or type(yValue) == list:
                            row_value[y] = str(yValue).encode('utf8')
                        elif type(yValue) != dict:
                            row_value[y] = yValue.encode('utf8')
                        else:
                            if inputFileName == "data":
                                row_value[y] = yValue["codename"].encode('utf8')
                                readAndWrite(inputFileName, primaryKey="codename")
                    writer.writerow(row_value)
                elif primaryKey == "codename":
                    for y in x["fields"].keys():
                        yValue = x["fields"].get(y)
                        if type(yValue) == int or type(yValue) == bool or type(yValue) == float or type(yValue) == list:
                            row_value[y] = str(yValue).encode('utf8')
                        elif type(yValue) != dict:
                            row_value[y] = yValue.encode('utf8')
                    writer.writerow(row_value)
    
    readAndWrite("data")
    
    0 讨论(0)
提交回复
热议问题