SimpleJSON and NumPy array

后端 未结 9 1350
挽巷
挽巷 2020-12-04 10:56

What is the most efficient way of serializing a numpy array using simplejson?

相关标签:
9条回答
  • 2020-12-04 11:28

    I'd use simplejson.dumps(somearray.tolist()) as the most convenient approach (if I was still using simplejson at all, which implies being stuck with Python 2.5 or earlier; 2.6 and later have a standard library module json which works the same way, so of course I'd use that if the Python release in use supported it;-).

    In a quest for greater efficiency, you could subclass json.JSONEncoder (in json; I don't know if the older simplejson already offered such customization possibilities) and, in the default method, special-case instances of numpy.array by turning them into list or tuples "just in time". I kind of doubt you'd gain enough by such an approach, in terms of performance, to justify the effort, though.

    0 讨论(0)
  • 2020-12-04 11:31

    This shows how to convert from a 1D NumPy array to JSON and back to an array:

    try:
        import json
    except ImportError:
        import simplejson as json
    import numpy as np
    
    def arr2json(arr):
        return json.dumps(arr.tolist())
    def json2arr(astr,dtype):
        return np.fromiter(json.loads(astr),dtype)
    
    arr=np.arange(10)
    astr=arr2json(arr)
    print(repr(astr))
    # '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]'
    dt=np.int32
    arr=json2arr(astr,dt)
    print(repr(arr))
    # array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    

    Building on tlausch's answer, here is a way to JSON-encode a NumPy array while preserving shape and dtype of any NumPy array -- including those with complex dtype.

    class NDArrayEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, np.ndarray):
                output = io.BytesIO()
                np.savez_compressed(output, obj=obj)
                return {'b64npz' : base64.b64encode(output.getvalue())}
            return json.JSONEncoder.default(self, obj)
    
    
    def ndarray_decoder(dct):
        if isinstance(dct, dict) and 'b64npz' in dct:
            output = io.BytesIO(base64.b64decode(dct['b64npz']))
            output.seek(0)
            return np.load(output)['obj']
        return dct
    
    # Make expected non-contiguous structured array:
    expected = np.arange(10)[::2]
    expected = expected.view('<i4,<f4')
    
    dumped = json.dumps(expected, cls=NDArrayEncoder)
    result = json.loads(dumped, object_hook=ndarray_decoder)
    
    assert result.dtype == expected.dtype, "Wrong Type"
    assert result.shape == expected.shape, "Wrong Shape"
    assert np.array_equal(expected, result), "Wrong Values"
    
    0 讨论(0)
  • 2020-12-04 11:31

    You can also answer this with just a function passed into json.dumps in this way:

    json.dumps(np.array([1, 2, 3]), default=json_numpy_serializer)
    

    With

    import numpy as np
    
    def json_numpy_serialzer(o):
        """ Serialize numpy types for json
    
        Parameters:
            o (object): any python object which fails to be serialized by json
    
        Example:
    
            >>> import json
            >>> a = np.array([1, 2, 3])
            >>> json.dumps(a, default=json_numpy_serializer)
    
        """
        numpy_types = (
            np.bool_,
            # np.bytes_, -- python `bytes` class is not json serializable     
            # np.complex64,  -- python `complex` class is not json serializable  
            # np.complex128,  -- python `complex` class is not json serializable
            # np.complex256,  -- special handling below
            # np.datetime64,  -- python `datetime.datetime` class is not json serializable
            np.float16,
            np.float32,
            np.float64,
            # np.float128,  -- special handling below
            np.int8,
            np.int16,
            np.int32,
            np.int64,
            # np.object_  -- should already be evaluated as python native
            np.str_,
            np.timedelta64,
            np.uint8,
            np.uint16,
            np.uint32,
            np.uint64,
            np.void,
        )
    
        if isinstance(o, np.ndarray):
            return o.tolist()
        elif isinstance(o, numpy_types):        
            return o.item()
        elif isinstance(o, np.float128):
            return o.astype(np.float64).item()
        # elif isinstance(o, np.complex256): -- no python native for np.complex256
        #     return o.astype(np.complex128).item() -- python `complex` class is not json serializable 
        else:
            raise TypeError("{} of type {} is not JSON serializable".format(repr(o), type(o)))
    

    validated:

    need_addition_json_handeling = (
        np.bytes_,
        np.complex64,  
        np.complex128, 
        np.complex256, 
        np.datetime64,
        np.float128,
    )
    
    
    numpy_types = tuple(set(np.typeDict.values()))
    
    for numpy_type in numpy_types:
        print(numpy_type)
    
        if numpy_type == np.void:
            # complex dtypes evaluate as np.void, e.g.
            numpy_type = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))])
        elif numpy_type in need_addition_json_handeling:
            print('python native can not be json serialized')
            continue
    
        a = np.ones(1, dtype=nptype)
        json.dumps(a, default=json_numpy_serialzer)
    
    0 讨论(0)
  • 2020-12-04 11:37

    I found this json subclass code for serializing one-dimensional numpy arrays within a dictionary. I tried it and it works for me.

    class NumpyAwareJSONEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, numpy.ndarray) and obj.ndim == 1:
                return obj.tolist()
            return json.JSONEncoder.default(self, obj)
    

    My dictionary is 'results'. Here's how I write to the file "data.json":

    j=json.dumps(results,cls=NumpyAwareJSONEncoder)
    f=open("data.json","w")
    f.write(j)
    f.close()
    
    0 讨论(0)
  • 2020-12-04 11:39

    One fast, though not truly optimal way is using Pandas:

    import pandas as pd
    pd.Series(your_array).to_json(orient='values')
    
    0 讨论(0)
  • 2020-12-04 11:41

    I just discovered tlausch's answer to this Question and realized it gives the almost correct answer for my problem, but at least for me it does not work in Python 3.5, because of several errors: 1 - infinite recursion 2 - the data was saved as None

    since i can not directly comment on the original answer yet, here is my version:

    import base64
    import json
    import numpy as np
    
        class NumpyEncoder(json.JSONEncoder):
            def default(self, obj):
                """If input object is an ndarray it will be converted into a dict
                holding dtype, shape and the data, base64 encoded.
                """
                if isinstance(obj, np.ndarray):
                    if obj.flags['C_CONTIGUOUS']:
                        obj_data = obj.data
                    else:
                        cont_obj = np.ascontiguousarray(obj)
                        assert(cont_obj.flags['C_CONTIGUOUS'])
                        obj_data = cont_obj.data
                    data_b64 = base64.b64encode(obj_data)
                    return dict(__ndarray__= data_b64.decode('utf-8'),
                                dtype=str(obj.dtype),
                                shape=obj.shape)
    
    
        def json_numpy_obj_hook(dct):
            """Decodes a previously encoded numpy ndarray with proper shape and dtype.
    
            :param dct: (dict) json encoded ndarray
            :return: (ndarray) if input was an encoded ndarray
            """
            if isinstance(dct, dict) and '__ndarray__' in dct:
                data = base64.b64decode(dct['__ndarray__'])
                return np.frombuffer(data, dct['dtype']).reshape(dct['shape'])
            return dct
    
    expected = np.arange(100, dtype=np.float)
    dumped = json.dumps(expected, cls=NumpyEncoder)
    result = json.loads(dumped, object_hook=json_numpy_obj_hook)
    
    
    # None of the following assertions will be broken.
    assert result.dtype == expected.dtype, "Wrong Type"
    assert result.shape == expected.shape, "Wrong Shape"
    assert np.allclose(expected, result), "Wrong Values"    
    
    0 讨论(0)
提交回复
热议问题