How to recover original indices for a flattened Numpy array?

后端 未结 6 415
逝去的感伤
逝去的感伤 2021-01-02 19:49

I\'ve got a multidimensional numpy array that I\'m trying to stick into a pandas data frame. I\'d like to flatten the array, and create a pandas index that reflects the pre-

相关标签:
6条回答
  • 2021-01-02 20:27

    As hpaulj pointed out in a comment, I could add indexing=='ij' to the meshgrid call:

    A = np.random.rand(2,3,4)
    dimnames = ['z', 'y', 'x']
    ranges   = [ np.arange(x) for x in A.shape ]
    ix       = [ x.flatten()  for x in np.meshgrid(*ranges, indexing='ij') ]
    for name, col in zip(dimnames, ix):
        df[name] = col
    df = df.set_index(dimnames).squeeze()
    
    # Compare the results
    for ix, val in df.iteritems():
        print ix, val == A[ix]
    (0, 0, 0) True
    (0, 0, 1) True
    (0, 0, 2) True
    (0, 0, 3) True
    (0, 1, 0) True
    (0, 1, 1) True
    (0, 1, 2) True
    (0, 1, 3) True
    (0, 2, 0) True
    (0, 2, 1) True
    (0, 2, 2) True
    (0, 2, 3) True
    (1, 0, 0) True
    (1, 0, 1) True
    (1, 0, 2) True
    (1, 0, 3) True
    (1, 1, 0) True
    (1, 1, 1) True
    (1, 1, 2) True
    (1, 1, 3) True
    (1, 2, 0) True
    (1, 2, 1) True
    (1, 2, 2) True
    (1, 2, 3) True
    
    0 讨论(0)
  • 2021-01-02 20:27

    Another possibility, although others maybe faster...

    x,y,z = np.indices(A.shape)
    
    df = pd.DataFrame(np.array([p.flatten() for p in [x,y,z,A]]).T
                      ,columns=['x','y','z',0])
    
    0 讨论(0)
  • 2021-01-02 20:28

    My solution is based on this this answer by Divakar involving np.ogrid. This function should work for any array of any dimension.

    def indices_merged_arr(arr):
        n = arr.ndim
        grid = np.ogrid[tuple(map(slice, arr.shape))]
        out = np.empty(arr.shape + (n+1,), dtype=arr.dtype)
        for i in range(n):
            out[...,i+1] = grid[i]
        out[...,0] = arr
        out.shape = (-1,n+1)
        return out
    
    A = np.array([[[ 0.43793885,  0.40078139,  0.48078691,  0.05334248],
                   [ 0.76331509,  0.82514441,  0.86169078,  0.86496111],
                   [ 0.75572665,  0.80860943,  0.79995337,  0.63123724]],
    
                  [[ 0.20648946,  0.57042315,  0.71777265,  0.34155005],
                   [ 0.30843717,  0.39381407,  0.12623462,  0.93481552],
                   [ 0.3267771 ,  0.64097038,  0.30405215,  0.57726629]]])
    
    df = pd.DataFrame(indices_merged_arr(A), columns=list('Axyz'))
    df
    
               A    x    y    z
    0   0.437939  0.0  0.0  0.0
    1   0.400781  0.0  0.0  1.0
    2   0.480787  0.0  0.0  2.0
    3   0.053342  0.0  0.0  3.0
    4   0.763315  0.0  1.0  0.0
    5   0.825144  0.0  1.0  1.0
    6   0.861691  0.0  1.0  2.0
    7   0.864961  0.0  1.0  3.0
    8   0.755727  0.0  2.0  0.0
    9   0.808609  0.0  2.0  1.0
    10  0.799953  0.0  2.0  2.0
    11  0.631237  0.0  2.0  3.0
    12  0.206489  1.0  0.0  0.0
    13  0.570423  1.0  0.0  1.0
    14  0.717773  1.0  0.0  2.0
    15  0.341550  1.0  0.0  3.0
    16  0.308437  1.0  1.0  0.0
    17  0.393814  1.0  1.0  1.0
    18  0.126235  1.0  1.0  2.0
    19  0.934816  1.0  1.0  3.0
    20  0.326777  1.0  2.0  0.0
    21  0.640970  1.0  2.0  1.0
    22  0.304052  1.0  2.0  2.0
    23  0.577266  1.0  2.0  3.0
    
    0 讨论(0)
  • 2021-01-02 20:30
    from itertools import product
    
    np.random.seed(0)
    A = np.random.rand(2, 3, 4)
    x, y, z = A.shape
    x_, y_, z_ = zip(*product(range(x), range(y), range(z)))
    df = pd.DataFrame(A.flatten()).assign(x=x_, y=y_, z=z_)
    >>> df
    
               0  x  y  z
    0   0.548814  0  0  0
    1   0.715189  0  0  1
    2   0.602763  0  0  2
    3   0.544883  0  0  3
    4   0.423655  0  1  0
    5   0.645894  0  1  1
    6   0.437587  0  1  2
    7   0.891773  0  1  3
    8   0.963663  0  2  0
    9   0.383442  0  2  1
    10  0.791725  0  2  2
    11  0.528895  0  2  3
    12  0.568045  1  0  0
    13  0.925597  1  0  1
    14  0.071036  1  0  2
    15  0.087129  1  0  3
    16  0.020218  1  1  0
    17  0.832620  1  1  1
    18  0.778157  1  1  2
    19  0.870012  1  1  3
    20  0.978618  1  2  0
    21  0.799159  1  2  1
    22  0.461479  1  2  2
    23  0.780529  1  2  3
    
    0 讨论(0)
  • 2021-01-02 20:44

    You could use pd.MultiIndex.from_product:

    import numpy as np
    import pandas as pd
    import string
    
    def using_multiindex(A, columns):
        shape = A.shape
        index = pd.MultiIndex.from_product([range(s)for s in shape], names=columns)
        df = pd.DataFrame({'A': A.flatten()}, index=index).reset_index()
        return df
    
    A = np.array([[[ 0.43793885,  0.40078139,  0.48078691,  0.05334248],
        [ 0.76331509,  0.82514441,  0.86169078,  0.86496111],
        [ 0.75572665,  0.80860943,  0.79995337,  0.63123724]],
    
       [[ 0.20648946,  0.57042315,  0.71777265,  0.34155005],
        [ 0.30843717,  0.39381407,  0.12623462,  0.93481552],
        [ 0.3267771 ,  0.64097038,  0.30405215,  0.57726629]]])
    
    df = using_multiindex(A, list('ZYX'))
    

    yields

        Z  Y  X         A
    0   0  0  0  0.437939
    1   0  0  1  0.400781
    2   0  0  2  0.480787
    3   0  0  3  0.053342
    ...
    21  1  2  1  0.640970
    22  1  2  2  0.304052
    23  1  2  3  0.577266
    

    Or if performance is a top priority, consider using senderle's cartesian_product. (See the code, below.)


    Here is a benchmark for A with shape (100, 100, 100):

    In [321]: %timeit  using_cartesian_product(A, columns)
    100 loops, best of 3: 13.8 ms per loop
    
    In [318]: %timeit using_multiindex(A, columns)
    10 loops, best of 3: 35.6 ms per loop
    
    In [320]: %timeit indices_merged_arr_generic(A, columns)
    10 loops, best of 3: 29.1 ms per loop
    
    In [319]: %timeit using_product(A)
    1 loop, best of 3: 461 ms per loop
    

    This is the setup I used for the benchmark:

    import numpy as np
    import pandas as pd
    import functools
    import itertools as IT
    import string
    product = IT.product
    
    def cartesian_product_broadcasted(*arrays):
        """
        http://stackoverflow.com/a/11146645/190597 (senderle)
        """
        broadcastable = np.ix_(*arrays)
        broadcasted = np.broadcast_arrays(*broadcastable)
        dtype = np.result_type(*arrays)
        rows, cols = functools.reduce(np.multiply, broadcasted[0].shape), len(broadcasted)
        out = np.empty(rows * cols, dtype=dtype)
        start, end = 0, rows
        for a in broadcasted:
            out[start:end] = a.reshape(-1)
            start, end = end, end + rows
        return out.reshape(cols, rows).T
    
    def using_cartesian_product(A, columns):
        shape = A.shape
        coords = cartesian_product_broadcasted(*[np.arange(s, dtype='int') for s in shape])
        df = pd.DataFrame(coords, columns=columns)
        df['A'] = A.flatten()
        return df
    
    def using_multiindex(A, columns):
        shape = A.shape
        index = pd.MultiIndex.from_product([range(s)for s in shape], names=columns)
        df = pd.DataFrame({'A': A.flatten()}, index=index).reset_index()
        return df
    
    def indices_merged_arr_generic(arr, columns):
        n = arr.ndim
        grid = np.ogrid[tuple(map(slice, arr.shape))]
        out = np.empty(arr.shape + (n+1,), dtype=arr.dtype)
        for i in range(n):
            out[...,i] = grid[i]
        out[...,-1] = arr
        out.shape = (-1,n+1)
        df = pd.DataFrame(out, columns=['A']+columns)
        return df
    
    def using_product(A):
        x, y, z = A.shape
        x_, y_, z_ = zip(*product(range(x), range(y), range(z)))
        df = pd.DataFrame(A.flatten()).assign(x=x_, y=y_, z=z_)
        return df
    
    A = np.random.random((100,100,100))
    shape = A.shape
    columns = list(string.ascii_uppercase[-len(shape):][::-1])
    
    0 讨论(0)
  • 2021-01-02 20:50
    def ndarray_to_indexed_2d(data):
        idx = np.column_stack(np.unravel_index(np.arange(np.product(data.shape[:-1])), data.shape[:-1]))
        data2d = np.hstack((idx, data.reshape(np.product(data.shape[:-1]), data.shape[-1])))
        return data2d
    
    0 讨论(0)
提交回复
热议问题