Python: reading 12-bit binary files

后端 未结 4 2055
情话喂你
情话喂你 2020-12-03 04:05

I am trying to read 12-bit binary files containing images (a video) using Python 3.

To read a similar file but encoded in 16 bits, the following works very well:

4条回答
  •  有刺的猬
    2020-12-03 04:40

    One way to speedup the numpy-vectorized methods is to avoid costly memory allocations for temporary data, use cache more efficently and make use of parallelization. This can be quite easily be done using Numba, Cython or C. Please note that the parallelization is not always beneficial. If the array you want to convert is too small, use the single threaded version (parallel=False)

    Numba version of Cyril Gaudefroy answer with temporary memory allocation

    import numba as nb
    import numpy as np
    @nb.njit(nb.uint16[::1](nb.uint8[::1]),fastmath=True,parallel=True)
    def nb_read_uint12(data_chunk):
      """data_chunk is a contigous 1D array of uint8 data)
      eg.data_chunk = np.frombuffer(data_chunk, dtype=np.uint8)"""
      
      #ensure that the data_chunk has the right length
      assert np.mod(data_chunk.shape[0],3)==0
      
      out=np.empty(data_chunk.shape[0]//3*2,dtype=np.uint16)
      
      for i in nb.prange(data_chunk.shape[0]//3):
        fst_uint8=np.uint16(data_chunk[i*3])
        mid_uint8=np.uint16(data_chunk[i*3+1])
        lst_uint8=np.uint16(data_chunk[i*3+2])
        
        out[i*2] =   (fst_uint8 << 4) + (mid_uint8 >> 4)
        out[i*2+1] = ((mid_uint8 % 16) << 8) + lst_uint8
        
      return out
    

    Numba version of Cyril Gaudefroy answer with memory preallocation

    If you apply this function multiple times on data-chunks of simmilar size you can preallocate the output array only once.

    @nb.njit(nb.uint16[::1](nb.uint8[::1],nb.uint16[::1]),fastmath=True,parallel=True,cache=True)
    def nb_read_uint12_prealloc(data_chunk,out):
        """data_chunk is a contigous 1D array of uint8 data)
        eg.data_chunk = np.frombuffer(data_chunk, dtype=np.uint8)"""
    
        #ensure that the data_chunk has the right length
        assert np.mod(data_chunk.shape[0],3)==0
        assert out.shape[0]==data_chunk.shape[0]//3*2
    
        for i in nb.prange(data_chunk.shape[0]//3):
            fst_uint8=np.uint16(data_chunk[i*3])
            mid_uint8=np.uint16(data_chunk[i*3+1])
            lst_uint8=np.uint16(data_chunk[i*3+2])
    
            out[i*2] =   (fst_uint8 << 4) + (mid_uint8 >> 4)
            out[i*2+1] = ((mid_uint8 % 16) << 8) + lst_uint8
    
        return out
    

    Numba version of DGrifffith answer with temporary memory allocation

    @nb.njit(nb.uint16[::1](nb.uint8[::1]),fastmath=True,parallel=True,cache=True)
    def read_uint12_var_2(data_chunk):
        """data_chunk is a contigous 1D array of uint8 data)
        eg.data_chunk = np.frombuffer(data_chunk, dtype=np.uint8)"""
    
        #ensure that the data_chunk has the right length
        assert np.mod(data_chunk.shape[0],3)==0
    
        out=np.empty(data_chunk.shape[0]//3*2,dtype=np.uint16)
    
        for i in nb.prange(data_chunk.shape[0]//3):
            fst_uint8=np.uint16(data_chunk[i*3])
            mid_uint8=np.uint16(data_chunk[i*3+1])
            lst_uint8=np.uint16(data_chunk[i*3+2])
    
            out[i*2] =   (fst_uint8 << 4) + (mid_uint8 >> 4)
            out[i*2+1] = (lst_uint8 << 4) + (15 & mid_uint8)
    
        return out
    

    Numba version of DGrifffith answer with memory preallocation

    @nb.njit(nb.uint16[::1](nb.uint8[::1],nb.uint16[::1]),fastmath=True,parallel=True,cache=True)
    def read_uint12_var_2_prealloc(data_chunk,out):
        """data_chunk is a contigous 1D array of uint8 data)
        eg.data_chunk = np.frombuffer(data_chunk, dtype=np.uint8)"""
    
        #ensure that the data_chunk has the right length
        assert np.mod(data_chunk.shape[0],3)==0
        assert out.shape[0]==data_chunk.shape[0]//3*2
    
        for i in nb.prange(data_chunk.shape[0]//3):
            fst_uint8=np.uint16(data_chunk[i*3])
            mid_uint8=np.uint16(data_chunk[i*3+1])
            lst_uint8=np.uint16(data_chunk[i*3+2])
    
            out[i*2] =   (fst_uint8 << 4) + (mid_uint8 >> 4)
            out[i*2+1] = (lst_uint8 << 4) + (15 & mid_uint8)
    
        return out
    

    Timings

    num_Frames=10
    data_chunk=np.random.randint(low=0,high=255,size=np.int(640*256*1.5*num_Frames),dtype=np.uint8)
    
    %timeit read_uint12_gaud(data_chunk)
    #11.3 ms ± 53.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
    #435 MB/s
    
    %timeit nb_read_uint12(data_chunk)
    #939 µs ± 24.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
    #5235 MB/s
    
    out=np.empty(data_chunk.shape[0]//3*2,dtype=np.uint16)
    %timeit nb_read_uint12_prealloc(data_chunk,out)
    #407 µs ± 5.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
    #11759 MB/s
    
    %timeit read_uint12_griff(data_chunk)
    #10.2 ms ± 55.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
    #491 MB/s
    
    %timeit read_uint12_var_2(data_chunk)
    #928 µs ± 16.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
    #5297 MB/s
    %timeit read_uint12_var_2_prealloc(data_chunk,out)
    #403 µs ± 13.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
    #12227 MB/s
    

提交回复
热议问题