Assembling a cython memoryview from numpy arrays

前端 未结 1 912
别那么骄傲
别那么骄傲 2020-12-10 20:21

I have a bunch of numpy arrays as attributes of an array of python objects, in cython, in preparation for prange processing (which requires nogil), I wanted to create a memo

相关标签:
1条回答
  • 2020-12-10 20:45

    With the below code, you would use this assignment:

    cimport stackoverflow_contrib
    
    cdef double[::cython.view.indirect, ::1] vectors =
        stackoverflow_contrib.OnceIndirect([object.vector for object in objects])
    

    where stackoverflow_contrib.pyx is as follows:

    from libc.stdlib cimport malloc, free
    from libc.string cimport strcmp
    
    from cython.view cimport memoryview
    from cpython cimport buffer
    
    cdef class OnceIndirect:
        cdef object _objects
        cdef void** buf
        cdef int ndim
        cdef int n_rows
        cdef int buf_len
        cdef Py_ssize_t* shape
        cdef Py_ssize_t* strides
        cdef Py_ssize_t* suboffsets
        cdef Py_ssize_t itemsize
        cdef bytes format
        cdef int is_readonly
    
        def __cinit__(self, object rows, want_writable=True, want_format=True, allow_indirect=False):
            """
            Set want_writable to False if you don't want writable data. (This may
            prevent copies.)
            Set want_format to False if your input doesn't support PyBUF_FORMAT (unlikely)
            Set allow_indirect to True if you are ok with the memoryview being indirect
            in dimensions other than the first. (This may prevent copies.)
            """
            demand = buffer.PyBUF_INDIRECT if allow_indirect else buffer.PyBUF_STRIDES
            if want_writable:
                demand |= buffer.PyBUF_WRITABLE
            if want_format:
                demand |= buffer.PyBUF_FORMAT
            self._objects = [memoryview(row, demand) for row in rows]
            self.n_rows = len(self._objects)
            self.buf_len = sizeof(void*) * self.n_rows
            self.buf = <void**>malloc(self.buf_len)
            self.ndim = 1 + self._objects[0].ndim
            self.shape = <Py_ssize_t*>malloc(sizeof(Py_ssize_t) * self.ndim)
            self.strides = <Py_ssize_t*>malloc(sizeof(Py_ssize_t) * self.ndim)
            self.suboffsets = <Py_ssize_t*>malloc(sizeof(Py_ssize_t) * self.ndim)
    
            cdef memoryview example_obj = self._objects[0]
            self.itemsize = example_obj.itemsize
    
            if want_format:
                self.format = example_obj.view.format
            else:
                self.format = None
            self.is_readonly |= example_obj.view.readonly
    
            for dim in range(self.ndim):
                if dim == 0:
                    self.shape[dim] = self.n_rows
                    self.strides[dim] = sizeof(void*)
                    self.suboffsets[dim] = 0
                else:
                    self.shape[dim] = example_obj.view.shape[dim - 1]
                    self.strides[dim] = example_obj.view.strides[dim - 1]
                    if example_obj.view.suboffsets == NULL:
                        self.suboffsets[dim] = -1
                    else:
                        self.suboffsets[dim] = example_obj.suboffsets[dim - 1]
    
            cdef memoryview obj
            cdef int i = 0
            for obj in self._objects:
                assert_similar(example_obj, obj)
                self.buf[i] = obj.view.buf
                i += 1
    
        def __getbuffer__(self, Py_buffer* buff, int flags):
            if (flags & buffer.PyBUF_INDIRECT) != buffer.PyBUF_INDIRECT:
                raise Exception("don't want to copy data")
            if flags & buffer.PyBUF_WRITABLE and self.is_readonly:
                raise Exception("couldn't provide writable, you should have demanded it earlier")
            if flags & buffer.PyBUF_FORMAT:
                if self.format is None:
                    raise Exception("couldn't provide format, you should have demanded it earlier")
                buff.format = self.format
            else:
                buff.format = NULL
    
            buff.buf = <void*>self.buf
            buff.obj = self
            buff.len = self.buf_len
            buff.readonly = self.is_readonly
            buff.ndim = self.ndim
            buff.shape = self.shape
            buff.strides = self.strides
            buff.suboffsets = self.suboffsets
            buff.itemsize = self.itemsize
            buff.internal = NULL
    
        def __dealloc__(self):
            free(self.buf)
            free(self.shape)
            free(self.strides)
            free(self.suboffsets)
    
    cdef int assert_similar(memoryview left_, memoryview right_) except -1:
        cdef Py_buffer left = left_.view
        cdef Py_buffer right = right_.view
        assert left.ndim == right.ndim
        cdef int i
        for i in range(left.ndim):
            assert left.shape[i] == right.shape[i], (left_.shape, right_.shape)
            assert left.strides[i] == right.strides[i], (left_.strides, right_.strides)
    
        if left.suboffsets == NULL:
            assert right.suboffsets == NULL, (left_.suboffsets, right_.suboffsets)
        else:
            for i in range(left.ndim):
                assert left.suboffsets[i] == right.suboffsets[i], (left_.suboffsets, right_.suboffsets)
    
        if left.format == NULL:
            assert right.format == NULL, (bytes(left.format), bytes(right.format))
        else:
            #alternatively, compare as Python strings:
            #assert bytes(left.format) == bytes(right.format)
            assert strcmp(left.format, right.format) == 0, (bytes(left.format), bytes(right.format))
        return 0
    
    from cython cimport view
    
    cimport numpy as np
    import numpy as np
    
    def show_memoryview(object x):
        print dict(shape=x.shape, strides=x.strides, suboffsets=x.suboffsets, itemsize=x.itemsize)
    
    def go():
        row0 = np.array(range(20), dtype=np.float64).reshape(2, 10)
        row1 = np.array(range(20, 40), dtype=np.float64).reshape(2, 10)
        row2 = np.array(range(40, 60), dtype=np.float64).reshape(2, 10)
        small_view = memoryview(row0, buffer.PyBUF_STRIDES)
        show_memoryview(small_view)
        rows = [row0, row1, row2]
            big_view = OnceIndirect(rows)
        cdef double[::view.indirect, :, :] big_view2 = big_view
        cdef int i, j, k
        show_memoryview(big_view2)
        print row1
        big_view2[1, 0, 1] += 200
        print row1
        cdef double[:, :] row1_view = big_view2[1]
        assert row1_view[0, 1] >= 200
        cdef double[::view.indirect, :, :] big_view3 = OnceIndirect([row0, row1, row0])
        cdef double[::view.indirect, ::view.indirect, :, :] dub = OnceIndirect([big_view2, big_view3], allow_indirect=True)
        show_memoryview(dub)
            # big_view2 can be indexed and sliced in Cython and Python code
            # note big_view2 is a cython memoryview object not a OnceIndirect object because it was implicitly cast to one
            # rows, big_view, big_view2 all refer to the same data!
        return (rows, big_view, big_view2)
    
    0 讨论(0)
提交回复
热议问题