Fast string array - Cython

后端 未结 2 689
轮回少年
轮回少年 2020-12-03 03:42

Having following hypothetical code:

cdef extern from \"string.h\":
    int strcmp(char* str1, char* str2)

def foo(list_str1, list_str2):
    cdef unsigned i         


        
相关标签:
2条回答
  • 2020-12-03 04:30

    Try following code. to_cstring_array function in the following code is what you want.

    from libc.stdlib cimport malloc, free
    from libc.string cimport strcmp
    from cpython.string cimport PyString_AsString
    
    cdef char ** to_cstring_array(list_str):
        cdef char **ret = <char **>malloc(len(list_str) * sizeof(char *))
        for i in xrange(len(list_str)):
            ret[i] = PyString_AsString(list_str[i])
        return ret
    
    def foo(list_str1, list_str2):
        cdef unsigned int i, j
        cdef char **c_arr1 = to_cstring_array(list_str1)
        cdef char **c_arr2 = to_cstring_array(list_str2)
    
        for i in xrange(len(list_str1)):
            for j in xrange(len(list_str2)):
                if i != j and strcmp(c_arr1[i], c_arr2[j]) == 0:
                    print i, j, list_str1[i]
        free(c_arr1)
        free(c_arr2)
    
    foo(['hello', 'python', 'world'], ['python', 'rules'])
    
    0 讨论(0)
  • 2020-12-03 04:38

    If you're on Python 3, here's an update to @falsetru's answer (untested on Python 2).

    cdef extern from "Python.h":
        char* PyUnicode_AsUTF8(object unicode)
    
    from libc.stdlib cimport malloc, free
    from libc.string cimport strcmp
    
    cdef char ** to_cstring_array(list_str):
        cdef char **ret = <char **>malloc(len(list_str) * sizeof(char *))
        for i in xrange(len(list_str)):
            ret[i] = PyUnicode_AsUTF8(list_str[i])
        return ret
    
    def foo(list_str1, list_str2):
        cdef unsigned int i, j
        cdef char **c_arr1 = to_cstring_array(list_str1)
        cdef char **c_arr2 = to_cstring_array(list_str2)
    
        for i in range(len(list_str1)):
            for j in range(len(list_str2)):
                if i != j and strcmp(c_arr1[i], c_arr2[j]) == 0:
                    print(i, j, list_str1[i])
        free(c_arr1)
        free(c_arr2)
    
    foo(['hello', 'python', 'world'], ['python', 'rules'])
    

    Warning: The pointer returned by PyUnicode_AsUTF8 is cached in the parent unicode-object. Which has two consequences:

    1. this pointer is only valid as long as the parent unicode-object is alive. Accessing it afterwards leads to undefined behavior (e.g. possible segmentation fault).
    2. The caller of the PyUnicode_AsUTF8 isn't responsible for the freeing the memory.
    0 讨论(0)
提交回复
热议问题