问题
I am working on integrating CUSP solvers into an existing FORTRAN code. As a first step, I am simply trying to pass in a pair of integer arrays and a float (real*4 in FORTRAN) from FORTRAN which will be used to construct and then print a COO format CUSP matrix.
So far, I have been able to follow this thread and got everything to compile and link: Unresolved references using IFORT with nvcc and CUSP
Unfortunately, the program is apparently sending garbage into the CUSP matrix and ends up crashing with the following error:
$./fort_cusp_test
testing 1 2 3
sparse matrix <1339222572, 1339222572> with 1339222568 entries
libc++abi.dylib: terminating with uncaught exception of type thrust::system::system_error: invalid argument
Program received signal SIGABRT: Process abort signal.
Backtrace for this error:
#0 0x10ff86ff6
#1 0x10ff86593
#2 0x7fff8593ff19
Abort trap: 6
The code for the cuda and fortran sources are as follows:
cusp_runner.cu
#include <stdio.h>
#include <cusp/coo_matrix.h>
#include <iostream>
#include <cusp/krylov/cg.h>
#include <cusp/print.h>
#if defined(__cplusplus)
extern "C" {
#endif
void test_coo_mat_print_(int * row_i, int * col_j, float * val_v, int n, int nnz ) {
//wrap raw input pointers with thrust::device_ptr
thrust::device_ptr<int> wrapped_device_I(row_i);
thrust::device_ptr<int> wrapped_device_J(col_j);
thrust::device_ptr<float> wrapped_device_V(val_v);
//use array1d_view to wrap individual arrays
typedef typename cusp::array1d_view< thrust::device_ptr<int> > DeviceIndexArrayView;
typedef typename cusp::array1d_view< thrust::device_ptr<float> > DeviceValueArrayView;
DeviceIndexArrayView row_indices(wrapped_device_I, wrapped_device_I + n);
DeviceIndexArrayView column_indices(wrapped_device_J, wrapped_device_J + nnz);
DeviceValueArrayView values(wrapped_device_V, wrapped_device_V + nnz);
//combine array1d_views into coo_matrix_view
typedef cusp::coo_matrix_view<DeviceIndexArrayView,DeviceIndexArrayView,DeviceValueArrayView> DeviceView;
//construct coo_matrix_view from array1d_views
DeviceView A(n,n,nnz,row_indices,column_indices,values);
cusp::print(A);
}
#if defined(__cplusplus)
}
#endif
fort_cusp_test.f90
program fort_cuda_test
implicit none
interface
subroutine test_coo_mat_print_(row_i,col_j,val_v,n,nnz) bind(C)
use, intrinsic :: ISO_C_BINDING, ONLY: C_INT,C_FLOAT
implicit none
integer(C_INT) :: n, nnz, row_i(:), col_j(:)
real(C_FLOAT) :: val_v(:)
end subroutine test_coo_mat_print_
end interface
integer*4 n
integer*4 nnz
integer*4, target :: rowI(9),colJ(9)
real*4, target :: valV(9)
integer*4, pointer :: row_i(:)
integer*4, pointer :: col_j(:)
real*4, pointer :: val_v(:)
n = 3
nnz = 9
rowI = (/ 1, 1, 1, 2, 2, 2, 3, 3, 3/)
colJ = (/ 1, 2, 3, 1, 2, 3, 1, 2, 3/)
valV = (/ 1, 2, 3, 4, 5, 6, 7, 8, 9/)
row_i => rowI
col_j => colJ
val_v => valV
write(*,*) "testing 1 2 3"
call test_coo_mat_print_(row_i,col_j,val_v,n,nnz)
end program fort_cuda_test
If you want to try it yourself, here is my (rather inelegant) makefile:
Test:
nvcc -Xcompiler="-fPIC" -shared cusp_runner.cu -o cusp_runner.so -I/Developer/NVIDIA/CUDA-6.5/include/cusp
gfortran -c fort_cusp_test.f90
gfortran fort_cusp_test.o cusp_runner.so -L/Developer/NVIDIA/CUDA-6.5/lib -lcudart -o fort_cusp_test
clean:
rm *.o *.so
The library paths will need to be changed as appropriate of course.
Can anyone point me in the right direction for how to properly pass in the needed arrays from the fortran code?
with the interface block removed and addition of a print statement at the start of the C function I can see that the arrays are being passed correctly, but n and nnz are causing problems. I get the following output:
$ ./fort_cusp_test
testing 1 2 3
n: 1509677596, nnz: 1509677592
i, row_i, col_j, val_v
0, 1, 1, 1.0000e+00
1, 1, 2, 2.0000e+00
2, 1, 3, 3.0000e+00
3, 2, 1, 4.0000e+00
4, 2, 2, 5.0000e+00
5, 2, 3, 6.0000e+00
6, 3, 1, 7.0000e+00
7, 3, 2, 8.0000e+00
8, 3, 3, 9.0000e+00
9, 0, 32727, 0.0000e+00
...
etc
...
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
#0 0x105ce7ff6
#1 0x105ce7593
#2 0x7fff8593ff19
#3 0x105c780a2
#4 0x105c42dbc
#5 0x105c42df4
Segmentation fault: 11
fort_cusp_test
interface
subroutine test_coo_mat_print_(row_i,col_j,val_v,n,nnz) bind(C)
use, intrinsic :: ISO_C_BINDING, ONLY: C_INT,C_FLOAT
implicit none
integer(C_INT),value :: n, nnz
integer(C_INT) :: row_i(:), col_j(:)
real(C_FLOAT) :: val_v(:)
end subroutine test_coo_mat_print_
end interface
integer*4 n
integer*4 nnz
integer*4, target :: rowI(9),colJ(9)
real*4, target :: valV(9)
integer*4, pointer :: row_i(:)
integer*4, pointer :: col_j(:)
real*4, pointer :: val_v(:)
n = 3
nnz = 9
rowI = (/ 1, 1, 1, 2, 2, 2, 3, 3, 3/)
colJ = (/ 1, 2, 3, 1, 2, 3, 1, 2, 3/)
valV = (/ 1, 2, 3, 4, 5, 6, 7, 8, 9/)
row_i => rowI
col_j => colJ
val_v => valV
write(*,*) "testing 1 2 3"
call test_coo_mat_print_(rowI,colJ,valV,n,nnz)
end program fort_cuda_test
cusp_runner.cu
#include <stdio.h>
#include <cusp/coo_matrix.h>
#include <iostream>
// #include <cusp/krylov/cg.h>
#include <cusp/print.h>
#if defined(__cplusplus)
extern "C" {
#endif
void test_coo_mat_print_(int * row_i, int * col_j, float * val_v, int n, int nnz ) {
printf("n: %d, nnz: %d\n",n,nnz);
printf("%6s, %6s, %6s, %12s \n","i","row_i","col_j","val_v");
for(int i=0;i<n;i++) {
printf("%6d, %6d, %6d, %12.4e\n",i,row_i[i],col_j[i],val_v[i]);
}
if ( false ) {
//wrap raw input pointers with thrust::device_ptr
thrust::device_ptr<int> wrapped_device_I(row_i);
thrust::device_ptr<int> wrapped_device_J(col_j);
thrust::device_ptr<float> wrapped_device_V(val_v);
//use array1d_view to wrap individual arrays
typedef typename cusp::array1d_view< thrust::device_ptr<int> > DeviceIndexArrayView;
typedef typename cusp::array1d_view< thrust::device_ptr<float> > DeviceValueArrayView;
DeviceIndexArrayView row_indices(wrapped_device_I, wrapped_device_I + n);
DeviceIndexArrayView column_indices(wrapped_device_J, wrapped_device_J + nnz);
DeviceValueArrayView values(wrapped_device_V, wrapped_device_V + nnz);
//combine array1d_views into coo_matrix_view
typedef cusp::coo_matrix_view<DeviceIndexArrayView,DeviceIndexArrayView,DeviceValueArrayView> DeviceView;
//construct coo_matrix_view from array1d_views
DeviceView A(n,n,nnz,row_indices,column_indices,values);
cusp::print(A); }
}
#if defined(__cplusplus)
}
#endif
回答1:
There are two methods for passing arguments from Fortran to C routines: The first is to use the interface block (a new approach in modern Fortran), and the second is not to use the interface block (an old approach valid even for Fortran77).
First, the following is about the first method for using the interface block. Because the C routine expects to receive C pointers (row_i, col_j, and val_v), we need to pass the address for those variables from the Fortran side. To do so, we have to use asterisk (*) rather than colon(:) in the interface block, as shown below. (If we use colon, then this tells the Fortran compiler to send the address of Fortran pointer objects [1], which is not the desired behavior.) Also, since n and nnz in the C routine are declared as values (not pointers), the interface block needs to have the VALUE attribute for these variables, so that the Fortran compiler sends the value of n and nnz rather than their addresses. To summarize, in this first approach, the C and Fortran routines look like this:
Fortran routine:
...
interface
subroutine test_coo_mat_print_(row_i,col_j,val_v,n,nnz) bind(C)
use, intrinsic :: ISO_C_BINDING, ONLY: C_INT,C_FLOAT
implicit none
integer(C_INT) :: row_i(*), col_j(*)
real(C_FLOAT) :: val_v(*)
integer(C_INT), value :: n, nnz !! see note [2] below also
end subroutine test_coo_mat_print_
end interface
...
call test_coo_mat_print_( rowI, colJ, valV, n, nnz )
C routine:
void test_coo_mat_print_ (int * row_i, int * col_j, float * val_v, int n, int nnz )
The following is about the second approach without the interface block. In this approach, first delete the interface block and array pointers entirely, and change the Fortran code as follows
Fortran routine:
integer rowI( 9 ), colJ( 9 ), n, nnz !! no TARGET attribute necessary
real valV( 9 )
! ...set rowI etc as above...
call test_coo_mat_print ( rowI, colJ, valV, n, nnz ) !! "_" is dropped
and the C routine as follows
void test_coo_mat_print_ ( int* row_i, int* col_j, float* val_v, int* n_, int* nnz_ )
{
int n = *n_, nnz = *nnz_;
printf( "%d %d \n", n, nnz );
for( int k = 0; k < 9; k++ ) {
printf( "%d %d %10.6f \n", row_i[ k ], col_j[ k ], val_v[ k ] );
}
// now go to thrust...
}
Note that n_ and nnz_ are declared as pointers in the C routine, because without the interface block, the Fortran compiler always sends the address of actual arguments to the C routine. Also note that in the above C routine, the contents of row_i etc are printed to make sure that the arguments are passed correctly. If the printed values are correct, then I guess the problem will be more likely in the call of thrust routines (including how to pass the size info like n and nnz).
[1] Fortran pointers declared as "real, pointer :: a(:)" actually represents something like an array view class (in C++ jargon), which is different from the actual data pointed. What is needed here is to send the address of actual data, not the address of this array-view object. Also, the asterisk in the interface block (a(*)) represents an assumed-size array, which is an old method for passing an array in Fortran. In this case the address of the first element of the array is passed, as expected.
[2] If n and nnz are declared as pointers in the C routine (as in the second approach), then this VALUE attribute should not be attached, because the C routine wants the address of actual arguments rather than their values.
来源:https://stackoverflow.com/questions/32020853/generating-cusp-coo-matrix-from-passed-fortran-arrays