I\'m new to CUDA C, and am trying to pass a typedef\'d struct into a kernel. My method worked fine when I tried it with a struct containing only ints, but when I switch to float
Since there doesn't appear to be any decent documentation on how to do this, I thought I'd post the final, revised code here. It turns out that the __align__
part was unnecessary as well, the actual problem was the use of %d in the printf when trying to print floats.
#include <stdlib.h>
#include <stdio.h>
typedef struct
{
float a, b;
} point;
__global__ void testKernel(point *p)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
p[i].a = 1.1;
p[i].b = 2.2;
}
int main(void)
{
// set number of points
int numPoints = 16,
gpuBlockSize = 4,
pointSize = sizeof(point),
numBytes = numPoints * pointSize,
gpuGridSize = numPoints / gpuBlockSize;
// allocate memory
point *cpuPointArray,
*gpuPointArray;
cpuPointArray = (point*)malloc(numBytes);
cudaMalloc((void**)&gpuPointArray, numBytes);
// launch kernel
testKernel<<<gpuGridSize,gpuBlockSize>>>(gpuPointArray);
// retrieve the results
cudaMemcpy(cpuPointArray, gpuPointArray, numBytes, cudaMemcpyDeviceToHost);
printf("testKernel results:\n");
for(int i = 0; i < numPoints; ++i)
{
printf("point.a: %f, point.b: %f\n",cpuPointArray[i].a,cpuPointArray[i].b);
}
// deallocate memory
free(cpuPointArray);
cudaFree(gpuPointArray);
return 0;
}
Have a look at how it's done in the vector_types.h header that comes in your CUDA include directory. That should already give you some pointers.
However, the main problem here is the %d
in your printf
calls. You're trying to print floats now, not integers. So those really should use %f
instead.