For the record this is homework so help as little or as much with that in mind. We are using constant memory to store a \"mask matrix\" that will be used to perform a convol
Below is a "minimum-sized" example showing the use of __constant__
symbols. You do not need to pass any pointer to the __global__
function.
#include
#include
#include
__constant__ float test_const;
__global__ void test_kernel(float* d_test_array) {
d_test_array[threadIdx.x] = test_const;
}
#include
int main(int argc, char **argv) {
float test = 3.f;
int N = 16;
float* test_array = (float*)malloc(N*sizeof(float));
float* d_test_array;
cudaMalloc((void**)&d_test_array,N*sizeof(float));
cudaMemcpyToSymbol(test_const, &test, sizeof(float));
test_kernel<<<1,N>>>(d_test_array);
cudaMemcpy(test_array,d_test_array,N*sizeof(float),cudaMemcpyDeviceToHost);
for (int i=0; i