CUDA has the API call
cudaError_t cudaMemset (void *devPtr, int value, size_t count)
which fills a buffer with a single-byte value. I want to f
Based on talonmies' answer, it seems a reasonable (though ugly) approach would be:
#include
inline cudaError_t cudaMemsetTyped(void *devPtr, T value, size_t count);
#define INSTANTIATE_CUDA_MEMSET_TYPED(_nbits) \
inline cudaError_t cudaMemsetTyped(void *devPtr, int ## _nbits ## _t value, size_t count) { \
cuMemsetD ## _nbits( reinterpret_cast(devPtr), value, count); \
} \
inline cudaError_t cudaMemsetTyped(void *devPtr, uint ## _nbits ## _t value, size_t count) { \
cuMemsetD ## _nbits( reinterpret_cast(devPtr), reinterpret_cast(value), count); \
} \
INSTANTIATE_CUDA_MEMSET_TYPED(8)
INSTANTIATE_CUDA_MEMSET_TYPED(16)
INSTANTIATE_CUD_AMEMSET_TYPED(32)
#undef INSTANTIATE_CUDA_MEMSET_TYPED(_nbits)
inline cudaError_t cudaMemsetTyped(void *devPtr, float value, size_t count) {
cuMemsetD32( reinterpret_cast(devPtr), reinterpret_cast(value), count);
}
(no cuMemset64
it seems, so no double
either)