This is the naive pre-fix sum implementation
__global__ void scan(float *g_odata, float *g_idata, int n) { extern __shared__ float temp[]; // allocate