If I use this code, then will be it executed on device 0 or 1?
cudaSetDevice(0); // switch to device 0
cudaStream_t stream1;
cudaStreamCreate(&stre
If I'm reading the following example from the CUDA webinar on using multiple GPUs correctly, it is an error to execute with a stream that is not on the currently selected device.
Example 2
cudaStream_t streamA, streamB;
cudaEvent_t eventA, eventB;
cudaSetDevice(0);
cudaStreamCreate(&streamA); // streamA and eventA belong to device-0
cudaEventCreaet(&eventA);
cudaSetDevice(1);
cudaStreamCreate(&streamB); // streamB and eventB belong to device-1
cudaEventCreate (&eventB);
kernel<<<..., streamA>>>(...);
cudaEventRecord(eventB, streamB);
cudaEventSynchronize( eventB);
ERROR: