CUDA thrust zip_iterator tuple transform_reduce

匿名 (未验证) 提交于 2019-12-03 08:48:34

问题:

I want to compute

for vectors

and

, where

denotes the magnitude of the vector

. Since this involves taking the square root of the sum of the squares of the differences between each corresponding component of the two vectors, it should be a highly parallelizable task. I am using Cuda and Thrust, through Cygwin, on Windows 10. Both Cuda and Thrust are in general working.

The below code compiles and runs (with nvcc), but only because I have commented out three lines toward the bottom of main, each of which I think should work but does not. func::operator()(tup t) thinks that the arguments I'm passing it are not in fact of type tup.

I have also commented out the actual body of the operator, in the interest of making it more likely to at least compile. The operator is supposed to find the squared difference between the elements of the input tup. The reduction unary_op from transform_reduce (which in this case is func()) would then add these, giving me the norm squared of the difference of vectors.

#include <iostream> #include <stdlib.h> #include <thrust/device_vector.h> #include <thrust/transform.h> #include <thrust/tuple.h> #include <thrust/transform_reduce.h> #include <thrust/iterator/zip_iterator.h>  typedef thrust::device_vector<float> dvec; typedef dvec::iterator iter; typedef thrust::tuple<iter, iter> tup;  struct func: public thrust::unary_function<tup, float> {   __device__ float operator()(tup t) //difsq   {     // I've commented out these two lines for testing purposes:     // float f = thrust::get<0>(t) - thrust::get<1>(t);     // return f*f;     return 3.14;   } };  int main() {   dvec a(40, 4.f);   dvec b(40, 3.f);   auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));   auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));    //these two lines work   thrust::get<0>(begin[0]);   std::cout << thrust::get<0>(begin[0]) - thrust::get<1>(begin[0]);     //these three lines do not   //thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>());   //func()(begin[0]);   //thrust::transform(begin, end, begin, func());     std::cout << "done" << std::endl;   return 0; } 

I get this error: (my program is called sandbox.cu)

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(310): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list         argument types are: (const thrust::device_reference<float>)       detected during:         instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]" C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here         instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]" sandbox.cu(37): here  C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(411): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list             argument types are: (const thrust::device_reference<float>)           detected during:             instantiation of "thrust::detail::cons<HT, thrust::null_type>::cons(const thrust::detail::cons<HT2, thrust::null_type> &) [with HT=iter, HT2=thrust::device_reference<float>]" (310): here             instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]" C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here             instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]" sandbox.cu(37): here  2 errors detected in the compilation of "C:/cygwin64/tmp/tmpxft_00001a90_00000000-10_sandbox.cpp1.ii". 

回答1:

Solved! tup should have been thrust::tuple<float, float>, not thrust::tuple<iter, iter>. Full solution:

#include <iostream> #include <stdlib.h> #include <thrust/device_vector.h> #include <thrust/transform.h> #include <thrust/tuple.h> #include <thrust/transform_reduce.h> #include <thrust/iterator/zip_iterator.h>  typedef thrust::device_vector<float> dvec; typedef thrust::tuple<float, float> tup;  struct func {   __device__ float operator()(tup t) //difsq   {      float f = thrust::get<0>(t) - thrust::get<1>(t);      return f*f;   } };  int main() {   dvec a(4, 3.f);   dvec b(4, 2.f);   auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));   auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));   std::cout << thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>()) << std::endl;   std::cout << "done" << std::endl;   return 0; } 


易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!