ViennaCL - The Vienna Computing Library  1.5.1
viennacl/ocl/kernel.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_OCL_KERNEL_HPP_
00002 #define VIENNACL_OCL_KERNEL_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #ifdef __APPLE__
00026 #include <OpenCL/cl.h>
00027 #else
00028 #include <CL/cl.h>
00029 #endif
00030 
00031 #include "viennacl/ocl/forwards.h"
00032 #include "viennacl/ocl/handle.hpp"
00033 #include "viennacl/ocl/program.hpp"
00034 #include "viennacl/ocl/device.hpp"
00035 #include "viennacl/ocl/local_mem.hpp"
00036 #include "viennacl/ocl/infos.hpp"
00037 
00038 namespace viennacl
00039 {
00040   namespace ocl
00041   {
00046     struct packed_cl_uint
00047     {
00049       cl_uint start;
00051       cl_uint stride;
00053       cl_uint size;
00055       cl_uint internal_size;
00056     };
00057 
00059     class kernel
00060     {
00061       template <typename KernelType>
00062       friend void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue);
00063 
00064       template<cl_kernel_info param>
00065       friend typename detail::return_type<cl_kernel, param>::Result info(viennacl::ocl::kernel & k);
00066 
00067       template<cl_kernel_info param>
00068       friend typename detail::return_type<cl_kernel, param>::Result info(viennacl::ocl::kernel & k, viennacl::ocl::device const & d);
00069 
00070 
00071     public:
00072       typedef vcl_size_t            size_type;
00073 
00074       kernel() : handle_(), p_program_(NULL), p_context_(NULL), name_()
00075       {
00076         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00077         std::cout << "ViennaCL: Creating kernel object (default CTOR)" << std::endl;
00078         #endif
00079       }
00080 
00081       kernel(cl_kernel kernel_handle, viennacl::ocl::program const & kernel_program, viennacl::ocl::context const & kernel_context, std::string const & name)
00082         : handle_(kernel_handle, kernel_context), p_program_(&kernel_program), p_context_(&kernel_context), name_(name)
00083       {
00084         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00085         std::cout << "ViennaCL: Creating kernel object (full CTOR)" << std::endl;
00086         #endif
00087         set_work_size_defaults();
00088       }
00089 
00090       kernel(kernel const & other)
00091         : handle_(other.handle_), p_program_(other.p_program_), p_context_(other.p_context_), name_(other.name_)
00092       {
00093         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00094         std::cout << "ViennaCL: Creating kernel object (Copy CTOR)" << std::endl;
00095         #endif
00096         local_work_size_[0] = other.local_work_size_[0];
00097         local_work_size_[1] = other.local_work_size_[1];
00098         local_work_size_[2] = other.local_work_size_[2];
00099 
00100         global_work_size_[0] = other.global_work_size_[0];
00101         global_work_size_[1] = other.global_work_size_[1];
00102         global_work_size_[2] = other.global_work_size_[2];
00103       }
00104 
00105       viennacl::ocl::kernel & operator=(const kernel & other)
00106       {
00107         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00108         std::cout << "ViennaCL: Assigning kernel object" << std::endl;
00109         #endif
00110         handle_ = other.handle_;
00111         p_program_ = other.p_program_;
00112         p_context_ = other.p_context_;
00113         name_ = other.name_;
00114         local_work_size_[0] = other.local_work_size_[0];
00115         local_work_size_[1] = other.local_work_size_[1];
00116         local_work_size_[2] = other.local_work_size_[2];
00117         global_work_size_[0] = other.global_work_size_[0];
00118         global_work_size_[1] = other.global_work_size_[1];
00119         global_work_size_[2] = other.global_work_size_[2];
00120         return *this;
00121       }
00122 
00124       void arg(unsigned int pos, cl_char val)
00125       {
00126         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00127         std::cout << "ViennaCL: Setting char kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00128         #endif
00129         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_char), (void*)&val);
00130         VIENNACL_ERR_CHECK(err);
00131       }
00132 
00134       void arg(unsigned int pos, cl_uchar val)
00135       {
00136         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00137         std::cout << "ViennaCL: Setting unsigned char kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00138         #endif
00139         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_uchar), (void*)&val);
00140         VIENNACL_ERR_CHECK(err);
00141       }
00142 
00144       void arg(unsigned int pos, cl_short val)
00145       {
00146         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00147         std::cout << "ViennaCL: Setting short kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00148         #endif
00149         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_short), (void*)&val);
00150         VIENNACL_ERR_CHECK(err);
00151       }
00152 
00154       void arg(unsigned int pos, cl_ushort val)
00155       {
00156         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00157         std::cout << "ViennaCL: Setting unsigned short kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00158         #endif
00159         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_ushort), (void*)&val);
00160         VIENNACL_ERR_CHECK(err);
00161       }
00162 
00163 
00165       void arg(unsigned int pos, cl_uint val)
00166       {
00167         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00168         std::cout << "ViennaCL: Setting unsigned int kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00169         #endif
00170         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_uint), (void*)&val);
00171         VIENNACL_ERR_CHECK(err);
00172       }
00173 
00175       void arg(unsigned int pos, packed_cl_uint val)
00176       {
00177         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00178         std::cout << "ViennaCL: Setting packed_cl_uint kernel argument (" << val.start << ", " << val.stride << ", " << val.size << ", " << val.internal_size << ") at pos " << pos << " for kernel " << name_ << std::endl;
00179         #endif
00180         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(packed_cl_uint), (void*)&val);
00181         VIENNACL_ERR_CHECK(err);
00182       }
00183 
00185       void arg(unsigned int pos, float val)
00186       {
00187         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00188         std::cout << "ViennaCL: Setting floating point kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00189         #endif
00190         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(float), (void*)&val);
00191         VIENNACL_ERR_CHECK(err);
00192       }
00193 
00195       void arg(unsigned int pos, double val)
00196       {
00197         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00198         std::cout << "ViennaCL: Setting double precision kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00199         #endif
00200         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(double), (void*)&val);
00201         VIENNACL_ERR_CHECK(err);
00202       }
00203 
00205       void arg(unsigned int pos, cl_int val)
00206       {
00207         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00208         std::cout << "ViennaCL: Setting int precision kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00209         #endif
00210         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_int), (void*)&val);
00211         VIENNACL_ERR_CHECK(err);
00212       }
00213 
00215       void arg(unsigned int pos, cl_ulong val)
00216       {
00217         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00218         std::cout << "ViennaCL: Setting ulong precision kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00219         #endif
00220         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_ulong), (void*)&val);
00221         VIENNACL_ERR_CHECK(err);
00222       }
00223 
00225       void arg(unsigned int pos, cl_long val)
00226       {
00227         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00228         std::cout << "ViennaCL: Setting long precision kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl;
00229         #endif
00230         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_long), (void*)&val);
00231         VIENNACL_ERR_CHECK(err);
00232       }
00233 
00234       //generic handling: call .handle() member
00236       template<class VCL_TYPE>
00237       void arg(unsigned int pos, VCL_TYPE const & val)
00238       {
00239         assert(&val.handle().opencl_handle().context() == &handle_.context() && bool("Kernel and memory object not in the same context!"));
00240 
00241         cl_mem temp = val.handle().opencl_handle().get();
00242         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00243         std::cout << "ViennaCL: Setting generic kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl;
00244         #endif
00245         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_mem), (void*)&temp);
00246         VIENNACL_ERR_CHECK(err);
00247       }
00248 
00249       //forward handles directly:
00251       template<class CL_TYPE>
00252       void arg(unsigned int pos, viennacl::ocl::handle<CL_TYPE> const & h)
00253       {
00254         CL_TYPE temp = h.get();
00255         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00256         std::cout << "ViennaCL: Setting handle kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl;
00257         #endif
00258         cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(CL_TYPE), (void*)&temp);
00259         VIENNACL_ERR_CHECK(err);
00260       }
00261 
00262 
00263       //local buffer argument:
00265       void arg(unsigned int pos, const local_mem & mem)
00266       {
00267         cl_uint size = static_cast<cl_uint>(mem.size());
00268         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00269         std::cout << "ViennaCL: Setting local memory kernel argument of size " << size << " bytes at pos " << pos << " for kernel " << name_ << std::endl;
00270         #endif
00271         cl_int err = clSetKernelArg(handle_.get(), pos, size, 0);
00272         VIENNACL_ERR_CHECK(err);
00273       }
00274 
00275 
00276 
00278       template <typename T0>
00279       kernel & operator()(T0 const & t0)
00280       {
00281          arg(0, t0);
00282          return *this;
00283       }
00284 
00286       template <typename T0, typename T1>
00287       kernel & operator()(T0 const & t0, T1 const & t1)
00288       {
00289          arg(0, t0); arg(1, t1);
00290          return *this;
00291       }
00292 
00294       template <typename T0, typename T1, typename T2>
00295       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2)
00296       {
00297          arg(0, t0); arg(1, t1); arg(2, t2);
00298          return *this;
00299       }
00300 
00302       template <typename T0, typename T1, typename T2, typename T3>
00303       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3)
00304       {
00305          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3);
00306          return *this;
00307       }
00308 
00310       template <typename T0, typename T1, typename T2, typename T3, typename T4>
00311       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4)
00312       {
00313          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4);
00314          return *this;
00315       }
00316 
00318       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
00319       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5)
00320       {
00321          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00322          return *this;
00323       }
00324 
00326       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
00327       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6)
00328       {
00329          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6);
00330          return *this;
00331       }
00332 
00334       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
00335       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7)
00336       {
00337          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7);
00338          return *this;
00339       }
00340 
00342       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
00343       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8)
00344       {
00345          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8);
00346          return *this;
00347       }
00348 
00350       template <typename T0, typename T1, typename T2, typename T3, typename T4,
00351                 typename T5, typename T6, typename T7, typename T8, typename T9>
00352       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4,
00353                           T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9)
00354       {
00355          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9);
00356          return *this;
00357       }
00358 
00360       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00361                 typename T6, typename T7, typename T8, typename T9, typename T10>
00362       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00363                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10)
00364       {
00365          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10);
00366          return *this;
00367       }
00368 
00370       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00371                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11>
00372       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00373                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11)
00374       {
00375          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00376          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00377          return *this;
00378       }
00379 
00381       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00382                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11, typename T12>
00383       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00384                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12)
00385       {
00386          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00387          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12);
00388          return *this;
00389       }
00390 
00392       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00393                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00394                 typename T12, typename T13>
00395       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00396                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00397                           T12 const & t12, T13 const & t13)
00398       {
00399          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00400          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00401          arg(12, t12); arg(13, t13);
00402          return *this;
00403       }
00404 
00406       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00407                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00408                 typename T12, typename T13, typename T14>
00409       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00410                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00411                           T12 const & t12, T13 const & t13, T14 const & t14)
00412       {
00413          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00414          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00415          arg(12, t12); arg(13, t13); arg(14, t14);
00416          return *this;
00417       }
00418 
00420       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00421                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00422                 typename T12, typename T13, typename T14, typename T15>
00423       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00424                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00425                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15)
00426       {
00427          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00428          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00429          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15);
00430          return *this;
00431       }
00432 
00434       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00435                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00436                 typename T12, typename T13, typename T14, typename T15, typename T16>
00437       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00438                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00439                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16)
00440       {
00441          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00442          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00443          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16);
00444          return *this;
00445       }
00446 
00448       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00449                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00450                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17>
00451       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00452                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00453                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17)
00454       {
00455          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00456          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00457          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00458          return *this;
00459       }
00460 
00462       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00463                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00464                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00465                 typename T18>
00466       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00467                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00468                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00469                           T18 const & t18
00470                          )
00471       {
00472          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00473          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00474          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00475          arg(18, t18);
00476          return *this;
00477       }
00478 
00480       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00481                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00482                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00483                 typename T18, typename T19>
00484       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00485                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00486                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00487                           T18 const & t18, T19 const & t19
00488                          )
00489       {
00490          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00491          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00492          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00493          arg(18, t18); arg(19, t19);
00494          return *this;
00495       }
00496 
00498       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00499                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00500                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00501                 typename T18, typename T19, typename T20>
00502       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00503                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00504                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00505                           T18 const & t18, T19 const & t19, T20 const & t20
00506                          )
00507       {
00508          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00509          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00510          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00511          arg(18, t18); arg(19, t19); arg(20, t20);
00512          return *this;
00513       }
00514 
00516       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00517                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00518                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00519                 typename T18, typename T19, typename T20, typename T21>
00520       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00521                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00522                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00523                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21
00524                          )
00525       {
00526          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00527          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00528          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00529          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21);
00530          return *this;
00531       }
00532 
00534       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00535                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00536                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00537                 typename T18, typename T19, typename T20, typename T21, typename T22>
00538       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00539                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00540                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00541                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22
00542                          )
00543       {
00544          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00545          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00546          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00547          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21);  arg(22, t22);
00548          return *this;
00549       }
00550 
00552       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00553                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00554                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00555                 typename T18, typename T19, typename T20, typename T21, typename T22, typename T23>
00556       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00557                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00558                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00559                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23
00560                          )
00561       {
00562          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00563          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00564          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00565          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23);
00566          return *this;
00567       }
00568 
00570       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00571                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00572                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00573                 typename T18, typename T19, typename T20, typename T21, typename T22, typename T23,
00574                 typename T24>
00575       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00576                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00577                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00578                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23,
00579                           T24 const & t24
00580                          )
00581       {
00582          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00583          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00584          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00585          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23);
00586          arg(24, t24);
00587          return *this;
00588       }
00589 
00591       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00592                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00593                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00594                 typename T18, typename T19, typename T20, typename T21, typename T22, typename T23,
00595                 typename T24, typename T25>
00596       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00597                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00598                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00599                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23,
00600                           T24 const & t24, T25 const & t25
00601                          )
00602       {
00603          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00604          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00605          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00606          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23);
00607          arg(24, t24); arg(25, t25);
00608          return *this;
00609       }
00610 
00612       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00613                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00614                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00615                 typename T18, typename T19, typename T20, typename T21, typename T22, typename T23,
00616                 typename T24, typename T25, typename T26>
00617       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00618                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00619                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00620                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23,
00621                           T24 const & t24, T25 const & t25, T26 const & t26
00622                          )
00623       {
00624          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00625          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00626          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00627          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23);
00628          arg(24, t24); arg(25, t25); arg(26, t26);
00629          return *this;
00630       }
00631 
00633       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00634                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00635                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00636                 typename T18, typename T19, typename T20, typename T21, typename T22, typename T23,
00637                 typename T24, typename T25, typename T26, typename T27>
00638       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00639                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00640                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00641                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23,
00642                           T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27
00643                          )
00644       {
00645          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00646          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00647          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00648          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23);
00649          arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27);
00650          return *this;
00651       }
00652 
00654       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00655                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00656                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00657                 typename T18, typename T19, typename T20, typename T21, typename T22, typename T23,
00658                 typename T24, typename T25, typename T26, typename T27, typename T28>
00659       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00660                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00661                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00662                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23,
00663                           T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27, T28 const & t28
00664                          )
00665       {
00666          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00667          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00668          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00669          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23);
00670          arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27); arg(28, t28);
00671          return *this;
00672       }
00673 
00675       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00676                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00677                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00678                 typename T18, typename T19, typename T20, typename T21, typename T22, typename T23,
00679                 typename T24, typename T25, typename T26, typename T27, typename T28, typename T29>
00680       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00681                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00682                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00683                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23,
00684                           T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27, T28 const & t28, T29 const & t29
00685                          )
00686       {
00687          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00688          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00689          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00690          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23);
00691          arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27); arg(28, t28); arg(29, t29);
00692          return *this;
00693       }
00694 
00696       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00697                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00698                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00699                 typename T18, typename T19, typename T20, typename T21, typename T22, typename T23,
00700                 typename T24, typename T25, typename T26, typename T27, typename T28, typename T29,
00701                 typename T30>
00702       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00703                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00704                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00705                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23,
00706                           T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27, T28 const & t28, T29 const & t29,
00707                           T30 const & t30
00708                          )
00709       {
00710          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00711          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00712          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00713          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23);
00714          arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27); arg(28, t28); arg(29, t29);
00715          arg(30, t30);
00716          return *this;
00717       }
00718 
00720       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00721                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00722                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00723                 typename T18, typename T19, typename T20, typename T21, typename T22, typename T23,
00724                 typename T24, typename T25, typename T26, typename T27, typename T28, typename T29,
00725                 typename T30, typename T31>
00726       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00727                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00728                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00729                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23,
00730                           T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27, T28 const & t28, T29 const & t29,
00731                           T30 const & t30, T31 const & t31
00732                          )
00733       {
00734          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00735          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00736          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00737          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23);
00738          arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27); arg(28, t28); arg(29, t29);
00739          arg(30, t30); arg(31, t31);
00740          return *this;
00741       }
00742 
00743 
00744 
00745 
00750       size_type local_work_size(int index = 0) const
00751       {
00752         assert(index < 3 && bool("Work size index out of bounds"));
00753         return local_work_size_[index];
00754       }
00759       size_type global_work_size(int index = 0) const
00760       {
00761         assert(index < 3 && bool("Work size index out of bounds"));
00762         return global_work_size_[index];
00763       }
00764 
00770       void local_work_size(int index, size_type s)
00771       {
00772         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00773         std::cout << "ViennaCL: Setting local work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
00774         #endif
00775         assert(index < 3 && bool("Work size index out of bounds"));
00776         local_work_size_[index] = s;
00777       }
00783       void global_work_size(int index, size_type s)
00784       {
00785         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00786         std::cout << "ViennaCL: Setting global work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
00787         #endif
00788         assert(index < 3 && bool("Work size index out of bounds"));
00789         global_work_size_[index] = s;
00790       }
00791 
00792       std::string const & name() const { return name_; }
00793 
00794       viennacl::ocl::handle<cl_kernel> const & handle() const { return handle_; }
00795 
00796       viennacl::ocl::context const & context() const { return *p_context_; }
00797 
00798     private:
00799 
00800       inline void set_work_size_defaults();    //see context.hpp for implementation
00801 
00802       viennacl::ocl::handle<cl_kernel> handle_;
00803       viennacl::ocl::program const * p_program_;
00804       viennacl::ocl::context const * p_context_;
00805       std::string name_;
00806       size_type local_work_size_[3];
00807       size_type global_work_size_[3];
00808     };
00809 
00814     template<cl_kernel_info param>
00815     typename detail::return_type<cl_kernel, param>::Result info(viennacl::ocl::kernel & k)
00816     {
00817         typedef typename detail::return_type<cl_kernel, param>::Result res_t;
00818         return detail::get_info_impl<res_t>()(k.handle_.get(),param);
00819     }
00820 
00826     template<cl_kernel_info param>
00827     typename detail::return_type<cl_kernel, param>::Result info(viennacl::ocl::kernel & k, viennacl::ocl::device const & d)
00828     {
00829         typedef typename detail::return_type<cl_kernel, param>::Result res_t;
00830         return detail::get_info_impl<res_t>()(k.handle_.get(),d.id(),param);
00831     }
00832 
00833   } //namespace ocl
00834 } //namespace viennacl
00835 
00836 #endif