ViennaCL - The Vienna Computing Library  1.5.1
viennacl/ocl/context.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_OCL_CONTEXT_HPP_
00002 #define VIENNACL_OCL_CONTEXT_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #ifdef __APPLE__
00026 #include <OpenCL/cl.h>
00027 #else
00028 #include <CL/cl.h>
00029 #endif
00030 
00031 #include <algorithm>
00032 #include <vector>
00033 #include <map>
00034 #include "viennacl/ocl/forwards.h"
00035 #include "viennacl/ocl/handle.hpp"
00036 #include "viennacl/ocl/kernel.hpp"
00037 #include "viennacl/ocl/program.hpp"
00038 #include "viennacl/ocl/device.hpp"
00039 #include "viennacl/ocl/platform.hpp"
00040 #include "viennacl/ocl/command_queue.hpp"
00041 
00042 namespace viennacl
00043 {
00044   namespace ocl
00045   {
00051     class context
00052     {
00053       typedef std::vector< viennacl::ocl::program >   ProgramContainer;
00054 
00055       public:
00056         context() : initialized_(false),
00057                     device_type_(CL_DEVICE_TYPE_DEFAULT),
00058                     current_device_id_(0),
00059                     default_device_num_(1),
00060                     pf_index_(0),
00061                     current_queue_id_(0) {}
00062 
00064 
00065         vcl_size_t default_device_num() const { return default_device_num_; }
00066 
00068         void default_device_num(vcl_size_t new_num) { default_device_num_ = new_num; }
00069 
00071 
00072         cl_device_type default_device_type()
00073         {
00074           return device_type_;
00075         }
00076 
00078         void default_device_type(cl_device_type dtype)
00079         {
00080           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00081           std::cout << "ViennaCL: Setting new device type for context " << h_ << std::endl;
00082           #endif
00083           if (!initialized_)
00084             device_type_ = dtype; //assume that the user provided a correct value
00085         }
00086 
00088 
00089         std::vector<viennacl::ocl::device> const & devices() const
00090         {
00091           return devices_;
00092         }
00093 
00095         viennacl::ocl::device const & current_device() const
00096         {
00097           //std::cout << "Current device id in context: " << current_device_id_ << std::endl;
00098           return devices_[current_device_id_];
00099         }
00100 
00102         void switch_device(vcl_size_t i)
00103         {
00104           assert(i < devices_.size() && bool("Provided device index out of range!"));
00105           current_device_id_ = i;
00106         }
00107 
00109         void switch_device(viennacl::ocl::device const & d)
00110         {
00111           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00112           std::cout << "ViennaCL: Setting new current device for context " << h_ << std::endl;
00113           #endif
00114           bool found = false;
00115           for (vcl_size_t i=0; i<devices_.size(); ++i)
00116           {
00117             if (devices_[i] == d)
00118             {
00119               found = true;
00120               current_device_id_ = i;
00121               break;
00122             }
00123           }
00124           if (found == false)
00125             std::cerr << "ViennaCL: Warning: Could not set device " << d.name() << " for context." << std::endl;
00126         }
00127 
00129         void add_device(viennacl::ocl::device const & d)
00130         {
00131           assert(!initialized_ && bool("Device must be added to context before it is initialized!"));
00132           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00133           std::cout << "ViennaCL: Adding new device to context " << h_ << std::endl;
00134           #endif
00135           if (std::find(devices_.begin(), devices_.end(), d) == devices_.end())
00136             devices_.push_back(d);
00137         }
00138 
00140         void add_device(cl_device_id d)
00141         {
00142           assert(!initialized_ && bool("Device must be added to context before it is initialized!"));
00143           add_device(viennacl::ocl::device(d));
00144         }
00145 
00146 
00148 
00150         void init()
00151         {
00152           init_new();
00153         }
00154 
00156         void init(cl_context c)
00157         {
00158           init_existing(c);
00159         }
00160 
00161 /*        void existing_context(cl_context context_id)
00162         {
00163           assert(!initialized_ && bool("ViennaCL: FATAL error: Provided a new context for an already initialized context."));
00164           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00165           std::cout << "ViennaCL: Reusing existing context " << h_ << std::endl;
00166           #endif
00167           h_ = context_id;
00168         }*/
00169 
00171 
00179         cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL) const
00180         {
00181           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00182           std::cout << "ViennaCL: Creating memory of size " << size << " for context " << h_ << " (unsafe, returning cl_mem directly)" << std::endl;
00183           #endif
00184           if (ptr)
00185             flags |= CL_MEM_COPY_HOST_PTR;
00186           cl_int err;
00187           cl_mem mem = clCreateBuffer(h_.get(), flags, size, ptr, &err);
00188           VIENNACL_ERR_CHECK(err);
00189           return mem;
00190         }
00191 
00192 
00199         viennacl::ocl::handle<cl_mem> create_memory(cl_mem_flags flags, unsigned int size, void * ptr = NULL) const
00200         {
00201           return viennacl::ocl::handle<cl_mem>(create_memory_without_smart_handle(flags, size, ptr), *this);
00202         }
00203 
00209         template < typename SCALARTYPE, typename A, template <typename, typename> class VectorType >
00210         viennacl::ocl::handle<cl_mem> create_memory(cl_mem_flags flags, const VectorType<SCALARTYPE, A> & buffer) const
00211         {
00212           return viennacl::ocl::handle<cl_mem>(create_memory_without_smart_handle(flags, static_cast<cl_uint>(sizeof(SCALARTYPE) * buffer.size()), (void*)&buffer[0]), *this);
00213         }
00214 
00216 
00218         void add_queue(cl_device_id dev, cl_command_queue q)
00219         {
00220           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00221           std::cout << "ViennaCL: Adding existing queue " << q << " for device " << dev << " to context " << h_ << std::endl;
00222           #endif
00223           viennacl::ocl::handle<cl_command_queue> queue_handle(q, *this);
00224           queues_[dev].push_back(viennacl::ocl::command_queue(queue_handle));
00225           queues_[dev].back().handle().inc();
00226         }
00227 
00229         void add_queue(cl_device_id dev)
00230         {
00231           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00232           std::cout << "ViennaCL: Adding new queue for device " << dev << " to context " << h_ << std::endl;
00233           #endif
00234           cl_int err;
00235 #ifdef VIENNACL_PROFILING_ENABLED
00236           viennacl::ocl::handle<cl_command_queue> temp(clCreateCommandQueue(h_.get(), dev, CL_QUEUE_PROFILING_ENABLE, &err), *this);
00237 #else
00238           viennacl::ocl::handle<cl_command_queue> temp(clCreateCommandQueue(h_.get(), dev, 0, &err), *this);
00239 #endif
00240           VIENNACL_ERR_CHECK(err);
00241 
00242           queues_[dev].push_back(viennacl::ocl::command_queue(temp));
00243         }
00244 
00246         void add_queue(viennacl::ocl::device d) { add_queue(d.id()); }
00247 
00248         //get queue for default device:
00249         viennacl::ocl::command_queue & get_queue()
00250         {
00251           return queues_[devices_[current_device_id_].id()][current_queue_id_];
00252         }
00253 
00254         viennacl::ocl::command_queue const & get_queue() const
00255         {
00256           typedef std::map< cl_device_id, std::vector<viennacl::ocl::command_queue> >    QueueContainer;
00257 
00258           // find queue:
00259           QueueContainer::const_iterator it = queues_.find(devices_[current_device_id_].id());
00260           if (it != queues_.end())
00261             return (it->second)[current_queue_id_];
00262 
00263           std::cerr << "ViennaCL: FATAL ERROR: Could not obtain current command queue!" << std::endl;
00264           std::cout << "Number of queues in context: " << queues_.size() << std::endl;
00265           std::cout << "Number of devices in context: " << devices_.size() << std::endl;
00266           throw "queue not found!";
00267 
00268           //return (it->second)[current_queue_id_];
00269         }
00270 
00271         //get a particular queue:
00273         viennacl::ocl::command_queue & get_queue(cl_device_id dev, vcl_size_t i = 0)
00274         {
00275           assert(i < queues_.size() && bool("In class 'context': id invalid in get_queue()"));
00276           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00277           std::cout << "ViennaCL: Getting queue " << i << " for device " << dev << " in context " << h_ << std::endl;
00278           #endif
00279           unsigned int device_index;
00280           for (device_index = 0; device_index < devices_.size(); ++device_index)
00281           {
00282             if (devices_[device_index] == dev)
00283               break;
00284           }
00285 
00286           assert(device_index < devices_.size() && bool("Device not within context"));
00287 
00288           return queues_[devices_[device_index].id()][i];
00289         }
00290 
00292         // TODO: work out the const issues
00293         viennacl::ocl::command_queue const & current_queue() //const
00294         {
00295           return queues_[devices_[current_device_id_].id()][current_queue_id_];
00296         }
00297 
00299         void switch_queue(vcl_size_t i)
00300         {
00301           assert(i < queues_[devices_[current_device_id_].id()].size() && bool("In class 'context': Provided queue index out of range for device!"));
00302           current_queue_id_ = i;
00303         }
00304 
00305 #if 1
00306 
00307         void switch_queue(viennacl::ocl::command_queue const & q)
00308         {
00309           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00310           std::cout << "ViennaCL: Setting new current queue for context " << h_ << std::endl;
00311           #endif
00312           bool found = false;
00313           typedef std::map< cl_device_id, std::vector<viennacl::ocl::command_queue> >    QueueContainer;
00314 
00315           // For each device:
00316           vcl_size_t j = 0;
00317           for (QueueContainer::const_iterator it=queues_.begin(); it != queues_.end(); it++,j++)
00318           {
00319               const std::vector<viennacl::ocl::command_queue> & qv = (it->second);
00320               // For each queue candidate
00321               for (vcl_size_t i=0; i<qv.size(); ++i)
00322               {
00323                   if (qv[i] == q)
00324                   {
00325                       found = true;
00326                       current_device_id_ = j;
00327                       current_queue_id_ = i;
00328                       break;
00329                   }
00330               }
00331           }
00332           if (found == false)
00333             std::cerr << "ViennaCL: Warning: Could not set queue " << q.handle().get() << " for context." << std::endl;
00334         }
00335 #endif
00336 
00338 
00340         viennacl::ocl::program & add_program(cl_program p, std::string const & prog_name)
00341         {
00342           programs_.push_back(viennacl::ocl::program(p, *this, prog_name));
00343           return programs_.back();
00344         }
00345 
00348         viennacl::ocl::program & add_program(std::string const & source, std::string const & prog_name)
00349         {
00350           const char * source_text = source.c_str();
00351           vcl_size_t source_size = source.size();
00352           cl_int err;
00353 
00354           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00355           std::cout << "ViennaCL: Adding program '" << prog_name << "' to context " << h_ << std::endl;
00356           #endif
00357 
00358           //
00359           // Build program
00360           //
00361           cl_program temp = clCreateProgramWithSource(h_.get(), 1, (const char **)&source_text, &source_size, &err);
00362           VIENNACL_ERR_CHECK(err);
00363 
00364           const char * options = build_options_.c_str();
00365           err = clBuildProgram(temp, 0, NULL, options, NULL, NULL);
00366           if (err != CL_SUCCESS)
00367           {
00368             char buffer[8192];
00369             cl_build_status status;
00370             clGetProgramBuildInfo(temp, devices_[0].id(), CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL);
00371             clGetProgramBuildInfo(temp, devices_[0].id(), CL_PROGRAM_BUILD_LOG, sizeof(char)*8192, &buffer, NULL);
00372             std::cout << "Build Scalar: Err = " << err << " Status = " << status << std::endl;
00373             std::cout << "Log: " << buffer << std::endl;
00374             std::cout << "Sources: " << source << std::endl;
00375           }
00376           VIENNACL_ERR_CHECK(err);
00377 
00378           programs_.push_back(viennacl::ocl::program(temp, *this, prog_name));
00379 
00380           viennacl::ocl::program & prog = programs_.back();
00381 
00382           //
00383           // Extract kernels
00384           //
00385           cl_kernel kernels[1024];
00386           cl_uint   num_kernels_in_prog;
00387           err = clCreateKernelsInProgram(prog.handle().get(), 1024, kernels, &num_kernels_in_prog);
00388           VIENNACL_ERR_CHECK(err);
00389 
00390           for (cl_uint i=0; i<num_kernels_in_prog; ++i)
00391           {
00392             char kernel_name[128];
00393             err = clGetKernelInfo(kernels[i], CL_KERNEL_FUNCTION_NAME, 128, kernel_name, NULL);
00394             prog.add_kernel(kernels[i], std::string(kernel_name));
00395           }
00396 
00397           return prog;
00398         }
00399 
00401         void delete_program(std::string const & name){
00402           for (ProgramContainer::iterator it = programs_.begin();
00403                 it != programs_.end();
00404                 ++it)
00405           {
00406             if (it->name() == name){
00407               programs_.erase(it);
00408               return;
00409             }
00410           }
00411         }
00412 
00414         viennacl::ocl::program & get_program(std::string const & name)
00415         {
00416           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00417           std::cout << "ViennaCL: Getting program '" << name << "' from context " << h_ << std::endl;
00418           #endif
00419           for (ProgramContainer::iterator it = programs_.begin();
00420                 it != programs_.end();
00421                 ++it)
00422           {
00423             if (it->name() == name)
00424               return *it;
00425           }
00426           std::cerr << "Could not find program '" << name << "'" << std::endl;
00427           throw "In class 'context': name invalid in get_program()";
00428           //return programs_[0];  //return a defined object
00429         }
00430 
00431         viennacl::ocl::program const & get_program(std::string const & name) const
00432         {
00433           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00434           std::cout << "ViennaCL: Getting program '" << name << "' from context " << h_ << std::endl;
00435           #endif
00436           for (ProgramContainer::const_iterator it = programs_.begin();
00437                 it != programs_.end();
00438                 ++it)
00439           {
00440             if (it->name() == name)
00441               return *it;
00442           }
00443           std::cerr << "Could not find program '" << name << "'" << std::endl;
00444           throw "In class 'context': name invalid in get_program()";
00445           //return programs_[0];  //return a defined object
00446         }
00447 
00449         bool has_program(std::string const & name){
00450             for (ProgramContainer::iterator it = programs_.begin();
00451                   it != programs_.end();
00452                   ++it)
00453             {
00454               if (it->name() == name) return true;
00455             }
00456             return false;
00457         }
00458 
00460         viennacl::ocl::program & get_program(vcl_size_t id)
00461         {
00462           assert(id < programs_.size() && bool("In class 'context': id invalid in get_program()"));
00463           return programs_[id];
00464         }
00465 
00467         vcl_size_t program_num() { return programs_.size(); }
00468 
00470         viennacl::ocl::kernel & get_kernel(std::string const & program_name, std::string const & kernel_name) { return get_program(program_name).get_kernel(kernel_name); }
00471 
00473         vcl_size_t device_num() { return devices_.size(); }
00474 
00476         const viennacl::ocl::handle<cl_context> & handle() const { return h_; }
00477 
00479         std::string build_options() const { return build_options_; }
00480 
00482         void build_options(std::string op) { build_options_ = op; }
00483 
00485         vcl_size_t platform_index() const  { return pf_index_; }
00486 
00488         void platform_index(vcl_size_t new_index)
00489         {
00490           assert(!initialized_ && bool("Platform ID must be set before context is initialized!"));
00491           pf_index_ = new_index;
00492         }
00493 
00495         bool operator<(context const & other) const
00496         {
00497           return h_.get() < other.h_.get();
00498         }
00499 
00500         bool operator==(context const & other) const
00501         {
00502           return h_.get() == other.h_.get();
00503         }
00504 
00505       private:
00507         void init_new()
00508         {
00509           assert(!initialized_ && bool("ViennaCL FATAL error: Context already created!"));
00510 
00511           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00512           std::cout << "ViennaCL: Initializing new ViennaCL context." << std::endl;
00513           #endif
00514 
00515           cl_int err;
00516           std::vector<cl_device_id> device_id_array;
00517           if (devices_.empty()) //get the default device if user has not yet specified a list of devices
00518           {
00519             //create an OpenCL context for the provided devices:
00520             #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00521             std::cout << "ViennaCL: Setting all devices for context..." << std::endl;
00522             #endif
00523 
00524             platform pf(pf_index_);
00525             std::vector<device> devices = pf.devices(device_type_);
00526             #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00527             std::cout << "ViennaCL: Number of devices for context: " << devices.size() << std::endl;
00528             #endif
00529             vcl_size_t device_num = std::min<vcl_size_t>(default_device_num_, devices.size());
00530             for (vcl_size_t i=0; i<device_num; ++i)
00531               devices_.push_back(devices[i]);
00532 
00533             if (devices.size() == 0)
00534             {
00535               std::cerr << "ViennaCL: FATAL ERROR: No devices of type '";
00536               switch (device_type_)
00537               {
00538                 case CL_DEVICE_TYPE_CPU:          std::cout << "CPU"; break;
00539                 case CL_DEVICE_TYPE_GPU:          std::cout << "GPU"; break;
00540                 case CL_DEVICE_TYPE_ACCELERATOR:  std::cout << "ACCELERATOR"; break;
00541                 case CL_DEVICE_TYPE_DEFAULT:      std::cout << "DEFAULT"; break;
00542                 default:
00543                   std::cout << "UNKNOWN" << std::endl;
00544               }
00545               std::cout << "' found!" << std::endl;
00546             }
00547           }
00548 
00549           //extract list of device ids:
00550           for (std::vector< viennacl::ocl::device >::const_iterator iter = devices_.begin();
00551                                                                     iter != devices_.end();
00552                                                                   ++iter)
00553             device_id_array.push_back(iter->id());
00554 
00555           h_ = clCreateContext(0,
00556                                static_cast<cl_uint>(devices_.size()),
00557                                &(device_id_array[0]),
00558                                NULL, NULL, &err);
00559           VIENNACL_ERR_CHECK(err);
00560 
00561           initialized_ = true;
00562           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00563           std::cout << "ViennaCL: Initialization of new ViennaCL context done." << std::endl;
00564           #endif
00565         }
00566 
00568         void init_existing(cl_context c)
00569         {
00570           assert(!initialized_ && bool("ViennaCL FATAL error: Context already created!"));
00571           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00572           std::cout << "ViennaCL: Initialization of ViennaCL context from existing context." << std::endl;
00573           #endif
00574 
00575           //set context handle:
00576           h_ = c;
00577           h_.inc(); // if the user provides the context, then the user will also call release() on the context. Without inc(), we would get a seg-fault due to double-free at program termination.
00578 
00579           if (devices_.empty())
00580           {
00581             //get devices for context:
00582             cl_int err;
00583             cl_uint num_devices;
00584             vcl_size_t temp;
00585             //Note: The obvious
00586             //  err = clGetContextInfo(h_, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
00587             //does not work with NVIDIA OpenCL stack!
00588             err = clGetContextInfo(h_.get(), CL_CONTEXT_DEVICES, VIENNACL_OCL_MAX_DEVICE_NUM * sizeof(cl_device_id), NULL, &temp);
00589             VIENNACL_ERR_CHECK(err);
00590             assert(temp > 0 && bool("ViennaCL: FATAL error: Provided context does not contain any devices!"));
00591             num_devices = static_cast<cl_uint>(temp / sizeof(cl_device_id));
00592 
00593             #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00594             std::cout << "ViennaCL: Reusing context with " << num_devices << " devices." << std::endl;
00595             #endif
00596 
00597             std::vector<cl_device_id> device_ids(num_devices);
00598             err = clGetContextInfo(h_.get(), CL_CONTEXT_DEVICES, num_devices * sizeof(cl_device_id), &(device_ids[0]), NULL);
00599             VIENNACL_ERR_CHECK(err);
00600 
00601             for (vcl_size_t i=0; i<num_devices; ++i)
00602               devices_.push_back(viennacl::ocl::device(device_ids[i]));
00603           }
00604           current_device_id_ = 0;
00605 
00606           initialized_ = true;
00607           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT)
00608           std::cout << "ViennaCL: Initialization of ViennaCL context from existing context done." << std::endl;
00609           #endif
00610         }
00611 
00612 
00613         bool initialized_;
00614         cl_device_type device_type_;
00615         viennacl::ocl::handle<cl_context> h_;
00616         std::vector< viennacl::ocl::device > devices_;
00617         vcl_size_t current_device_id_;
00618         vcl_size_t default_device_num_;
00619         ProgramContainer programs_;
00620         std::map< cl_device_id, std::vector< viennacl::ocl::command_queue> > queues_;
00621         std::string build_options_;
00622         vcl_size_t pf_index_;
00623         vcl_size_t current_queue_id_;
00624     }; //context
00625 
00626 
00627 
00629     inline viennacl::ocl::kernel & viennacl::ocl::program::add_kernel(cl_kernel kernel_handle, std::string const & kernel_name)
00630     {
00631       assert(p_context_ != NULL && bool("Pointer to context invalid in viennacl::ocl::program object"));
00632       viennacl::ocl::kernel temp(kernel_handle, *this, *p_context_, kernel_name);
00633       kernels_.push_back(temp);
00634       return kernels_.back();
00635     }
00636 
00638     inline viennacl::ocl::kernel & viennacl::ocl::program::get_kernel(std::string const & name)
00639     {
00640       //std::cout << "Requiring kernel " << name << " from program " << name_ << std::endl;
00641       for (KernelContainer::iterator it = kernels_.begin();
00642             it != kernels_.end();
00643            ++it)
00644       {
00645         if (it->name() == name)
00646           return *it;
00647       }
00648       std::cerr << "ViennaCL: FATAL ERROR: Could not find kernel '" << name << "' from program '" << name_ << "'" << std::endl;
00649       std::cout << "Number of kernels in program: " << kernels_.size() << std::endl;
00650       throw "Kernel not found";
00651       //return kernels_[0];  //return a defined object
00652     }
00653 
00654 
00655     inline void viennacl::ocl::kernel::set_work_size_defaults()
00656     {
00657       assert( p_program_ != NULL && bool("Kernel not initialized, program pointer invalid."));
00658       assert( p_context_ != NULL && bool("Kernel not initialized, context pointer invalid."));
00659 
00660       if (   (p_context_->current_device().type() == CL_DEVICE_TYPE_GPU)
00661           || (p_context_->current_device().type() == CL_DEVICE_TYPE_ACCELERATOR) // Xeon Phi
00662          )
00663       {
00664         local_work_size_[0] = 128;      local_work_size_[1] = 0;  local_work_size_[2] = 0;
00665         global_work_size_[0] = 128*128; global_work_size_[1] = 0; global_work_size_[2] = 0;
00666       }
00667       else //assume CPU type:
00668       {
00669         //conservative assumption: one thread per CPU core:
00670         local_work_size_[0] = 1; local_work_size_[1] = 0; local_work_size_[2] = 0;
00671 
00672         size_type units = p_context_->current_device().max_compute_units();
00673         size_type s = 1;
00674 
00675         while (s < units) // find next power of 2. Important to make reductions work on e.g. six-core CPUs.
00676           s *= 2;
00677 
00678         global_work_size_[0] = s; global_work_size_[1] = 0; global_work_size_[2] = 0;
00679       }
00680     }
00681 
00682   }
00683 }
00684 
00685 #endif