ViennaCL - The Vienna Computing Library  1.5.1
viennacl/generator/saxpy.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_GENERATOR_GENERATE_SAXPY_HPP
00002 #define VIENNACL_GENERATOR_GENERATE_SAXPY_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00021 
00027 #include <vector>
00028 
00029 #include "viennacl/scheduler/forwards.h"
00030 
00031 #include "viennacl/generator/mapped_objects.hpp"
00032 #include "viennacl/generator/helpers.hpp"
00033 #include "viennacl/generator/utils.hpp"
00034 
00035 #include "viennacl/generator/profile_base.hpp"
00036 
00037 #include "viennacl/tools/tools.hpp"
00038 
00039 namespace viennacl{
00040 
00041   namespace generator{
00042 
00044     class vector_saxpy : public profile_base{
00045       public:
00046         static std::string csv_format() {
00047           return "Vec,LSize1,NumGroups1,GlobalDecomposition";
00048         }
00049 
00050         std::string csv_representation() const{
00051           std::ostringstream oss;
00052           oss << vector_size_
00053               << "," << local_size_1_
00054               << "," << num_groups_
00055               << "," << decomposition_;
00056           return oss.str();
00057         }
00058 
00059         vector_saxpy(unsigned int v, vcl_size_t gs, vcl_size_t ng, unsigned int d) : profile_base(v, gs, 1, 1), num_groups_(ng), decomposition_(d){ }
00060 
00061         void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type  const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg)  const{
00062           configure_local_sizes(k, kernel_id);
00063 
00064           k.global_work_size(0,local_size_1_*num_groups_);
00065           k.global_work_size(1,1);
00066 
00067           scheduler::statement_node const & first_node = statements.front().second;
00068           viennacl::vcl_size_t N = utils::call_on_vector(first_node.lhs, utils::internal_size_fun());
00069           k.arg(n_arg++, cl_uint(N/vector_size_));
00070         }
00071         void kernel_arguments(statements_type  const & /*statements*/, std::string & arguments_string) const{
00072           arguments_string += detail::generate_value_kernel_argument("unsigned int", "N");
00073         }
00074 
00075       private:
00076 
00077         void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const {
00078           stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl;
00079           stream << "{" << std::endl;
00080           stream.inc_tab();
00081 
00082           //Fetches entries to registers
00083           std::set<std::string>  fetched;
00084           for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it)
00085             for(detail::mapping_type::const_reverse_iterator iit = it->rbegin() ; iit != it->rend() ; ++iit)
00086               //Useless to fetch cpu scalars into registers
00087               if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(iit->second.get()))
00088                 p->fetch( std::make_pair("i","0"), vector_size_, fetched, stream);
00089 
00090           //Generates all the expression, in order
00091           vcl_size_t i = 0;
00092           for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
00093             std::string str;
00094             detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i","0"), -1, str, mapping[i++]));
00095             stream << str << ";" << std::endl;
00096           }
00097 
00098           //Writes back
00099           for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it)
00100              //Gets the mapped object at the LHS of each expression
00101             if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second, detail::LHS_NODE_TYPE)).get()))
00102               p->write_back( std::make_pair("i", "0"), fetched, stream);
00103 
00104           stream.dec_tab();
00105           stream << "}" << std::endl;
00106         }
00107 
00108       private:
00109         vcl_size_t num_groups_;
00110         unsigned int decomposition_;
00111 
00112     };
00113 
00114 
00115 
00117     class matrix_saxpy : public profile_base{
00118 
00119         bool invalid_impl(viennacl::ocl::device const & /*dev*/, vcl_size_t /*scalartype_size*/) const{ return false; }
00120         bool is_slow_impl(viennacl::ocl::device const &) const { return false; }
00121 
00122       public:
00123         matrix_saxpy(unsigned int v, vcl_size_t gs1, vcl_size_t gs2, vcl_size_t ng1, vcl_size_t ng2, unsigned int d) : profile_base(v, gs1, gs2, 1), num_groups_row_(ng1), num_groups_col_(ng2), decomposition_(d){ }
00124 
00125         static std::string csv_format() {
00126           return "Vec,LSize1,LSize2,NumGroups1,NumGroups2,GlobalDecomposition";
00127         }
00128 
00129         std::string csv_representation() const{
00130           std::ostringstream oss;
00131           oss << vector_size_
00132                  << "," << local_size_1_
00133                  << "," << local_size_2_
00134                  << "," << num_groups_row_
00135                  << "," << num_groups_col_
00136                  << "," << decomposition_;
00137           return oss.str();
00138         }
00139 
00140         void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type  const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg)  const{
00141           configure_local_sizes(k, kernel_id);
00142 
00143           k.global_work_size(0,local_size_1_*num_groups_row_);
00144           k.global_work_size(1,local_size_2_*num_groups_col_);
00145 
00146           scheduler::statement_node const & first_node = statements.front().second;
00147           k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size1_fun())));
00148           k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size2_fun())));
00149         }
00150 
00151         void kernel_arguments(statements_type  const & /*statements*/, std::string & arguments_string) const{
00152           arguments_string += detail::generate_value_kernel_argument("unsigned int", "M");
00153           arguments_string += detail::generate_value_kernel_argument("unsigned int", "N");
00154         }
00155 
00156       private:
00157         void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const {
00158 
00159           for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){
00160             for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){
00161               if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix*>(iit->second.get()))
00162                 p->bind_sizes("M","N");
00163             }
00164           }
00165 
00166           stream << "for(unsigned int i = get_global_id(0) ; i < M ; i += get_global_size(0))" << std::endl;
00167           stream << "{" << std::endl;
00168           stream.inc_tab();
00169           stream << "for(unsigned int j = get_global_id(1) ; j < N ; j += get_global_size(1))" << std::endl;
00170           stream << "{" << std::endl;
00171           stream.inc_tab();
00172 
00173           //Fetches entries to registers
00174           std::set<std::string>  fetched;
00175           for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it)
00176             for(detail::mapping_type::const_reverse_iterator it2 = it->rbegin() ; it2 != it->rend() ; ++it2)
00177               if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix *>(it2->second.get()))
00178                 p->fetch(std::make_pair("i", "j"), vector_size_, fetched, stream);
00179 
00180 
00181           vcl_size_t i = 0;
00182           for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
00183             std::string str;
00184             detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i", "j"), -1, str, mapping[i++]));
00185             stream << str << ";" << std::endl;
00186           }
00187 
00188           //Writes back
00189           for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
00190             if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second,detail::LHS_NODE_TYPE)).get()))
00191               p->write_back(std::make_pair("i", "j"), fetched, stream);
00192           }
00193 
00194           stream.dec_tab();
00195           stream << "}" << std::endl;
00196           stream.dec_tab();
00197           stream << "}" << std::endl;
00198         }
00199 
00200       private:
00201         vcl_size_t num_groups_row_;
00202         vcl_size_t num_groups_col_;
00203 
00204         unsigned int decomposition_;
00205     };
00206   }
00207 
00208 }
00209 
00210 #endif