ViennaCL - The Vienna Computing Library  1.5.1
viennacl/coordinate_matrix.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_COORDINATE_MATRIX_HPP_
00002 #define VIENNACL_COORDINATE_MATRIX_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include <map>
00026 #include <vector>
00027 #include <list>
00028 
00029 #include "viennacl/forwards.h"
00030 #include "viennacl/vector.hpp"
00031 
00032 #include "viennacl/linalg/sparse_matrix_operations.hpp"
00033 
00034 namespace viennacl
00035 {
00036 
00037 
00038     //provide copy-operation:
00046     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00047     void copy(const CPU_MATRIX & cpu_matrix,
00048                      coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00049     {
00050       assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
00051       assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
00052 
00053       vcl_size_t group_num = 64;
00054 
00055       // Step 1: Determine nonzeros:
00056       if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
00057       {
00058         vcl_size_t num_entries = 0;
00059         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00060               row_it != cpu_matrix.end1();
00061               ++row_it)
00062         {
00063           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00064                 col_it != row_it.end();
00065                 ++col_it)
00066           {
00067             ++num_entries;
00068           }
00069         }
00070 
00071         // Step 2: Set up matrix data:
00072         gpu_matrix.nonzeros_ = num_entries;
00073         gpu_matrix.rows_ = cpu_matrix.size1();
00074         gpu_matrix.cols_ = cpu_matrix.size2();
00075 
00076         viennacl::backend::typesafe_host_array<unsigned int> group_boundaries(gpu_matrix.handle3(), group_num + 1);
00077         viennacl::backend::typesafe_host_array<unsigned int> coord_buffer(gpu_matrix.handle12(), 2*gpu_matrix.internal_nnz());
00078         std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz());
00079 
00080         vcl_size_t data_index = 0;
00081         vcl_size_t current_fraction = 0;
00082 
00083         group_boundaries.set(0, 0);
00084         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00085               row_it != cpu_matrix.end1();
00086               ++row_it)
00087         {
00088           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00089                 col_it != row_it.end();
00090                 ++col_it)
00091           {
00092             coord_buffer.set(2*data_index, col_it.index1());
00093             coord_buffer.set(2*data_index + 1, col_it.index2());
00094             elements[data_index] = *col_it;
00095             ++data_index;
00096           }
00097 
00098           while (data_index > (current_fraction + 1) / static_cast<double>(group_num) * num_entries)    //split data equally over 64 groups
00099             group_boundaries.set(++current_fraction, data_index);
00100         }
00101 
00102         //write end of last group:
00103         group_boundaries.set(group_num, data_index);
00104         //group_boundaries[1] = data_index; //for one compute unit
00105 
00106         //std::cout << "Group boundaries: " << std::endl;
00107         //for (vcl_size_t i=0; i<group_boundaries.size(); ++i)
00108         //  std::cout << group_boundaries[i] << std::endl;
00109 
00110         viennacl::backend::memory_create(gpu_matrix.group_boundaries_, group_boundaries.raw_size(), traits::context(gpu_matrix.group_boundaries_), group_boundaries.get());
00111         viennacl::backend::memory_create(gpu_matrix.coord_buffer_,         coord_buffer.raw_size(), traits::context(gpu_matrix.coord_buffer_),     coord_buffer.get());
00112         viennacl::backend::memory_create(gpu_matrix.elements_,  sizeof(SCALARTYPE)*elements.size(), traits::context(gpu_matrix.elements_),         &(elements[0]));
00113       }
00114     }
00115 
00121     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00122     void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
00123                      coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00124     {
00125       copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix, cpu_matrix.size(), cpu_matrix.size()), gpu_matrix);
00126     }
00127 
00128     //gpu to cpu:
00138     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00139     void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00140                      CPU_MATRIX & cpu_matrix )
00141     {
00142       assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
00143       assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
00144 
00145       if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00146       {
00147         //get raw data from memory:
00148         viennacl::backend::typesafe_host_array<unsigned int> coord_buffer(gpu_matrix.handle12(), 2*gpu_matrix.nnz());
00149         std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00150 
00151         //std::cout << "GPU nonzeros: " << gpu_matrix.nnz() << std::endl;
00152 
00153         viennacl::backend::memory_read(gpu_matrix.handle12(), 0, coord_buffer.raw_size(), coord_buffer.get());
00154         viennacl::backend::memory_read(gpu_matrix.handle(),   0, sizeof(SCALARTYPE) * elements.size(), &(elements[0]));
00155 
00156         //fill the cpu_matrix:
00157         for (vcl_size_t index = 0; index < gpu_matrix.nnz(); ++index)
00158           cpu_matrix(coord_buffer[2*index], coord_buffer[2*index+1]) = elements[index];
00159 
00160       }
00161     }
00162 
00168     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00169     void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00170               std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
00171     {
00172       tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix, gpu_matrix.size1(), gpu_matrix.size2());
00173       copy(gpu_matrix, temp);
00174     }
00175 
00176 
00178 
00185     template<class SCALARTYPE, unsigned int ALIGNMENT /* see forwards.h */ >
00186     class coordinate_matrix
00187     {
00188       public:
00189         typedef viennacl::backend::mem_handle                                                              handle_type;
00190         typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType>   value_type;
00191         typedef vcl_size_t                                                                                 size_type;
00192 
00194         coordinate_matrix() : rows_(0), cols_(0), nonzeros_(0), group_num_(64) {}
00195 
00196         explicit coordinate_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzeros_(0), group_num_(64)
00197         {
00198           group_boundaries_.switch_active_handle_id(ctx.memory_type());
00199               coord_buffer_.switch_active_handle_id(ctx.memory_type());
00200                   elements_.switch_active_handle_id(ctx.memory_type());
00201 
00202 #ifdef VIENNACL_WITH_OPENCL
00203           if (ctx.memory_type() == OPENCL_MEMORY)
00204           {
00205             group_boundaries_.opencl_handle().context(ctx.opencl_context());
00206                 coord_buffer_.opencl_handle().context(ctx.opencl_context());
00207                     elements_.opencl_handle().context(ctx.opencl_context());
00208           }
00209 #endif
00210         }
00211 
00219         coordinate_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) :
00220           rows_(rows), cols_(cols), nonzeros_(nonzeros)
00221         {
00222           if (nonzeros > 0)
00223           {
00224             viennacl::backend::memory_create(group_boundaries_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (group_num_ + 1), ctx);
00225             viennacl::backend::memory_create(coord_buffer_,     viennacl::backend::typesafe_host_array<unsigned int>().element_size() * 2 * internal_nnz(), ctx);
00226             viennacl::backend::memory_create(elements_,         sizeof(SCALARTYPE) * internal_nnz(), ctx);
00227           }
00228           else
00229           {
00230             group_boundaries_.switch_active_handle_id(ctx.memory_type());
00231                 coord_buffer_.switch_active_handle_id(ctx.memory_type());
00232                     elements_.switch_active_handle_id(ctx.memory_type());
00233 
00234   #ifdef VIENNACL_WITH_OPENCL
00235             if (ctx.memory_type() == OPENCL_MEMORY)
00236             {
00237               group_boundaries_.opencl_handle().context(ctx.opencl_context());
00238                   coord_buffer_.opencl_handle().context(ctx.opencl_context());
00239                       elements_.opencl_handle().context(ctx.opencl_context());
00240             }
00241   #endif
00242           }
00243         }
00244 
00251         explicit coordinate_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx)
00252           : rows_(rows), cols_(cols), nonzeros_(0)
00253         {
00254           group_boundaries_.switch_active_handle_id(ctx.memory_type());
00255               coord_buffer_.switch_active_handle_id(ctx.memory_type());
00256                   elements_.switch_active_handle_id(ctx.memory_type());
00257 
00258 #ifdef VIENNACL_WITH_OPENCL
00259           if (ctx.memory_type() == OPENCL_MEMORY)
00260           {
00261             group_boundaries_.opencl_handle().context(ctx.opencl_context());
00262                 coord_buffer_.opencl_handle().context(ctx.opencl_context());
00263                     elements_.opencl_handle().context(ctx.opencl_context());
00264           }
00265 #endif
00266         }
00267 
00268 
00270         void reserve(vcl_size_t new_nonzeros)
00271         {
00272           if (new_nonzeros > nonzeros_)  //TODO: Do we need to initialize new memory with zero?
00273           {
00274             handle_type coord_buffer_old;
00275             handle_type elements_old;
00276             viennacl::backend::memory_shallow_copy(coord_buffer_, coord_buffer_old);
00277             viennacl::backend::memory_shallow_copy(elements_, elements_old);
00278 
00279             vcl_size_t internal_new_nnz = viennacl::tools::align_to_multiple<vcl_size_t>(new_nonzeros, ALIGNMENT);
00280             viennacl::backend::typesafe_host_array<unsigned int> size_deducer(coord_buffer_);
00281             viennacl::backend::memory_create(coord_buffer_, size_deducer.element_size() * 2 * internal_new_nnz, viennacl::traits::context(coord_buffer_));
00282             viennacl::backend::memory_create(elements_,     sizeof(SCALARTYPE)  * internal_new_nnz,             viennacl::traits::context(elements_));
00283 
00284             viennacl::backend::memory_copy(coord_buffer_old, coord_buffer_, 0, 0, size_deducer.element_size() * 2 * nonzeros_);
00285             viennacl::backend::memory_copy(elements_old,     elements_,     0, 0, sizeof(SCALARTYPE)  * nonzeros_);
00286 
00287             nonzeros_ = new_nonzeros;
00288           }
00289         }
00290 
00297         void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve = true)
00298         {
00299           assert (new_size1 > 0 && new_size2 > 0);
00300 
00301           if (new_size1 < rows_ || new_size2 < cols_) //enlarge buffer
00302           {
00303             std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
00304             if (rows_ > 0)
00305               stl_sparse_matrix.resize(rows_);
00306 
00307             if (preserve && rows_ > 0)
00308               viennacl::copy(*this, stl_sparse_matrix);
00309 
00310             stl_sparse_matrix.resize(new_size1);
00311 
00312             //std::cout << "Cropping STL matrix of size " << stl_sparse_matrix.size() << std::endl;
00313             if (new_size2 < cols_ && rows_ > 0)
00314             {
00315               for (vcl_size_t i=0; i<stl_sparse_matrix.size(); ++i)
00316               {
00317                 std::list<unsigned int> to_delete;
00318                 for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
00319                     it != stl_sparse_matrix[i].end();
00320                     ++it)
00321                 {
00322                   if (it->first >= new_size2)
00323                     to_delete.push_back(it->first);
00324                 }
00325 
00326                 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
00327                   stl_sparse_matrix[i].erase(*it);
00328               }
00329               //std::cout << "Cropping done..." << std::endl;
00330             }
00331 
00332             rows_ = new_size1;
00333             cols_ = new_size2;
00334             viennacl::copy(stl_sparse_matrix, *this);
00335           }
00336 
00337           rows_ = new_size1;
00338           cols_ = new_size2;
00339         }
00340 
00341 
00343         vcl_size_t size1() const { return rows_; }
00345         vcl_size_t size2() const { return cols_; }
00347         vcl_size_t nnz() const { return nonzeros_; }
00349         vcl_size_t internal_nnz() const { return viennacl::tools::align_to_multiple<vcl_size_t>(nonzeros_, ALIGNMENT); }
00350 
00352         const handle_type & handle12() const { return coord_buffer_; }
00354         const handle_type & handle() const { return elements_; }
00356         const handle_type & handle3() const { return group_boundaries_; }
00357 
00358         vcl_size_t groups() const { return group_num_; }
00359 
00360         #if defined(_MSC_VER) && _MSC_VER < 1500      //Visual Studio 2005 needs special treatment
00361         template <typename CPU_MATRIX>
00362         friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix );
00363         #else
00364         template <typename CPU_MATRIX, typename SCALARTYPE2, unsigned int ALIGNMENT2>
00365         friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix<SCALARTYPE2, ALIGNMENT2> & gpu_matrix );
00366         #endif
00367 
00368       private:
00370         coordinate_matrix(coordinate_matrix const &);
00371 
00373         coordinate_matrix & operator=(coordinate_matrix const &);
00374 
00375 
00376         vcl_size_t rows_;
00377         vcl_size_t cols_;
00378         vcl_size_t nonzeros_;
00379         vcl_size_t group_num_;
00380         handle_type coord_buffer_;
00381         handle_type elements_;
00382         handle_type group_boundaries_;
00383     };
00384 
00385 
00386     //
00387     // Specify available operations:
00388     //
00389 
00392     namespace linalg
00393     {
00394       namespace detail
00395       {
00396         // x = A * y
00397         template <typename T, unsigned int A>
00398         struct op_executor<vector_base<T>, op_assign, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> >
00399         {
00400             static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
00401             {
00402               // check for the special case x = A * x
00403               if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
00404               {
00405                 viennacl::vector<T> temp(lhs);
00406                 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
00407                 lhs = temp;
00408               }
00409               else
00410                 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
00411             }
00412         };
00413 
00414         template <typename T, unsigned int A>
00415         struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> >
00416         {
00417             static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
00418             {
00419               viennacl::vector<T> temp(lhs);
00420               viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
00421               lhs += temp;
00422             }
00423         };
00424 
00425         template <typename T, unsigned int A>
00426         struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> >
00427         {
00428             static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
00429             {
00430               viennacl::vector<T> temp(lhs);
00431               viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
00432               lhs -= temp;
00433             }
00434         };
00435 
00436 
00437         // x = A * vec_op
00438         template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
00439         struct op_executor<vector_base<T>, op_assign, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
00440         {
00441             static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
00442             {
00443               viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
00444               viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
00445             }
00446         };
00447 
00448         // x += A * vec_op
00449         template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
00450         struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
00451         {
00452             static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
00453             {
00454               viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
00455               viennacl::vector<T> temp_result(lhs);
00456               viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
00457               lhs += temp_result;
00458             }
00459         };
00460 
00461         // x -= A * vec_op
00462         template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
00463         struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
00464         {
00465             static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
00466             {
00467               viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
00468               viennacl::vector<T> temp_result(lhs);
00469               viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
00470               lhs -= temp_result;
00471             }
00472         };
00473 
00474       } // namespace detail
00475     } // namespace linalg
00476 
00478 }
00479 
00480 #endif