ViennaCL - The Vienna Computing Library
1.5.1
|
00001 #ifndef VIENNACL_COMPRESSED_MATRIX_HPP_ 00002 #define VIENNACL_COMPRESSED_MATRIX_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <vector> 00026 #include <list> 00027 #include <map> 00028 #include "viennacl/forwards.h" 00029 #include "viennacl/vector.hpp" 00030 00031 #include "viennacl/linalg/sparse_matrix_operations.hpp" 00032 00033 #include "viennacl/tools/tools.hpp" 00034 #include "viennacl/tools/entry_proxy.hpp" 00035 00036 namespace viennacl 00037 { 00038 namespace detail 00039 { 00040 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00041 void copy_impl(const CPU_MATRIX & cpu_matrix, 00042 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00043 vcl_size_t nonzeros) 00044 { 00045 assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00046 assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00047 00048 viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1); 00049 viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), nonzeros); 00050 std::vector<SCALARTYPE> elements(nonzeros); 00051 00052 vcl_size_t row_index = 0; 00053 vcl_size_t data_index = 0; 00054 00055 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); 00056 row_it != cpu_matrix.end1(); 00057 ++row_it) 00058 { 00059 row_buffer.set(row_index, data_index); 00060 ++row_index; 00061 00062 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); 00063 col_it != row_it.end(); 00064 ++col_it) 00065 { 00066 col_buffer.set(data_index, col_it.index2()); 00067 elements[data_index] = *col_it; 00068 ++data_index; 00069 } 00070 data_index = viennacl::tools::align_to_multiple<vcl_size_t>(data_index, ALIGNMENT); //take care of alignment 00071 } 00072 row_buffer.set(row_index, data_index); 00073 00074 gpu_matrix.set(row_buffer.get(), 00075 col_buffer.get(), 00076 &elements[0], 00077 cpu_matrix.size1(), 00078 cpu_matrix.size2(), 00079 nonzeros); 00080 } 00081 } 00082 00083 //provide copy-operation: 00098 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00099 void copy(const CPU_MATRIX & cpu_matrix, 00100 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) 00101 { 00102 if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) 00103 { 00104 //determine nonzeros: 00105 vcl_size_t num_entries = 0; 00106 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); 00107 row_it != cpu_matrix.end1(); 00108 ++row_it) 00109 { 00110 vcl_size_t entries_per_row = 0; 00111 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); 00112 col_it != row_it.end(); 00113 ++col_it) 00114 { 00115 ++entries_per_row; 00116 } 00117 num_entries += viennacl::tools::align_to_multiple<vcl_size_t>(entries_per_row, ALIGNMENT); 00118 } 00119 00120 if (num_entries == 0) //we copy an empty matrix 00121 num_entries = 1; 00122 00123 //set up matrix entries: 00124 viennacl::detail::copy_impl(cpu_matrix, gpu_matrix, num_entries); 00125 } 00126 } 00127 00128 00129 //adapted for std::vector< std::map < > > argument: 00135 template <typename SizeType, typename SCALARTYPE, unsigned int ALIGNMENT> 00136 void copy(const std::vector< std::map<SizeType, SCALARTYPE> > & cpu_matrix, 00137 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) 00138 { 00139 vcl_size_t nonzeros = 0; 00140 vcl_size_t max_col = 0; 00141 for (vcl_size_t i=0; i<cpu_matrix.size(); ++i) 00142 { 00143 if (cpu_matrix[i].size() > 0) 00144 nonzeros += ((cpu_matrix[i].size() - 1) / ALIGNMENT + 1) * ALIGNMENT; 00145 if (cpu_matrix[i].size() > 0) 00146 max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first); 00147 } 00148 00149 viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter<SCALARTYPE, SizeType>(cpu_matrix, cpu_matrix.size(), max_col + 1), 00150 gpu_matrix, 00151 nonzeros); 00152 } 00153 00154 #ifdef VIENNACL_WITH_UBLAS 00155 template <typename ScalarType, typename F, vcl_size_t IB, typename IA, typename TA> 00156 void copy(const boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA, TA> & ublas_matrix, 00157 viennacl::compressed_matrix<ScalarType, 1> & gpu_matrix) 00158 { 00159 assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00160 assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00161 00162 //we just need to copy the CSR arrays: 00163 viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), ublas_matrix.size1() + 1); 00164 for (vcl_size_t i=0; i<=ublas_matrix.size1(); ++i) 00165 row_buffer.set(i, ublas_matrix.index1_data()[i]); 00166 00167 viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), ublas_matrix.nnz()); 00168 for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i) 00169 col_buffer.set(i, ublas_matrix.index2_data()[i]); 00170 00171 gpu_matrix.set(row_buffer.get(), 00172 col_buffer.get(), 00173 &(ublas_matrix.value_data()[0]), 00174 ublas_matrix.size1(), 00175 ublas_matrix.size2(), 00176 ublas_matrix.nnz()); 00177 00178 } 00179 #endif 00180 00181 #ifdef VIENNACL_WITH_EIGEN 00182 template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT> 00183 void copy(const Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix, 00184 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix) 00185 { 00186 assert( (gpu_matrix.size1() == 0 || static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); 00187 assert( (gpu_matrix.size2() == 0 || static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); 00188 00189 std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(eigen_matrix.rows()); 00190 00191 for (int k=0; k < eigen_matrix.outerSize(); ++k) 00192 for (typename Eigen::SparseMatrix<SCALARTYPE, flags>::InnerIterator it(eigen_matrix, k); it; ++it) 00193 stl_matrix[it.row()][it.col()] = it.value(); 00194 00195 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix, eigen_matrix.rows(), eigen_matrix.cols()), gpu_matrix); 00196 } 00197 #endif 00198 00199 00200 #ifdef VIENNACL_WITH_MTL4 00201 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00202 void copy(const mtl::compressed2D<SCALARTYPE> & cpu_matrix, 00203 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix) 00204 { 00205 assert( (gpu_matrix.size1() == 0 || static_cast<vcl_size_t>(cpu_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); 00206 assert( (gpu_matrix.size2() == 0 || static_cast<vcl_size_t>(cpu_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); 00207 00208 typedef mtl::compressed2D<SCALARTYPE> MatrixType; 00209 00210 std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(cpu_matrix.num_rows()); 00211 00212 using mtl::traits::range_generator; 00213 using mtl::traits::range::min; 00214 00215 // Choose between row and column traversal 00216 typedef typename min<range_generator<mtl::tag::row, MatrixType>, 00217 range_generator<mtl::tag::col, MatrixType> >::type range_type; 00218 range_type my_range; 00219 00220 // Type of outer cursor 00221 typedef typename range_type::type c_type; 00222 // Type of inner cursor 00223 typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type; 00224 00225 // Define the property maps 00226 typename mtl::traits::row<MatrixType>::type row(cpu_matrix); 00227 typename mtl::traits::col<MatrixType>::type col(cpu_matrix); 00228 typename mtl::traits::const_value<MatrixType>::type value(cpu_matrix); 00229 00230 // Now iterate over the matrix 00231 for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor) 00232 for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor) 00233 stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor); 00234 00235 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix, cpu_matrix.num_rows(), cpu_matrix.num_cols()), gpu_matrix); 00236 } 00237 #endif 00238 00239 00240 00241 00242 00243 00244 00245 // 00246 // gpu to cpu: 00247 // 00257 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00258 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00259 CPU_MATRIX & cpu_matrix ) 00260 { 00261 assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00262 assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00263 00264 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) 00265 { 00266 //get raw data from memory: 00267 viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1); 00268 viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); 00269 std::vector<SCALARTYPE> elements(gpu_matrix.nnz()); 00270 00271 //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl; 00272 00273 viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 00274 viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 00275 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)* gpu_matrix.nnz(), &(elements[0])); 00276 00277 //fill the cpu_matrix: 00278 vcl_size_t data_index = 0; 00279 for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) 00280 { 00281 while (data_index < row_buffer[row]) 00282 { 00283 if (col_buffer[data_index] >= gpu_matrix.size2()) 00284 { 00285 std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl; 00286 return; 00287 } 00288 00289 if (elements[data_index] != static_cast<SCALARTYPE>(0.0)) 00290 cpu_matrix(row-1, static_cast<vcl_size_t>(col_buffer[data_index])) = elements[data_index]; 00291 ++data_index; 00292 } 00293 } 00294 } 00295 } 00296 00297 00303 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00304 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00305 std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix) 00306 { 00307 tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix, cpu_matrix.size(), cpu_matrix.size()); 00308 copy(gpu_matrix, temp); 00309 } 00310 00311 #ifdef VIENNACL_WITH_UBLAS 00312 template <typename ScalarType, unsigned int ALIGNMENT, typename F, vcl_size_t IB, typename IA, typename TA> 00313 void copy(viennacl::compressed_matrix<ScalarType, ALIGNMENT> const & gpu_matrix, 00314 boost::numeric::ublas::compressed_matrix<ScalarType> & ublas_matrix) 00315 { 00316 assert( (viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00317 assert( (viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00318 00319 viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); 00320 viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); 00321 00322 viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 00323 viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 00324 00325 ublas_matrix.clear(); 00326 ublas_matrix.reserve(gpu_matrix.nnz()); 00327 00328 ublas_matrix.set_filled(gpu_matrix.size1() + 1, gpu_matrix.nnz()); 00329 00330 for (vcl_size_t i=0; i<ublas_matrix.size1() + 1; ++i) 00331 ublas_matrix.index1_data()[i] = row_buffer[i]; 00332 00333 for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i) 00334 ublas_matrix.index2_data()[i] = col_buffer[i]; 00335 00336 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(ScalarType) * gpu_matrix.nnz(), &(ublas_matrix.value_data()[0])); 00337 00338 } 00339 #endif 00340 00341 #ifdef VIENNACL_WITH_EIGEN 00342 template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT> 00343 void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00344 Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix) 00345 { 00346 assert( (static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); 00347 assert( (static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); 00348 00349 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) 00350 { 00351 //get raw data from memory: 00352 viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); 00353 viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); 00354 std::vector<SCALARTYPE> elements(gpu_matrix.nnz()); 00355 00356 viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 00357 viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 00358 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)* gpu_matrix.nnz(), &(elements[0])); 00359 00360 eigen_matrix.setZero(); 00361 vcl_size_t data_index = 0; 00362 for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) 00363 { 00364 while (data_index < row_buffer[row]) 00365 { 00366 assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); 00367 if (elements[data_index] != static_cast<SCALARTYPE>(0.0)) 00368 eigen_matrix.insert(row-1, col_buffer[data_index]) = elements[data_index]; 00369 ++data_index; 00370 } 00371 } 00372 } 00373 } 00374 #endif 00375 00376 00377 00378 #ifdef VIENNACL_WITH_MTL4 00379 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00380 void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00381 mtl::compressed2D<SCALARTYPE> & mtl4_matrix) 00382 { 00383 assert( (static_cast<vcl_size_t>(mtl4_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); 00384 assert( (static_cast<vcl_size_t>(mtl4_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); 00385 00386 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) 00387 { 00388 00389 //get raw data from memory: 00390 viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); 00391 viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); 00392 std::vector<SCALARTYPE> elements(gpu_matrix.nnz()); 00393 00394 viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 00395 viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 00396 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)* gpu_matrix.nnz(), &(elements[0])); 00397 00398 //set_to_zero(mtl4_matrix); 00399 //mtl4_matrix.change_dim(gpu_matrix.size1(), gpu_matrix.size2()); 00400 00401 mtl::matrix::inserter< mtl::compressed2D<SCALARTYPE> > ins(mtl4_matrix); 00402 vcl_size_t data_index = 0; 00403 for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) 00404 { 00405 while (data_index < row_buffer[row]) 00406 { 00407 assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); 00408 if (elements[data_index] != static_cast<SCALARTYPE>(0.0)) 00409 ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D<SCALARTYPE> >::value_type(elements[data_index]); 00410 ++data_index; 00411 } 00412 } 00413 } 00414 } 00415 #endif 00416 00417 00418 00419 00420 00422 00427 template<class SCALARTYPE, unsigned int ALIGNMENT /* see VCLForwards.h */> 00428 class compressed_matrix 00429 { 00430 public: 00431 typedef viennacl::backend::mem_handle handle_type; 00432 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type; 00433 typedef vcl_size_t size_type; 00434 00436 compressed_matrix() : rows_(0), cols_(0), nonzeros_(0) {} 00437 00445 explicit compressed_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) 00446 : rows_(rows), cols_(cols), nonzeros_(nonzeros) 00447 { 00448 row_buffer_.switch_active_handle_id(ctx.memory_type()); 00449 col_buffer_.switch_active_handle_id(ctx.memory_type()); 00450 elements_.switch_active_handle_id(ctx.memory_type()); 00451 00452 #ifdef VIENNACL_WITH_OPENCL 00453 if (ctx.memory_type() == OPENCL_MEMORY) 00454 { 00455 row_buffer_.opencl_handle().context(ctx.opencl_context()); 00456 col_buffer_.opencl_handle().context(ctx.opencl_context()); 00457 elements_.opencl_handle().context(ctx.opencl_context()); 00458 } 00459 #endif 00460 if (rows > 0) 00461 { 00462 viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); 00463 } 00464 if (nonzeros > 0) 00465 { 00466 viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * nonzeros, ctx); 00467 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * nonzeros, ctx); 00468 } 00469 } 00470 00477 explicit compressed_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx) 00478 : rows_(rows), cols_(cols), nonzeros_(0) 00479 { 00480 row_buffer_.switch_active_handle_id(ctx.memory_type()); 00481 col_buffer_.switch_active_handle_id(ctx.memory_type()); 00482 elements_.switch_active_handle_id(ctx.memory_type()); 00483 00484 #ifdef VIENNACL_WITH_OPENCL 00485 if (ctx.memory_type() == OPENCL_MEMORY) 00486 { 00487 row_buffer_.opencl_handle().context(ctx.opencl_context()); 00488 col_buffer_.opencl_handle().context(ctx.opencl_context()); 00489 elements_.opencl_handle().context(ctx.opencl_context()); 00490 } 00491 #endif 00492 if (rows > 0) 00493 { 00494 viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); 00495 } 00496 } 00497 00498 explicit compressed_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzeros_(0) 00499 { 00500 row_buffer_.switch_active_handle_id(ctx.memory_type()); 00501 col_buffer_.switch_active_handle_id(ctx.memory_type()); 00502 elements_.switch_active_handle_id(ctx.memory_type()); 00503 00504 #ifdef VIENNACL_WITH_OPENCL 00505 if (ctx.memory_type() == OPENCL_MEMORY) 00506 { 00507 row_buffer_.opencl_handle().context(ctx.opencl_context()); 00508 col_buffer_.opencl_handle().context(ctx.opencl_context()); 00509 elements_.opencl_handle().context(ctx.opencl_context()); 00510 } 00511 #endif 00512 } 00513 00514 00515 #ifdef VIENNACL_WITH_OPENCL 00516 explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements, 00517 vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros) : 00518 rows_(rows), cols_(cols), nonzeros_(nonzeros) 00519 { 00520 row_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); 00521 row_buffer_.opencl_handle() = mem_row_buffer; 00522 row_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. 00523 row_buffer_.raw_size(sizeof(cl_uint) * (rows + 1)); 00524 00525 col_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); 00526 col_buffer_.opencl_handle() = mem_col_buffer; 00527 col_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. 00528 col_buffer_.raw_size(sizeof(cl_uint) * nonzeros); 00529 00530 elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); 00531 elements_.opencl_handle() = mem_elements; 00532 elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. 00533 elements_.raw_size(sizeof(SCALARTYPE) * nonzeros); 00534 } 00535 #endif 00536 00537 00539 compressed_matrix & operator=(compressed_matrix const & other) 00540 { 00541 assert( (rows_ == 0 || rows_ == other.size1()) && bool("Size mismatch") ); 00542 assert( (cols_ == 0 || cols_ == other.size2()) && bool("Size mismatch") ); 00543 00544 rows_ = other.size1(); 00545 cols_ = other.size2(); 00546 nonzeros_ = other.nnz(); 00547 00548 viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_buffer_, row_buffer_); 00549 viennacl::backend::typesafe_memory_copy<unsigned int>(other.col_buffer_, col_buffer_); 00550 viennacl::backend::typesafe_memory_copy<SCALARTYPE>(other.elements_, elements_); 00551 00552 return *this; 00553 } 00554 00555 00565 void set(const void * row_jumper, 00566 const void * col_buffer, 00567 const SCALARTYPE * elements, 00568 vcl_size_t rows, 00569 vcl_size_t cols, 00570 vcl_size_t nonzeros) 00571 { 00572 assert( (rows > 0) && bool("Error in compressed_matrix::set(): Number of rows must be larger than zero!")); 00573 assert( (cols > 0) && bool("Error in compressed_matrix::set(): Number of columns must be larger than zero!")); 00574 assert( (nonzeros > 0) && bool("Error in compressed_matrix::set(): Number of nonzeros must be larger than zero!")); 00575 //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl; 00576 00577 //row_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); 00578 viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(row_buffer_).element_size() * (rows + 1), viennacl::traits::context(row_buffer_), row_jumper); 00579 00580 //col_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); 00581 viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(col_buffer_).element_size() * nonzeros, viennacl::traits::context(col_buffer_), col_buffer); 00582 00583 //elements_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); 00584 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * nonzeros, viennacl::traits::context(elements_), elements); 00585 00586 nonzeros_ = nonzeros; 00587 rows_ = rows; 00588 cols_ = cols; 00589 } 00590 00592 void reserve(vcl_size_t new_nonzeros) 00593 { 00594 if (new_nonzeros > nonzeros_) 00595 { 00596 handle_type col_buffer_old; 00597 handle_type elements_old; 00598 viennacl::backend::memory_shallow_copy(col_buffer_, col_buffer_old); 00599 viennacl::backend::memory_shallow_copy(elements_, elements_old); 00600 00601 viennacl::backend::typesafe_host_array<unsigned int> size_deducer(col_buffer_); 00602 viennacl::backend::memory_create(col_buffer_, size_deducer.element_size() * new_nonzeros, viennacl::traits::context(col_buffer_)); 00603 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * new_nonzeros, viennacl::traits::context(elements_)); 00604 00605 viennacl::backend::memory_copy(col_buffer_old, col_buffer_, 0, 0, size_deducer.element_size() * nonzeros_); 00606 viennacl::backend::memory_copy(elements_old, elements_, 0, 0, sizeof(SCALARTYPE)* nonzeros_); 00607 00608 nonzeros_ = new_nonzeros; 00609 } 00610 } 00611 00618 void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve = true) 00619 { 00620 assert(new_size1 > 0 && new_size2 > 0 && bool("Cannot resize to zero size!")); 00621 00622 if (new_size1 != rows_ || new_size2 != cols_) 00623 { 00624 std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix; 00625 if (rows_ > 0) 00626 { 00627 if (preserve) 00628 { 00629 stl_sparse_matrix.resize(rows_); 00630 viennacl::copy(*this, stl_sparse_matrix); 00631 } else 00632 stl_sparse_matrix[0][0] = 0; 00633 } else { 00634 stl_sparse_matrix.resize(new_size1); 00635 stl_sparse_matrix[0][0] = 0; //enforces nonzero array sizes if matrix was initially empty 00636 } 00637 00638 stl_sparse_matrix.resize(new_size1); 00639 00640 //discard entries with column index larger than new_size2 00641 if (new_size2 < cols_ && rows_ > 0) 00642 { 00643 for (vcl_size_t i=0; i<stl_sparse_matrix.size(); ++i) 00644 { 00645 std::list<unsigned int> to_delete; 00646 for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin(); 00647 it != stl_sparse_matrix[i].end(); 00648 ++it) 00649 { 00650 if (it->first >= new_size2) 00651 to_delete.push_back(it->first); 00652 } 00653 00654 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it) 00655 stl_sparse_matrix[i].erase(*it); 00656 } 00657 } 00658 00659 viennacl::copy(stl_sparse_matrix, *this); 00660 00661 rows_ = new_size1; 00662 cols_ = new_size2; 00663 } 00664 } 00665 00667 entry_proxy<SCALARTYPE> operator()(vcl_size_t i, vcl_size_t j) 00668 { 00669 assert( (i < rows_) && (j < cols_) && bool("compressed_matrix access out of bounds!")); 00670 00671 vcl_size_t index = element_index(i, j); 00672 00673 // check for element in sparsity pattern 00674 if (index < nonzeros_) 00675 return entry_proxy<SCALARTYPE>(index, elements_); 00676 00677 // Element not found. Copying required. Very slow, but direct entry manipulation is painful anyway... 00678 std::vector< std::map<unsigned int, SCALARTYPE> > cpu_backup(rows_); 00679 tools::sparse_matrix_adapter<SCALARTYPE> adapted_cpu_backup(cpu_backup, rows_, cols_); 00680 viennacl::copy(*this, adapted_cpu_backup); 00681 cpu_backup[i][static_cast<unsigned int>(j)] = 0.0; 00682 viennacl::copy(adapted_cpu_backup, *this); 00683 00684 index = element_index(i, j); 00685 00686 assert(index < nonzeros_); 00687 00688 return entry_proxy<SCALARTYPE>(index, elements_); 00689 } 00690 00692 const vcl_size_t & size1() const { return rows_; } 00694 const vcl_size_t & size2() const { return cols_; } 00696 const vcl_size_t & nnz() const { return nonzeros_; } 00697 00699 const handle_type & handle1() const { return row_buffer_; } 00701 const handle_type & handle2() const { return col_buffer_; } 00703 const handle_type & handle() const { return elements_; } 00704 00706 handle_type & handle1() { return row_buffer_; } 00708 handle_type & handle2() { return col_buffer_; } 00710 handle_type & handle() { return elements_; } 00711 00712 void switch_memory_context(viennacl::context new_ctx) 00713 { 00714 viennacl::backend::switch_memory_context<unsigned int>(row_buffer_, new_ctx); 00715 viennacl::backend::switch_memory_context<unsigned int>(col_buffer_, new_ctx); 00716 viennacl::backend::switch_memory_context<SCALARTYPE>(elements_, new_ctx); 00717 } 00718 00719 viennacl::memory_types memory_context() const 00720 { 00721 return row_buffer_.get_active_handle_id(); 00722 } 00723 00724 private: 00725 00726 vcl_size_t element_index(vcl_size_t i, vcl_size_t j) 00727 { 00728 //read row indices 00729 viennacl::backend::typesafe_host_array<unsigned int> row_indices(row_buffer_, 2); 00730 viennacl::backend::memory_read(row_buffer_, row_indices.element_size()*i, row_indices.element_size()*2, row_indices.get()); 00731 00732 //get column indices for row i: 00733 viennacl::backend::typesafe_host_array<unsigned int> col_indices(col_buffer_, row_indices[1] - row_indices[0]); 00734 viennacl::backend::memory_read(col_buffer_, col_indices.element_size()*row_indices[0], row_indices.element_size()*col_indices.size(), col_indices.get()); 00735 00736 //get entries for row i: 00737 viennacl::backend::typesafe_host_array<SCALARTYPE> row_entries(elements_, row_indices[1] - row_indices[0]); 00738 viennacl::backend::memory_read(elements_, sizeof(SCALARTYPE)*row_indices[0], sizeof(SCALARTYPE)*row_entries.size(), row_entries.get()); 00739 00740 for (vcl_size_t k=0; k<col_indices.size(); ++k) 00741 { 00742 if (col_indices[k] == j) 00743 return row_indices[0] + k; 00744 } 00745 00746 // if not found, return index past the end of the matrix (cf. matrix.end() in the spirit of the STL) 00747 return nonzeros_; 00748 } 00749 00750 // /** @brief Copy constructor is by now not available. */ 00751 //compressed_matrix(compressed_matrix const &); 00752 00753 00754 vcl_size_t rows_; 00755 vcl_size_t cols_; 00756 vcl_size_t nonzeros_; 00757 handle_type row_buffer_; 00758 handle_type col_buffer_; 00759 handle_type elements_; 00760 }; 00761 00762 00763 00764 // 00765 // Specify available operations: 00766 // 00767 00770 namespace linalg 00771 { 00772 namespace detail 00773 { 00774 // x = A * y 00775 template <typename T, unsigned int A> 00776 struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> > 00777 { 00778 static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00779 { 00780 // check for the special case x = A * x 00781 if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) 00782 { 00783 viennacl::vector<T> temp(lhs); 00784 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00785 lhs = temp; 00786 } 00787 else 00788 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); 00789 } 00790 }; 00791 00792 template <typename T, unsigned int A> 00793 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> > 00794 { 00795 static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00796 { 00797 viennacl::vector<T> temp(lhs); 00798 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00799 lhs += temp; 00800 } 00801 }; 00802 00803 template <typename T, unsigned int A> 00804 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> > 00805 { 00806 static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00807 { 00808 viennacl::vector<T> temp(lhs); 00809 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00810 lhs -= temp; 00811 } 00812 }; 00813 00814 00815 // x = A * vec_op 00816 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00817 struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00818 { 00819 static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00820 { 00821 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00822 viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); 00823 } 00824 }; 00825 00826 // x = A * vec_op 00827 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00828 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> > 00829 { 00830 static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00831 { 00832 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00833 viennacl::vector<T> temp_result(lhs); 00834 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); 00835 lhs += temp_result; 00836 } 00837 }; 00838 00839 // x = A * vec_op 00840 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00841 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00842 { 00843 static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00844 { 00845 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00846 viennacl::vector<T> temp_result(lhs); 00847 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); 00848 lhs -= temp_result; 00849 } 00850 }; 00851 00852 } // namespace detail 00853 } // namespace linalg 00854 00856 } 00857 00858 #endif