ViennaCL - The Vienna Computing Library
1.5.1
|
00001 #ifndef VIENNACL_MATRIX_HPP_ 00002 #define VIENNACL_MATRIX_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include "viennacl/forwards.h" 00026 #include "viennacl/scalar.hpp" 00027 #include "viennacl/vector.hpp" 00028 #include "viennacl/linalg/matrix_operations.hpp" 00029 #include "viennacl/linalg/sparse_matrix_operations.hpp" 00030 #include "viennacl/tools/tools.hpp" 00031 #include "viennacl/tools/matrix_size_deducer.hpp" 00032 #include "viennacl/meta/result_of.hpp" 00033 #include "viennacl/meta/enable_if.hpp" 00034 //#include "viennacl/rand/utils.hpp" 00035 #include "viennacl/traits/handle.hpp" 00036 00037 namespace viennacl 00038 { 00043 template<typename SCALARTYPE> 00044 class implicit_matrix_base 00045 { 00046 protected: 00047 typedef vcl_size_t size_type; 00048 implicit_matrix_base(size_type size1, size_type size2, std::pair<SCALARTYPE, bool> value, bool diag) : size1_(size1), size2_(size2), value_(value), diag_(diag){ } 00049 public: 00050 typedef SCALARTYPE const & const_reference; 00051 typedef SCALARTYPE cpu_value_type; 00052 00053 size_type size1() const { return size1_; } 00054 size_type size2() const { return size2_; } 00055 00056 SCALARTYPE value() const { return value_.first; } 00057 bool is_value_static( ) const { return value_.second; } 00058 bool diag() const { return diag_; } 00059 00060 const_reference operator()(size_type i, size_type j) const { 00061 if(diag_) return (i == j) ? value_.first : 0; 00062 return value_.first; 00063 } 00064 00065 protected: 00066 size_type size1_; 00067 size_type size2_; 00068 std::pair<SCALARTYPE, bool> value_; 00069 bool diag_; 00070 }; 00071 00072 // 00073 // Initializer types 00074 // 00076 template <typename SCALARTYPE> 00077 class identity_matrix 00078 { 00079 public: 00080 typedef vcl_size_t size_type; 00081 typedef SCALARTYPE const & const_reference; 00082 00083 identity_matrix(size_type s, viennacl::context ctx = viennacl::context()) : size_(s), diag_(1), off_diag_(0), ctx_(ctx) {} 00084 00085 size_type size1() const { return size_; } 00086 size_type size2() const { return size_; } 00087 const_reference operator()(size_type i, size_type j) const { return (i == j) ? diag_ : off_diag_; } 00088 00089 viennacl::context context() const { return ctx_; } 00090 00091 private: 00092 size_type size_; 00093 SCALARTYPE diag_; 00094 SCALARTYPE off_diag_; 00095 viennacl::context ctx_; 00096 }; 00097 00098 00100 template <typename SCALARTYPE> 00101 class zero_matrix 00102 { 00103 public: 00104 typedef vcl_size_t size_type; 00105 typedef SCALARTYPE const & const_reference; 00106 00107 zero_matrix(size_type s1, size_type s2, viennacl::context ctx = viennacl::context()) : size1_(s1), size2_(s2), val_(0), ctx_(ctx) {} 00108 00109 size_type size1() const { return size1_; } 00110 size_type size2() const { return size2_; } 00111 const_reference operator()(size_type /*i*/, size_type /*j*/) const { return val_; } 00112 00113 viennacl::context context() const { return ctx_; } 00114 00115 private: 00116 size_type size1_; 00117 size_type size2_; 00118 SCALARTYPE val_; 00119 viennacl::context ctx_; 00120 }; 00121 00122 00124 template <typename SCALARTYPE> 00125 class scalar_matrix 00126 { 00127 public: 00128 typedef vcl_size_t size_type; 00129 typedef SCALARTYPE const & const_reference; 00130 00131 scalar_matrix(size_type s1, size_type s2, const_reference val, viennacl::context ctx = viennacl::context()) : size1_(s1), size2_(s2), value_(val), ctx_(ctx) {} 00132 00133 size_type size1() const { return size1_; } 00134 size_type size2() const { return size2_; } 00135 const_reference operator()(size_type /*i*/, size_type /*j*/) const { return value_; } 00136 00137 viennacl::context context() const { return ctx_; } 00138 00139 private: 00140 size_type size1_; 00141 size_type size2_; 00142 SCALARTYPE value_; 00143 viennacl::context ctx_; 00144 }; 00145 00146 00147 00148 //#ifdef VIENNACL_WITH_OPENCL 00149 // template<class SCALARTYPE, class DISTRIBUTION> 00150 // rand::random_matrix_t<SCALARTYPE, DISTRIBUTION> random_matrix(unsigned int size1, unsigned int size2, DISTRIBUTION const & distribution){ 00151 // return rand::random_matrix_t<SCALARTYPE,DISTRIBUTION>(size1,size2,distribution); 00152 // } 00153 //#endif 00154 00161 template <typename LHS, typename RHS, typename OP> 00162 class matrix_expression 00163 { 00164 typedef typename viennacl::result_of::reference_if_nonscalar<LHS>::type lhs_reference_type; 00165 typedef typename viennacl::result_of::reference_if_nonscalar<RHS>::type rhs_reference_type; 00166 00167 public: 00168 typedef vcl_size_t size_type; 00169 00170 matrix_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {} 00171 00174 LHS & lhs() const { return lhs_; } 00177 RHS & rhs() const { return rhs_; } 00178 00180 vcl_size_t size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size1(lhs_, rhs_); } 00181 vcl_size_t size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size2(lhs_, rhs_); } 00182 00183 private: 00185 lhs_reference_type lhs_; 00187 rhs_reference_type rhs_; 00188 }; 00189 00190 00192 struct row_iteration {}; 00193 00195 struct col_iteration {}; 00196 00197 //STL-like iterator. TODO: STL-compliance... 00199 template <typename ROWCOL, typename MATRIXTYPE> 00200 class matrix_iterator 00201 { 00202 typedef matrix_iterator<ROWCOL, MATRIXTYPE> self_type; 00203 public: 00204 typedef typename MATRIXTYPE::value_type value_type; 00205 00206 matrix_iterator(MATRIXTYPE & mat, 00207 vcl_size_t start_row, 00208 vcl_size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {} 00209 00210 value_type operator*(void) { return mat_(row_, col_); } 00211 self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MATRIXTYPE>::apply(mat_, row_, col_); return *this; } 00212 self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; } 00213 00214 bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); } 00215 bool operator!=(self_type const & other) { return !(*this == other); } 00216 00217 vcl_size_t index1() { return row_; } 00218 vcl_size_t index2() { return col_; } 00219 00220 MATRIXTYPE & operator()(void) const { return mat_; } 00221 00222 private: 00223 MATRIXTYPE & mat_; 00224 vcl_size_t row_; 00225 vcl_size_t col_; 00226 }; 00227 00228 00235 template <class SCALARTYPE, typename F, typename SizeType /* see forwards.h for default type */, typename DistanceType /* see forwards.h for default type */> 00236 class matrix_base 00237 { 00238 typedef matrix_base<SCALARTYPE, F, SizeType, DistanceType> self_type; 00239 public: 00240 00241 typedef matrix_iterator<row_iteration, self_type > iterator1; 00242 typedef matrix_iterator<col_iteration, self_type > iterator2; 00243 typedef scalar<SCALARTYPE> value_type; 00244 typedef SCALARTYPE cpu_value_type; 00245 typedef SizeType size_type; 00246 typedef DistanceType difference_type; 00247 typedef viennacl::backend::mem_handle handle_type; 00248 typedef F orientation_functor; 00249 typedef typename F::orientation_category orientation_category; 00250 00251 static const size_type alignment = 128; 00252 00253 00255 explicit matrix_base() : size1_(0), size2_(0), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(0), internal_size2_(0) {} 00256 00263 explicit matrix_base(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) 00264 : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1), 00265 internal_size1_(viennacl::tools::align_to_multiple<size_type>(rows, alignment)), 00266 internal_size2_(viennacl::tools::align_to_multiple<size_type>(columns, alignment)) 00267 { 00268 if (rows > 0 && columns > 0) 00269 { 00270 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), ctx); 00271 clear(); 00272 } 00273 } 00274 00275 00277 explicit matrix_base(viennacl::backend::mem_handle & h, 00278 size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1, 00279 size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2) 00280 : size1_(mat_size1), size2_(mat_size2), 00281 start1_(mat_start1), start2_(mat_start2), 00282 stride1_(mat_stride1), stride2_(mat_stride2), 00283 internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2), 00284 elements_(h) {} 00285 00286 template <typename LHS, typename RHS, typename OP> 00287 explicit matrix_base(matrix_expression<const LHS, const RHS, OP> const & proxy) : 00288 size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1), 00289 internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, alignment)), 00290 internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, alignment)) 00291 { 00292 elements_.switch_active_handle_id(viennacl::traits::active_handle_id(proxy)); 00293 if (internal_size() > 0) 00294 { 00295 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy)); 00296 clear(); 00297 self_type::operator=(proxy); 00298 } 00299 } 00300 00301 // CUDA or host memory: 00302 explicit matrix_base(SCALARTYPE * ptr_to_mem, viennacl::memory_types mem_type, 00303 size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1, 00304 size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2) 00305 : size1_(mat_size1), size2_(mat_size2), 00306 start1_(mat_start1), start2_(mat_start2), 00307 stride1_(mat_stride1), stride2_(mat_stride2), 00308 internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2) 00309 { 00310 if (mem_type == viennacl::CUDA_MEMORY) 00311 { 00312 #ifdef VIENNACL_WITH_CUDA 00313 elements_.switch_active_handle_id(viennacl::CUDA_MEMORY); 00314 elements_.cuda_handle().reset(reinterpret_cast<char*>(ptr_to_mem)); 00315 elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. 00316 #else 00317 throw cuda_not_available_exception(); 00318 #endif 00319 } 00320 else if (mem_type == viennacl::MAIN_MEMORY) 00321 { 00322 elements_.switch_active_handle_id(viennacl::MAIN_MEMORY); 00323 elements_.ram_handle().reset(reinterpret_cast<char*>(ptr_to_mem)); 00324 elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. 00325 } 00326 00327 elements_.raw_size(sizeof(SCALARTYPE) * internal_size()); 00328 } 00329 00330 #ifdef VIENNACL_WITH_OPENCL 00331 explicit matrix_base(cl_mem mem, size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) 00332 : size1_(rows), size2_(columns), 00333 start1_(0), start2_(0), 00334 stride1_(1), stride2_(1), 00335 internal_size1_(rows), internal_size2_(columns) 00336 { 00337 elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); 00338 elements_.opencl_handle() = mem; 00339 elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. 00340 elements_.opencl_handle().context(ctx.opencl_context()); 00341 elements_.raw_size(sizeof(SCALARTYPE)*internal_size()); 00342 } 00343 00344 explicit matrix_base(cl_mem mem, viennacl::context ctx, 00345 size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1, 00346 size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2) 00347 : size1_(mat_size1), size2_(mat_size2), 00348 start1_(mat_start1), start2_(mat_start2), 00349 stride1_(mat_stride1), stride2_(mat_stride2), 00350 internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2) 00351 { 00352 elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); 00353 elements_.opencl_handle() = mem; 00354 elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. 00355 elements_.opencl_handle().context(ctx.opencl_context()); 00356 elements_.raw_size(sizeof(SCALARTYPE)*internal_size()); 00357 } 00358 #endif 00359 00360 00361 self_type & operator=(const self_type & other) //enables implicit conversions 00362 { 00363 if (internal_size() == 0) 00364 { 00365 if (other.internal_size() == 0) 00366 return *this; 00367 resize(other.size1(), other.size2(), false); 00368 } 00369 00370 viennacl::linalg::am(*this, 00371 other, cpu_value_type(1.0), 1, false, false); 00372 return *this; 00373 } 00374 00376 /*template<class DISTRIBUTION> 00377 matrix(rand::random_matrix_t<SCALARTYPE, DISTRIBUTION> const & m) : rows_(m.size1), columns_(m.size2) 00378 { 00379 if (internal_size() > 0) 00380 { 00381 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size()); 00382 rand::buffer_dumper<SCALARTYPE, DISTRIBUTION>::dump(elements_,m.distribution,0,internal_size()); 00383 } 00384 }*/ 00385 00386 00387 00392 template <typename LHS, typename RHS, typename OP> 00393 self_type & operator=(const matrix_expression<const LHS, const RHS, OP> & proxy) 00394 { 00395 assert( (viennacl::traits::size1(proxy) == size1() || size1() == 0) 00396 && (viennacl::traits::size2(proxy) == size2() || size2() == 0) 00397 && bool("Incompatible matrix sizes!")); 00398 00399 if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0) 00400 { 00401 size1_ = viennacl::traits::size1(proxy); 00402 size2_ = viennacl::traits::size2(proxy); 00403 internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment); 00404 internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment); 00405 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy)); 00406 if (size1_ != internal_size1_ || size2_ != internal_size2_) 00407 clear(); 00408 } 00409 00410 if (internal_size() > 0) 00411 linalg::detail::op_executor<self_type, op_assign, matrix_expression<const LHS, const RHS, OP> >::apply(*this, proxy); 00412 00413 return *this; 00414 } 00415 00416 00417 // A = trans(B). Currently achieved in CPU memory 00418 self_type & operator=(const matrix_expression< const self_type, 00419 const self_type, 00420 op_trans> & proxy) 00421 { 00422 assert( (handle() != proxy.lhs().handle()) && bool("Self-assignment of matrix transpose not implemented")); 00423 assert( ( (proxy.lhs().size1() == size2()) || (size2() == 0) ) && bool("Matrix dimensions do not match!")); 00424 assert( ( (proxy.lhs().size2() == size1()) || (size1() == 0) ) && bool("Matrix dimensions do not match!")); 00425 00426 if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0) 00427 { 00428 size1_ = viennacl::traits::size1(proxy); 00429 size2_ = viennacl::traits::size2(proxy); 00430 internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment); 00431 internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment); 00432 } 00433 00434 std::vector<SCALARTYPE> temp(proxy.lhs().internal_size()); 00435 00436 viennacl::backend::memory_read(proxy.lhs().handle(), 0, sizeof(SCALARTYPE)*proxy.lhs().internal_size(), &(temp[0])); 00437 00438 // now transpose it 00439 std::vector<SCALARTYPE> temp_trans(internal_size()); 00440 00441 for (vcl_size_t i=0; i<proxy.lhs().size1(); ++i) 00442 for (vcl_size_t j=0; j<proxy.lhs().size2(); ++j) 00443 temp_trans[F::mem_index(start2() + stride2() * j, 00444 start1() + stride1() * i, 00445 internal_size1(), internal_size2())] 00446 = temp[F::mem_index(proxy.lhs().start1() + proxy.lhs().stride1() * i, 00447 proxy.lhs().start2() + proxy.lhs().stride2() * j, 00448 proxy.lhs().internal_size1(), proxy.lhs().internal_size2())]; 00449 00450 // write back 00451 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy), &(temp_trans[0])); 00452 00453 return *this; 00454 } 00455 00456 template <typename LHS, typename RHS, typename OP> 00457 self_type & operator+=(const matrix_expression<const LHS, const RHS, OP> & proxy) 00458 { 00459 assert( (viennacl::traits::size1(proxy) == size1()) 00460 && (viennacl::traits::size2(proxy) == size2()) 00461 && bool("Incompatible matrix sizes!")); 00462 assert( (size1() > 0) && bool("Vector not yet initialized!") ); 00463 assert( (size2() > 0) && bool("Vector not yet initialized!") ); 00464 00465 linalg::detail::op_executor<self_type, op_inplace_add, matrix_expression<const LHS, const RHS, OP> >::apply(*this, proxy); 00466 00467 return *this; 00468 } 00469 00470 template <typename LHS, typename RHS, typename OP> 00471 self_type & operator-=(const matrix_expression<const LHS, const RHS, OP> & proxy) 00472 { 00473 assert( (viennacl::traits::size1(proxy) == size1()) 00474 && (viennacl::traits::size2(proxy) == size2()) 00475 && bool("Incompatible matrix sizes!")); 00476 assert( (size1() > 0) && bool("Vector not yet initialized!") ); 00477 assert( (size2() > 0) && bool("Vector not yet initialized!") ); 00478 00479 linalg::detail::op_executor<self_type, op_inplace_sub, matrix_expression<const LHS, const RHS, OP> >::apply(*this, proxy); 00480 00481 return *this; 00482 } 00483 00485 self_type & operator = (identity_matrix<SCALARTYPE> const & m) 00486 { 00487 assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); 00488 assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); 00489 00490 if (internal_size() == 0) 00491 { 00492 size1_ = m.size1(); 00493 size2_ = m.size2(); 00494 internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment); 00495 internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment); 00496 if (internal_size() > 0) 00497 { 00498 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context()); 00499 clear(); 00500 } 00501 } 00502 else 00503 viennacl::linalg::matrix_assign(*this, SCALARTYPE(0)); 00504 00505 if (internal_size() > 0) 00506 viennacl::linalg::matrix_diagonal_assign(*this, m(0,0)); 00507 00508 return *this; 00509 } 00510 00512 self_type & operator = (zero_matrix<SCALARTYPE> const & m) 00513 { 00514 assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); 00515 assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); 00516 00517 if (internal_size() == 0) 00518 { 00519 size1_ = m.size1(); 00520 size2_ = m.size2(); 00521 internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment); 00522 internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment); 00523 if (internal_size() > 0) 00524 { 00525 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context()); 00526 clear(); 00527 } 00528 } 00529 else 00530 viennacl::linalg::matrix_assign(*this, SCALARTYPE(0)); 00531 00532 return *this; 00533 } 00534 00536 self_type & operator = (scalar_matrix<SCALARTYPE> const & m) 00537 { 00538 assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); 00539 assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); 00540 00541 if (internal_size() == 0) 00542 { 00543 size1_ = m.size1(); 00544 size2_ = m.size2(); 00545 internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment); 00546 internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment); 00547 if (internal_size() > 0) 00548 { 00549 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context()); 00550 clear(); 00551 } 00552 } 00553 00554 if (internal_size() > 0) 00555 { 00556 viennacl::linalg::matrix_assign(*this, m(0,0)); 00557 } 00558 00559 return *this; 00560 } 00561 00562 00563 //read-write access to an element of the matrix/matrix_range/matrix_slice 00566 entry_proxy<SCALARTYPE> operator()(size_type row_index, size_type col_index) 00567 { 00568 return entry_proxy<SCALARTYPE>(F::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_); 00569 } 00570 00573 const_entry_proxy<SCALARTYPE> operator()(size_type row_index, size_type col_index) const 00574 { 00575 return const_entry_proxy<SCALARTYPE>(F::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_); 00576 } 00577 00578 // 00579 // Operator overloads for enabling implicit conversions: 00580 // 00581 self_type & operator += (const self_type & other) 00582 { 00583 viennacl::linalg::ambm(*this, 00584 *this, SCALARTYPE(1.0), 1, false, false, 00585 other, SCALARTYPE(1.0), 1, false, false); 00586 return *this; 00587 } 00588 00589 self_type & operator -= (const self_type & other) 00590 { 00591 viennacl::linalg::ambm(*this, 00592 *this, SCALARTYPE(1.0), 1, false, false, 00593 other, SCALARTYPE(1.0), 1, false, true); 00594 return *this; 00595 } 00596 00599 self_type & operator *= (SCALARTYPE val) 00600 { 00601 //viennacl::linalg::inplace_mult(*this, val); 00602 viennacl::linalg::am(*this, 00603 *this, val, 1, false, false); 00604 return *this; 00605 } 00606 00609 self_type & operator /= (SCALARTYPE val) 00610 { 00611 //viennacl::linalg::inplace_mult(*this, static_cast<SCALARTYPE>(1) / val); 00612 viennacl::linalg::am(*this, 00613 *this, val, 1, true, false); 00614 return *this; 00615 } 00616 00617 00619 matrix_expression<const self_type, const SCALARTYPE, op_mult> operator-() const 00620 { 00621 return matrix_expression<const self_type, const SCALARTYPE, op_mult>(*this, SCALARTYPE(-1)); 00622 } 00623 00625 size_type size1() const { return size1_;} 00627 size_type size2() const { return size2_; } 00628 00630 size_type start1() const { return start1_;} 00632 size_type start2() const { return start2_; } 00633 00635 size_type stride1() const { return stride1_;} 00637 size_type stride2() const { return stride2_; } 00638 00640 void clear() 00641 { 00642 viennacl::linalg::matrix_assign(*this, SCALARTYPE(0), true); 00643 } 00644 00645 00647 size_type internal_size1() const { return internal_size1_; } 00649 size_type internal_size2() const { return internal_size2_; } 00651 size_type internal_size() const { return internal_size1() * internal_size2(); } 00652 00654 handle_type & handle() { return elements_; } 00656 const handle_type & handle() const { return elements_; } 00657 00658 00659 viennacl::memory_types memory_domain() const 00660 { 00661 return elements_.get_active_handle_id(); 00662 } 00663 00664 protected: 00665 00666 void set_handle(viennacl::backend::mem_handle const & h) 00667 { 00668 elements_ = h; 00669 } 00670 00671 void switch_memory_context(viennacl::context new_ctx) 00672 { 00673 viennacl::backend::switch_memory_context<SCALARTYPE>(elements_, new_ctx); 00674 } 00675 00676 00684 void resize(size_type rows, size_type columns, bool preserve = true) 00685 { 00686 assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!")); 00687 00688 if (preserve && internal_size() > 0) 00689 { 00690 //get old entries: 00691 std::vector< SCALARTYPE > old_entries(internal_size()); 00692 viennacl::backend::memory_read(elements_, 0, sizeof(SCALARTYPE)*internal_size(), &(old_entries[0])); 00693 00694 //set up entries of new matrix: 00695 std::vector< SCALARTYPE > new_entries( viennacl::tools::align_to_multiple<vcl_size_t>(rows, alignment) 00696 * viennacl::tools::align_to_multiple<vcl_size_t>(columns, alignment)); 00697 for (size_type i=0; i<rows; ++i) 00698 { 00699 if (i >= size1_) 00700 continue; 00701 00702 for (size_type j=0; j<columns; ++j) 00703 { 00704 if (j >= size2_) 00705 continue; 00706 new_entries[F::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, alignment), viennacl::tools::align_to_multiple<vcl_size_t>(columns, alignment))] 00707 = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())]; 00708 } 00709 } 00710 00711 //copy new entries to GPU: 00712 size1_ = rows; 00713 size2_ = columns; 00714 internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment); 00715 internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment); 00716 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*new_entries.size(), viennacl::traits::context(elements_), &(new_entries[0])); 00717 } 00718 else //discard old entries: 00719 { 00720 size1_ = rows; 00721 size2_ = columns; 00722 internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment); 00723 internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment); 00724 00725 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(elements_)); 00726 clear(); 00727 } 00728 } 00729 00730 private: 00731 size_type size1_; 00732 size_type size2_; 00733 size_type start1_; 00734 size_type start2_; 00735 difference_type stride1_; 00736 difference_type stride2_; 00737 size_type internal_size1_; 00738 size_type internal_size2_; 00739 handle_type elements_; 00740 }; //matrix 00741 00742 00743 00750 template <class SCALARTYPE, typename F, unsigned int ALIGNMENT> 00751 class matrix : public matrix_base<SCALARTYPE, F> 00752 { 00753 typedef matrix<SCALARTYPE, F, ALIGNMENT> self_type; 00754 typedef matrix_base<SCALARTYPE, F> base_type; 00755 public: 00756 typedef typename base_type::size_type size_type; 00757 00759 explicit matrix() : base_type() {} 00760 00767 explicit matrix(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : base_type(rows, columns, ctx) {} 00768 00769 #ifdef VIENNACL_WITH_OPENCL 00770 explicit matrix(cl_mem mem, size_type rows, size_type columns) : base_type(mem, rows, columns) {} 00771 #endif 00772 00773 template <typename LHS, typename RHS, typename OP> 00774 matrix(matrix_expression< LHS, RHS, OP> const & proxy) : base_type(proxy) {} 00775 00777 matrix(identity_matrix<SCALARTYPE> const & m) : base_type(m.size1(), m.size2(), m.context()) 00778 { 00779 if (base_type::internal_size() > 0) 00780 base_type::operator=(m); 00781 } 00782 00784 matrix(zero_matrix<SCALARTYPE> const & m) : base_type(m.size1(), m.size2(), m.context()) 00785 { 00786 if (base_type::internal_size() > 0) 00787 base_type::operator=(m); 00788 } 00789 00791 matrix(scalar_matrix<SCALARTYPE> const & m) : base_type(m.size1(), m.size2(), m.context()) 00792 { 00793 if (base_type::internal_size() > 0) 00794 base_type::operator=(m); 00795 } 00796 00797 matrix(const base_type & other) : base_type(other.size1(), other.size2(), viennacl::traits::context(other)) 00798 { 00799 base_type::operator=(other); 00800 } 00801 00802 00803 //copy constructor: 00804 matrix(const self_type & other) : base_type(other.size1(), other.size2(), viennacl::traits::context(other)) 00805 { 00806 base_type::operator=(other); 00807 } 00808 00809 00810 /*template <typename M1> 00811 self_type & operator=(const matrix_expression< const M1, const M1, op_trans> & proxy) 00812 { 00813 self_type temp(proxy.lhs()); 00814 *this = trans(temp); 00815 return *this; 00816 }*/ 00817 00818 using base_type::operator=; 00819 00827 void resize(size_type rows, size_type columns, bool preserve = true) 00828 { 00829 base_type::resize(rows, columns, preserve); 00830 } 00831 00832 }; //matrix 00833 00834 00835 00841 template<class SCALARTYPE, typename F> 00842 std::ostream & operator<<(std::ostream & s, const matrix_base<SCALARTYPE, F> & gpu_matrix) 00843 { 00844 typedef typename matrix_base<SCALARTYPE, F>::size_type size_type; 00845 00846 std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size()); 00847 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &(tmp[0])); 00848 00849 s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]"; 00850 00851 s << "("; 00852 for (size_type i = 0; i < gpu_matrix.size1(); ++i) 00853 { 00854 s << "("; 00855 for (size_type j = 0; j < gpu_matrix.size2(); ++j) 00856 { 00857 s << tmp[F::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())]; 00858 if (j < gpu_matrix.size2() - 1) 00859 s << ","; 00860 } 00861 s << ")"; 00862 if (i < gpu_matrix.size1() - 1) 00863 s << ","; 00864 } 00865 s << ")"; 00866 return s; 00867 } 00868 00874 template<typename LHS, typename RHS, typename OP> 00875 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr) 00876 { 00877 typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER< typename tools::CONST_REMOVER<LHS>::ResultType >::ResultType ScalarType; 00878 00879 matrix<ScalarType> temp = expr; 00880 s << temp; 00881 return s; 00882 } 00883 00885 template<typename NumericT, typename F> 00886 matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_trans> 00887 trans(const matrix_base<NumericT, F> & mat) 00888 { 00889 return matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_trans>(mat, mat); 00890 } 00891 00892 //diag(): 00893 template<typename NumericT, typename F> 00894 vector_expression< const matrix_base<NumericT, F>, const int, op_matrix_diag> 00895 diag(const matrix_base<NumericT, F> & A, int k = 0) 00896 { 00897 return vector_expression< const matrix_base<NumericT, F>, const int, op_matrix_diag>(A, k); 00898 } 00899 00900 template<typename NumericT> 00901 matrix_expression< const vector_base<NumericT>, const int, op_vector_diag> 00902 diag(const vector_base<NumericT> & v, int k = 0) 00903 { 00904 return matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>(v, k); 00905 } 00906 00907 // row(): 00908 template<typename NumericT, typename F> 00909 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row> 00910 row(const matrix_base<NumericT, F> & A, unsigned int i) 00911 { 00912 return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>(A, i); 00913 } 00914 00915 // column(): 00916 template<typename NumericT, typename F> 00917 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column> 00918 column(const matrix_base<NumericT, F> & A, unsigned int j) 00919 { 00920 return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>(A, j); 00921 } 00922 00924 00925 // 00926 //cpu to gpu, generic type: 00927 // 00933 template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT> 00934 void copy(const CPU_MATRIX & cpu_matrix, 00935 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix ) 00936 { 00937 typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type; 00938 00939 //std::cout << "Copying CPU_MATRIX!" << std::endl; 00940 //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl; 00941 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) 00942 { 00943 gpu_matrix.resize(cpu_matrix.size1(), 00944 cpu_matrix.size2(), false); 00945 } 00946 00947 assert( (gpu_matrix.size1() == cpu_matrix.size1()) && (gpu_matrix.size2() == cpu_matrix.size2()) && bool("Matrix dimensions mismatch.") ); 00948 00949 std::vector<SCALARTYPE> data(gpu_matrix.internal_size()); 00950 for (size_type i = 0; i < gpu_matrix.size1(); ++i) 00951 { 00952 for (size_type j = 0; j < gpu_matrix.size2(); ++j) 00953 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j); 00954 } 00955 00956 viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); 00957 //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 00958 //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl; 00959 } 00960 00961 // 00962 //cpu to gpu, STL type: 00963 // 00969 template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT> 00970 void copy(const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix, 00971 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix ) 00972 { 00973 typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type; 00974 00975 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) 00976 { 00977 gpu_matrix.resize(cpu_matrix.size(), 00978 cpu_matrix[0].size(), 00979 false); 00980 } 00981 00982 assert( (gpu_matrix.size1() == cpu_matrix.size()) && bool("Matrix dimensions mismatch.") ); 00983 00984 std::vector<SCALARTYPE> data(gpu_matrix.internal_size()); 00985 for (size_type i = 0; i < gpu_matrix.size1(); ++i) 00986 { 00987 assert( (gpu_matrix.size2() == cpu_matrix[i].size()) && bool("Matrix dimensions mismatch.") ); 00988 00989 for (size_type j = 0; j < gpu_matrix.size2(); ++j) 00990 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j]; 00991 } 00992 00993 viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); 00994 //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 00995 } 00996 00997 00998 // 00999 //cpu to gpu, another STL type: 01000 // 01007 template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT> 01008 void fast_copy(SCALARTYPE * cpu_matrix_begin, 01009 SCALARTYPE * cpu_matrix_end, 01010 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix) 01011 { 01012 viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin); 01013 /*gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, 01014 sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin), 01015 cpu_matrix_begin);*/ 01016 } 01017 01018 01019 #ifdef VIENNACL_WITH_EIGEN 01020 01025 template <typename F, unsigned int ALIGNMENT> 01026 void copy(const Eigen::MatrixXf & cpu_matrix, 01027 matrix<float, F, ALIGNMENT> & gpu_matrix) 01028 { 01029 typedef typename matrix<float, F, ALIGNMENT>::size_type size_type; 01030 01031 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) 01032 { 01033 gpu_matrix.resize(cpu_matrix.rows(), 01034 cpu_matrix.cols(), 01035 false); 01036 } 01037 else 01038 { 01039 assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows())) 01040 && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols())) 01041 && bool("matrix size mismatch") 01042 ); 01043 } 01044 01045 std::vector<float> data(gpu_matrix.internal_size()); 01046 for (size_type i = 0; i < gpu_matrix.size1(); ++i) 01047 { 01048 for (size_type j = 0; j < gpu_matrix.size2(); ++j) 01049 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j); 01050 } 01051 01052 viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(float) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); 01053 //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 01054 } 01055 01061 template <typename F, unsigned int ALIGNMENT> 01062 void copy(const Eigen::MatrixXd & cpu_matrix, 01063 matrix<double, F, ALIGNMENT> & gpu_matrix) 01064 { 01065 typedef typename matrix<double, F, ALIGNMENT>::size_type size_type; 01066 01067 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) 01068 { 01069 gpu_matrix.resize(cpu_matrix.rows(), 01070 cpu_matrix.cols(), 01071 false); 01072 } 01073 else 01074 { 01075 assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows())) 01076 && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols())) 01077 && bool("matrix size mismatch") 01078 ); 01079 } 01080 01081 std::vector<double> data(gpu_matrix.internal_size()); 01082 for (size_type i = 0; i < gpu_matrix.size1(); ++i) 01083 { 01084 for (size_type j = 0; j < gpu_matrix.size2(); ++j) 01085 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j); 01086 } 01087 01088 viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(double) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); 01089 //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 01090 } 01091 #endif 01092 01093 #ifdef VIENNACL_WITH_MTL4 01094 01099 template <typename SCALARTYPE, typename T, typename F, unsigned int ALIGNMENT> 01100 void copy(const mtl::dense2D<SCALARTYPE, T>& cpu_matrix, 01101 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix) 01102 { 01103 typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type; 01104 01105 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) 01106 { 01107 gpu_matrix.resize(cpu_matrix.num_rows(), 01108 cpu_matrix.num_cols(), 01109 false); 01110 } 01111 else 01112 { 01113 assert( (gpu_matrix.size1() == cpu_matrix.num_rows()) 01114 && (gpu_matrix.size2() == cpu_matrix.num_cols()) 01115 && bool("matrix size mismatch") 01116 ); 01117 } 01118 01119 std::vector<SCALARTYPE> data(gpu_matrix.internal_size()); 01120 for (size_type i = 0; i < gpu_matrix.size1(); ++i) 01121 { 01122 for (size_type j = 0; j < gpu_matrix.size2(); ++j) 01123 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j]; 01124 } 01125 01126 viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); 01127 //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 01128 } 01129 #endif 01130 01131 01132 01133 01134 // 01135 //gpu to cpu, generic type 01136 // 01142 template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT> 01143 void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix, 01144 CPU_MATRIX & cpu_matrix ) 01145 { 01146 typedef typename matrix<float, F, ALIGNMENT>::size_type size_type; 01147 01148 if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) ) 01149 { 01150 assert( viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1() && bool("Matrix dimensions mismatch: rows")); 01151 01152 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size()); 01153 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0])); 01154 01155 //now copy entries to cpu_matrix: 01156 for (size_type i = 0; i < gpu_matrix.size1(); ++i) 01157 { 01158 assert( viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2() && bool("Matrix dimensions mismatch: columns")); 01159 for (size_type j = 0; j < gpu_matrix.size2(); ++j) 01160 cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())]; 01161 } 01162 } 01163 } 01164 01165 //gpu to cpu, STL type 01171 template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT> 01172 void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix, 01173 std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix) 01174 { 01175 typedef typename matrix<float, F, ALIGNMENT>::size_type size_type; 01176 01177 if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) ) 01178 { 01179 assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Matrix dimensions mismatch: rows")); 01180 01181 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size()); 01182 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0])); 01183 01184 //now copy entries to cpu_matrix: 01185 for (size_type i = 0; i < gpu_matrix.size1(); ++i) 01186 { 01187 assert( (cpu_matrix[i].size() == gpu_matrix.size2()) && bool("Matrix dimensions mismatch: columns")); 01188 01189 for (size_type j = 0; j < gpu_matrix.size2(); ++j) 01190 cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())]; 01191 } 01192 } 01193 } 01194 01195 //gpu to cpu, STL type 01201 template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT> 01202 void fast_copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix, 01203 SCALARTYPE * cpu_matrix_begin) 01204 { 01205 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), cpu_matrix_begin); 01206 } 01207 01208 01209 01211 01212 01213 // operator + 01215 template <typename LHS1, typename RHS1, typename OP1, 01216 typename LHS2, typename RHS2, typename OP2> 01217 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>, 01218 const matrix_expression<const LHS2, const RHS2, OP2>, 01219 op_add> 01220 operator + (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1, 01221 matrix_expression<const LHS2, const RHS2, OP2> const & proxy2) 01222 { 01223 assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) 01224 && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) 01225 && bool("Incompatible matrix sizes!")); 01226 return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>, 01227 const matrix_expression<const LHS2, const RHS2, OP2>, 01228 op_add>(proxy1, proxy2); 01229 } 01230 01231 template <typename LHS1, typename RHS1, typename OP1, 01232 typename NumericT, typename F> 01233 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>, 01234 const matrix_base<NumericT, F>, 01235 op_add> 01236 operator + (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1, 01237 matrix_base<NumericT, F> const & proxy2) 01238 { 01239 assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) 01240 && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) 01241 && bool("Incompatible matrix sizes!")); 01242 return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>, 01243 const matrix_base<NumericT, F>, 01244 op_add>(proxy1, proxy2); 01245 } 01246 01247 template <typename NumericT, typename F, 01248 typename LHS2, typename RHS2, typename OP2> 01249 matrix_expression< const matrix_base<NumericT, F>, 01250 const matrix_expression<const LHS2, const RHS2, OP2>, 01251 op_add> 01252 operator + (matrix_base<NumericT, F> const & proxy1, 01253 matrix_expression<const LHS2, const RHS2, OP2> const & proxy2) 01254 { 01255 assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) 01256 && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) 01257 && bool("Incompatible matrix sizes!")); 01258 return matrix_expression< const matrix_base<NumericT, F>, 01259 const matrix_expression<const LHS2, const RHS2, OP2>, 01260 op_add>(proxy1, proxy2); 01261 } 01262 01264 template <typename NumericT, typename F> 01265 matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_add > 01266 operator + (const matrix_base<NumericT, F> & m1, const matrix_base<NumericT, F> & m2) 01267 { 01268 return matrix_expression< const matrix_base<NumericT, F>, 01269 const matrix_base<NumericT, F>, 01270 op_add > (m1, m2); 01271 } 01272 01273 01274 // operator - 01275 template <typename LHS1, typename RHS1, typename OP1, 01276 typename LHS2, typename RHS2, typename OP2> 01277 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>, 01278 const matrix_expression<const LHS2, const RHS2, OP2>, 01279 op_sub> 01280 operator - (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1, 01281 matrix_expression<const LHS2, const RHS2, OP2> const & proxy2) 01282 { 01283 assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) 01284 && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) 01285 && bool("Incompatible matrix sizes!")); 01286 return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>, 01287 const matrix_expression<const LHS2, const RHS2, OP2>, 01288 op_sub>(proxy1, proxy2); 01289 } 01290 01291 template <typename LHS1, typename RHS1, typename OP1, 01292 typename NumericT, typename F> 01293 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>, 01294 const matrix_base<NumericT, F>, 01295 op_sub> 01296 operator - (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1, 01297 matrix_base<NumericT, F> const & proxy2) 01298 { 01299 assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) 01300 && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) 01301 && bool("Incompatible matrix sizes!")); 01302 return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>, 01303 const matrix_base<NumericT, F>, 01304 op_sub>(proxy1, proxy2); 01305 } 01306 01307 template <typename NumericT, typename F, 01308 typename LHS2, typename RHS2, typename OP2> 01309 matrix_expression< const matrix_base<NumericT, F>, 01310 const matrix_expression<const LHS2, const RHS2, OP2>, 01311 op_sub> 01312 operator - (matrix_base<NumericT, F> const & proxy1, 01313 matrix_expression<const LHS2, const RHS2, OP2> const & proxy2) 01314 { 01315 assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) 01316 && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) 01317 && bool("Incompatible matrix sizes!")); 01318 return matrix_expression< const matrix_base<NumericT, F>, 01319 const matrix_expression<const LHS2, const RHS2, OP2>, 01320 op_sub>(proxy1, proxy2); 01321 } 01322 01324 template <typename NumericT, typename F> 01325 matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_sub > 01326 operator - (const matrix_base<NumericT, F> & m1, const matrix_base<NumericT, F> & m2) 01327 { 01328 return matrix_expression< const matrix_base<NumericT, F>, 01329 const matrix_base<NumericT, F>, 01330 op_sub > (m1, m2); 01331 } 01332 01333 01334 01335 // operator * 01341 template <typename S1, typename NumericT, typename F> 01342 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value, 01343 matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult> 01344 >::type 01345 operator * (S1 const & value, matrix_base<NumericT, F> const & m1) 01346 { 01347 return matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult>(m1, value); 01348 } 01349 01350 01356 template <typename LHS, typename RHS, typename OP, typename S1> 01357 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value, 01358 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type 01359 operator * (matrix_expression< LHS, RHS, OP> const & proxy, 01360 S1 const & val) 01361 { 01362 return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val); 01363 } 01364 01365 01371 template <typename S1, typename LHS, typename RHS, typename OP> 01372 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value, 01373 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type 01374 operator * (S1 const & val, 01375 matrix_expression< LHS, RHS, OP> const & proxy) 01376 { 01377 return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val); 01378 } 01379 01382 template <typename NumericT, typename F, typename S1> 01383 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value, 01384 matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult> >::type 01385 operator * (matrix_base<NumericT, F> const & m1, S1 const & s1) 01386 { 01387 return matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult>(m1, s1); 01388 } 01389 01390 01391 // operator *= 01392 01395 template <typename NumericT, typename F, typename S1> 01396 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, 01397 matrix_base<NumericT, F> & 01398 >::type 01399 operator *= (matrix_base<NumericT, F> & m1, S1 const & gpu_val) 01400 { 01401 //viennacl::linalg::inplace_mult(*this, gpu_val); 01402 viennacl::linalg::am(m1, 01403 m1, gpu_val, 1, false, (viennacl::is_flip_sign_scalar<S1>::value ? true : false)); 01404 return m1; 01405 } 01406 01407 01408 // operator / 01409 01410 01416 template <typename LHS, typename RHS, typename OP, typename S1> 01417 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value, 01418 matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div> >::type 01419 operator / (matrix_expression<const LHS, const RHS, OP> const & proxy, 01420 S1 const & val) 01421 { 01422 return matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div>(proxy, val); 01423 } 01424 01425 01428 template <typename NumericT, typename F, typename S1> 01429 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value, 01430 matrix_expression< const matrix_base<NumericT, F>, const S1, op_div> >::type 01431 operator / (matrix_base<NumericT, F> const & m1, S1 const & s1) 01432 { 01433 return matrix_expression< const matrix_base<NumericT, F>, const S1, op_div>(m1, s1); 01434 } 01435 01436 01437 // operator /= 01438 01441 template <typename NumericT, typename F, typename S1> 01442 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, 01443 matrix_base<NumericT, F> & 01444 >::type 01445 operator /= (matrix_base<NumericT, F> & m1, S1 const & gpu_val) 01446 { 01447 //viennacl::linalg::inplace_divide(*this, gpu_val); 01448 viennacl::linalg::am(m1, 01449 m1, gpu_val, 1, true, (viennacl::is_flip_sign_scalar<S1>::value ? true : false)); 01450 return m1; 01451 } 01452 01453 01454 01455 01456 01457 // outer_prod(v1, v2) * val; 01458 template <typename NumericT, typename S1> 01459 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, 01460 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>, 01461 const S1, 01462 op_mult> 01463 >::type 01464 operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy, 01465 const S1 & val) 01466 { 01467 return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>, 01468 const S1, 01469 op_mult>(proxy, val); 01470 } 01471 01472 template <typename NumericT, typename S1> 01473 typename viennacl::enable_if< viennacl::is_cpu_scalar<S1>::value, 01474 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>, 01475 const NumericT, 01476 op_mult> 01477 >::type 01478 operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy, 01479 const S1 & val) 01480 { 01481 return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>, 01482 const NumericT, 01483 op_mult>(proxy, NumericT(val)); 01484 } 01485 01486 // val * outer_prod(v1, v2); 01487 template <typename NumericT, typename S1> 01488 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, 01489 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>, 01490 const S1, 01491 op_mult> 01492 >::type 01493 operator*(const S1 & val, 01494 const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy) 01495 { 01496 return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>, 01497 const S1, 01498 op_mult>(proxy, val); 01499 } 01500 01501 template<typename NumericT, typename S1> 01502 typename viennacl::enable_if< viennacl::is_cpu_scalar<S1>::value, 01503 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>, 01504 const NumericT, 01505 op_mult> 01506 >::type 01507 operator*(const S1 & val, 01508 const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy) 01509 { 01510 return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>, 01511 const NumericT, 01512 op_mult>(proxy, NumericT(val)); 01513 } 01514 01515 01516 01517 // 01518 // Specify available operations: 01519 // 01520 01523 namespace linalg 01524 { 01525 namespace detail 01526 { 01527 01528 // x = y 01529 template <typename T, typename F> 01530 struct op_executor<matrix_base<T, F>, op_assign, matrix_base<T, F> > 01531 { 01532 static void apply(matrix_base<T, F> & lhs, matrix_base<T, F> const & rhs) 01533 { 01534 viennacl::linalg::am(lhs, rhs, T(1), 1, false, false); 01535 } 01536 }; 01537 01538 // x += y 01539 template <typename T, typename F> 01540 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_base<T, F> > 01541 { 01542 static void apply(matrix_base<T, F> & lhs, matrix_base<T, F> const & rhs) 01543 { 01544 viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false); 01545 } 01546 }; 01547 01548 // x -= y 01549 template <typename T, typename F> 01550 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_base<T, F> > 01551 { 01552 static void apply(matrix_base<T, F> & lhs, matrix_base<T, F> const & rhs) 01553 { 01554 viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, true); 01555 } 01556 }; 01557 01559 01560 01561 // x = alpha * y 01562 template <typename T, typename F, typename ScalarType> 01563 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> > 01564 { 01565 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> const & proxy) 01566 { 01567 viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false); 01568 } 01569 }; 01570 01571 // x += alpha * y 01572 template <typename T, typename F, typename ScalarType> 01573 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> > 01574 { 01575 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> const & proxy) 01576 { 01577 viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false); 01578 } 01579 }; 01580 01581 // x -= alpha * y 01582 template <typename T, typename F, typename ScalarType> 01583 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> > 01584 { 01585 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> const & proxy) 01586 { 01587 viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true); 01588 } 01589 }; 01590 01591 01593 01594 // x = alpha * vec_expr 01595 template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType> 01596 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> > 01597 { 01598 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy) 01599 { 01600 matrix<T, F> temp(proxy.lhs()); 01601 lhs = temp * proxy.rhs(); 01602 } 01603 }; 01604 01605 // x += alpha * vec_expr 01606 template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType> 01607 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> > 01608 { 01609 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy) 01610 { 01611 matrix<T, F> temp(proxy.lhs()); 01612 lhs += temp * proxy.rhs(); 01613 } 01614 }; 01615 01616 // x -= alpha * vec_expr 01617 template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType> 01618 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> > 01619 { 01620 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy) 01621 { 01622 matrix<T, F> temp(proxy.lhs()); 01623 lhs -= temp * proxy.rhs(); 01624 } 01625 }; 01626 01627 01629 01630 // x = y / alpha 01631 template <typename T, typename F, typename ScalarType> 01632 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> > 01633 { 01634 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> const & proxy) 01635 { 01636 viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false); 01637 } 01638 }; 01639 01640 // x += y / alpha 01641 template <typename T, typename F, typename ScalarType> 01642 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> > 01643 { 01644 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> const & proxy) 01645 { 01646 viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false); 01647 } 01648 }; 01649 01650 // x -= y / alpha 01651 template <typename T, typename F, typename ScalarType> 01652 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> > 01653 { 01654 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> const & proxy) 01655 { 01656 viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true); 01657 } 01658 }; 01659 01660 01662 01663 // x = vec_expr / alpha 01664 template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType> 01665 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> > 01666 { 01667 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy) 01668 { 01669 matrix<T, F> temp(proxy.lhs()); 01670 lhs = temp / proxy.rhs(); 01671 } 01672 }; 01673 01674 // x += vec_expr / alpha 01675 template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType> 01676 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> > 01677 { 01678 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy) 01679 { 01680 matrix<T, F> temp(proxy.lhs()); 01681 lhs += temp / proxy.rhs(); 01682 } 01683 }; 01684 01685 // x -= vec_expr / alpha 01686 template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType> 01687 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> > 01688 { 01689 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy) 01690 { 01691 matrix<T, F> temp(proxy.lhs()); 01692 lhs -= temp / proxy.rhs(); 01693 } 01694 }; 01695 01696 01697 01698 // generic x = vec_expr1 + vec_expr2: 01699 template <typename T, typename F, typename LHS, typename RHS> 01700 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_add> > 01701 { 01702 // generic x = vec_expr1 + vec_expr2: 01703 template <typename LHS1, typename RHS1> 01704 static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy) 01705 { 01706 bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); 01707 bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); 01708 01709 if (op_aliasing_lhs || op_aliasing_rhs) 01710 { 01711 matrix_base<T, F> temp(proxy.lhs()); 01712 op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(temp, proxy.rhs()); 01713 lhs = temp; 01714 } 01715 else 01716 { 01717 op_executor<matrix_base<T, F>, op_assign, LHS>::apply(lhs, proxy.lhs()); 01718 op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(lhs, proxy.rhs()); 01719 } 01720 } 01721 01722 // x = y + z 01723 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_add> const & proxy) 01724 { 01725 viennacl::linalg::ambm(lhs, 01726 proxy.lhs(), T(1), 1, false, false, 01727 proxy.rhs(), T(1), 1, false, false); 01728 } 01729 01730 // x = alpha * y + z 01731 template <typename ScalarType> 01732 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 01733 const matrix_base<T, F>, 01734 op_add> const & proxy) 01735 { 01736 viennacl::linalg::ambm(lhs, 01737 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 01738 proxy.rhs(), T(1), 1, false, false); 01739 } 01740 01741 // x = y / alpha + z 01742 template <typename ScalarType> 01743 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 01744 const matrix_base<T, F>, 01745 op_add> const & proxy) 01746 { 01747 viennacl::linalg::ambm(lhs, 01748 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 01749 proxy.rhs(), T(1), 1, false, false); 01750 } 01751 01752 // x = y + beta * z 01753 template <typename ScalarType> 01754 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 01755 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 01756 op_add> const & proxy) 01757 { 01758 viennacl::linalg::ambm(lhs, 01759 proxy.lhs(), T(1), 1, false, false, 01760 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); 01761 } 01762 01763 // x = y + z / beta 01764 template <typename ScalarType> 01765 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 01766 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 01767 op_add> const & proxy) 01768 { 01769 viennacl::linalg::ambm(lhs, 01770 proxy.lhs(), T(1), 1, false, false, 01771 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); 01772 } 01773 01774 // x = alpha * y + beta * z 01775 template <typename ScalarType1, typename ScalarType2> 01776 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 01777 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 01778 op_add> const & proxy) 01779 { 01780 viennacl::linalg::ambm(lhs, 01781 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 01782 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); 01783 } 01784 01785 // x = alpha * y + z / beta 01786 template <typename ScalarType1, typename ScalarType2> 01787 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 01788 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 01789 op_add> const & proxy) 01790 { 01791 viennacl::linalg::ambm(lhs, 01792 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 01793 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); 01794 } 01795 01796 // x = y / alpha + beta * z 01797 template <typename ScalarType1, typename ScalarType2> 01798 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 01799 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 01800 op_add> const & proxy) 01801 { 01802 viennacl::linalg::ambm(lhs, 01803 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 01804 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); 01805 } 01806 01807 // x = y / alpha + z / beta 01808 template <typename ScalarType1, typename ScalarType2> 01809 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 01810 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 01811 op_add> const & proxy) 01812 { 01813 viennacl::linalg::ambm(lhs, 01814 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 01815 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); 01816 } 01817 }; 01818 01819 // dense = sparse * dense 01820 template <typename T, typename F1, typename LHS, typename RHS> 01821 struct op_executor<matrix_base<T, F1>, op_assign, matrix_expression<const LHS, const RHS, op_prod> > 01822 { 01823 template < typename SparseMatrixType, typename F2 > 01824 static void apply(matrix_base<T, F1> & lhs, matrix_expression<const SparseMatrixType, 01825 const viennacl::matrix_base<T, F2>, 01826 viennacl::op_prod> const & proxy) 01827 { 01828 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs); 01829 } 01830 01831 // dense = sparse * trans(dense) 01832 template < typename SparseMatrixType, typename F2 > 01833 static void apply(matrix_base<T, F1> & lhs, matrix_expression<const SparseMatrixType, 01834 const viennacl::matrix_expression< const viennacl::matrix_base<T, F2>, 01835 const viennacl::matrix_base<T, F2>, 01836 viennacl::op_trans >, 01837 viennacl::op_prod> const & proxy) 01838 { 01839 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs); 01840 } 01841 01842 }; 01843 01844 // generic x += vec_expr1 + vec_expr2: 01845 template <typename T, typename F, typename LHS, typename RHS> 01846 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_add> > 01847 { 01848 // generic x += vec_expr1 + vec_expr2: 01849 template <typename LHS1, typename RHS1> 01850 static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy) 01851 { 01852 bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); 01853 bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); 01854 01855 if (op_aliasing_lhs || op_aliasing_rhs) 01856 { 01857 matrix_base<T, F> temp(proxy.lhs()); 01858 op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(temp, proxy.rhs()); 01859 lhs += temp; 01860 } 01861 else 01862 { 01863 op_executor<matrix_base<T, F>, op_inplace_add, LHS>::apply(lhs, proxy.lhs()); 01864 op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(lhs, proxy.rhs()); 01865 } 01866 } 01867 01868 // x += y + z 01869 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_add> const & proxy) 01870 { 01871 viennacl::linalg::ambm_m(lhs, 01872 proxy.lhs(), T(1), 1, false, false, 01873 proxy.rhs(), T(1), 1, false, false); 01874 } 01875 01876 // x += alpha * y + z 01877 template <typename ScalarType> 01878 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 01879 const matrix_base<T, F>, 01880 op_add> const & proxy) 01881 { 01882 viennacl::linalg::ambm_m(lhs, 01883 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 01884 proxy.rhs(), T(1), 1, false, false); 01885 } 01886 01887 // x += y / alpha + z 01888 template <typename ScalarType> 01889 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 01890 const matrix_base<T, F>, 01891 op_add> const & proxy) 01892 { 01893 viennacl::linalg::ambm_m(lhs, 01894 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 01895 proxy.rhs(), T(1), 1, false, false); 01896 } 01897 01898 // x += y + beta * z 01899 template <typename ScalarType> 01900 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 01901 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 01902 op_add> const & proxy) 01903 { 01904 viennacl::linalg::ambm_m(lhs, 01905 proxy.lhs(), T(1), 1, false, false, 01906 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); 01907 } 01908 01909 // x += y + z / beta 01910 template <typename ScalarType> 01911 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 01912 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 01913 op_add> const & proxy) 01914 { 01915 viennacl::linalg::ambm_m(lhs, 01916 proxy.lhs(), T(1), 1, false, false, 01917 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); 01918 } 01919 01920 // x += alpha * y + beta * z 01921 template <typename ScalarType1, typename ScalarType2> 01922 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 01923 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 01924 op_add> const & proxy) 01925 { 01926 viennacl::linalg::ambm_m(lhs, 01927 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 01928 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); 01929 } 01930 01931 // x += alpha * y + z / beta 01932 template <typename ScalarType1, typename ScalarType2> 01933 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 01934 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 01935 op_add> const & proxy) 01936 { 01937 viennacl::linalg::ambm_m(lhs, 01938 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 01939 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); 01940 } 01941 01942 // x += y / alpha + beta * z 01943 template <typename ScalarType1, typename ScalarType2> 01944 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 01945 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 01946 op_add> const & proxy) 01947 { 01948 viennacl::linalg::ambm_m(lhs, 01949 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 01950 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); 01951 } 01952 01953 // x += y / alpha + z / beta 01954 template <typename ScalarType1, typename ScalarType2> 01955 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 01956 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 01957 op_add> const & proxy) 01958 { 01959 viennacl::linalg::ambm_m(lhs, 01960 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 01961 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); 01962 } 01963 }; 01964 01965 01966 01967 // generic x -= vec_expr1 + vec_expr2: 01968 template <typename T, typename F, typename LHS, typename RHS> 01969 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_add> > 01970 { 01971 // generic x -= vec_expr1 + vec_expr2: 01972 template <typename LHS1, typename RHS1> 01973 static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy) 01974 { 01975 bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); 01976 bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); 01977 01978 if (op_aliasing_lhs || op_aliasing_rhs) 01979 { 01980 matrix_base<T, F> temp(proxy.lhs()); 01981 op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(temp, proxy.rhs()); 01982 lhs -= temp; 01983 } 01984 else 01985 { 01986 op_executor<matrix_base<T, F>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs()); 01987 op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs()); 01988 } 01989 } 01990 01991 // x -= y + z 01992 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_add> const & proxy) 01993 { 01994 viennacl::linalg::ambm_m(lhs, 01995 proxy.lhs(), T(1), 1, false, true, 01996 proxy.rhs(), T(1), 1, false, true); 01997 } 01998 01999 // x -= alpha * y + z 02000 template <typename ScalarType> 02001 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 02002 const matrix_base<T, F>, 02003 op_add> const & proxy) 02004 { 02005 viennacl::linalg::ambm_m(lhs, 02006 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, 02007 proxy.rhs(), T(1), 1, false, true); 02008 } 02009 02010 // x -= y / alpha + z 02011 template <typename ScalarType> 02012 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 02013 const matrix_base<T, F>, 02014 op_add> const & proxy) 02015 { 02016 viennacl::linalg::ambm_m(lhs, 02017 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, 02018 proxy.rhs(), T(1), 1, false, true); 02019 } 02020 02021 // x -= y + beta * z 02022 template <typename ScalarType> 02023 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 02024 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 02025 op_add> const & proxy) 02026 { 02027 viennacl::linalg::ambm_m(lhs, 02028 proxy.lhs(), T(1), 1, false, true, 02029 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); 02030 } 02031 02032 // x -= y + z / beta 02033 template <typename ScalarType> 02034 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 02035 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 02036 op_add> const & proxy) 02037 { 02038 viennacl::linalg::ambm_m(lhs, 02039 proxy.lhs(), T(1), 1, false, true, 02040 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); 02041 } 02042 02043 // x -= alpha * y + beta * z 02044 template <typename ScalarType1, typename ScalarType2> 02045 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 02046 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 02047 op_add> const & proxy) 02048 { 02049 viennacl::linalg::ambm_m(lhs, 02050 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, 02051 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); 02052 } 02053 02054 // x -= alpha * y + z / beta 02055 template <typename ScalarType1, typename ScalarType2> 02056 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 02057 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 02058 op_add> const & proxy) 02059 { 02060 viennacl::linalg::ambm_m(lhs, 02061 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, 02062 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); 02063 } 02064 02065 // x -= y / alpha + beta * z 02066 template <typename ScalarType1, typename ScalarType2> 02067 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 02068 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 02069 op_add> const & proxy) 02070 { 02071 viennacl::linalg::ambm_m(lhs, 02072 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, 02073 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); 02074 } 02075 02076 // x -= y / alpha + z / beta 02077 template <typename ScalarType1, typename ScalarType2> 02078 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 02079 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 02080 op_add> const & proxy) 02081 { 02082 viennacl::linalg::ambm_m(lhs, 02083 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, 02084 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); 02085 } 02086 }; 02087 02088 02089 02091 02092 02093 02094 // generic x = vec_expr1 - vec_expr2: 02095 template <typename T, typename F, typename LHS, typename RHS> 02096 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_sub> > 02097 { 02098 // generic x = vec_expr1 - vec_expr2: 02099 template <typename LHS1, typename RHS1> 02100 static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy) 02101 { 02102 bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); 02103 bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); 02104 02105 if (op_aliasing_lhs || op_aliasing_rhs) 02106 { 02107 matrix_base<T, F> temp(proxy.lhs()); 02108 op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(temp, proxy.rhs()); 02109 lhs = temp; 02110 } 02111 else 02112 { 02113 op_executor<matrix_base<T, F>, op_assign, LHS>::apply(lhs, proxy.lhs()); 02114 op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs()); 02115 } 02116 } 02117 02118 // x = y - z 02119 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_sub> const & proxy) 02120 { 02121 viennacl::linalg::ambm(lhs, 02122 proxy.lhs(), T(1), 1, false, false, 02123 proxy.rhs(), T(1), 1, false, true); 02124 } 02125 02126 // x = alpha * y - z 02127 template <typename ScalarType> 02128 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 02129 const matrix_base<T, F>, 02130 op_sub> const & proxy) 02131 { 02132 viennacl::linalg::ambm(lhs, 02133 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 02134 proxy.rhs(), T(1), 1, false, true); 02135 } 02136 02137 // x = y / alpha - z 02138 template <typename ScalarType> 02139 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 02140 const matrix_base<T, F>, 02141 op_sub> const & proxy) 02142 { 02143 viennacl::linalg::ambm(lhs, 02144 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 02145 proxy.rhs(), T(1), 1, false, true); 02146 } 02147 02148 // x = y - beta * z 02149 template <typename ScalarType> 02150 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 02151 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 02152 op_sub> const & proxy) 02153 { 02154 viennacl::linalg::ambm(lhs, 02155 proxy.lhs(), T(1), 1, false, false, 02156 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); 02157 } 02158 02159 // x = y - z / beta 02160 template <typename ScalarType> 02161 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 02162 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 02163 op_sub> const & proxy) 02164 { 02165 viennacl::linalg::ambm(lhs, 02166 proxy.lhs(), T(1), 1, false, false, 02167 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); 02168 } 02169 02170 // x = alpha * y - beta * z 02171 template <typename ScalarType1, typename ScalarType2> 02172 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 02173 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 02174 op_sub> const & proxy) 02175 { 02176 viennacl::linalg::ambm(lhs, 02177 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 02178 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); 02179 } 02180 02181 // x = alpha * y - z / beta 02182 template <typename ScalarType1, typename ScalarType2> 02183 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 02184 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 02185 op_sub> const & proxy) 02186 { 02187 viennacl::linalg::ambm(lhs, 02188 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 02189 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); 02190 } 02191 02192 // x = y / alpha - beta * z 02193 template <typename ScalarType1, typename ScalarType2> 02194 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 02195 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 02196 op_sub> const & proxy) 02197 { 02198 viennacl::linalg::ambm(lhs, 02199 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 02200 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); 02201 } 02202 02203 // x = y / alpha - z / beta 02204 template <typename ScalarType1, typename ScalarType2> 02205 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 02206 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 02207 op_sub> const & proxy) 02208 { 02209 viennacl::linalg::ambm(lhs, 02210 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 02211 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); 02212 } 02213 }; 02214 02215 02216 // generic x += vec_expr1 - vec_expr2: 02217 template <typename T, typename F, typename LHS, typename RHS> 02218 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_sub> > 02219 { 02220 // generic x += vec_expr1 - vec_expr2: 02221 template <typename LHS1, typename RHS1> 02222 static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy) 02223 { 02224 bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); 02225 bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); 02226 02227 if (op_aliasing_lhs || op_aliasing_rhs) 02228 { 02229 matrix_base<T, F> temp(proxy.lhs()); 02230 op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(temp, proxy.rhs()); 02231 lhs += temp; 02232 } 02233 else 02234 { 02235 op_executor<matrix_base<T, F>, op_inplace_add, LHS>::apply(lhs, proxy.lhs()); 02236 op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs()); 02237 } 02238 } 02239 02240 // x += y - z 02241 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_sub> const & proxy) 02242 { 02243 viennacl::linalg::ambm_m(lhs, 02244 proxy.lhs(), T(1), 1, false, false, 02245 proxy.rhs(), T(1), 1, false, true); 02246 } 02247 02248 // x += alpha * y - z 02249 template <typename ScalarType> 02250 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 02251 const matrix_base<T, F>, 02252 op_sub> const & proxy) 02253 { 02254 viennacl::linalg::ambm_m(lhs, 02255 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 02256 proxy.rhs(), T(1), 1, false, true); 02257 } 02258 02259 // x += y / alpha - z 02260 template <typename ScalarType> 02261 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 02262 const matrix_base<T, F>, 02263 op_sub> const & proxy) 02264 { 02265 viennacl::linalg::ambm_m(lhs, 02266 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 02267 proxy.rhs(), T(1), 1, false, true); 02268 } 02269 02270 // x += y - beta * z 02271 template <typename ScalarType> 02272 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 02273 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 02274 op_sub> const & proxy) 02275 { 02276 viennacl::linalg::ambm_m(lhs, 02277 proxy.lhs(), T(1), 1, false, false, 02278 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); 02279 } 02280 02281 // x += y - z / beta 02282 template <typename ScalarType> 02283 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 02284 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 02285 op_sub> const & proxy) 02286 { 02287 viennacl::linalg::ambm_m(lhs, 02288 proxy.lhs(), T(1), 1, false, false, 02289 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); 02290 } 02291 02292 // x += alpha * y - beta * z 02293 template <typename ScalarType1, typename ScalarType2> 02294 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 02295 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 02296 op_sub> const & proxy) 02297 { 02298 viennacl::linalg::ambm_m(lhs, 02299 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 02300 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); 02301 } 02302 02303 // x += alpha * y - z / beta 02304 template <typename ScalarType1, typename ScalarType2> 02305 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 02306 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 02307 op_sub> const & proxy) 02308 { 02309 viennacl::linalg::ambm_m(lhs, 02310 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, 02311 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); 02312 } 02313 02314 // x += y / alpha - beta * z 02315 template <typename ScalarType1, typename ScalarType2> 02316 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 02317 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 02318 op_sub> const & proxy) 02319 { 02320 viennacl::linalg::ambm_m(lhs, 02321 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 02322 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); 02323 } 02324 02325 // x += y / alpha - z / beta 02326 template <typename ScalarType1, typename ScalarType2> 02327 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 02328 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 02329 op_sub> const & proxy) 02330 { 02331 viennacl::linalg::ambm_m(lhs, 02332 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, 02333 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); 02334 } 02335 }; 02336 02337 02338 02339 // generic x -= vec_expr1 - vec_expr2: 02340 template <typename T, typename F, typename LHS, typename RHS> 02341 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_sub> > 02342 { 02343 // generic x -= vec_expr1 - vec_expr2: 02344 template <typename LHS1, typename RHS1> 02345 static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy) 02346 { 02347 bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); 02348 bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); 02349 02350 if (op_aliasing_lhs || op_aliasing_rhs) 02351 { 02352 matrix_base<T, F> temp(proxy.lhs()); 02353 op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(temp, proxy.rhs()); 02354 lhs -= temp; 02355 } 02356 else 02357 { 02358 op_executor<matrix_base<T, F>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs()); 02359 op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(lhs, proxy.rhs()); 02360 } 02361 } 02362 02363 // x -= y - z 02364 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_sub> const & proxy) 02365 { 02366 viennacl::linalg::ambm_m(lhs, 02367 proxy.lhs(), T(1), 1, false, true, 02368 proxy.rhs(), T(1), 1, false, false); 02369 } 02370 02371 // x -= alpha * y - z 02372 template <typename ScalarType> 02373 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 02374 const matrix_base<T, F>, 02375 op_sub> const & proxy) 02376 { 02377 viennacl::linalg::ambm_m(lhs, 02378 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, 02379 proxy.rhs(), T(1), 1, false, false); 02380 } 02381 02382 // x -= y / alpha - z 02383 template <typename ScalarType> 02384 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 02385 const matrix_base<T, F>, 02386 op_sub> const & proxy) 02387 { 02388 viennacl::linalg::ambm_m(lhs, 02389 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, 02390 proxy.rhs(), T(1), 1, false, false); 02391 } 02392 02393 // x -= y - beta * z 02394 template <typename ScalarType> 02395 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 02396 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>, 02397 op_sub> const & proxy) 02398 { 02399 viennacl::linalg::ambm_m(lhs, 02400 proxy.lhs(), T(1), 1, false, true, 02401 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); 02402 } 02403 02404 // x -= y - z / beta 02405 template <typename ScalarType> 02406 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, 02407 const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>, 02408 op_sub> const & proxy) 02409 { 02410 viennacl::linalg::ambm_m(lhs, 02411 proxy.lhs(), T(1), 1, false, true, 02412 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); 02413 } 02414 02415 // x -= alpha * y - beta * z 02416 template <typename ScalarType1, typename ScalarType2> 02417 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 02418 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 02419 op_sub> const & proxy) 02420 { 02421 viennacl::linalg::ambm_m(lhs, 02422 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, 02423 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); 02424 } 02425 02426 // x -= alpha * y - z / beta 02427 template <typename ScalarType1, typename ScalarType2> 02428 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>, 02429 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 02430 op_sub> const & proxy) 02431 { 02432 viennacl::linalg::ambm_m(lhs, 02433 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, 02434 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); 02435 } 02436 02437 // x -= y / alpha - beta * z 02438 template <typename ScalarType1, typename ScalarType2> 02439 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 02440 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>, 02441 op_sub> const & proxy) 02442 { 02443 viennacl::linalg::ambm_m(lhs, 02444 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, 02445 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); 02446 } 02447 02448 // x -= y / alpha - z / beta 02449 template <typename ScalarType1, typename ScalarType2> 02450 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>, 02451 const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>, 02452 op_sub> const & proxy) 02453 { 02454 viennacl::linalg::ambm_m(lhs, 02455 proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, 02456 proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); 02457 } 02458 }; 02459 02460 02462 02463 template <typename T, typename F, typename LHS> 02464 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const int, op_vector_diag> > 02465 { 02466 static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const int, op_vector_diag> const & proxy) 02467 { 02468 viennacl::linalg::matrix_diag_from_vector(proxy.lhs(), proxy.rhs(), lhs); 02469 } 02470 }; 02471 02472 02473 template <typename T, typename LHS> 02474 struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const int, op_matrix_diag> > 02475 { 02476 template <typename F> 02477 static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const int, op_matrix_diag> const & proxy) 02478 { 02479 viennacl::linalg::matrix_diag_to_vector(proxy.lhs(), proxy.rhs(), lhs); 02480 } 02481 }; 02482 02483 template <typename T, typename LHS> 02484 struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_row> > 02485 { 02486 template <typename F> 02487 static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const unsigned int, op_row> const & proxy) 02488 { 02489 viennacl::linalg::matrix_row(proxy.lhs(), proxy.rhs(), lhs); 02490 } 02491 }; 02492 02493 02494 template <typename T, typename LHS> 02495 struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_column> > 02496 { 02497 template <typename F> 02498 static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const unsigned int, op_column> const & proxy) 02499 { 02500 viennacl::linalg::matrix_column(proxy.lhs(), proxy.rhs(), lhs); 02501 } 02502 }; 02503 02504 02506 02507 // generic x = mat_expr1 .* mat_expr2: 02508 template <typename T, typename F, typename LHS, typename RHS, typename OP> 02509 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_element_binary<OP> > > 02510 { 02511 // x = y .* z 02512 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy) 02513 { 02514 viennacl::linalg::element_op(lhs, proxy); 02515 } 02516 02517 // x = y .* mat_expr 02518 template <typename LHS2, typename RHS2, typename OP2> 02519 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy) 02520 { 02521 matrix<T, F> temp(proxy.rhs()); 02522 viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(proxy.lhs(), temp)); 02523 } 02524 02525 // x = mat_expr .* z 02526 template <typename LHS1, typename RHS1, typename OP1> 02527 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy) 02528 { 02529 matrix<T, F> temp(proxy.lhs()); 02530 viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp, proxy.rhs())); 02531 } 02532 02533 // x = mat_expr .* mat_expr 02534 template <typename LHS1, typename RHS1, typename OP1, 02535 typename LHS2, typename RHS2, typename OP2> 02536 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, 02537 const matrix_expression<const LHS2, const RHS2, OP2>, 02538 op_element_binary<OP> > const & proxy) 02539 { 02540 matrix<T, F> temp1(proxy.lhs()); 02541 matrix<T, F> temp2(proxy.rhs()); 02542 viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp1, temp2)); 02543 } 02544 }; 02545 02546 // generic x += mat_expr .* mat_expr: 02547 template <typename T, typename F, typename LHS, typename RHS, typename OP> 02548 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_binary<OP> > > 02549 { 02550 // x += y .* z 02551 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy) 02552 { 02553 viennacl::matrix<T, F> temp(proxy); 02554 lhs += temp; 02555 } 02556 02557 // x += y .* mat_expr 02558 template <typename LHS2, typename RHS2, typename OP2> 02559 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy) 02560 { 02561 matrix<T, F> temp(proxy.rhs()); 02562 matrix<T, F> temp2(temp.size1(), temp.size2()); 02563 viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(proxy.lhs(), temp)); 02564 lhs += temp2; 02565 } 02566 02567 // x += mat_expr .* z 02568 template <typename LHS1, typename RHS1, typename OP1> 02569 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy) 02570 { 02571 matrix<T, F> temp(proxy.lhs()); 02572 matrix<T, F> temp2(temp.size1(), temp.size2()); 02573 viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp, proxy.rhs())); 02574 lhs += temp2; 02575 } 02576 02577 // x += mat_expr .* mat_expr 02578 template <typename LHS1, typename RHS1, typename OP1, 02579 typename LHS2, typename RHS2, typename OP2> 02580 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, 02581 const matrix_expression<const LHS2, const RHS2, OP2>, 02582 op_element_binary<OP> > const & proxy) 02583 { 02584 matrix<T, F> temp1(proxy.lhs()); 02585 matrix<T, F> temp2(proxy.rhs()); 02586 matrix<T, F> temp3(temp1.size1(), temp1.size2()); 02587 viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp1, temp2)); 02588 lhs += temp3; 02589 } 02590 }; 02591 02592 // generic x -= mat_expr1 .* mat_expr2: 02593 template <typename T, typename F, typename LHS, typename RHS, typename OP> 02594 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_binary<OP> > > 02595 { 02596 02597 // x -= y .* z 02598 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy) 02599 { 02600 viennacl::matrix<T, F> temp(proxy); 02601 lhs -= temp; 02602 } 02603 02604 // x -= y .* mat_expr 02605 template <typename LHS2, typename RHS2, typename OP2> 02606 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy) 02607 { 02608 matrix<T, F> temp(proxy.rhs()); 02609 matrix<T, F> temp2(temp.size1(), temp.size2()); 02610 viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(proxy.lhs(), temp)); 02611 lhs -= temp2; 02612 } 02613 02614 // x -= mat_expr .* z 02615 template <typename LHS1, typename RHS1, typename OP1> 02616 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy) 02617 { 02618 matrix<T, F> temp(proxy.lhs()); 02619 matrix<T, F> temp2(temp.size1(), temp.size2()); 02620 viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp, proxy.rhs())); 02621 lhs -= temp2; 02622 } 02623 02624 // x -= mat_expr .* mat_expr 02625 template <typename LHS1, typename RHS1, typename OP1, 02626 typename LHS2, typename RHS2, typename OP2> 02627 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, 02628 const matrix_expression<const LHS2, const RHS2, OP2>, 02629 op_element_binary<OP> > const & proxy) 02630 { 02631 matrix<T, F> temp1(proxy.lhs()); 02632 matrix<T, F> temp2(proxy.rhs()); 02633 matrix<T, F> temp3(temp1.size1(), temp1.size2()); 02634 viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp1, temp2)); 02635 lhs -= temp3; 02636 } 02637 }; 02638 02640 02641 template <typename T, typename F, typename LHS, typename RHS, typename OP> 02642 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_element_unary<OP> > > 02643 { 02644 // x = OP(y) 02645 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> > const & proxy) 02646 { 02647 viennacl::linalg::element_op(lhs, proxy); 02648 } 02649 02650 // x = OP(vec_expr) 02651 template <typename LHS2, typename RHS2, typename OP2> 02652 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>, 02653 const matrix_expression<const LHS2, const RHS2, OP2>, 02654 op_element_unary<OP> > const & proxy) 02655 { 02656 matrix<T, F> temp(proxy.rhs()); 02657 viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> >(temp, temp)); 02658 } 02659 }; 02660 02661 template <typename T, typename F, typename LHS, typename RHS, typename OP> 02662 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_unary<OP> > > 02663 { 02664 // x += OP(y) 02665 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> > const & proxy) 02666 { 02667 matrix<T, F> temp(proxy); 02668 lhs += temp; 02669 } 02670 02671 // x += OP(vec_expr) 02672 template <typename LHS2, typename RHS2, typename OP2> 02673 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>, 02674 const matrix_expression<const LHS2, const RHS2, OP2>, 02675 op_element_unary<OP> > const & proxy) 02676 { 02677 matrix<T, F> temp(proxy.rhs()); 02678 viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here 02679 lhs += temp; 02680 } 02681 }; 02682 02683 template <typename T, typename F, typename LHS, typename RHS, typename OP> 02684 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_unary<OP> > > 02685 { 02686 // x -= OP(y) 02687 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> > const & proxy) 02688 { 02689 matrix<T, F> temp(proxy); 02690 lhs -= temp; 02691 } 02692 02693 // x -= OP(vec_expr) 02694 template <typename LHS2, typename RHS2, typename OP2> 02695 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>, 02696 const matrix_expression<const LHS2, const RHS2, OP2>, 02697 op_element_unary<OP> > const & proxy) 02698 { 02699 matrix<T, F> temp(proxy.rhs()); 02700 viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here 02701 lhs -= temp; 02702 } 02703 }; 02704 02705 02706 02708 02709 // C = A * B 02710 template <typename T, typename F, typename F1, typename F2> 02711 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> > 02712 { 02713 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> const & rhs) 02714 { 02715 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0)); 02716 } 02717 }; 02718 02719 // C = A * B^T 02720 template <typename T, typename F, typename F1, typename F2> 02721 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F1>, 02722 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02723 op_mat_mat_prod> > 02724 { 02725 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, 02726 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02727 op_mat_mat_prod> const & rhs) 02728 { 02729 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0)); 02730 } 02731 }; 02732 02733 // C = A^T * B 02734 template <typename T, typename F, typename F1, typename F2> 02735 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02736 const matrix_base<T, F2>, 02737 op_mat_mat_prod> > 02738 { 02739 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02740 const matrix_base<T, F2>, 02741 op_mat_mat_prod> const & rhs) 02742 { 02743 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0)); 02744 } 02745 }; 02746 02747 // C = A^T * B^T 02748 template <typename T, typename F, typename F1, typename F2> 02749 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02750 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02751 op_mat_mat_prod> > 02752 { 02753 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02754 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02755 op_mat_mat_prod> const & rhs) 02756 { 02757 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0)); 02758 } 02759 }; 02760 02761 02762 // C += A * B 02763 template <typename T, typename F, typename F1, typename F2> 02764 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> > 02765 { 02766 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> const & rhs) 02767 { 02768 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0)); 02769 } 02770 }; 02771 02772 // C += A * B^T 02773 template <typename T, typename F, typename F1, typename F2> 02774 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F1>, 02775 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02776 op_mat_mat_prod> > 02777 { 02778 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, 02779 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02780 op_mat_mat_prod> const & rhs) 02781 { 02782 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0)); 02783 } 02784 }; 02785 02786 // C += A^T * B 02787 template <typename T, typename F, typename F1, typename F2> 02788 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02789 const matrix_base<T, F2>, 02790 op_mat_mat_prod> > 02791 { 02792 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02793 const matrix_base<T, F2>, 02794 op_mat_mat_prod> const & rhs) 02795 { 02796 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0)); 02797 } 02798 }; 02799 02800 // C += A^T * B^T 02801 template <typename T, typename F, typename F1, typename F2> 02802 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02803 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02804 op_mat_mat_prod> > 02805 { 02806 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02807 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02808 op_mat_mat_prod> const & rhs) 02809 { 02810 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0)); 02811 } 02812 }; 02813 02814 02815 // C -= A * B 02816 template <typename T, typename F, typename F1, typename F2> 02817 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> > 02818 { 02819 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> const & rhs) 02820 { 02821 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0)); 02822 } 02823 }; 02824 02825 // C -= A * B^T 02826 template <typename T, typename F, typename F1, typename F2> 02827 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F1>, 02828 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02829 op_mat_mat_prod> > 02830 { 02831 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, 02832 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02833 op_mat_mat_prod> const & rhs) 02834 { 02835 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0)); 02836 } 02837 }; 02838 02839 // C -= A^T * B 02840 template <typename T, typename F, typename F1, typename F2> 02841 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02842 const matrix_base<T, F2>, 02843 op_mat_mat_prod> > 02844 { 02845 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02846 const matrix_base<T, F2>, 02847 op_mat_mat_prod> const & rhs) 02848 { 02849 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0)); 02850 } 02851 }; 02852 02853 // C -= A^T * B^T 02854 template <typename T, typename F, typename F1, typename F2> 02855 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02856 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02857 op_mat_mat_prod> > 02858 { 02859 static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>, 02860 const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>, 02861 op_mat_mat_prod> const & rhs) 02862 { 02863 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0)); 02864 } 02865 }; 02866 02868 02869 // y = A * x 02870 template <typename T, typename F> 02871 struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> > 02872 { 02873 static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> const & rhs) 02874 { 02875 // check for x = A * x 02876 if (op_aliasing(lhs, rhs.rhs())) 02877 { 02878 vector_base<T> temp(rhs); 02879 lhs = temp; 02880 } 02881 else 02882 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); 02883 } 02884 }; 02885 02886 // y = A^T * x 02887 template <typename T, typename F> 02888 struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>, 02889 const vector_base<T>, 02890 op_prod> > 02891 { 02892 static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>, 02893 const vector_base<T>, 02894 op_prod> const & rhs) 02895 { 02896 // check for x = A^T * x 02897 if (op_aliasing(lhs, rhs.rhs())) 02898 { 02899 vector_base<T> temp(rhs); 02900 lhs = temp; 02901 } 02902 else 02903 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); 02904 } 02905 }; 02906 02907 02908 // y += A * x 02909 template <typename T, typename F> 02910 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> > 02911 { 02912 static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> const & rhs) 02913 { 02914 vector_base<T> temp(rhs); 02915 lhs += temp; 02916 } 02917 }; 02918 02919 // y += A^T * x 02920 template <typename T, typename F> 02921 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>, 02922 const vector_base<T>, 02923 op_prod> > 02924 { 02925 static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>, 02926 const vector_base<T>, 02927 op_prod> const & rhs) 02928 { 02929 vector_base<T> temp(rhs); 02930 lhs += temp; 02931 } 02932 }; 02933 02934 02935 // y -= A * x 02936 template <typename T, typename F> 02937 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> > 02938 { 02939 static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> const & rhs) 02940 { 02941 vector_base<T> temp(rhs); 02942 lhs -= temp; 02943 } 02944 }; 02945 02946 // y -= A^T * x 02947 template <typename T, typename F> 02948 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>, 02949 const vector_base<T>, 02950 op_prod> > 02951 { 02952 static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>, 02953 const vector_base<T>, 02954 op_prod> const & rhs) 02955 { 02956 vector_base<T> temp(rhs); 02957 lhs -= temp; 02958 } 02959 }; 02960 02961 02962 02964 02965 // A = v1 * v2^T 02966 template <typename T, typename F> 02967 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> > 02968 { 02969 static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs) 02970 { 02971 lhs.clear(); 02972 viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs()); 02973 } 02974 }; 02975 02976 // A = alpha * v1 * v2^T 02977 template <typename T, typename F, typename ScalarType> 02978 struct op_executor<matrix_base<T, F>, op_assign, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>, 02979 const ScalarType, 02980 op_mult> > 02981 { 02982 static void apply(matrix_base<T, F> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>, 02983 const ScalarType, 02984 op_mult> const & rhs) 02985 { 02986 lhs.clear(); 02987 viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs()); 02988 } 02989 }; 02990 02991 // A += v1 * v2^T 02992 template <typename T, typename F> 02993 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> > 02994 { 02995 static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs) 02996 { 02997 viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs()); 02998 } 02999 }; 03000 03001 // A += alpha * v1 * v2^T 03002 template <typename T, typename F, typename ScalarType> 03003 struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>, 03004 const ScalarType, 03005 op_mult> > 03006 { 03007 static void apply(matrix_base<T, F> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>, 03008 const ScalarType, 03009 op_mult> const & rhs) 03010 { 03011 viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs()); 03012 } 03013 }; 03014 03015 // A -= v1 * v2^T 03016 template <typename T, typename F> 03017 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> > 03018 { 03019 static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs) 03020 { 03021 viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, true, rhs.lhs(), rhs.rhs()); 03022 } 03023 }; 03024 03025 // A -= alpha * v1 * v2^T 03026 template <typename T, typename F, typename ScalarType> 03027 struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>, 03028 const ScalarType, 03029 op_mult> > 03030 { 03031 static void apply(matrix_base<T, F> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>, 03032 const ScalarType, 03033 op_mult> const & rhs) 03034 { 03035 viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, true, rhs.lhs().lhs(), rhs.lhs().rhs()); 03036 } 03037 }; 03038 03039 03040 } // namespace detail 03041 03042 } // namespace linalg 03043 03046 } //namespace viennacl 03047 03048 #endif