ViennaCL - The Vienna Computing Library
1.5.1
|
00001 #ifndef VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_ 00002 #define VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <list> 00026 00027 #include "viennacl/forwards.h" 00028 #include "viennacl/scalar.hpp" 00029 #include "viennacl/vector.hpp" 00030 #include "viennacl/tools/tools.hpp" 00031 #include "viennacl/linalg/host_based/common.hpp" 00032 #include "viennacl/linalg/host_based/vector_operations.hpp" 00033 00034 namespace viennacl 00035 { 00036 namespace linalg 00037 { 00038 namespace host_based 00039 { 00040 // 00041 // Compressed matrix 00042 // 00043 00044 namespace detail 00045 { 00046 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00047 void row_info(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & mat, 00048 vector_base<ScalarType> & vec, 00049 viennacl::linalg::detail::row_info_types info_selector) 00050 { 00051 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00052 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle()); 00053 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1()); 00054 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2()); 00055 00056 for (vcl_size_t row = 0; row < mat.size1(); ++row) 00057 { 00058 ScalarType value = 0; 00059 unsigned int row_end = row_buffer[row+1]; 00060 00061 switch (info_selector) 00062 { 00063 case viennacl::linalg::detail::SPARSE_ROW_NORM_INF: //inf-norm 00064 for (unsigned int i = row_buffer[row]; i < row_end; ++i) 00065 value = std::max<ScalarType>(value, std::fabs(elements[i])); 00066 break; 00067 00068 case viennacl::linalg::detail::SPARSE_ROW_NORM_1: //1-norm 00069 for (unsigned int i = row_buffer[row]; i < row_end; ++i) 00070 value += std::fabs(elements[i]); 00071 break; 00072 00073 case viennacl::linalg::detail::SPARSE_ROW_NORM_2: //2-norm 00074 for (unsigned int i = row_buffer[row]; i < row_end; ++i) 00075 value += elements[i] * elements[i]; 00076 value = std::sqrt(value); 00077 break; 00078 00079 case viennacl::linalg::detail::SPARSE_ROW_DIAGONAL: //diagonal entry 00080 for (unsigned int i = row_buffer[row]; i < row_end; ++i) 00081 { 00082 if (col_buffer[i] == row) 00083 { 00084 value = elements[i]; 00085 break; 00086 } 00087 } 00088 break; 00089 00090 default: 00091 break; 00092 } 00093 result_buf[row] = value; 00094 } 00095 } 00096 } 00097 00098 00107 template<class ScalarType, unsigned int ALIGNMENT> 00108 void prod_impl(const viennacl::compressed_matrix<ScalarType, ALIGNMENT> & mat, 00109 const viennacl::vector_base<ScalarType> & vec, 00110 viennacl::vector_base<ScalarType> & result) 00111 { 00112 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle()); 00113 ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00114 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle()); 00115 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1()); 00116 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2()); 00117 00118 #ifdef VIENNACL_WITH_OPENMP 00119 #pragma omp parallel for 00120 #endif 00121 for (long row = 0; row < static_cast<long>(mat.size1()); ++row) 00122 { 00123 ScalarType dot_prod = 0; 00124 vcl_size_t row_end = row_buffer[row+1]; 00125 for (vcl_size_t i = row_buffer[row]; i < row_end; ++i) 00126 dot_prod += elements[i] * vec_buf[col_buffer[i] * vec.stride() + vec.start()]; 00127 result_buf[row * result.stride() + result.start()] = dot_prod; 00128 } 00129 00130 } 00131 00140 template< class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 00141 void prod_impl(const viennacl::compressed_matrix<ScalarType, ALIGNMENT> & sp_mat, 00142 const viennacl::matrix_base<NumericT, F1> & d_mat, 00143 viennacl::matrix_base<NumericT, F2> & result) { 00144 00145 ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle()); 00146 unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle1()); 00147 unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2()); 00148 00149 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat); 00150 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result); 00151 00152 vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat); 00153 vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat); 00154 vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat); 00155 vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat); 00156 vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat); 00157 vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat); 00158 00159 vcl_size_t result_start1 = viennacl::traits::start1(result); 00160 vcl_size_t result_start2 = viennacl::traits::start2(result); 00161 vcl_size_t result_inc1 = viennacl::traits::stride1(result); 00162 vcl_size_t result_inc2 = viennacl::traits::stride2(result); 00163 vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result); 00164 vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result); 00165 00166 detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false> 00167 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2); 00168 detail::matrix_array_wrapper<NumericT, typename F2::orientation_category, false> 00169 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2); 00170 00171 if ( detail::is_row_major(typename F1::orientation_category()) ) { 00172 #ifdef VIENNACL_WITH_OPENMP 00173 #pragma omp parallel for 00174 #endif 00175 for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) { 00176 vcl_size_t row_start = sp_mat_row_buffer[row]; 00177 vcl_size_t row_end = sp_mat_row_buffer[row+1]; 00178 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) { 00179 NumericT temp = 0; 00180 for (vcl_size_t k = row_start; k < row_end; ++k) { 00181 temp += sp_mat_elements[k] * d_mat_wrapper(sp_mat_col_buffer[k], col); 00182 } 00183 result_wrapper(row, col) = temp; 00184 } 00185 } 00186 } 00187 else { 00188 #ifdef VIENNACL_WITH_OPENMP 00189 #pragma omp parallel for 00190 #endif 00191 for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) { 00192 for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) { 00193 vcl_size_t row_start = sp_mat_row_buffer[row]; 00194 vcl_size_t row_end = sp_mat_row_buffer[row+1]; 00195 NumericT temp = 0; 00196 for (vcl_size_t k = row_start; k < row_end; ++k) { 00197 temp += sp_mat_elements[k] * d_mat_wrapper(sp_mat_col_buffer[k], col); 00198 } 00199 result_wrapper(row, col) = temp; 00200 } 00201 } 00202 } 00203 00204 } 00205 00215 template< class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 00216 void prod_impl(const viennacl::compressed_matrix<ScalarType, ALIGNMENT> & sp_mat, 00217 const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>, 00218 const viennacl::matrix_base<NumericT, F1>, 00219 viennacl::op_trans > & d_mat, 00220 viennacl::matrix_base<NumericT, F2> & result) { 00221 00222 ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle()); 00223 unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle1()); 00224 unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2()); 00225 00226 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs()); 00227 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result); 00228 00229 vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs()); 00230 vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs()); 00231 vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs()); 00232 vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs()); 00233 vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs()); 00234 vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs()); 00235 00236 vcl_size_t result_start1 = viennacl::traits::start1(result); 00237 vcl_size_t result_start2 = viennacl::traits::start2(result); 00238 vcl_size_t result_inc1 = viennacl::traits::stride1(result); 00239 vcl_size_t result_inc2 = viennacl::traits::stride2(result); 00240 vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result); 00241 vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result); 00242 00243 detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false> 00244 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2); 00245 detail::matrix_array_wrapper<NumericT, typename F2::orientation_category, false> 00246 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2); 00247 00248 if ( detail::is_row_major(typename F1::orientation_category()) ) { 00249 #ifdef VIENNACL_WITH_OPENMP 00250 #pragma omp parallel for 00251 #endif 00252 for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) { 00253 vcl_size_t row_start = sp_mat_row_buffer[row]; 00254 vcl_size_t row_end = sp_mat_row_buffer[row+1]; 00255 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) { 00256 NumericT temp = 0; 00257 for (vcl_size_t k = row_start; k < row_end; ++k) { 00258 temp += sp_mat_elements[k] * d_mat_wrapper(col, sp_mat_col_buffer[k]); 00259 } 00260 result_wrapper(row, col) = temp; 00261 } 00262 } 00263 } 00264 else { 00265 #ifdef VIENNACL_WITH_OPENMP 00266 #pragma omp parallel for 00267 #endif 00268 for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) { 00269 for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) { 00270 vcl_size_t row_start = sp_mat_row_buffer[row]; 00271 vcl_size_t row_end = sp_mat_row_buffer[row+1]; 00272 NumericT temp = 0; 00273 for (vcl_size_t k = row_start; k < row_end; ++k) { 00274 temp += sp_mat_elements[k] * d_mat_wrapper(col, sp_mat_col_buffer[k]); 00275 } 00276 result_wrapper(row, col) = temp; 00277 } 00278 } 00279 } 00280 00281 } 00282 00283 00284 // 00285 // Triangular solve for compressed_matrix, A \ b 00286 // 00287 namespace detail 00288 { 00289 template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray> 00290 void csr_inplace_solve(SizeTypeArray const & row_buffer, 00291 SizeTypeArray const & col_buffer, 00292 ConstScalarTypeArray const & element_buffer, 00293 ScalarTypeArray & vec_buffer, 00294 vcl_size_t num_cols, 00295 viennacl::linalg::unit_lower_tag) 00296 { 00297 vcl_size_t row_begin = row_buffer[1]; 00298 for (vcl_size_t row = 1; row < num_cols; ++row) 00299 { 00300 NumericT vec_entry = vec_buffer[row]; 00301 vcl_size_t row_end = row_buffer[row+1]; 00302 for (vcl_size_t i = row_begin; i < row_end; ++i) 00303 { 00304 vcl_size_t col_index = col_buffer[i]; 00305 if (col_index < row) 00306 vec_entry -= vec_buffer[col_index] * element_buffer[i]; 00307 } 00308 vec_buffer[row] = vec_entry; 00309 row_begin = row_end; 00310 } 00311 } 00312 00313 template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray> 00314 void csr_inplace_solve(SizeTypeArray const & row_buffer, 00315 SizeTypeArray const & col_buffer, 00316 ConstScalarTypeArray const & element_buffer, 00317 ScalarTypeArray & vec_buffer, 00318 vcl_size_t num_cols, 00319 viennacl::linalg::lower_tag) 00320 { 00321 vcl_size_t row_begin = row_buffer[0]; 00322 for (vcl_size_t row = 0; row < num_cols; ++row) 00323 { 00324 NumericT vec_entry = vec_buffer[row]; 00325 00326 // substitute and remember diagonal entry 00327 vcl_size_t row_end = row_buffer[row+1]; 00328 NumericT diagonal_entry = 0; 00329 for (vcl_size_t i = row_begin; i < row_end; ++i) 00330 { 00331 vcl_size_t col_index = col_buffer[i]; 00332 if (col_index < row) 00333 vec_entry -= vec_buffer[col_index] * element_buffer[i]; 00334 else if (col_index == row) 00335 diagonal_entry = element_buffer[i]; 00336 } 00337 00338 vec_buffer[row] = vec_entry / diagonal_entry; 00339 row_begin = row_end; 00340 } 00341 } 00342 00343 00344 template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray> 00345 void csr_inplace_solve(SizeTypeArray const & row_buffer, 00346 SizeTypeArray const & col_buffer, 00347 ConstScalarTypeArray const & element_buffer, 00348 ScalarTypeArray & vec_buffer, 00349 vcl_size_t num_cols, 00350 viennacl::linalg::unit_upper_tag) 00351 { 00352 for (vcl_size_t row2 = 1; row2 < num_cols; ++row2) 00353 { 00354 vcl_size_t row = (num_cols - row2) - 1; 00355 NumericT vec_entry = vec_buffer[row]; 00356 vcl_size_t row_begin = row_buffer[row]; 00357 vcl_size_t row_end = row_buffer[row+1]; 00358 for (vcl_size_t i = row_begin; i < row_end; ++i) 00359 { 00360 vcl_size_t col_index = col_buffer[i]; 00361 if (col_index > row) 00362 vec_entry -= vec_buffer[col_index] * element_buffer[i]; 00363 } 00364 vec_buffer[row] = vec_entry; 00365 } 00366 } 00367 00368 template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray> 00369 void csr_inplace_solve(SizeTypeArray const & row_buffer, 00370 SizeTypeArray const & col_buffer, 00371 ConstScalarTypeArray const & element_buffer, 00372 ScalarTypeArray & vec_buffer, 00373 vcl_size_t num_cols, 00374 viennacl::linalg::upper_tag) 00375 { 00376 for (vcl_size_t row2 = 0; row2 < num_cols; ++row2) 00377 { 00378 vcl_size_t row = (num_cols - row2) - 1; 00379 NumericT vec_entry = vec_buffer[row]; 00380 00381 // substitute and remember diagonal entry 00382 vcl_size_t row_begin = row_buffer[row]; 00383 vcl_size_t row_end = row_buffer[row+1]; 00384 NumericT diagonal_entry = 0; 00385 for (vcl_size_t i = row_begin; i < row_end; ++i) 00386 { 00387 vcl_size_t col_index = col_buffer[i]; 00388 if (col_index > row) 00389 vec_entry -= vec_buffer[col_index] * element_buffer[i]; 00390 else if (col_index == row) 00391 diagonal_entry = element_buffer[i]; 00392 } 00393 00394 vec_buffer[row] = vec_entry / diagonal_entry; 00395 } 00396 } 00397 00398 } //namespace detail 00399 00400 00401 00408 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00409 void inplace_solve(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & L, 00410 vector_base<ScalarType> & vec, 00411 viennacl::linalg::unit_lower_tag tag) 00412 { 00413 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00414 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(L.handle()); 00415 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1()); 00416 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2()); 00417 00418 detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, L.size2(), tag); 00419 } 00420 00427 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00428 void inplace_solve(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & L, 00429 vector_base<ScalarType> & vec, 00430 viennacl::linalg::lower_tag tag) 00431 { 00432 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00433 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(L.handle()); 00434 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1()); 00435 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2()); 00436 00437 detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, L.size2(), tag); 00438 } 00439 00440 00447 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00448 void inplace_solve(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & U, 00449 vector_base<ScalarType> & vec, 00450 viennacl::linalg::unit_upper_tag tag) 00451 { 00452 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00453 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(U.handle()); 00454 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1()); 00455 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2()); 00456 00457 detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, U.size2(), tag); 00458 } 00459 00466 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00467 void inplace_solve(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & U, 00468 vector_base<ScalarType> & vec, 00469 viennacl::linalg::upper_tag tag) 00470 { 00471 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00472 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(U.handle()); 00473 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1()); 00474 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2()); 00475 00476 detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, U.size2(), tag); 00477 } 00478 00479 00480 00481 00482 00483 00484 00485 // 00486 // Triangular solve for compressed_matrix, A^T \ b 00487 // 00488 00489 namespace detail 00490 { 00491 template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray> 00492 void csr_trans_inplace_solve(SizeTypeArray const & row_buffer, 00493 SizeTypeArray const & col_buffer, 00494 ConstScalarTypeArray const & element_buffer, 00495 ScalarTypeArray & vec_buffer, 00496 vcl_size_t num_cols, 00497 viennacl::linalg::unit_lower_tag) 00498 { 00499 vcl_size_t col_begin = row_buffer[0]; 00500 for (vcl_size_t col = 0; col < num_cols; ++col) 00501 { 00502 NumericT vec_entry = vec_buffer[col]; 00503 vcl_size_t col_end = row_buffer[col+1]; 00504 for (vcl_size_t i = col_begin; i < col_end; ++i) 00505 { 00506 unsigned int row_index = col_buffer[i]; 00507 if (row_index > col) 00508 vec_buffer[row_index] -= vec_entry * element_buffer[i]; 00509 } 00510 col_begin = col_end; 00511 } 00512 } 00513 00514 template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray> 00515 void csr_trans_inplace_solve(SizeTypeArray const & row_buffer, 00516 SizeTypeArray const & col_buffer, 00517 ConstScalarTypeArray const & element_buffer, 00518 ScalarTypeArray & vec_buffer, 00519 vcl_size_t num_cols, 00520 viennacl::linalg::lower_tag) 00521 { 00522 vcl_size_t col_begin = row_buffer[0]; 00523 for (vcl_size_t col = 0; col < num_cols; ++col) 00524 { 00525 vcl_size_t col_end = row_buffer[col+1]; 00526 00527 // Stage 1: Find diagonal entry: 00528 NumericT diagonal_entry = 0; 00529 for (vcl_size_t i = col_begin; i < col_end; ++i) 00530 { 00531 vcl_size_t row_index = col_buffer[i]; 00532 if (row_index == col) 00533 { 00534 diagonal_entry = element_buffer[i]; 00535 break; 00536 } 00537 } 00538 00539 // Stage 2: Substitute 00540 NumericT vec_entry = vec_buffer[col] / diagonal_entry; 00541 vec_buffer[col] = vec_entry; 00542 for (vcl_size_t i = col_begin; i < col_end; ++i) 00543 { 00544 vcl_size_t row_index = col_buffer[i]; 00545 if (row_index > col) 00546 vec_buffer[row_index] -= vec_entry * element_buffer[i]; 00547 } 00548 col_begin = col_end; 00549 } 00550 } 00551 00552 template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray> 00553 void csr_trans_inplace_solve(SizeTypeArray const & row_buffer, 00554 SizeTypeArray const & col_buffer, 00555 ConstScalarTypeArray const & element_buffer, 00556 ScalarTypeArray & vec_buffer, 00557 vcl_size_t num_cols, 00558 viennacl::linalg::unit_upper_tag) 00559 { 00560 for (vcl_size_t col2 = 0; col2 < num_cols; ++col2) 00561 { 00562 vcl_size_t col = (num_cols - col2) - 1; 00563 00564 NumericT vec_entry = vec_buffer[col]; 00565 vcl_size_t col_begin = row_buffer[col]; 00566 vcl_size_t col_end = row_buffer[col+1]; 00567 for (vcl_size_t i = col_begin; i < col_end; ++i) 00568 { 00569 vcl_size_t row_index = col_buffer[i]; 00570 if (row_index < col) 00571 vec_buffer[row_index] -= vec_entry * element_buffer[i]; 00572 } 00573 00574 } 00575 } 00576 00577 template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray> 00578 void csr_trans_inplace_solve(SizeTypeArray const & row_buffer, 00579 SizeTypeArray const & col_buffer, 00580 ConstScalarTypeArray const & element_buffer, 00581 ScalarTypeArray & vec_buffer, 00582 vcl_size_t num_cols, 00583 viennacl::linalg::upper_tag) 00584 { 00585 for (vcl_size_t col2 = 0; col2 < num_cols; ++col2) 00586 { 00587 vcl_size_t col = (num_cols - col2) - 1; 00588 vcl_size_t col_begin = row_buffer[col]; 00589 vcl_size_t col_end = row_buffer[col+1]; 00590 00591 // Stage 1: Find diagonal entry: 00592 NumericT diagonal_entry = 0; 00593 for (vcl_size_t i = col_begin; i < col_end; ++i) 00594 { 00595 vcl_size_t row_index = col_buffer[i]; 00596 if (row_index == col) 00597 { 00598 diagonal_entry = element_buffer[i]; 00599 break; 00600 } 00601 } 00602 00603 // Stage 2: Substitute 00604 NumericT vec_entry = vec_buffer[col] / diagonal_entry; 00605 vec_buffer[col] = vec_entry; 00606 for (vcl_size_t i = col_begin; i < col_end; ++i) 00607 { 00608 vcl_size_t row_index = col_buffer[i]; 00609 if (row_index < col) 00610 vec_buffer[row_index] -= vec_entry * element_buffer[i]; 00611 } 00612 } 00613 } 00614 00615 00616 // 00617 // block solves 00618 // 00619 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00620 void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00621 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00622 op_trans> & L, 00623 viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */, 00624 vector_base<ScalarType> const & /* L_diagonal */, //ignored 00625 vector_base<ScalarType> & vec, 00626 viennacl::linalg::unit_lower_tag) 00627 { 00628 // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP. 00629 00630 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1()); 00631 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2()); 00632 ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L.lhs().handle()); 00633 ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00634 00635 vcl_size_t col_begin = row_buffer[0]; 00636 for (vcl_size_t col = 0; col < L.lhs().size1(); ++col) 00637 { 00638 ScalarType vec_entry = vec_buffer[col]; 00639 vcl_size_t col_end = row_buffer[col+1]; 00640 for (vcl_size_t i = col_begin; i < col_end; ++i) 00641 { 00642 unsigned int row_index = col_buffer[i]; 00643 if (row_index > col) 00644 vec_buffer[row_index] -= vec_entry * elements[i]; 00645 } 00646 col_begin = col_end; 00647 } 00648 } 00649 00650 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00651 void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00652 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00653 op_trans> & L, 00654 viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */, 00655 vector_base<ScalarType> const & L_diagonal, 00656 vector_base<ScalarType> & vec, 00657 viennacl::linalg::lower_tag) 00658 { 00659 // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP. 00660 00661 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1()); 00662 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2()); 00663 ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L.lhs().handle()); 00664 ScalarType const * diagonal_buffer = detail::extract_raw_pointer<ScalarType>(L_diagonal.handle()); 00665 ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00666 00667 vcl_size_t col_begin = row_buffer[0]; 00668 for (vcl_size_t col = 0; col < L.lhs().size1(); ++col) 00669 { 00670 vcl_size_t col_end = row_buffer[col+1]; 00671 00672 ScalarType vec_entry = vec_buffer[col] / diagonal_buffer[col]; 00673 vec_buffer[col] = vec_entry; 00674 for (vcl_size_t i = col_begin; i < col_end; ++i) 00675 { 00676 vcl_size_t row_index = col_buffer[i]; 00677 if (row_index > col) 00678 vec_buffer[row_index] -= vec_entry * elements[i]; 00679 } 00680 col_begin = col_end; 00681 } 00682 } 00683 00684 00685 00686 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00687 void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00688 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00689 op_trans> & U, 00690 viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */, 00691 vector_base<ScalarType> const & /* U_diagonal */, //ignored 00692 vector_base<ScalarType> & vec, 00693 viennacl::linalg::unit_upper_tag) 00694 { 00695 // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP. 00696 00697 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1()); 00698 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2()); 00699 ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U.lhs().handle()); 00700 ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00701 00702 for (vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2) 00703 { 00704 vcl_size_t col = (U.lhs().size1() - col2) - 1; 00705 00706 ScalarType vec_entry = vec_buffer[col]; 00707 vcl_size_t col_begin = row_buffer[col]; 00708 vcl_size_t col_end = row_buffer[col+1]; 00709 for (vcl_size_t i = col_begin; i < col_end; ++i) 00710 { 00711 vcl_size_t row_index = col_buffer[i]; 00712 if (row_index < col) 00713 vec_buffer[row_index] -= vec_entry * elements[i]; 00714 } 00715 00716 } 00717 } 00718 00719 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00720 void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00721 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00722 op_trans> & U, 00723 viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */, 00724 vector_base<ScalarType> const & U_diagonal, 00725 vector_base<ScalarType> & vec, 00726 viennacl::linalg::upper_tag) 00727 { 00728 // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP. 00729 00730 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1()); 00731 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2()); 00732 ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U.lhs().handle()); 00733 ScalarType const * diagonal_buffer = detail::extract_raw_pointer<ScalarType>(U_diagonal.handle()); 00734 ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00735 00736 for (vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2) 00737 { 00738 vcl_size_t col = (U.lhs().size1() - col2) - 1; 00739 vcl_size_t col_begin = row_buffer[col]; 00740 vcl_size_t col_end = row_buffer[col+1]; 00741 00742 // Stage 2: Substitute 00743 ScalarType vec_entry = vec_buffer[col] / diagonal_buffer[col]; 00744 vec_buffer[col] = vec_entry; 00745 for (vcl_size_t i = col_begin; i < col_end; ++i) 00746 { 00747 vcl_size_t row_index = col_buffer[i]; 00748 if (row_index < col) 00749 vec_buffer[row_index] -= vec_entry * elements[i]; 00750 } 00751 } 00752 } 00753 00754 00755 } //namespace detail 00756 00763 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00764 void inplace_solve(matrix_expression< const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00765 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00766 op_trans> const & proxy, 00767 vector_base<ScalarType> & vec, 00768 viennacl::linalg::unit_lower_tag tag) 00769 { 00770 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00771 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle()); 00772 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1()); 00773 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2()); 00774 00775 detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag); 00776 } 00777 00784 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00785 void inplace_solve(matrix_expression< const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00786 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00787 op_trans> const & proxy, 00788 vector_base<ScalarType> & vec, 00789 viennacl::linalg::lower_tag tag) 00790 { 00791 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00792 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle()); 00793 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1()); 00794 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2()); 00795 00796 detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag); 00797 } 00798 00799 00806 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00807 void inplace_solve(matrix_expression< const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00808 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00809 op_trans> const & proxy, 00810 vector_base<ScalarType> & vec, 00811 viennacl::linalg::unit_upper_tag tag) 00812 { 00813 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00814 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle()); 00815 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1()); 00816 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2()); 00817 00818 detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag); 00819 } 00820 00821 00828 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00829 void inplace_solve(matrix_expression< const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00830 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00831 op_trans> const & proxy, 00832 vector_base<ScalarType> & vec, 00833 viennacl::linalg::upper_tag tag) 00834 { 00835 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00836 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle()); 00837 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1()); 00838 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2()); 00839 00840 detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag); 00841 } 00842 00843 00844 00845 // 00846 // Compressed Compressed Matrix 00847 // 00848 00857 template<class ScalarType> 00858 void prod_impl(const viennacl::compressed_compressed_matrix<ScalarType> & mat, 00859 const viennacl::vector_base<ScalarType> & vec, 00860 viennacl::vector_base<ScalarType> & result) 00861 { 00862 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle()); 00863 ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00864 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle()); 00865 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1()); 00866 unsigned int const * row_indices = detail::extract_raw_pointer<unsigned int>(mat.handle3()); 00867 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2()); 00868 00869 vector_assign(result, ScalarType(0)); 00870 00871 #ifdef VIENNACL_WITH_OPENMP 00872 #pragma omp parallel for 00873 #endif 00874 for (long i = 0; i < static_cast<long>(mat.nnz1()); ++i) 00875 { 00876 ScalarType dot_prod = 0; 00877 vcl_size_t row_end = row_buffer[i+1]; 00878 for (vcl_size_t j = row_buffer[i]; j < row_end; ++j) 00879 dot_prod += elements[j] * vec_buf[col_buffer[j] * vec.stride() + vec.start()]; 00880 result_buf[row_indices[i] * result.stride() + result.start()] = dot_prod; 00881 } 00882 00883 } 00884 00885 00886 00887 // 00888 // Coordinate Matrix 00889 // 00890 00891 namespace detail 00892 { 00893 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00894 void row_info(coordinate_matrix<ScalarType, MAT_ALIGNMENT> const & mat, 00895 vector_base<ScalarType> & vec, 00896 viennacl::linalg::detail::row_info_types info_selector) 00897 { 00898 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00899 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle()); 00900 unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle12()); 00901 00902 ScalarType value = 0; 00903 unsigned int last_row = 0; 00904 00905 for (vcl_size_t i = 0; i < mat.nnz(); ++i) 00906 { 00907 unsigned int current_row = coord_buffer[2*i]; 00908 00909 if (current_row != last_row) 00910 { 00911 if (info_selector == viennacl::linalg::detail::SPARSE_ROW_NORM_2) 00912 value = std::sqrt(value); 00913 00914 result_buf[last_row] = value; 00915 value = 0; 00916 last_row = current_row; 00917 } 00918 00919 switch (info_selector) 00920 { 00921 case viennacl::linalg::detail::SPARSE_ROW_NORM_INF: //inf-norm 00922 value = std::max<ScalarType>(value, std::fabs(elements[i])); 00923 break; 00924 00925 case viennacl::linalg::detail::SPARSE_ROW_NORM_1: //1-norm 00926 value += std::fabs(elements[i]); 00927 break; 00928 00929 case viennacl::linalg::detail::SPARSE_ROW_NORM_2: //2-norm 00930 value += elements[i] * elements[i]; 00931 break; 00932 00933 case viennacl::linalg::detail::SPARSE_ROW_DIAGONAL: //diagonal entry 00934 if (coord_buffer[2*i+1] == current_row) 00935 value = elements[i]; 00936 break; 00937 00938 default: 00939 break; 00940 } 00941 } 00942 00943 if (info_selector == viennacl::linalg::detail::SPARSE_ROW_NORM_2) 00944 value = std::sqrt(value); 00945 00946 result_buf[last_row] = value; 00947 } 00948 } 00949 00958 template<class ScalarType, unsigned int ALIGNMENT> 00959 void prod_impl(const viennacl::coordinate_matrix<ScalarType, ALIGNMENT> & mat, 00960 const viennacl::vector_base<ScalarType> & vec, 00961 viennacl::vector_base<ScalarType> & result) 00962 { 00963 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle()); 00964 ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 00965 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle()); 00966 unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle12()); 00967 00968 for (vcl_size_t i = 0; i< result.size(); ++i) 00969 result_buf[i * result.stride() + result.start()] = 0; 00970 00971 for (vcl_size_t i = 0; i < mat.nnz(); ++i) 00972 result_buf[coord_buffer[2*i] * result.stride() + result.start()] 00973 += elements[i] * vec_buf[coord_buffer[2*i+1] * vec.stride() + vec.start()]; 00974 } 00975 00984 template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2> 00985 void prod_impl(const viennacl::coordinate_matrix<ScalarType, ALIGNMENT> & sp_mat, 00986 const viennacl::matrix_base<NumericT, F1> & d_mat, 00987 viennacl::matrix_base<NumericT, F2> & result) { 00988 00989 ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle()); 00990 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle12()); 00991 00992 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat); 00993 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result); 00994 00995 vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat); 00996 vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat); 00997 vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat); 00998 vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat); 00999 vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat); 01000 vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat); 01001 01002 vcl_size_t result_start1 = viennacl::traits::start1(result); 01003 vcl_size_t result_start2 = viennacl::traits::start2(result); 01004 vcl_size_t result_inc1 = viennacl::traits::stride1(result); 01005 vcl_size_t result_inc2 = viennacl::traits::stride2(result); 01006 vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result); 01007 vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result); 01008 01009 detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false> 01010 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2); 01011 detail::matrix_array_wrapper<NumericT, typename F2::orientation_category, false> 01012 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2); 01013 01014 if ( detail::is_row_major(typename F1::orientation_category()) ) { 01015 01016 #ifdef VIENNACL_WITH_OPENMP 01017 #pragma omp parallel for 01018 #endif 01019 for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) 01020 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) 01021 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */ 01022 01023 #ifdef VIENNACL_WITH_OPENMP 01024 #pragma omp parallel for 01025 #endif 01026 for (long i = 0; i < static_cast<long>(sp_mat.nnz()); ++i) { 01027 NumericT x = static_cast<NumericT>(sp_mat_elements[i]); 01028 unsigned int r = sp_mat_coords[2*i]; 01029 unsigned int c = sp_mat_coords[2*i+1]; 01030 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) { 01031 NumericT y = d_mat_wrapper( c, col); 01032 result_wrapper(r, col) += x * y; 01033 } 01034 } 01035 } 01036 01037 else { 01038 01039 #ifdef VIENNACL_WITH_OPENMP 01040 #pragma omp parallel for 01041 #endif 01042 for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) 01043 for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) 01044 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */ 01045 01046 #ifdef VIENNACL_WITH_OPENMP 01047 #pragma omp parallel for 01048 #endif 01049 for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) { 01050 01051 for (vcl_size_t i = 0; i < sp_mat.nnz(); ++i) { 01052 01053 NumericT x = static_cast<NumericT>(sp_mat_elements[i]); 01054 unsigned int r = sp_mat_coords[2*i]; 01055 unsigned int c = sp_mat_coords[2*i+1]; 01056 NumericT y = d_mat_wrapper( c, col); 01057 01058 result_wrapper( r, col) += x*y; 01059 } 01060 01061 } 01062 } 01063 01064 } 01065 01066 01075 template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2> 01076 void prod_impl(const viennacl::coordinate_matrix<ScalarType, ALIGNMENT> & sp_mat, 01077 const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>, 01078 const viennacl::matrix_base<NumericT, F1>, 01079 viennacl::op_trans > & d_mat, 01080 viennacl::matrix_base<NumericT, F2> & result) { 01081 01082 ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle()); 01083 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle12()); 01084 01085 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs()); 01086 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result); 01087 01088 vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs()); 01089 vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs()); 01090 vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs()); 01091 vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs()); 01092 vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs()); 01093 vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs()); 01094 01095 vcl_size_t result_start1 = viennacl::traits::start1(result); 01096 vcl_size_t result_start2 = viennacl::traits::start2(result); 01097 vcl_size_t result_inc1 = viennacl::traits::stride1(result); 01098 vcl_size_t result_inc2 = viennacl::traits::stride2(result); 01099 vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result); 01100 vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result); 01101 01102 detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false> 01103 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2); 01104 detail::matrix_array_wrapper<NumericT, typename F2::orientation_category, false> 01105 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2); 01106 01107 if ( detail::is_row_major(typename F1::orientation_category()) ) { 01108 01109 #ifdef VIENNACL_WITH_OPENMP 01110 #pragma omp parallel for 01111 #endif 01112 for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) 01113 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) 01114 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */ 01115 } 01116 else { 01117 01118 #ifdef VIENNACL_WITH_OPENMP 01119 #pragma omp parallel for 01120 #endif 01121 for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) 01122 for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) 01123 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */ 01124 } 01125 01126 #ifdef VIENNACL_WITH_OPENMP 01127 #pragma omp parallel for 01128 #endif 01129 for (long i = 0; i < static_cast<long>(sp_mat.nnz()); ++i) { 01130 NumericT x = static_cast<NumericT>(sp_mat_elements[i]); 01131 unsigned int r = sp_mat_coords[2*i]; 01132 unsigned int c = sp_mat_coords[2*i+1]; 01133 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) { 01134 NumericT y = d_mat_wrapper( col, c); 01135 result_wrapper(r, col) += x * y; 01136 } 01137 } 01138 01139 } 01140 // 01141 // ELL Matrix 01142 // 01151 template<class ScalarType, unsigned int ALIGNMENT> 01152 void prod_impl(const viennacl::ell_matrix<ScalarType, ALIGNMENT> & mat, 01153 const viennacl::vector_base<ScalarType> & vec, 01154 viennacl::vector_base<ScalarType> & result) 01155 { 01156 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle()); 01157 ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 01158 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle()); 01159 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2()); 01160 01161 for(vcl_size_t row = 0; row < mat.size1(); ++row) 01162 { 01163 ScalarType sum = 0; 01164 01165 for(unsigned int item_id = 0; item_id < mat.internal_maxnnz(); ++item_id) 01166 { 01167 vcl_size_t offset = row + item_id * mat.internal_size1(); 01168 ScalarType val = elements[offset]; 01169 01170 if(val != 0) 01171 { 01172 unsigned int col = coords[offset]; 01173 sum += (vec_buf[col * vec.stride() + vec.start()] * val); 01174 } 01175 } 01176 01177 result_buf[row * result.stride() + result.start()] = sum; 01178 } 01179 } 01180 01189 template<class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 01190 void prod_impl(const viennacl::ell_matrix<ScalarType, ALIGNMENT> & sp_mat, 01191 const viennacl::matrix_base<NumericT, F1> & d_mat, 01192 viennacl::matrix_base<NumericT, F2> & result) 01193 { 01194 ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle()); 01195 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2()); 01196 01197 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat); 01198 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result); 01199 01200 vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat); 01201 vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat); 01202 vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat); 01203 vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat); 01204 vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat); 01205 vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat); 01206 01207 vcl_size_t result_start1 = viennacl::traits::start1(result); 01208 vcl_size_t result_start2 = viennacl::traits::start2(result); 01209 vcl_size_t result_inc1 = viennacl::traits::stride1(result); 01210 vcl_size_t result_inc2 = viennacl::traits::stride2(result); 01211 vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result); 01212 vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result); 01213 01214 detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false> 01215 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2); 01216 detail::matrix_array_wrapper<NumericT, typename F2::orientation_category, false> 01217 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2); 01218 01219 if ( detail::is_row_major(typename F1::orientation_category()) ) { 01220 #ifdef VIENNACL_WITH_OPENMP 01221 #pragma omp parallel for 01222 #endif 01223 for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) 01224 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) 01225 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */ 01226 01227 #ifdef VIENNACL_WITH_OPENMP 01228 #pragma omp parallel for 01229 #endif 01230 for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) 01231 { 01232 for (long item_id = 0; item_id < static_cast<long>(sp_mat.maxnnz()); ++item_id) 01233 { 01234 vcl_size_t offset = static_cast<vcl_size_t>(row) + item_id * sp_mat.internal_size1(); 01235 NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]); 01236 unsigned int sp_mat_col = sp_mat_coords[offset]; 01237 01238 if( sp_mat_val != 0) 01239 { 01240 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) 01241 result_wrapper(static_cast<vcl_size_t>(row), col) += sp_mat_val * d_mat_wrapper( sp_mat_col, col); 01242 } 01243 } 01244 } 01245 } 01246 else { 01247 #ifdef VIENNACL_WITH_OPENMP 01248 #pragma omp parallel for 01249 #endif 01250 for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) 01251 for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) 01252 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */ 01253 01254 #ifdef VIENNACL_WITH_OPENMP 01255 #pragma omp parallel for 01256 #endif 01257 for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) { 01258 01259 for(unsigned int item_id = 0; item_id < sp_mat.maxnnz(); ++item_id) { 01260 01261 for(vcl_size_t row = 0; row < sp_mat.size1(); ++row) { 01262 01263 vcl_size_t offset = row + item_id * sp_mat.internal_size1(); 01264 NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]); 01265 unsigned int sp_mat_col = sp_mat_coords[offset]; 01266 01267 if( sp_mat_val != 0) { 01268 01269 result_wrapper( row, col) += sp_mat_val * d_mat_wrapper( sp_mat_col, col); 01270 } 01271 } 01272 } 01273 } 01274 } 01275 01276 } 01277 01287 template<class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 01288 void prod_impl(const viennacl::ell_matrix<ScalarType, ALIGNMENT> & sp_mat, 01289 const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>, 01290 const viennacl::matrix_base<NumericT, F1>, 01291 viennacl::op_trans > & d_mat, 01292 viennacl::matrix_base<NumericT, F2> & result) { 01293 01294 ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle()); 01295 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2()); 01296 01297 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs()); 01298 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result); 01299 01300 vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs()); 01301 vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs()); 01302 vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs()); 01303 vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs()); 01304 vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs()); 01305 vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs()); 01306 01307 vcl_size_t result_start1 = viennacl::traits::start1(result); 01308 vcl_size_t result_start2 = viennacl::traits::start2(result); 01309 vcl_size_t result_inc1 = viennacl::traits::stride1(result); 01310 vcl_size_t result_inc2 = viennacl::traits::stride2(result); 01311 vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result); 01312 vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result); 01313 01314 detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false> 01315 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2); 01316 detail::matrix_array_wrapper<NumericT, typename F2::orientation_category, false> 01317 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2); 01318 01319 if ( detail::is_row_major(typename F1::orientation_category()) ) { 01320 #ifdef VIENNACL_WITH_OPENMP 01321 #pragma omp parallel for 01322 #endif 01323 for(long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) 01324 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) 01325 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */ 01326 01327 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) { 01328 01329 for(unsigned int item_id = 0; item_id < sp_mat.maxnnz(); ++item_id) { 01330 01331 for(vcl_size_t row = 0; row < sp_mat.size1(); ++row) { 01332 01333 vcl_size_t offset = row + item_id * sp_mat.internal_size1(); 01334 NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]); 01335 unsigned int sp_mat_col = sp_mat_coords[offset]; 01336 01337 if( sp_mat_val != 0) { 01338 01339 result_wrapper( row, col) += sp_mat_val * d_mat_wrapper( col, sp_mat_col); 01340 } 01341 } 01342 } 01343 } 01344 } 01345 else { 01346 #ifdef VIENNACL_WITH_OPENMP 01347 #pragma omp parallel for 01348 #endif 01349 for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) 01350 for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) 01351 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */ 01352 01353 #ifdef VIENNACL_WITH_OPENMP 01354 #pragma omp parallel for 01355 #endif 01356 for(long item_id = 0; item_id < static_cast<long>(sp_mat.maxnnz()); ++item_id) { 01357 01358 for(vcl_size_t row = 0; row < sp_mat.size1(); ++row) { 01359 01360 vcl_size_t offset = row + item_id * sp_mat.internal_size1(); 01361 NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]); 01362 unsigned int sp_mat_col = sp_mat_coords[offset]; 01363 01364 if( sp_mat_val != 0) { 01365 01366 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) { 01367 01368 result_wrapper( row, col) += sp_mat_val * d_mat_wrapper( col, sp_mat_col); 01369 } 01370 } 01371 } 01372 } 01373 } 01374 01375 } 01376 01377 // 01378 // Hybrid Matrix 01379 // 01388 template<class ScalarType, unsigned int ALIGNMENT> 01389 void prod_impl(const viennacl::hyb_matrix<ScalarType, ALIGNMENT> & mat, 01390 const viennacl::vector_base<ScalarType> & vec, 01391 viennacl::vector_base<ScalarType> & result) 01392 { 01393 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle()); 01394 ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle()); 01395 ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle()); 01396 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2()); 01397 ScalarType const * csr_elements = detail::extract_raw_pointer<ScalarType>(mat.handle5()); 01398 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3()); 01399 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4()); 01400 01401 01402 for(vcl_size_t row = 0; row < mat.size1(); ++row) 01403 { 01404 ScalarType sum = 0; 01405 01406 // 01407 // Part 1: Process ELL part 01408 // 01409 for(unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id) 01410 { 01411 vcl_size_t offset = row + item_id * mat.internal_size1(); 01412 ScalarType val = elements[offset]; 01413 01414 if(val != 0) 01415 { 01416 unsigned int col = coords[offset]; 01417 sum += (vec_buf[col * vec.stride() + vec.start()] * val); 01418 } 01419 } 01420 01421 // 01422 // Part 2: Process HYB part 01423 // 01424 vcl_size_t col_begin = csr_row_buffer[row]; 01425 vcl_size_t col_end = csr_row_buffer[row + 1]; 01426 01427 for(vcl_size_t item_id = col_begin; item_id < col_end; item_id++) 01428 { 01429 sum += (vec_buf[csr_col_buffer[item_id] * vec.stride() + vec.start()] * csr_elements[item_id]); 01430 } 01431 01432 result_buf[row * result.stride() + result.start()] = sum; 01433 } 01434 01435 } 01436 01437 // 01438 // Hybrid Matrix 01439 // 01448 template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 01449 void prod_impl(const viennacl::hyb_matrix<NumericT, ALIGNMENT> & mat, 01450 const viennacl::matrix_base<NumericT, F1> & d_mat, 01451 viennacl::matrix_base<NumericT, F2> & result) 01452 { 01453 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat); 01454 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result); 01455 01456 vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat); 01457 vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat); 01458 vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat); 01459 vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat); 01460 vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat); 01461 vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat); 01462 01463 vcl_size_t result_start1 = viennacl::traits::start1(result); 01464 vcl_size_t result_start2 = viennacl::traits::start2(result); 01465 vcl_size_t result_inc1 = viennacl::traits::stride1(result); 01466 vcl_size_t result_inc2 = viennacl::traits::stride2(result); 01467 vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result); 01468 vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result); 01469 01470 detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false> 01471 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2); 01472 detail::matrix_array_wrapper<NumericT, typename F2::orientation_category, false> 01473 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2); 01474 01475 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle()); 01476 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2()); 01477 NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.handle5()); 01478 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3()); 01479 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4()); 01480 01481 01482 for (vcl_size_t result_col = 0; result_col < result.size2(); ++result_col) 01483 { 01484 for(vcl_size_t row = 0; row < mat.size1(); ++row) 01485 { 01486 NumericT sum = 0; 01487 01488 // 01489 // Part 1: Process ELL part 01490 // 01491 for(unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id) 01492 { 01493 vcl_size_t offset = row + item_id * mat.internal_size1(); 01494 NumericT val = elements[offset]; 01495 01496 if(val != 0) 01497 { 01498 unsigned int col = coords[offset]; 01499 sum += d_mat_wrapper(col, result_col) * val; 01500 } 01501 } 01502 01503 // 01504 // Part 2: Process HYB/CSR part 01505 // 01506 vcl_size_t col_begin = csr_row_buffer[row]; 01507 vcl_size_t col_end = csr_row_buffer[row + 1]; 01508 01509 for(vcl_size_t item_id = col_begin; item_id < col_end; item_id++) 01510 sum += d_mat_wrapper(csr_col_buffer[item_id], result_col) * csr_elements[item_id]; 01511 01512 result_wrapper(row, result_col) = sum; 01513 } 01514 } // for result_col 01515 } 01516 01517 01526 template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 01527 void prod_impl(const viennacl::hyb_matrix<NumericT, ALIGNMENT> & mat, 01528 const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>, 01529 const viennacl::matrix_base<NumericT, F1>, 01530 viennacl::op_trans > & d_mat, 01531 viennacl::matrix_base<NumericT, F2> & result) 01532 { 01533 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat); 01534 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result); 01535 01536 vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs()); 01537 vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs()); 01538 vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs()); 01539 vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs()); 01540 vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs()); 01541 vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs()); 01542 01543 vcl_size_t result_start1 = viennacl::traits::start1(result); 01544 vcl_size_t result_start2 = viennacl::traits::start2(result); 01545 vcl_size_t result_inc1 = viennacl::traits::stride1(result); 01546 vcl_size_t result_inc2 = viennacl::traits::stride2(result); 01547 vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result); 01548 vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result); 01549 01550 detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false> 01551 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2); 01552 detail::matrix_array_wrapper<NumericT, typename F2::orientation_category, false> 01553 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2); 01554 01555 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle()); 01556 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2()); 01557 NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.handle5()); 01558 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3()); 01559 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4()); 01560 01561 01562 for (vcl_size_t result_col = 0; result_col < result.size2(); ++result_col) 01563 { 01564 for(vcl_size_t row = 0; row < mat.size1(); ++row) 01565 { 01566 NumericT sum = 0; 01567 01568 // 01569 // Part 1: Process ELL part 01570 // 01571 for(unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id) 01572 { 01573 vcl_size_t offset = row + item_id * mat.internal_size1(); 01574 NumericT val = elements[offset]; 01575 01576 if(val != 0) 01577 { 01578 unsigned int col = coords[offset]; 01579 sum += d_mat_wrapper(result_col, col) * val; 01580 } 01581 } 01582 01583 // 01584 // Part 2: Process HYB/CSR part 01585 // 01586 vcl_size_t col_begin = csr_row_buffer[row]; 01587 vcl_size_t col_end = csr_row_buffer[row + 1]; 01588 01589 for(vcl_size_t item_id = col_begin; item_id < col_end; item_id++) 01590 sum += d_mat_wrapper(result_col, csr_col_buffer[item_id]) * csr_elements[item_id]; 01591 01592 result_wrapper(row, result_col) = sum; 01593 } 01594 } // for result_col 01595 } 01596 01597 01598 } // namespace host_based 01599 } //namespace linalg 01600 } //namespace viennacl 01601 01602 01603 #endif