ViennaCL - The Vienna Computing Library  1.5.1
viennacl/linalg/host_based/sparse_matrix_operations.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
00002 #define VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include <list>
00026 
00027 #include "viennacl/forwards.h"
00028 #include "viennacl/scalar.hpp"
00029 #include "viennacl/vector.hpp"
00030 #include "viennacl/tools/tools.hpp"
00031 #include "viennacl/linalg/host_based/common.hpp"
00032 #include "viennacl/linalg/host_based/vector_operations.hpp"
00033 
00034 namespace viennacl
00035 {
00036   namespace linalg
00037   {
00038     namespace host_based
00039     {
00040       //
00041       // Compressed matrix
00042       //
00043 
00044       namespace detail
00045       {
00046         template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00047         void row_info(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & mat,
00048                       vector_base<ScalarType> & vec,
00049                       viennacl::linalg::detail::row_info_types info_selector)
00050         {
00051           ScalarType         * result_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
00052           ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(mat.handle());
00053           unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1());
00054           unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2());
00055 
00056           for (vcl_size_t row = 0; row < mat.size1(); ++row)
00057           {
00058             ScalarType value = 0;
00059             unsigned int row_end = row_buffer[row+1];
00060 
00061             switch (info_selector)
00062             {
00063               case viennacl::linalg::detail::SPARSE_ROW_NORM_INF: //inf-norm
00064                 for (unsigned int i = row_buffer[row]; i < row_end; ++i)
00065                   value = std::max<ScalarType>(value, std::fabs(elements[i]));
00066                 break;
00067 
00068               case viennacl::linalg::detail::SPARSE_ROW_NORM_1: //1-norm
00069                 for (unsigned int i = row_buffer[row]; i < row_end; ++i)
00070                   value += std::fabs(elements[i]);
00071                 break;
00072 
00073               case viennacl::linalg::detail::SPARSE_ROW_NORM_2: //2-norm
00074                 for (unsigned int i = row_buffer[row]; i < row_end; ++i)
00075                   value += elements[i] * elements[i];
00076                 value = std::sqrt(value);
00077                 break;
00078 
00079               case viennacl::linalg::detail::SPARSE_ROW_DIAGONAL: //diagonal entry
00080                 for (unsigned int i = row_buffer[row]; i < row_end; ++i)
00081                 {
00082                   if (col_buffer[i] == row)
00083                   {
00084                     value = elements[i];
00085                     break;
00086                   }
00087                 }
00088                 break;
00089 
00090               default:
00091                 break;
00092             }
00093             result_buf[row] = value;
00094           }
00095         }
00096       }
00097 
00098 
00107       template<class ScalarType, unsigned int ALIGNMENT>
00108       void prod_impl(const viennacl::compressed_matrix<ScalarType, ALIGNMENT> & mat,
00109                      const viennacl::vector_base<ScalarType> & vec,
00110                            viennacl::vector_base<ScalarType> & result)
00111       {
00112         ScalarType         * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle());
00113         ScalarType   const * vec_buf    = detail::extract_raw_pointer<ScalarType>(vec.handle());
00114         ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(mat.handle());
00115         unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1());
00116         unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2());
00117 
00118 #ifdef VIENNACL_WITH_OPENMP
00119         #pragma omp parallel for
00120 #endif
00121         for (long row = 0; row < static_cast<long>(mat.size1()); ++row)
00122         {
00123           ScalarType dot_prod = 0;
00124           vcl_size_t row_end = row_buffer[row+1];
00125           for (vcl_size_t i = row_buffer[row]; i < row_end; ++i)
00126             dot_prod += elements[i] * vec_buf[col_buffer[i] * vec.stride() + vec.start()];
00127           result_buf[row * result.stride() + result.start()] = dot_prod;
00128         }
00129 
00130       }
00131 
00140       template< class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
00141       void prod_impl(const viennacl::compressed_matrix<ScalarType, ALIGNMENT> & sp_mat,
00142                      const viennacl::matrix_base<NumericT, F1> & d_mat,
00143                            viennacl::matrix_base<NumericT, F2> & result) {
00144 
00145         ScalarType   const * sp_mat_elements   = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
00146         unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle1());
00147         unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
00148 
00149         NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
00150         NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
00151 
00152         vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
00153         vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
00154         vcl_size_t d_mat_inc1   = viennacl::traits::stride1(d_mat);
00155         vcl_size_t d_mat_inc2   = viennacl::traits::stride2(d_mat);
00156         vcl_size_t d_mat_internal_size1  = viennacl::traits::internal_size1(d_mat);
00157         vcl_size_t d_mat_internal_size2  = viennacl::traits::internal_size2(d_mat);
00158 
00159         vcl_size_t result_start1 = viennacl::traits::start1(result);
00160         vcl_size_t result_start2 = viennacl::traits::start2(result);
00161         vcl_size_t result_inc1   = viennacl::traits::stride1(result);
00162         vcl_size_t result_inc2   = viennacl::traits::stride2(result);
00163         vcl_size_t result_internal_size1  = viennacl::traits::internal_size1(result);
00164         vcl_size_t result_internal_size2  = viennacl::traits::internal_size2(result);
00165 
00166         detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false>
00167             d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
00168         detail::matrix_array_wrapper<NumericT,       typename F2::orientation_category, false>
00169             result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
00170 
00171         if ( detail::is_row_major(typename F1::orientation_category()) ) {
00172 #ifdef VIENNACL_WITH_OPENMP
00173         #pragma omp parallel for
00174 #endif
00175           for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) {
00176             vcl_size_t row_start = sp_mat_row_buffer[row];
00177             vcl_size_t row_end = sp_mat_row_buffer[row+1];
00178             for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
00179               NumericT temp = 0;
00180               for (vcl_size_t k = row_start; k < row_end; ++k) {
00181                 temp += sp_mat_elements[k] * d_mat_wrapper(sp_mat_col_buffer[k], col);
00182               }
00183               result_wrapper(row, col) = temp;
00184             }
00185           }
00186         }
00187         else {
00188 #ifdef VIENNACL_WITH_OPENMP
00189         #pragma omp parallel for
00190 #endif
00191           for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
00192             for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) {
00193               vcl_size_t row_start = sp_mat_row_buffer[row];
00194               vcl_size_t row_end = sp_mat_row_buffer[row+1];
00195               NumericT temp = 0;
00196               for (vcl_size_t k = row_start; k < row_end; ++k) {
00197                 temp += sp_mat_elements[k] * d_mat_wrapper(sp_mat_col_buffer[k], col);
00198               }
00199               result_wrapper(row, col) = temp;
00200             }
00201           }
00202         }
00203 
00204       }
00205 
00215       template< class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
00216       void prod_impl(const viennacl::compressed_matrix<ScalarType, ALIGNMENT> & sp_mat,
00217                 const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>,
00218                                                    const viennacl::matrix_base<NumericT, F1>,
00219                                                    viennacl::op_trans > & d_mat,
00220                       viennacl::matrix_base<NumericT, F2> & result) {
00221 
00222         ScalarType   const * sp_mat_elements   = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
00223         unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle1());
00224         unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
00225 
00226         NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
00227         NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
00228 
00229         vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
00230         vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
00231         vcl_size_t d_mat_inc1   = viennacl::traits::stride1(d_mat.lhs());
00232         vcl_size_t d_mat_inc2   = viennacl::traits::stride2(d_mat.lhs());
00233         vcl_size_t d_mat_internal_size1  = viennacl::traits::internal_size1(d_mat.lhs());
00234         vcl_size_t d_mat_internal_size2  = viennacl::traits::internal_size2(d_mat.lhs());
00235 
00236         vcl_size_t result_start1 = viennacl::traits::start1(result);
00237         vcl_size_t result_start2 = viennacl::traits::start2(result);
00238         vcl_size_t result_inc1   = viennacl::traits::stride1(result);
00239         vcl_size_t result_inc2   = viennacl::traits::stride2(result);
00240         vcl_size_t result_internal_size1  = viennacl::traits::internal_size1(result);
00241         vcl_size_t result_internal_size2  = viennacl::traits::internal_size2(result);
00242 
00243         detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false>
00244             d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
00245         detail::matrix_array_wrapper<NumericT,       typename F2::orientation_category, false>
00246             result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
00247 
00248         if ( detail::is_row_major(typename F1::orientation_category()) ) {
00249 #ifdef VIENNACL_WITH_OPENMP
00250         #pragma omp parallel for
00251 #endif
00252           for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) {
00253             vcl_size_t row_start = sp_mat_row_buffer[row];
00254             vcl_size_t row_end = sp_mat_row_buffer[row+1];
00255             for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
00256               NumericT temp = 0;
00257               for (vcl_size_t k = row_start; k < row_end; ++k) {
00258                 temp += sp_mat_elements[k] * d_mat_wrapper(col, sp_mat_col_buffer[k]);
00259               }
00260               result_wrapper(row, col) = temp;
00261             }
00262           }
00263         }
00264         else {
00265 #ifdef VIENNACL_WITH_OPENMP
00266         #pragma omp parallel for
00267 #endif
00268           for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
00269             for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
00270               vcl_size_t row_start = sp_mat_row_buffer[row];
00271               vcl_size_t row_end = sp_mat_row_buffer[row+1];
00272               NumericT temp = 0;
00273               for (vcl_size_t k = row_start; k < row_end; ++k) {
00274                 temp += sp_mat_elements[k] * d_mat_wrapper(col, sp_mat_col_buffer[k]);
00275               }
00276               result_wrapper(row, col) = temp;
00277             }
00278           }
00279         }
00280 
00281       }
00282 
00283 
00284       //
00285       // Triangular solve for compressed_matrix, A \ b
00286       //
00287       namespace detail
00288       {
00289         template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
00290         void csr_inplace_solve(SizeTypeArray const & row_buffer,
00291                                SizeTypeArray const & col_buffer,
00292                                ConstScalarTypeArray const & element_buffer,
00293                                ScalarTypeArray & vec_buffer,
00294                                vcl_size_t num_cols,
00295                                viennacl::linalg::unit_lower_tag)
00296         {
00297           vcl_size_t row_begin = row_buffer[1];
00298           for (vcl_size_t row = 1; row < num_cols; ++row)
00299           {
00300             NumericT vec_entry = vec_buffer[row];
00301             vcl_size_t row_end = row_buffer[row+1];
00302             for (vcl_size_t i = row_begin; i < row_end; ++i)
00303             {
00304               vcl_size_t col_index = col_buffer[i];
00305               if (col_index < row)
00306                 vec_entry -= vec_buffer[col_index] * element_buffer[i];
00307             }
00308             vec_buffer[row] = vec_entry;
00309             row_begin = row_end;
00310           }
00311         }
00312 
00313         template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
00314         void csr_inplace_solve(SizeTypeArray const & row_buffer,
00315                                SizeTypeArray const & col_buffer,
00316                                ConstScalarTypeArray const & element_buffer,
00317                                ScalarTypeArray & vec_buffer,
00318                                vcl_size_t num_cols,
00319                                viennacl::linalg::lower_tag)
00320         {
00321           vcl_size_t row_begin = row_buffer[0];
00322           for (vcl_size_t row = 0; row < num_cols; ++row)
00323           {
00324             NumericT vec_entry = vec_buffer[row];
00325 
00326             // substitute and remember diagonal entry
00327             vcl_size_t row_end = row_buffer[row+1];
00328             NumericT diagonal_entry = 0;
00329             for (vcl_size_t i = row_begin; i < row_end; ++i)
00330             {
00331               vcl_size_t col_index = col_buffer[i];
00332               if (col_index < row)
00333                 vec_entry -= vec_buffer[col_index] * element_buffer[i];
00334               else if (col_index == row)
00335                 diagonal_entry = element_buffer[i];
00336             }
00337 
00338             vec_buffer[row] = vec_entry / diagonal_entry;
00339             row_begin = row_end;
00340           }
00341         }
00342 
00343 
00344         template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
00345         void csr_inplace_solve(SizeTypeArray const & row_buffer,
00346                                SizeTypeArray const & col_buffer,
00347                                ConstScalarTypeArray const & element_buffer,
00348                                ScalarTypeArray & vec_buffer,
00349                                vcl_size_t num_cols,
00350                                viennacl::linalg::unit_upper_tag)
00351         {
00352           for (vcl_size_t row2 = 1; row2 < num_cols; ++row2)
00353           {
00354             vcl_size_t row = (num_cols - row2) - 1;
00355             NumericT vec_entry = vec_buffer[row];
00356             vcl_size_t row_begin = row_buffer[row];
00357             vcl_size_t row_end   = row_buffer[row+1];
00358             for (vcl_size_t i = row_begin; i < row_end; ++i)
00359             {
00360               vcl_size_t col_index = col_buffer[i];
00361               if (col_index > row)
00362                 vec_entry -= vec_buffer[col_index] * element_buffer[i];
00363             }
00364             vec_buffer[row] = vec_entry;
00365           }
00366         }
00367 
00368         template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
00369         void csr_inplace_solve(SizeTypeArray const & row_buffer,
00370                                SizeTypeArray const & col_buffer,
00371                                ConstScalarTypeArray const & element_buffer,
00372                                ScalarTypeArray & vec_buffer,
00373                                vcl_size_t num_cols,
00374                                viennacl::linalg::upper_tag)
00375         {
00376           for (vcl_size_t row2 = 0; row2 < num_cols; ++row2)
00377           {
00378             vcl_size_t row = (num_cols - row2) - 1;
00379             NumericT vec_entry = vec_buffer[row];
00380 
00381             // substitute and remember diagonal entry
00382             vcl_size_t row_begin = row_buffer[row];
00383             vcl_size_t row_end   = row_buffer[row+1];
00384             NumericT diagonal_entry = 0;
00385             for (vcl_size_t i = row_begin; i < row_end; ++i)
00386             {
00387               vcl_size_t col_index = col_buffer[i];
00388               if (col_index > row)
00389                 vec_entry -= vec_buffer[col_index] * element_buffer[i];
00390               else if (col_index == row)
00391                 diagonal_entry = element_buffer[i];
00392             }
00393 
00394             vec_buffer[row] = vec_entry / diagonal_entry;
00395           }
00396         }
00397 
00398       } //namespace detail
00399 
00400 
00401 
00408       template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00409       void inplace_solve(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & L,
00410                          vector_base<ScalarType> & vec,
00411                          viennacl::linalg::unit_lower_tag tag)
00412       {
00413         ScalarType         * vec_buf    = detail::extract_raw_pointer<ScalarType>(vec.handle());
00414         ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(L.handle());
00415         unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
00416         unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
00417 
00418         detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, L.size2(), tag);
00419       }
00420 
00427       template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00428       void inplace_solve(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & L,
00429                          vector_base<ScalarType> & vec,
00430                          viennacl::linalg::lower_tag tag)
00431       {
00432         ScalarType         * vec_buf    = detail::extract_raw_pointer<ScalarType>(vec.handle());
00433         ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(L.handle());
00434         unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
00435         unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
00436 
00437         detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, L.size2(), tag);
00438       }
00439 
00440 
00447       template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00448       void inplace_solve(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & U,
00449                          vector_base<ScalarType> & vec,
00450                          viennacl::linalg::unit_upper_tag tag)
00451       {
00452         ScalarType         * vec_buf    = detail::extract_raw_pointer<ScalarType>(vec.handle());
00453         ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(U.handle());
00454         unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());
00455         unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());
00456 
00457         detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, U.size2(), tag);
00458       }
00459 
00466       template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00467       void inplace_solve(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & U,
00468                          vector_base<ScalarType> & vec,
00469                          viennacl::linalg::upper_tag tag)
00470       {
00471         ScalarType         * vec_buf    = detail::extract_raw_pointer<ScalarType>(vec.handle());
00472         ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(U.handle());
00473         unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());
00474         unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());
00475 
00476         detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, U.size2(), tag);
00477       }
00478 
00479 
00480 
00481 
00482 
00483 
00484 
00485       //
00486       // Triangular solve for compressed_matrix, A^T \ b
00487       //
00488 
00489       namespace detail
00490       {
00491         template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
00492         void csr_trans_inplace_solve(SizeTypeArray const & row_buffer,
00493                                      SizeTypeArray const & col_buffer,
00494                                      ConstScalarTypeArray const & element_buffer,
00495                                      ScalarTypeArray & vec_buffer,
00496                                      vcl_size_t num_cols,
00497                                      viennacl::linalg::unit_lower_tag)
00498         {
00499           vcl_size_t col_begin = row_buffer[0];
00500           for (vcl_size_t col = 0; col < num_cols; ++col)
00501           {
00502             NumericT vec_entry = vec_buffer[col];
00503             vcl_size_t col_end = row_buffer[col+1];
00504             for (vcl_size_t i = col_begin; i < col_end; ++i)
00505             {
00506               unsigned int row_index = col_buffer[i];
00507               if (row_index > col)
00508                 vec_buffer[row_index] -= vec_entry * element_buffer[i];
00509             }
00510             col_begin = col_end;
00511           }
00512         }
00513 
00514         template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
00515         void csr_trans_inplace_solve(SizeTypeArray const & row_buffer,
00516                                      SizeTypeArray const & col_buffer,
00517                                      ConstScalarTypeArray const & element_buffer,
00518                                      ScalarTypeArray & vec_buffer,
00519                                      vcl_size_t num_cols,
00520                                      viennacl::linalg::lower_tag)
00521         {
00522           vcl_size_t col_begin = row_buffer[0];
00523           for (vcl_size_t col = 0; col < num_cols; ++col)
00524           {
00525             vcl_size_t col_end = row_buffer[col+1];
00526 
00527             // Stage 1: Find diagonal entry:
00528             NumericT diagonal_entry = 0;
00529             for (vcl_size_t i = col_begin; i < col_end; ++i)
00530             {
00531               vcl_size_t row_index = col_buffer[i];
00532               if (row_index == col)
00533               {
00534                 diagonal_entry = element_buffer[i];
00535                 break;
00536               }
00537             }
00538 
00539             // Stage 2: Substitute
00540             NumericT vec_entry = vec_buffer[col] / diagonal_entry;
00541             vec_buffer[col] = vec_entry;
00542             for (vcl_size_t i = col_begin; i < col_end; ++i)
00543             {
00544               vcl_size_t row_index = col_buffer[i];
00545               if (row_index > col)
00546                 vec_buffer[row_index] -= vec_entry * element_buffer[i];
00547             }
00548             col_begin = col_end;
00549           }
00550         }
00551 
00552         template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
00553         void csr_trans_inplace_solve(SizeTypeArray const & row_buffer,
00554                                      SizeTypeArray const & col_buffer,
00555                                      ConstScalarTypeArray const & element_buffer,
00556                                      ScalarTypeArray & vec_buffer,
00557                                      vcl_size_t num_cols,
00558                                      viennacl::linalg::unit_upper_tag)
00559         {
00560           for (vcl_size_t col2 = 0; col2 < num_cols; ++col2)
00561           {
00562             vcl_size_t col = (num_cols - col2) - 1;
00563 
00564             NumericT vec_entry = vec_buffer[col];
00565             vcl_size_t col_begin = row_buffer[col];
00566             vcl_size_t col_end = row_buffer[col+1];
00567             for (vcl_size_t i = col_begin; i < col_end; ++i)
00568             {
00569               vcl_size_t row_index = col_buffer[i];
00570               if (row_index < col)
00571                 vec_buffer[row_index] -= vec_entry * element_buffer[i];
00572             }
00573 
00574           }
00575         }
00576 
00577         template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
00578         void csr_trans_inplace_solve(SizeTypeArray const & row_buffer,
00579                                      SizeTypeArray const & col_buffer,
00580                                      ConstScalarTypeArray const & element_buffer,
00581                                      ScalarTypeArray & vec_buffer,
00582                                      vcl_size_t num_cols,
00583                                      viennacl::linalg::upper_tag)
00584         {
00585           for (vcl_size_t col2 = 0; col2 < num_cols; ++col2)
00586           {
00587             vcl_size_t col = (num_cols - col2) - 1;
00588             vcl_size_t col_begin = row_buffer[col];
00589             vcl_size_t col_end = row_buffer[col+1];
00590 
00591             // Stage 1: Find diagonal entry:
00592             NumericT diagonal_entry = 0;
00593             for (vcl_size_t i = col_begin; i < col_end; ++i)
00594             {
00595               vcl_size_t row_index = col_buffer[i];
00596               if (row_index == col)
00597               {
00598                 diagonal_entry = element_buffer[i];
00599                 break;
00600               }
00601             }
00602 
00603             // Stage 2: Substitute
00604             NumericT vec_entry = vec_buffer[col] / diagonal_entry;
00605             vec_buffer[col] = vec_entry;
00606             for (vcl_size_t i = col_begin; i < col_end; ++i)
00607             {
00608               vcl_size_t row_index = col_buffer[i];
00609               if (row_index < col)
00610                 vec_buffer[row_index] -= vec_entry * element_buffer[i];
00611             }
00612           }
00613         }
00614 
00615 
00616         //
00617         // block solves
00618         //
00619         template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00620         void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00621                                                          const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00622                                                          op_trans> & L,
00623                                  viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */,
00624                                  vector_base<ScalarType> const & /* L_diagonal */,  //ignored
00625                                  vector_base<ScalarType> & vec,
00626                                  viennacl::linalg::unit_lower_tag)
00627         {
00628           // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
00629 
00630           unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
00631           unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
00632           ScalarType   const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L.lhs().handle());
00633           ScalarType         * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle());
00634 
00635           vcl_size_t col_begin = row_buffer[0];
00636           for (vcl_size_t col = 0; col < L.lhs().size1(); ++col)
00637           {
00638             ScalarType vec_entry = vec_buffer[col];
00639             vcl_size_t col_end = row_buffer[col+1];
00640             for (vcl_size_t i = col_begin; i < col_end; ++i)
00641             {
00642               unsigned int row_index = col_buffer[i];
00643               if (row_index > col)
00644                 vec_buffer[row_index] -= vec_entry * elements[i];
00645             }
00646             col_begin = col_end;
00647           }
00648         }
00649 
00650         template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00651         void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00652                                                          const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00653                                                          op_trans> & L,
00654                                  viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */,
00655                                  vector_base<ScalarType> const & L_diagonal,
00656                                  vector_base<ScalarType> & vec,
00657                                  viennacl::linalg::lower_tag)
00658         {
00659           // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
00660 
00661           unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
00662           unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
00663           ScalarType   const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L.lhs().handle());
00664           ScalarType   const * diagonal_buffer = detail::extract_raw_pointer<ScalarType>(L_diagonal.handle());
00665           ScalarType         * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle());
00666 
00667           vcl_size_t col_begin = row_buffer[0];
00668           for (vcl_size_t col = 0; col < L.lhs().size1(); ++col)
00669           {
00670             vcl_size_t col_end = row_buffer[col+1];
00671 
00672             ScalarType vec_entry = vec_buffer[col] / diagonal_buffer[col];
00673             vec_buffer[col] = vec_entry;
00674             for (vcl_size_t i = col_begin; i < col_end; ++i)
00675             {
00676               vcl_size_t row_index = col_buffer[i];
00677               if (row_index > col)
00678                 vec_buffer[row_index] -= vec_entry * elements[i];
00679             }
00680             col_begin = col_end;
00681           }
00682         }
00683 
00684 
00685 
00686         template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00687         void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00688                                                          const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00689                                                          op_trans> & U,
00690                                  viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */,
00691                                  vector_base<ScalarType> const & /* U_diagonal */, //ignored
00692                                  vector_base<ScalarType> & vec,
00693                                  viennacl::linalg::unit_upper_tag)
00694         {
00695           // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
00696 
00697           unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
00698           unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
00699           ScalarType   const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U.lhs().handle());
00700           ScalarType         * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle());
00701 
00702           for (vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
00703           {
00704             vcl_size_t col = (U.lhs().size1() - col2) - 1;
00705 
00706             ScalarType vec_entry = vec_buffer[col];
00707             vcl_size_t col_begin = row_buffer[col];
00708             vcl_size_t col_end = row_buffer[col+1];
00709             for (vcl_size_t i = col_begin; i < col_end; ++i)
00710             {
00711               vcl_size_t row_index = col_buffer[i];
00712               if (row_index < col)
00713                 vec_buffer[row_index] -= vec_entry * elements[i];
00714             }
00715 
00716           }
00717         }
00718 
00719         template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00720         void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00721                                                          const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00722                                                          op_trans> & U,
00723                                  viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */,
00724                                  vector_base<ScalarType> const & U_diagonal,
00725                                  vector_base<ScalarType> & vec,
00726                                  viennacl::linalg::upper_tag)
00727         {
00728           // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
00729 
00730           unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
00731           unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
00732           ScalarType   const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U.lhs().handle());
00733           ScalarType   const * diagonal_buffer = detail::extract_raw_pointer<ScalarType>(U_diagonal.handle());
00734           ScalarType         * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle());
00735 
00736           for (vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
00737           {
00738             vcl_size_t col = (U.lhs().size1() - col2) - 1;
00739             vcl_size_t col_begin = row_buffer[col];
00740             vcl_size_t col_end = row_buffer[col+1];
00741 
00742             // Stage 2: Substitute
00743             ScalarType vec_entry = vec_buffer[col] / diagonal_buffer[col];
00744             vec_buffer[col] = vec_entry;
00745             for (vcl_size_t i = col_begin; i < col_end; ++i)
00746             {
00747               vcl_size_t row_index = col_buffer[i];
00748               if (row_index < col)
00749                 vec_buffer[row_index] -= vec_entry * elements[i];
00750             }
00751           }
00752         }
00753 
00754 
00755       } //namespace detail
00756 
00763       template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00764       void inplace_solve(matrix_expression< const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00765                                             const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00766                                             op_trans> const & proxy,
00767                          vector_base<ScalarType> & vec,
00768                          viennacl::linalg::unit_lower_tag tag)
00769       {
00770         ScalarType         * vec_buf    = detail::extract_raw_pointer<ScalarType>(vec.handle());
00771         ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
00772         unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
00773         unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
00774 
00775         detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
00776       }
00777 
00784       template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00785       void inplace_solve(matrix_expression< const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00786                                             const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00787                                             op_trans> const & proxy,
00788                          vector_base<ScalarType> & vec,
00789                          viennacl::linalg::lower_tag tag)
00790       {
00791         ScalarType         * vec_buf    = detail::extract_raw_pointer<ScalarType>(vec.handle());
00792         ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
00793         unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
00794         unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
00795 
00796         detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
00797       }
00798 
00799 
00806       template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00807       void inplace_solve(matrix_expression< const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00808                                             const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00809                                             op_trans> const & proxy,
00810                          vector_base<ScalarType> & vec,
00811                          viennacl::linalg::unit_upper_tag tag)
00812       {
00813         ScalarType         * vec_buf    = detail::extract_raw_pointer<ScalarType>(vec.handle());
00814         ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
00815         unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
00816         unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
00817 
00818         detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
00819       }
00820 
00821 
00828       template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00829       void inplace_solve(matrix_expression< const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00830                                             const compressed_matrix<ScalarType, MAT_ALIGNMENT>,
00831                                             op_trans> const & proxy,
00832                          vector_base<ScalarType> & vec,
00833                          viennacl::linalg::upper_tag tag)
00834       {
00835         ScalarType         * vec_buf    = detail::extract_raw_pointer<ScalarType>(vec.handle());
00836         ScalarType   const * elements   = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
00837         unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
00838         unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
00839 
00840         detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
00841       }
00842 
00843 
00844 
00845       //
00846       // Compressed Compressed Matrix
00847       //
00848 
00857       template<class ScalarType>
00858       void prod_impl(const viennacl::compressed_compressed_matrix<ScalarType> & mat,
00859                      const viennacl::vector_base<ScalarType> & vec,
00860                            viennacl::vector_base<ScalarType> & result)
00861       {
00862         ScalarType         * result_buf  = detail::extract_raw_pointer<ScalarType>(result.handle());
00863         ScalarType   const * vec_buf     = detail::extract_raw_pointer<ScalarType>(vec.handle());
00864         ScalarType   const * elements    = detail::extract_raw_pointer<ScalarType>(mat.handle());
00865         unsigned int const * row_buffer  = detail::extract_raw_pointer<unsigned int>(mat.handle1());
00866         unsigned int const * row_indices = detail::extract_raw_pointer<unsigned int>(mat.handle3());
00867         unsigned int const * col_buffer  = detail::extract_raw_pointer<unsigned int>(mat.handle2());
00868 
00869         vector_assign(result, ScalarType(0));
00870 
00871 #ifdef VIENNACL_WITH_OPENMP
00872         #pragma omp parallel for
00873 #endif
00874         for (long i = 0; i < static_cast<long>(mat.nnz1()); ++i)
00875         {
00876           ScalarType dot_prod = 0;
00877           vcl_size_t row_end = row_buffer[i+1];
00878           for (vcl_size_t j = row_buffer[i]; j < row_end; ++j)
00879             dot_prod += elements[j] * vec_buf[col_buffer[j] * vec.stride() + vec.start()];
00880           result_buf[row_indices[i] * result.stride() + result.start()] = dot_prod;
00881         }
00882 
00883       }
00884 
00885 
00886 
00887       //
00888       // Coordinate Matrix
00889       //
00890 
00891       namespace detail
00892       {
00893         template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00894         void row_info(coordinate_matrix<ScalarType, MAT_ALIGNMENT> const & mat,
00895                       vector_base<ScalarType> & vec,
00896                       viennacl::linalg::detail::row_info_types info_selector)
00897         {
00898           ScalarType         * result_buf   = detail::extract_raw_pointer<ScalarType>(vec.handle());
00899           ScalarType   const * elements     = detail::extract_raw_pointer<ScalarType>(mat.handle());
00900           unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle12());
00901 
00902           ScalarType value = 0;
00903           unsigned int last_row = 0;
00904 
00905           for (vcl_size_t i = 0; i < mat.nnz(); ++i)
00906           {
00907             unsigned int current_row = coord_buffer[2*i];
00908 
00909             if (current_row != last_row)
00910             {
00911               if (info_selector == viennacl::linalg::detail::SPARSE_ROW_NORM_2)
00912                 value = std::sqrt(value);
00913 
00914               result_buf[last_row] = value;
00915               value = 0;
00916               last_row = current_row;
00917             }
00918 
00919             switch (info_selector)
00920             {
00921               case viennacl::linalg::detail::SPARSE_ROW_NORM_INF: //inf-norm
00922                 value = std::max<ScalarType>(value, std::fabs(elements[i]));
00923                 break;
00924 
00925               case viennacl::linalg::detail::SPARSE_ROW_NORM_1: //1-norm
00926                 value += std::fabs(elements[i]);
00927                 break;
00928 
00929               case viennacl::linalg::detail::SPARSE_ROW_NORM_2: //2-norm
00930                 value += elements[i] * elements[i];
00931                 break;
00932 
00933               case viennacl::linalg::detail::SPARSE_ROW_DIAGONAL: //diagonal entry
00934                 if (coord_buffer[2*i+1] == current_row)
00935                   value = elements[i];
00936                 break;
00937 
00938               default:
00939                 break;
00940             }
00941           }
00942 
00943           if (info_selector == viennacl::linalg::detail::SPARSE_ROW_NORM_2)
00944             value = std::sqrt(value);
00945 
00946           result_buf[last_row] = value;
00947         }
00948       }
00949 
00958       template<class ScalarType, unsigned int ALIGNMENT>
00959       void prod_impl(const viennacl::coordinate_matrix<ScalarType, ALIGNMENT> & mat,
00960                      const viennacl::vector_base<ScalarType> & vec,
00961                            viennacl::vector_base<ScalarType> & result)
00962       {
00963         ScalarType         * result_buf   = detail::extract_raw_pointer<ScalarType>(result.handle());
00964         ScalarType   const * vec_buf      = detail::extract_raw_pointer<ScalarType>(vec.handle());
00965         ScalarType   const * elements     = detail::extract_raw_pointer<ScalarType>(mat.handle());
00966         unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle12());
00967 
00968         for (vcl_size_t i = 0; i< result.size(); ++i)
00969           result_buf[i * result.stride() + result.start()] = 0;
00970 
00971         for (vcl_size_t i = 0; i < mat.nnz(); ++i)
00972           result_buf[coord_buffer[2*i] * result.stride() + result.start()]
00973             += elements[i] * vec_buf[coord_buffer[2*i+1] * vec.stride() + vec.start()];
00974       }
00975 
00984       template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2>
00985       void prod_impl(const viennacl::coordinate_matrix<ScalarType, ALIGNMENT> & sp_mat,
00986                      const viennacl::matrix_base<NumericT, F1> & d_mat,
00987                            viennacl::matrix_base<NumericT, F2> & result) {
00988 
00989         ScalarType   const * sp_mat_elements     = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
00990         unsigned int const * sp_mat_coords       = detail::extract_raw_pointer<unsigned int>(sp_mat.handle12());
00991 
00992         NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
00993         NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
00994 
00995         vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
00996         vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
00997         vcl_size_t d_mat_inc1   = viennacl::traits::stride1(d_mat);
00998         vcl_size_t d_mat_inc2   = viennacl::traits::stride2(d_mat);
00999         vcl_size_t d_mat_internal_size1  = viennacl::traits::internal_size1(d_mat);
01000         vcl_size_t d_mat_internal_size2  = viennacl::traits::internal_size2(d_mat);
01001 
01002         vcl_size_t result_start1 = viennacl::traits::start1(result);
01003         vcl_size_t result_start2 = viennacl::traits::start2(result);
01004         vcl_size_t result_inc1   = viennacl::traits::stride1(result);
01005         vcl_size_t result_inc2   = viennacl::traits::stride2(result);
01006         vcl_size_t result_internal_size1  = viennacl::traits::internal_size1(result);
01007         vcl_size_t result_internal_size2  = viennacl::traits::internal_size2(result);
01008 
01009         detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false>
01010             d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
01011         detail::matrix_array_wrapper<NumericT,       typename F2::orientation_category, false>
01012             result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
01013 
01014         if ( detail::is_row_major(typename F1::orientation_category()) ) {
01015 
01016 #ifdef VIENNACL_WITH_OPENMP
01017         #pragma omp parallel for
01018 #endif
01019           for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
01020                 for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
01021                   result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
01022 
01023 #ifdef VIENNACL_WITH_OPENMP
01024         #pragma omp parallel for
01025 #endif
01026           for (long i = 0; i < static_cast<long>(sp_mat.nnz()); ++i) {
01027             NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
01028             unsigned int r = sp_mat_coords[2*i];
01029             unsigned int c = sp_mat_coords[2*i+1];
01030             for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
01031               NumericT y = d_mat_wrapper( c, col);
01032               result_wrapper(r, col) += x * y;
01033             }
01034           }
01035         }
01036 
01037         else {
01038 
01039 #ifdef VIENNACL_WITH_OPENMP
01040         #pragma omp parallel for
01041 #endif
01042           for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
01043             for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
01044                 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
01045 
01046 #ifdef VIENNACL_WITH_OPENMP
01047         #pragma omp parallel for
01048 #endif
01049           for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
01050 
01051             for (vcl_size_t i = 0; i < sp_mat.nnz(); ++i) {
01052 
01053               NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
01054               unsigned int r = sp_mat_coords[2*i];
01055               unsigned int c = sp_mat_coords[2*i+1];
01056               NumericT y = d_mat_wrapper( c, col);
01057 
01058               result_wrapper( r, col) += x*y;
01059             }
01060 
01061           }
01062         }
01063 
01064       }
01065 
01066 
01075       template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2>
01076       void prod_impl(const viennacl::coordinate_matrix<ScalarType, ALIGNMENT> & sp_mat,
01077                      const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>,
01078                                                         const viennacl::matrix_base<NumericT, F1>,
01079                                                         viennacl::op_trans > & d_mat,
01080                            viennacl::matrix_base<NumericT, F2> & result) {
01081 
01082         ScalarType   const * sp_mat_elements     = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
01083         unsigned int const * sp_mat_coords       = detail::extract_raw_pointer<unsigned int>(sp_mat.handle12());
01084 
01085         NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
01086         NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
01087 
01088         vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
01089         vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
01090         vcl_size_t d_mat_inc1   = viennacl::traits::stride1(d_mat.lhs());
01091         vcl_size_t d_mat_inc2   = viennacl::traits::stride2(d_mat.lhs());
01092         vcl_size_t d_mat_internal_size1  = viennacl::traits::internal_size1(d_mat.lhs());
01093         vcl_size_t d_mat_internal_size2  = viennacl::traits::internal_size2(d_mat.lhs());
01094 
01095         vcl_size_t result_start1 = viennacl::traits::start1(result);
01096         vcl_size_t result_start2 = viennacl::traits::start2(result);
01097         vcl_size_t result_inc1   = viennacl::traits::stride1(result);
01098         vcl_size_t result_inc2   = viennacl::traits::stride2(result);
01099         vcl_size_t result_internal_size1  = viennacl::traits::internal_size1(result);
01100         vcl_size_t result_internal_size2  = viennacl::traits::internal_size2(result);
01101 
01102         detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false>
01103             d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
01104         detail::matrix_array_wrapper<NumericT,       typename F2::orientation_category, false>
01105             result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
01106 
01107         if ( detail::is_row_major(typename F1::orientation_category()) ) {
01108 
01109 #ifdef VIENNACL_WITH_OPENMP
01110         #pragma omp parallel for
01111 #endif
01112           for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
01113             for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
01114               result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
01115         }
01116         else {
01117 
01118 #ifdef VIENNACL_WITH_OPENMP
01119         #pragma omp parallel for
01120 #endif
01121           for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
01122             for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
01123               result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
01124         }
01125 
01126 #ifdef VIENNACL_WITH_OPENMP
01127         #pragma omp parallel for
01128 #endif
01129         for (long i = 0; i < static_cast<long>(sp_mat.nnz()); ++i) {
01130           NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
01131           unsigned int r = sp_mat_coords[2*i];
01132           unsigned int c = sp_mat_coords[2*i+1];
01133           for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
01134             NumericT y = d_mat_wrapper( col, c);
01135             result_wrapper(r, col) += x * y;
01136           }
01137         }
01138 
01139       }
01140       //
01141       // ELL Matrix
01142       //
01151       template<class ScalarType, unsigned int ALIGNMENT>
01152       void prod_impl(const viennacl::ell_matrix<ScalarType, ALIGNMENT> & mat,
01153                      const viennacl::vector_base<ScalarType> & vec,
01154                            viennacl::vector_base<ScalarType> & result)
01155       {
01156         ScalarType         * result_buf   = detail::extract_raw_pointer<ScalarType>(result.handle());
01157         ScalarType   const * vec_buf      = detail::extract_raw_pointer<ScalarType>(vec.handle());
01158         ScalarType   const * elements     = detail::extract_raw_pointer<ScalarType>(mat.handle());
01159         unsigned int const * coords       = detail::extract_raw_pointer<unsigned int>(mat.handle2());
01160 
01161         for(vcl_size_t row = 0; row < mat.size1(); ++row)
01162         {
01163           ScalarType sum = 0;
01164 
01165           for(unsigned int item_id = 0; item_id < mat.internal_maxnnz(); ++item_id)
01166           {
01167             vcl_size_t offset = row + item_id * mat.internal_size1();
01168             ScalarType val = elements[offset];
01169 
01170             if(val != 0)
01171             {
01172               unsigned int col = coords[offset];
01173               sum += (vec_buf[col * vec.stride() + vec.start()] * val);
01174             }
01175           }
01176 
01177           result_buf[row * result.stride() + result.start()] = sum;
01178         }
01179       }
01180 
01189       template<class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
01190       void prod_impl(const viennacl::ell_matrix<ScalarType, ALIGNMENT> & sp_mat,
01191                      const viennacl::matrix_base<NumericT, F1> & d_mat,
01192                            viennacl::matrix_base<NumericT, F2> & result)
01193       {
01194         ScalarType   const * sp_mat_elements     = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
01195         unsigned int const * sp_mat_coords       = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
01196 
01197         NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
01198         NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
01199 
01200         vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
01201         vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
01202         vcl_size_t d_mat_inc1   = viennacl::traits::stride1(d_mat);
01203         vcl_size_t d_mat_inc2   = viennacl::traits::stride2(d_mat);
01204         vcl_size_t d_mat_internal_size1  = viennacl::traits::internal_size1(d_mat);
01205         vcl_size_t d_mat_internal_size2  = viennacl::traits::internal_size2(d_mat);
01206 
01207         vcl_size_t result_start1 = viennacl::traits::start1(result);
01208         vcl_size_t result_start2 = viennacl::traits::start2(result);
01209         vcl_size_t result_inc1   = viennacl::traits::stride1(result);
01210         vcl_size_t result_inc2   = viennacl::traits::stride2(result);
01211         vcl_size_t result_internal_size1  = viennacl::traits::internal_size1(result);
01212         vcl_size_t result_internal_size2  = viennacl::traits::internal_size2(result);
01213 
01214         detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false>
01215             d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
01216         detail::matrix_array_wrapper<NumericT,       typename F2::orientation_category, false>
01217             result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
01218 
01219         if ( detail::is_row_major(typename F1::orientation_category()) ) {
01220 #ifdef VIENNACL_WITH_OPENMP
01221         #pragma omp parallel for
01222 #endif
01223           for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
01224                 for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
01225                   result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
01226 
01227 #ifdef VIENNACL_WITH_OPENMP
01228         #pragma omp parallel for
01229 #endif
01230           for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
01231           {
01232             for (long item_id = 0; item_id < static_cast<long>(sp_mat.maxnnz()); ++item_id)
01233             {
01234               vcl_size_t offset = static_cast<vcl_size_t>(row) + item_id * sp_mat.internal_size1();
01235               NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
01236               unsigned int sp_mat_col = sp_mat_coords[offset];
01237 
01238               if( sp_mat_val != 0)
01239               {
01240                 for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
01241                   result_wrapper(static_cast<vcl_size_t>(row), col) += sp_mat_val * d_mat_wrapper( sp_mat_col, col);
01242               }
01243             }
01244           }
01245         }
01246         else {
01247 #ifdef VIENNACL_WITH_OPENMP
01248         #pragma omp parallel for
01249 #endif
01250           for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
01251             for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
01252                 result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
01253 
01254 #ifdef VIENNACL_WITH_OPENMP
01255         #pragma omp parallel for
01256 #endif
01257           for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
01258 
01259             for(unsigned int item_id = 0; item_id < sp_mat.maxnnz(); ++item_id) {
01260 
01261               for(vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
01262 
01263                 vcl_size_t offset = row + item_id * sp_mat.internal_size1();
01264                 NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
01265                 unsigned int sp_mat_col = sp_mat_coords[offset];
01266 
01267                 if( sp_mat_val != 0) {
01268 
01269                   result_wrapper( row, col) += sp_mat_val * d_mat_wrapper( sp_mat_col, col);
01270                 }
01271               }
01272             }
01273           }
01274         }
01275 
01276       }
01277 
01287       template<class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
01288       void prod_impl(const viennacl::ell_matrix<ScalarType, ALIGNMENT> & sp_mat,
01289                      const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>,
01290                                                         const viennacl::matrix_base<NumericT, F1>,
01291                                                         viennacl::op_trans > & d_mat,
01292                            viennacl::matrix_base<NumericT, F2> & result) {
01293 
01294         ScalarType   const * sp_mat_elements     = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
01295         unsigned int const * sp_mat_coords       = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
01296 
01297         NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
01298         NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
01299 
01300         vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
01301         vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
01302         vcl_size_t d_mat_inc1   = viennacl::traits::stride1(d_mat.lhs());
01303         vcl_size_t d_mat_inc2   = viennacl::traits::stride2(d_mat.lhs());
01304         vcl_size_t d_mat_internal_size1  = viennacl::traits::internal_size1(d_mat.lhs());
01305         vcl_size_t d_mat_internal_size2  = viennacl::traits::internal_size2(d_mat.lhs());
01306 
01307         vcl_size_t result_start1 = viennacl::traits::start1(result);
01308         vcl_size_t result_start2 = viennacl::traits::start2(result);
01309         vcl_size_t result_inc1   = viennacl::traits::stride1(result);
01310         vcl_size_t result_inc2   = viennacl::traits::stride2(result);
01311         vcl_size_t result_internal_size1  = viennacl::traits::internal_size1(result);
01312         vcl_size_t result_internal_size2  = viennacl::traits::internal_size2(result);
01313 
01314         detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false>
01315             d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
01316         detail::matrix_array_wrapper<NumericT,       typename F2::orientation_category, false>
01317             result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
01318 
01319         if ( detail::is_row_major(typename F1::orientation_category()) ) {
01320 #ifdef VIENNACL_WITH_OPENMP
01321         #pragma omp parallel for
01322 #endif
01323           for(long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
01324             for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
01325               result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
01326 
01327           for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
01328 
01329             for(unsigned int item_id = 0; item_id < sp_mat.maxnnz(); ++item_id) {
01330 
01331               for(vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
01332 
01333                 vcl_size_t offset = row + item_id * sp_mat.internal_size1();
01334                 NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
01335                 unsigned int sp_mat_col = sp_mat_coords[offset];
01336 
01337                 if( sp_mat_val != 0) {
01338 
01339                   result_wrapper( row, col) += sp_mat_val * d_mat_wrapper( col, sp_mat_col);
01340                 }
01341               }
01342             }
01343           }
01344         }
01345         else {
01346 #ifdef VIENNACL_WITH_OPENMP
01347         #pragma omp parallel for
01348 #endif
01349           for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
01350             for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
01351               result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
01352 
01353 #ifdef VIENNACL_WITH_OPENMP
01354         #pragma omp parallel for
01355 #endif
01356           for(long item_id = 0; item_id < static_cast<long>(sp_mat.maxnnz()); ++item_id) {
01357 
01358             for(vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
01359 
01360               vcl_size_t offset = row + item_id * sp_mat.internal_size1();
01361               NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
01362               unsigned int sp_mat_col = sp_mat_coords[offset];
01363 
01364               if( sp_mat_val != 0) {
01365 
01366                 for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
01367 
01368                   result_wrapper( row, col) += sp_mat_val * d_mat_wrapper( col, sp_mat_col);
01369                 }
01370               }
01371             }
01372           }
01373         }
01374 
01375       }
01376 
01377       //
01378       // Hybrid Matrix
01379       //
01388       template<class ScalarType, unsigned int ALIGNMENT>
01389       void prod_impl(const viennacl::hyb_matrix<ScalarType, ALIGNMENT> & mat,
01390                      const viennacl::vector_base<ScalarType> & vec,
01391                            viennacl::vector_base<ScalarType> & result)
01392       {
01393         ScalarType         * result_buf     = detail::extract_raw_pointer<ScalarType>(result.handle());
01394         ScalarType   const * vec_buf        = detail::extract_raw_pointer<ScalarType>(vec.handle());
01395         ScalarType   const * elements       = detail::extract_raw_pointer<ScalarType>(mat.handle());
01396         unsigned int const * coords         = detail::extract_raw_pointer<unsigned int>(mat.handle2());
01397         ScalarType   const * csr_elements   = detail::extract_raw_pointer<ScalarType>(mat.handle5());
01398         unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3());
01399         unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4());
01400 
01401 
01402         for(vcl_size_t row = 0; row < mat.size1(); ++row)
01403         {
01404           ScalarType sum = 0;
01405 
01406           //
01407           // Part 1: Process ELL part
01408           //
01409           for(unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id)
01410           {
01411             vcl_size_t offset = row + item_id * mat.internal_size1();
01412             ScalarType val = elements[offset];
01413 
01414             if(val != 0)
01415             {
01416               unsigned int col = coords[offset];
01417               sum += (vec_buf[col * vec.stride() + vec.start()] * val);
01418             }
01419           }
01420 
01421           //
01422           // Part 2: Process HYB part
01423           //
01424           vcl_size_t col_begin = csr_row_buffer[row];
01425           vcl_size_t col_end   = csr_row_buffer[row + 1];
01426 
01427           for(vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
01428           {
01429               sum += (vec_buf[csr_col_buffer[item_id] * vec.stride() + vec.start()] * csr_elements[item_id]);
01430           }
01431 
01432           result_buf[row * result.stride() + result.start()] = sum;
01433         }
01434 
01435       }
01436 
01437       //
01438       // Hybrid Matrix
01439       //
01448       template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
01449       void prod_impl(const viennacl::hyb_matrix<NumericT, ALIGNMENT> & mat,
01450                      const viennacl::matrix_base<NumericT, F1> & d_mat,
01451                            viennacl::matrix_base<NumericT, F2> & result)
01452       {
01453         NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
01454         NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
01455 
01456         vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
01457         vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
01458         vcl_size_t d_mat_inc1   = viennacl::traits::stride1(d_mat);
01459         vcl_size_t d_mat_inc2   = viennacl::traits::stride2(d_mat);
01460         vcl_size_t d_mat_internal_size1  = viennacl::traits::internal_size1(d_mat);
01461         vcl_size_t d_mat_internal_size2  = viennacl::traits::internal_size2(d_mat);
01462 
01463         vcl_size_t result_start1 = viennacl::traits::start1(result);
01464         vcl_size_t result_start2 = viennacl::traits::start2(result);
01465         vcl_size_t result_inc1   = viennacl::traits::stride1(result);
01466         vcl_size_t result_inc2   = viennacl::traits::stride2(result);
01467         vcl_size_t result_internal_size1  = viennacl::traits::internal_size1(result);
01468         vcl_size_t result_internal_size2  = viennacl::traits::internal_size2(result);
01469 
01470         detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false>
01471             d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
01472         detail::matrix_array_wrapper<NumericT,       typename F2::orientation_category, false>
01473             result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
01474 
01475         NumericT     const * elements       = detail::extract_raw_pointer<NumericT>(mat.handle());
01476         unsigned int const * coords         = detail::extract_raw_pointer<unsigned int>(mat.handle2());
01477         NumericT     const * csr_elements   = detail::extract_raw_pointer<NumericT>(mat.handle5());
01478         unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3());
01479         unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4());
01480 
01481 
01482         for (vcl_size_t result_col = 0; result_col < result.size2(); ++result_col)
01483         {
01484           for(vcl_size_t row = 0; row < mat.size1(); ++row)
01485           {
01486             NumericT sum = 0;
01487 
01488             //
01489             // Part 1: Process ELL part
01490             //
01491             for(unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id)
01492             {
01493               vcl_size_t offset = row + item_id * mat.internal_size1();
01494               NumericT val = elements[offset];
01495 
01496               if(val != 0)
01497               {
01498                 unsigned int col = coords[offset];
01499                 sum += d_mat_wrapper(col, result_col) * val;
01500               }
01501             }
01502 
01503             //
01504             // Part 2: Process HYB/CSR part
01505             //
01506             vcl_size_t col_begin = csr_row_buffer[row];
01507             vcl_size_t col_end   = csr_row_buffer[row + 1];
01508 
01509             for(vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
01510               sum += d_mat_wrapper(csr_col_buffer[item_id], result_col) * csr_elements[item_id];
01511 
01512             result_wrapper(row, result_col) = sum;
01513           }
01514         } // for result_col
01515       }
01516 
01517 
01526       template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
01527       void prod_impl(const viennacl::hyb_matrix<NumericT, ALIGNMENT> & mat,
01528                      const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>,
01529                                                         const viennacl::matrix_base<NumericT, F1>,
01530                                                         viennacl::op_trans > & d_mat,
01531                            viennacl::matrix_base<NumericT, F2> & result)
01532       {
01533         NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
01534         NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
01535 
01536         vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
01537         vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
01538         vcl_size_t d_mat_inc1   = viennacl::traits::stride1(d_mat.lhs());
01539         vcl_size_t d_mat_inc2   = viennacl::traits::stride2(d_mat.lhs());
01540         vcl_size_t d_mat_internal_size1  = viennacl::traits::internal_size1(d_mat.lhs());
01541         vcl_size_t d_mat_internal_size2  = viennacl::traits::internal_size2(d_mat.lhs());
01542 
01543         vcl_size_t result_start1 = viennacl::traits::start1(result);
01544         vcl_size_t result_start2 = viennacl::traits::start2(result);
01545         vcl_size_t result_inc1   = viennacl::traits::stride1(result);
01546         vcl_size_t result_inc2   = viennacl::traits::stride2(result);
01547         vcl_size_t result_internal_size1  = viennacl::traits::internal_size1(result);
01548         vcl_size_t result_internal_size2  = viennacl::traits::internal_size2(result);
01549 
01550         detail::matrix_array_wrapper<NumericT const, typename F1::orientation_category, false>
01551             d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
01552         detail::matrix_array_wrapper<NumericT,       typename F2::orientation_category, false>
01553             result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
01554 
01555         NumericT     const * elements       = detail::extract_raw_pointer<NumericT>(mat.handle());
01556         unsigned int const * coords         = detail::extract_raw_pointer<unsigned int>(mat.handle2());
01557         NumericT     const * csr_elements   = detail::extract_raw_pointer<NumericT>(mat.handle5());
01558         unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3());
01559         unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4());
01560 
01561 
01562         for (vcl_size_t result_col = 0; result_col < result.size2(); ++result_col)
01563         {
01564           for(vcl_size_t row = 0; row < mat.size1(); ++row)
01565           {
01566             NumericT sum = 0;
01567 
01568             //
01569             // Part 1: Process ELL part
01570             //
01571             for(unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id)
01572             {
01573               vcl_size_t offset = row + item_id * mat.internal_size1();
01574               NumericT val = elements[offset];
01575 
01576               if(val != 0)
01577               {
01578                 unsigned int col = coords[offset];
01579                 sum += d_mat_wrapper(result_col, col) * val;
01580               }
01581             }
01582 
01583             //
01584             // Part 2: Process HYB/CSR part
01585             //
01586             vcl_size_t col_begin = csr_row_buffer[row];
01587             vcl_size_t col_end   = csr_row_buffer[row + 1];
01588 
01589             for(vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
01590               sum += d_mat_wrapper(result_col, csr_col_buffer[item_id]) * csr_elements[item_id];
01591 
01592             result_wrapper(row, result_col) = sum;
01593           }
01594         } // for result_col
01595       }
01596 
01597 
01598     } // namespace host_based
01599   } //namespace linalg
01600 } //namespace viennacl
01601 
01602 
01603 #endif