ViennaCL - The Vienna Computing Library  1.5.1
viennacl/linalg/host_based/matrix_operations.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
00002 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include "viennacl/forwards.h"
00026 #include "viennacl/scalar.hpp"
00027 #include "viennacl/vector.hpp"
00028 #include "viennacl/vector_proxy.hpp"
00029 #include "viennacl/tools/tools.hpp"
00030 #include "viennacl/meta/enable_if.hpp"
00031 #include "viennacl/meta/predicate.hpp"
00032 #include "viennacl/meta/result_of.hpp"
00033 #include "viennacl/traits/size.hpp"
00034 #include "viennacl/traits/start.hpp"
00035 #include "viennacl/traits/handle.hpp"
00036 #include "viennacl/traits/stride.hpp"
00037 #include "viennacl/linalg/detail/op_applier.hpp"
00038 #include "viennacl/linalg/host_based/common.hpp"
00039 
00040 namespace viennacl
00041 {
00042   namespace linalg
00043   {
00044     namespace host_based
00045     {
00046 
00047       //
00048       // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
00049       //
00050 
00051       template <typename NumericT, typename F, typename ScalarType1>
00052       void am(matrix_base<NumericT, F> & mat1,
00053               matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha)
00054       {
00055         typedef NumericT        value_type;
00056 
00057         value_type       * data_A = detail::extract_raw_pointer<value_type>(mat1);
00058         value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
00059 
00060         value_type data_alpha = alpha;
00061         if (flip_sign_alpha)
00062           data_alpha = -data_alpha;
00063 
00064         vcl_size_t A_start1 = viennacl::traits::start1(mat1);
00065         vcl_size_t A_start2 = viennacl::traits::start2(mat1);
00066         vcl_size_t A_inc1   = viennacl::traits::stride1(mat1);
00067         vcl_size_t A_inc2   = viennacl::traits::stride2(mat1);
00068         vcl_size_t A_size1  = viennacl::traits::size1(mat1);
00069         vcl_size_t A_size2  = viennacl::traits::size2(mat1);
00070         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat1);
00071         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat1);
00072 
00073         vcl_size_t B_start1 = viennacl::traits::start1(mat2);
00074         vcl_size_t B_start2 = viennacl::traits::start2(mat2);
00075         vcl_size_t B_inc1   = viennacl::traits::stride1(mat2);
00076         vcl_size_t B_inc2   = viennacl::traits::stride2(mat2);
00077         vcl_size_t B_internal_size1  = viennacl::traits::internal_size1(mat2);
00078         vcl_size_t B_internal_size2  = viennacl::traits::internal_size2(mat2);
00079 
00080         detail::matrix_array_wrapper<value_type,       typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00081         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
00082         //typedef typename detail::majority_struct_for_orientation<typename M1::orientation_category>::type index_generator_A;
00083         //typedef typename detail::majority_struct_for_orientation<typename M2::orientation_category>::type index_generator_B;
00084 
00085         if (detail::is_row_major(typename F::orientation_category()))
00086         {
00087           if (reciprocal_alpha)
00088           {
00089 #ifdef VIENNACL_WITH_OPENMP
00090             #pragma omp parallel for
00091 #endif
00092             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00093               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00094                 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
00095           }
00096           else
00097           {
00098 #ifdef VIENNACL_WITH_OPENMP
00099             #pragma omp parallel for
00100 #endif
00101             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00102               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00103                 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
00104           }
00105         }
00106         else
00107         {
00108           if (reciprocal_alpha)
00109           {
00110 #ifdef VIENNACL_WITH_OPENMP
00111             #pragma omp parallel for
00112 #endif
00113             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00114               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00115                 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
00116           }
00117           else
00118           {
00119 #ifdef VIENNACL_WITH_OPENMP
00120             #pragma omp parallel for
00121 #endif
00122             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00123               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00124                 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
00125           }
00126         }
00127       }
00128 
00129 
00130       template <typename NumericT, typename F,
00131                 typename ScalarType1, typename ScalarType2>
00132       void ambm(matrix_base<NumericT, F> & mat1,
00133                 matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
00134                 matrix_base<NumericT, F> const & mat3, ScalarType2 const & beta,  vcl_size_t /*len_beta*/,  bool reciprocal_beta,  bool flip_sign_beta)
00135       {
00136         typedef NumericT        value_type;
00137 
00138         value_type       * data_A = detail::extract_raw_pointer<value_type>(mat1);
00139         value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
00140         value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
00141 
00142         value_type data_alpha = alpha;
00143         if (flip_sign_alpha)
00144           data_alpha = -data_alpha;
00145 
00146         value_type data_beta = beta;
00147         if (flip_sign_beta)
00148           data_beta = -data_beta;
00149 
00150         vcl_size_t A_start1 = viennacl::traits::start1(mat1);
00151         vcl_size_t A_start2 = viennacl::traits::start2(mat1);
00152         vcl_size_t A_inc1   = viennacl::traits::stride1(mat1);
00153         vcl_size_t A_inc2   = viennacl::traits::stride2(mat1);
00154         vcl_size_t A_size1  = viennacl::traits::size1(mat1);
00155         vcl_size_t A_size2  = viennacl::traits::size2(mat1);
00156         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat1);
00157         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat1);
00158 
00159         vcl_size_t B_start1 = viennacl::traits::start1(mat2);
00160         vcl_size_t B_start2 = viennacl::traits::start2(mat2);
00161         vcl_size_t B_inc1   = viennacl::traits::stride1(mat2);
00162         vcl_size_t B_inc2   = viennacl::traits::stride2(mat2);
00163         vcl_size_t B_internal_size1  = viennacl::traits::internal_size1(mat2);
00164         vcl_size_t B_internal_size2  = viennacl::traits::internal_size2(mat2);
00165 
00166         vcl_size_t C_start1 = viennacl::traits::start1(mat3);
00167         vcl_size_t C_start2 = viennacl::traits::start2(mat3);
00168         vcl_size_t C_inc1   = viennacl::traits::stride1(mat3);
00169         vcl_size_t C_inc2   = viennacl::traits::stride2(mat3);
00170         vcl_size_t C_internal_size1  = viennacl::traits::internal_size1(mat3);
00171         vcl_size_t C_internal_size2  = viennacl::traits::internal_size2(mat3);
00172 
00173         detail::matrix_array_wrapper<value_type,       typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00174         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
00175         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
00176 
00177         if (detail::is_row_major(typename F::orientation_category()))
00178         {
00179           if (reciprocal_alpha && reciprocal_beta)
00180           {
00181 #ifdef VIENNACL_WITH_OPENMP
00182             #pragma omp parallel for
00183 #endif
00184             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00185               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00186                 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
00187           }
00188           else if (reciprocal_alpha && !reciprocal_beta)
00189           {
00190 #ifdef VIENNACL_WITH_OPENMP
00191             #pragma omp parallel for
00192 #endif
00193             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00194               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00195                 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
00196           }
00197           else if (!reciprocal_alpha && reciprocal_beta)
00198           {
00199 #ifdef VIENNACL_WITH_OPENMP
00200             #pragma omp parallel for
00201 #endif
00202             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00203               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00204                 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
00205           }
00206           else if (!reciprocal_alpha && !reciprocal_beta)
00207           {
00208 #ifdef VIENNACL_WITH_OPENMP
00209             #pragma omp parallel for
00210 #endif
00211             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00212               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00213                 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
00214           }
00215         }
00216         else
00217         {
00218           if (reciprocal_alpha && reciprocal_beta)
00219           {
00220 #ifdef VIENNACL_WITH_OPENMP
00221             #pragma omp parallel for
00222 #endif
00223             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00224               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00225                 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
00226           }
00227           else if (reciprocal_alpha && !reciprocal_beta)
00228           {
00229 #ifdef VIENNACL_WITH_OPENMP
00230             #pragma omp parallel for
00231 #endif
00232             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00233               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00234                 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
00235           }
00236           else if (!reciprocal_alpha && reciprocal_beta)
00237           {
00238 #ifdef VIENNACL_WITH_OPENMP
00239             #pragma omp parallel for
00240 #endif
00241             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00242               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00243                 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
00244           }
00245           else if (!reciprocal_alpha && !reciprocal_beta)
00246           {
00247 #ifdef VIENNACL_WITH_OPENMP
00248             #pragma omp parallel for
00249 #endif
00250             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00251               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00252                 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
00253           }
00254         }
00255 
00256       }
00257 
00258 
00259       template <typename NumericT, typename F,
00260                 typename ScalarType1, typename ScalarType2>
00261       void ambm_m(matrix_base<NumericT, F> & mat1,
00262                   matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
00263                   matrix_base<NumericT, F> const & mat3, ScalarType2 const & beta,  vcl_size_t /*len_beta*/,  bool reciprocal_beta,  bool flip_sign_beta)
00264       {
00265         typedef NumericT        value_type;
00266 
00267         value_type       * data_A = detail::extract_raw_pointer<value_type>(mat1);
00268         value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
00269         value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
00270 
00271         value_type data_alpha = alpha;
00272         if (flip_sign_alpha)
00273           data_alpha = -data_alpha;
00274 
00275         value_type data_beta = beta;
00276         if (flip_sign_beta)
00277           data_beta = -data_beta;
00278 
00279         vcl_size_t A_start1 = viennacl::traits::start1(mat1);
00280         vcl_size_t A_start2 = viennacl::traits::start2(mat1);
00281         vcl_size_t A_inc1   = viennacl::traits::stride1(mat1);
00282         vcl_size_t A_inc2   = viennacl::traits::stride2(mat1);
00283         vcl_size_t A_size1  = viennacl::traits::size1(mat1);
00284         vcl_size_t A_size2  = viennacl::traits::size2(mat1);
00285         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat1);
00286         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat1);
00287 
00288         vcl_size_t B_start1 = viennacl::traits::start1(mat2);
00289         vcl_size_t B_start2 = viennacl::traits::start2(mat2);
00290         vcl_size_t B_inc1   = viennacl::traits::stride1(mat2);
00291         vcl_size_t B_inc2   = viennacl::traits::stride2(mat2);
00292         vcl_size_t B_internal_size1  = viennacl::traits::internal_size1(mat2);
00293         vcl_size_t B_internal_size2  = viennacl::traits::internal_size2(mat2);
00294 
00295         vcl_size_t C_start1 = viennacl::traits::start1(mat3);
00296         vcl_size_t C_start2 = viennacl::traits::start2(mat3);
00297         vcl_size_t C_inc1   = viennacl::traits::stride1(mat3);
00298         vcl_size_t C_inc2   = viennacl::traits::stride2(mat3);
00299         vcl_size_t C_internal_size1  = viennacl::traits::internal_size1(mat3);
00300         vcl_size_t C_internal_size2  = viennacl::traits::internal_size2(mat3);
00301 
00302         detail::matrix_array_wrapper<value_type,       typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00303         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
00304         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
00305 
00306         //typedef typename detail::majority_struct_for_orientation<typename M1::orientation_category>::type index_generator_A;
00307         //typedef typename detail::majority_struct_for_orientation<typename M2::orientation_category>::type index_generator_B;
00308         //typedef typename detail::majority_struct_for_orientation<typename M3::orientation_category>::type index_generator_C;
00309 
00310         if (detail::is_row_major(typename F::orientation_category()))
00311         {
00312           if (reciprocal_alpha && reciprocal_beta)
00313           {
00314 #ifdef VIENNACL_WITH_OPENMP
00315             #pragma omp parallel for
00316 #endif
00317             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00318               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00319                 wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
00320           }
00321           else if (reciprocal_alpha && !reciprocal_beta)
00322           {
00323 #ifdef VIENNACL_WITH_OPENMP
00324             #pragma omp parallel for
00325 #endif
00326             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00327               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00328                 wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
00329           }
00330           else if (!reciprocal_alpha && reciprocal_beta)
00331           {
00332 #ifdef VIENNACL_WITH_OPENMP
00333             #pragma omp parallel for
00334 #endif
00335             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00336               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00337                 wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
00338           }
00339           else if (!reciprocal_alpha && !reciprocal_beta)
00340           {
00341 #ifdef VIENNACL_WITH_OPENMP
00342             #pragma omp parallel for
00343 #endif
00344             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00345               for (long col = 0; col < static_cast<long>(A_size2); ++col)
00346                 wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
00347           }
00348         }
00349         else
00350         {
00351           if (reciprocal_alpha && reciprocal_beta)
00352           {
00353 #ifdef VIENNACL_WITH_OPENMP
00354             #pragma omp parallel for
00355 #endif
00356             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00357               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00358                 wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
00359           }
00360           else if (reciprocal_alpha && !reciprocal_beta)
00361           {
00362 #ifdef VIENNACL_WITH_OPENMP
00363             #pragma omp parallel for
00364 #endif
00365             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00366               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00367                 wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
00368           }
00369           else if (!reciprocal_alpha && reciprocal_beta)
00370           {
00371 #ifdef VIENNACL_WITH_OPENMP
00372             #pragma omp parallel for
00373 #endif
00374             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00375               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00376                 wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
00377           }
00378           else if (!reciprocal_alpha && !reciprocal_beta)
00379           {
00380 #ifdef VIENNACL_WITH_OPENMP
00381             #pragma omp parallel for
00382 #endif
00383             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00384               for (long row = 0; row < static_cast<long>(A_size1); ++row)
00385                 wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
00386           }
00387         }
00388 
00389       }
00390 
00391 
00392 
00393 
00394       template <typename NumericT, typename F>
00395       void matrix_assign(matrix_base<NumericT, F> & mat, NumericT s, bool clear = false)
00396       {
00397         typedef NumericT        value_type;
00398 
00399         value_type       * data_A = detail::extract_raw_pointer<value_type>(mat);
00400         value_type alpha = static_cast<value_type>(s);
00401 
00402         vcl_size_t A_start1 = viennacl::traits::start1(mat);
00403         vcl_size_t A_start2 = viennacl::traits::start2(mat);
00404         vcl_size_t A_inc1   = viennacl::traits::stride1(mat);
00405         vcl_size_t A_inc2   = viennacl::traits::stride2(mat);
00406         vcl_size_t A_size1  = clear ? viennacl::traits::internal_size1(mat) : viennacl::traits::size1(mat);
00407         vcl_size_t A_size2  = clear ? viennacl::traits::internal_size2(mat) : viennacl::traits::size2(mat);
00408         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat);
00409         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat);
00410 
00411         detail::matrix_array_wrapper<value_type,       typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00412 
00413         if (detail::is_row_major(typename F::orientation_category()))
00414         {
00415 #ifdef VIENNACL_WITH_OPENMP
00416           #pragma omp parallel for
00417 #endif
00418           for (long row = 0; row < static_cast<long>(A_size1); ++row)
00419             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00420               wrapper_A(row, col) = alpha;
00421               //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
00422               // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
00423         }
00424         else
00425         {
00426 #ifdef VIENNACL_WITH_OPENMP
00427           #pragma omp parallel for
00428 #endif
00429           for (long col = 0; col < static_cast<long>(A_size2); ++col)
00430             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00431               wrapper_A(row, col) = alpha;
00432               //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
00433               // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
00434         }
00435       }
00436 
00437 
00438 
00439       template <typename NumericT, typename F>
00440       void matrix_diagonal_assign(matrix_base<NumericT, F> & mat, NumericT s)
00441       {
00442         typedef NumericT        value_type;
00443 
00444         value_type       * data_A = detail::extract_raw_pointer<value_type>(mat);
00445         value_type alpha = static_cast<value_type>(s);
00446 
00447         vcl_size_t A_start1 = viennacl::traits::start1(mat);
00448         vcl_size_t A_start2 = viennacl::traits::start2(mat);
00449         vcl_size_t A_inc1   = viennacl::traits::stride1(mat);
00450         vcl_size_t A_inc2   = viennacl::traits::stride2(mat);
00451         vcl_size_t A_size1  = viennacl::traits::size1(mat);
00452         //vcl_size_t A_size2  = viennacl::traits::size2(mat);
00453         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat);
00454         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat);
00455 
00456         detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00457 
00458 #ifdef VIENNACL_WITH_OPENMP
00459         #pragma omp parallel for
00460 #endif
00461         for (long row = 0; row < static_cast<long>(A_size1); ++row)
00462           wrapper_A(row, row) = alpha;
00463       }
00464 
00465       template <typename NumericT, typename F>
00466       void matrix_diag_from_vector(const vector_base<NumericT> & vec, int k, matrix_base<NumericT, F> & mat)
00467       {
00468         typedef NumericT        value_type;
00469 
00470         value_type       *data_A   = detail::extract_raw_pointer<value_type>(mat);
00471         value_type const *data_vec = detail::extract_raw_pointer<value_type>(vec);
00472 
00473         vcl_size_t A_start1 = viennacl::traits::start1(mat);
00474         vcl_size_t A_start2 = viennacl::traits::start2(mat);
00475         vcl_size_t A_inc1   = viennacl::traits::stride1(mat);
00476         vcl_size_t A_inc2   = viennacl::traits::stride2(mat);
00477         //vcl_size_t A_size1  = viennacl::traits::size1(mat);
00478         //vcl_size_t A_size2  = viennacl::traits::size2(mat);
00479         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat);
00480         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat);
00481 
00482         vcl_size_t v_start = viennacl::traits::start(vec);
00483         vcl_size_t v_inc   = viennacl::traits::stride(vec);
00484         vcl_size_t v_size  = viennacl::traits::size(vec);
00485 
00486         detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00487 
00488         vcl_size_t row_start = 0;
00489         vcl_size_t col_start = 0;
00490 
00491         if (k >= 0)
00492           col_start = static_cast<vcl_size_t>(k);
00493         else
00494           row_start = static_cast<vcl_size_t>(-k);
00495 
00496         matrix_assign(mat, NumericT(0));
00497 
00498         for (vcl_size_t i = 0; i < v_size; ++i)
00499           wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
00500 
00501       }
00502 
00503       template <typename NumericT, typename F>
00504       void matrix_diag_to_vector(const matrix_base<NumericT, F> & mat, int k, vector_base<NumericT> & vec)
00505       {
00506         typedef NumericT        value_type;
00507 
00508         value_type const *data_A   = detail::extract_raw_pointer<value_type>(mat);
00509         value_type       *data_vec = detail::extract_raw_pointer<value_type>(vec);
00510 
00511         vcl_size_t A_start1 = viennacl::traits::start1(mat);
00512         vcl_size_t A_start2 = viennacl::traits::start2(mat);
00513         vcl_size_t A_inc1   = viennacl::traits::stride1(mat);
00514         vcl_size_t A_inc2   = viennacl::traits::stride2(mat);
00515         //vcl_size_t A_size1  = viennacl::traits::size1(mat);
00516         //vcl_size_t A_size2  = viennacl::traits::size2(mat);
00517         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat);
00518         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat);
00519 
00520         vcl_size_t v_start = viennacl::traits::start(vec);
00521         vcl_size_t v_inc   = viennacl::traits::stride(vec);
00522         vcl_size_t v_size  = viennacl::traits::size(vec);
00523 
00524         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00525 
00526         vcl_size_t row_start = 0;
00527         vcl_size_t col_start = 0;
00528 
00529         if (k >= 0)
00530           col_start = static_cast<vcl_size_t>(k);
00531         else
00532           row_start = static_cast<vcl_size_t>(-k);
00533 
00534         for (vcl_size_t i = 0; i < v_size; ++i)
00535           data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
00536       }
00537 
00538       template <typename NumericT, typename F>
00539       void matrix_row(const matrix_base<NumericT, F> & mat, unsigned int i, vector_base<NumericT> & vec)
00540       {
00541         typedef NumericT        value_type;
00542 
00543         value_type const *data_A   = detail::extract_raw_pointer<value_type>(mat);
00544         value_type       *data_vec = detail::extract_raw_pointer<value_type>(vec);
00545 
00546         vcl_size_t A_start1 = viennacl::traits::start1(mat);
00547         vcl_size_t A_start2 = viennacl::traits::start2(mat);
00548         vcl_size_t A_inc1   = viennacl::traits::stride1(mat);
00549         vcl_size_t A_inc2   = viennacl::traits::stride2(mat);
00550         //vcl_size_t A_size1  = viennacl::traits::size1(mat);
00551         //vcl_size_t A_size2  = viennacl::traits::size2(mat);
00552         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat);
00553         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat);
00554 
00555         vcl_size_t v_start = viennacl::traits::start(vec);
00556         vcl_size_t v_inc   = viennacl::traits::stride(vec);
00557         vcl_size_t v_size  = viennacl::traits::size(vec);
00558 
00559         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00560 
00561         for (vcl_size_t j = 0; j < v_size; ++j)
00562           data_vec[v_start + j * v_inc] = wrapper_A(i, j);
00563       }
00564 
00565       template <typename NumericT, typename F>
00566       void matrix_column(const matrix_base<NumericT, F> & mat, unsigned int j, vector_base<NumericT> & vec)
00567       {
00568         typedef NumericT        value_type;
00569 
00570         value_type const *data_A   = detail::extract_raw_pointer<value_type>(mat);
00571         value_type       *data_vec = detail::extract_raw_pointer<value_type>(vec);
00572 
00573         vcl_size_t A_start1 = viennacl::traits::start1(mat);
00574         vcl_size_t A_start2 = viennacl::traits::start2(mat);
00575         vcl_size_t A_inc1   = viennacl::traits::stride1(mat);
00576         vcl_size_t A_inc2   = viennacl::traits::stride2(mat);
00577         //vcl_size_t A_size1  = viennacl::traits::size1(mat);
00578         //vcl_size_t A_size2  = viennacl::traits::size2(mat);
00579         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat);
00580         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat);
00581 
00582         vcl_size_t v_start = viennacl::traits::start(vec);
00583         vcl_size_t v_inc   = viennacl::traits::stride(vec);
00584         vcl_size_t v_size  = viennacl::traits::size(vec);
00585 
00586         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00587 
00588         for (vcl_size_t i = 0; i < v_size; ++i)
00589           data_vec[v_start + i * v_inc] = wrapper_A(i, j);
00590       }
00591 
00592       //
00594       //
00595 
00596       // Binary operations A = B .* C and A = B ./ C
00597 
00603       template <typename NumericT, typename F, typename OP>
00604       void element_op(matrix_base<NumericT, F> & A,
00605                       matrix_expression<const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_element_binary<OP> > const & proxy)
00606       {
00607         typedef NumericT        value_type;
00608         typedef viennacl::linalg::detail::op_applier<op_element_binary<OP> >    OpFunctor;
00609 
00610         value_type       * data_A = detail::extract_raw_pointer<value_type>(A);
00611         value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
00612         value_type const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs());
00613 
00614         vcl_size_t A_start1 = viennacl::traits::start1(A);
00615         vcl_size_t A_start2 = viennacl::traits::start2(A);
00616         vcl_size_t A_inc1   = viennacl::traits::stride1(A);
00617         vcl_size_t A_inc2   = viennacl::traits::stride2(A);
00618         vcl_size_t A_size1  = viennacl::traits::size1(A);
00619         vcl_size_t A_size2  = viennacl::traits::size2(A);
00620         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(A);
00621         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(A);
00622 
00623         vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
00624         vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
00625         vcl_size_t B_inc1   = viennacl::traits::stride1(proxy.lhs());
00626         vcl_size_t B_inc2   = viennacl::traits::stride2(proxy.lhs());
00627         vcl_size_t B_internal_size1  = viennacl::traits::internal_size1(proxy.lhs());
00628         vcl_size_t B_internal_size2  = viennacl::traits::internal_size2(proxy.lhs());
00629 
00630         vcl_size_t C_start1 = viennacl::traits::start1(proxy.rhs());
00631         vcl_size_t C_start2 = viennacl::traits::start2(proxy.rhs());
00632         vcl_size_t C_inc1   = viennacl::traits::stride1(proxy.rhs());
00633         vcl_size_t C_inc2   = viennacl::traits::stride2(proxy.rhs());
00634         vcl_size_t C_internal_size1  = viennacl::traits::internal_size1(proxy.rhs());
00635         vcl_size_t C_internal_size2  = viennacl::traits::internal_size2(proxy.rhs());
00636 
00637         detail::matrix_array_wrapper<value_type,       typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00638         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
00639         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
00640 
00641         if (detail::is_row_major(typename F::orientation_category()))
00642         {
00643 #ifdef VIENNACL_WITH_OPENMP
00644           #pragma omp parallel for
00645 #endif
00646           for (long row = 0; row < static_cast<long>(A_size1); ++row)
00647             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00648               OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
00649               //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
00650               // =   data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
00651               //   + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
00652         }
00653         else
00654         {
00655 #ifdef VIENNACL_WITH_OPENMP
00656           #pragma omp parallel for
00657 #endif
00658           for (long col = 0; col < static_cast<long>(A_size2); ++col)
00659             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00660               OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
00661 
00662               //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
00663               // =   data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
00664               //   + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
00665         }
00666       }
00667 
00668       // Unary operations
00669 
00670       // A = op(B)
00671       template <typename NumericT, typename F, typename OP>
00672       void element_op(matrix_base<NumericT, F> & A,
00673                       matrix_expression<const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_element_unary<OP> > const & proxy)
00674       {
00675         typedef NumericT        value_type;
00676         typedef viennacl::linalg::detail::op_applier<op_element_unary<OP> >    OpFunctor;
00677 
00678         value_type       * data_A = detail::extract_raw_pointer<value_type>(A);
00679         value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
00680 
00681         vcl_size_t A_start1 = viennacl::traits::start1(A);
00682         vcl_size_t A_start2 = viennacl::traits::start2(A);
00683         vcl_size_t A_inc1   = viennacl::traits::stride1(A);
00684         vcl_size_t A_inc2   = viennacl::traits::stride2(A);
00685         vcl_size_t A_size1  = viennacl::traits::size1(A);
00686         vcl_size_t A_size2  = viennacl::traits::size2(A);
00687         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(A);
00688         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(A);
00689 
00690         vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
00691         vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
00692         vcl_size_t B_inc1   = viennacl::traits::stride1(proxy.lhs());
00693         vcl_size_t B_inc2   = viennacl::traits::stride2(proxy.lhs());
00694         vcl_size_t B_internal_size1  = viennacl::traits::internal_size1(proxy.lhs());
00695         vcl_size_t B_internal_size2  = viennacl::traits::internal_size2(proxy.lhs());
00696 
00697         detail::matrix_array_wrapper<value_type,       typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00698         detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
00699 
00700         if (detail::is_row_major(typename F::orientation_category()))
00701         {
00702 #ifdef VIENNACL_WITH_OPENMP
00703           #pragma omp parallel for
00704 #endif
00705           for (long row = 0; row < static_cast<long>(A_size1); ++row)
00706             for (long col = 0; col < static_cast<long>(A_size2); ++col)
00707               OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
00708         }
00709         else
00710         {
00711 #ifdef VIENNACL_WITH_OPENMP
00712           #pragma omp parallel for
00713 #endif
00714           for (long col = 0; col < static_cast<long>(A_size2); ++col)
00715             for (long row = 0; row < static_cast<long>(A_size1); ++row)
00716               OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
00717         }
00718       }
00719 
00720 
00721 
00722       //
00724       //
00725 
00726       // A * x
00727 
00736       template <typename NumericT, typename F>
00737       void prod_impl(const matrix_base<NumericT, F> & mat,
00738                      const vector_base<NumericT> & vec,
00739                            vector_base<NumericT> & result)
00740       {
00741         typedef NumericT        value_type;
00742 
00743         value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
00744         value_type const * data_x = detail::extract_raw_pointer<value_type>(vec);
00745         value_type       * data_result = detail::extract_raw_pointer<value_type>(result);
00746 
00747         vcl_size_t A_start1 = viennacl::traits::start1(mat);
00748         vcl_size_t A_start2 = viennacl::traits::start2(mat);
00749         vcl_size_t A_inc1   = viennacl::traits::stride1(mat);
00750         vcl_size_t A_inc2   = viennacl::traits::stride2(mat);
00751         vcl_size_t A_size1  = viennacl::traits::size1(mat);
00752         vcl_size_t A_size2  = viennacl::traits::size2(mat);
00753         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat);
00754         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat);
00755 
00756         vcl_size_t start1 = viennacl::traits::start(vec);
00757         vcl_size_t inc1   = viennacl::traits::stride(vec);
00758 
00759         vcl_size_t start2 = viennacl::traits::start(result);
00760         vcl_size_t inc2   = viennacl::traits::stride(result);
00761 
00762         if (detail::is_row_major(typename F::orientation_category()))
00763         {
00764 #ifdef VIENNACL_WITH_OPENMP
00765           #pragma omp parallel for
00766 #endif
00767           for (long row = 0; row < static_cast<long>(A_size1); ++row)
00768           {
00769             value_type temp = 0;
00770             for (vcl_size_t col = 0; col < A_size2; ++col)
00771               temp += data_A[viennacl::row_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
00772 
00773             data_result[row * inc2 + start2] = temp;
00774           }
00775         }
00776         else
00777         {
00778           {
00779             value_type temp = data_x[start1];
00780             for (vcl_size_t row = 0; row < A_size1; ++row)
00781               data_result[row * inc2 + start2] = data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, A_start2, A_internal_size1, A_internal_size2)] * temp;
00782           }
00783           for (vcl_size_t col = 1; col < A_size2; ++col)  //run through matrix sequentially
00784           {
00785             value_type temp = data_x[col * inc1 + start1];
00786             for (vcl_size_t row = 0; row < A_size1; ++row)
00787               data_result[row * inc2 + start2] += data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
00788           }
00789         }
00790       }
00791 
00792 
00793       // trans(A) * x
00794 
00803       template <typename NumericT, typename F>
00804       void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_trans> & mat_trans,
00805                      const vector_base<NumericT> & vec,
00806                            vector_base<NumericT> & result)
00807       {
00808         typedef NumericT        value_type;
00809 
00810         value_type const * data_A = detail::extract_raw_pointer<value_type>(mat_trans.lhs());
00811         value_type const * data_x = detail::extract_raw_pointer<value_type>(vec);
00812         value_type       * data_result = detail::extract_raw_pointer<value_type>(result);
00813 
00814         vcl_size_t A_start1 = viennacl::traits::start1(mat_trans.lhs());
00815         vcl_size_t A_start2 = viennacl::traits::start2(mat_trans.lhs());
00816         vcl_size_t A_inc1   = viennacl::traits::stride1(mat_trans.lhs());
00817         vcl_size_t A_inc2   = viennacl::traits::stride2(mat_trans.lhs());
00818         vcl_size_t A_size1  = viennacl::traits::size1(mat_trans.lhs());
00819         vcl_size_t A_size2  = viennacl::traits::size2(mat_trans.lhs());
00820         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat_trans.lhs());
00821         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat_trans.lhs());
00822 
00823         vcl_size_t start1 = viennacl::traits::start(vec);
00824         vcl_size_t inc1   = viennacl::traits::stride(vec);
00825 
00826         vcl_size_t start2 = viennacl::traits::start(result);
00827         vcl_size_t inc2   = viennacl::traits::stride(result);
00828 
00829         if (detail::is_row_major(typename F::orientation_category()))
00830         {
00831           {
00832             value_type temp = data_x[start1];
00833             for (vcl_size_t row = 0; row < A_size2; ++row)
00834               data_result[row * inc2 + start2] = data_A[viennacl::row_major::mem_index(A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
00835           }
00836 
00837           for (vcl_size_t col = 1; col < A_size1; ++col)  //run through matrix sequentially
00838           {
00839             value_type temp = data_x[col * inc1 + start1];
00840             for (vcl_size_t row = 0; row < A_size2; ++row)
00841             {
00842               data_result[row * inc2 + start2] += data_A[viennacl::row_major::mem_index(col * A_inc1 + A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
00843             }
00844           }
00845         }
00846         else
00847         {
00848 #ifdef VIENNACL_WITH_OPENMP
00849           #pragma omp parallel for
00850 #endif
00851           for (long row = 0; row < static_cast<long>(A_size2); ++row)
00852           {
00853             value_type temp = 0;
00854             for (vcl_size_t col = 0; col < A_size1; ++col)
00855               temp += data_A[viennacl::column_major::mem_index(col * A_inc1 + A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
00856 
00857             data_result[row * inc2 + start2] = temp;
00858           }
00859         }
00860       }
00861 
00862 
00863       //
00865       //
00866 
00867       namespace detail
00868       {
00869         template <typename A, typename B, typename C, typename NumericT>
00870         void prod(A & a, B & b, C & c,
00871                   vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2,
00872                   NumericT alpha, NumericT beta)
00873         {
00874 #ifdef VIENNACL_WITH_OPENMP
00875           #pragma omp parallel for
00876 #endif
00877           for (long i=0; i<static_cast<long>(C_size1); ++i)
00878           {
00879             for (vcl_size_t j=0; j<C_size2; ++j)
00880             {
00881               NumericT temp = 0;
00882               for (vcl_size_t k=0; k<A_size2; ++k)
00883                 temp += a(i, k) * b(k, j);
00884 
00885               temp *= alpha;
00886               if (beta != 0)
00887                 temp += beta * c(i,j);
00888               c(i,j) = temp;
00889             }
00890           }
00891         }
00892 
00893       }
00894 
00900       template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
00901       void prod_impl(const matrix_base<NumericT, F1> & A,
00902                      const matrix_base<NumericT, F2> & B,
00903                            matrix_base<NumericT, F3> & C,
00904                      ScalarType alpha,
00905                      ScalarType beta)
00906       {
00907         typedef NumericT        value_type;
00908 
00909         value_type const * data_A = detail::extract_raw_pointer<value_type>(A);
00910         value_type const * data_B = detail::extract_raw_pointer<value_type>(B);
00911         value_type       * data_C = detail::extract_raw_pointer<value_type>(C);
00912 
00913         vcl_size_t A_start1 = viennacl::traits::start1(A);
00914         vcl_size_t A_start2 = viennacl::traits::start2(A);
00915         vcl_size_t A_inc1   = viennacl::traits::stride1(A);
00916         vcl_size_t A_inc2   = viennacl::traits::stride2(A);
00917         vcl_size_t A_size2  = viennacl::traits::size2(A);
00918         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(A);
00919         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(A);
00920 
00921         vcl_size_t B_start1 = viennacl::traits::start1(B);
00922         vcl_size_t B_start2 = viennacl::traits::start2(B);
00923         vcl_size_t B_inc1   = viennacl::traits::stride1(B);
00924         vcl_size_t B_inc2   = viennacl::traits::stride2(B);
00925         vcl_size_t B_internal_size1  = viennacl::traits::internal_size1(B);
00926         vcl_size_t B_internal_size2  = viennacl::traits::internal_size2(B);
00927 
00928         vcl_size_t C_start1 = viennacl::traits::start1(C);
00929         vcl_size_t C_start2 = viennacl::traits::start2(C);
00930         vcl_size_t C_inc1   = viennacl::traits::stride1(C);
00931         vcl_size_t C_inc2   = viennacl::traits::stride2(C);
00932         vcl_size_t C_size1  = viennacl::traits::size1(C);
00933         vcl_size_t C_size2  = viennacl::traits::size2(C);
00934         vcl_size_t C_internal_size1  = viennacl::traits::internal_size1(C);
00935         vcl_size_t C_internal_size2  = viennacl::traits::internal_size2(C);
00936 
00937         detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, false>   wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00938         detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, false>   wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
00939         detail::matrix_array_wrapper<value_type,       typename F3::orientation_category, false>   wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
00940 
00941         detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
00942       }
00943 
00944 
00945 
00951       template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
00952       void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT, F1>,
00953                                                         const matrix_base<NumericT, F1>,
00954                                                         op_trans> & A,
00955                      const matrix_base<NumericT, F2> & B,
00956                            matrix_base<NumericT, F3> & C,
00957                      ScalarType alpha,
00958                      ScalarType beta)
00959       {
00960         typedef NumericT        value_type;
00961 
00962         value_type const * data_A = detail::extract_raw_pointer<value_type>(A.lhs());
00963         value_type const * data_B = detail::extract_raw_pointer<value_type>(B);
00964         value_type       * data_C = detail::extract_raw_pointer<value_type>(C);
00965 
00966         vcl_size_t A_start1 = viennacl::traits::start1(A.lhs());
00967         vcl_size_t A_start2 = viennacl::traits::start2(A.lhs());
00968         vcl_size_t A_inc1   = viennacl::traits::stride1(A.lhs());
00969         vcl_size_t A_inc2   = viennacl::traits::stride2(A.lhs());
00970         vcl_size_t A_size1  = viennacl::traits::size1(A.lhs());
00971         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(A.lhs());
00972         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(A.lhs());
00973 
00974         vcl_size_t B_start1 = viennacl::traits::start1(B);
00975         vcl_size_t B_start2 = viennacl::traits::start2(B);
00976         vcl_size_t B_inc1   = viennacl::traits::stride1(B);
00977         vcl_size_t B_inc2   = viennacl::traits::stride2(B);
00978         vcl_size_t B_internal_size1  = viennacl::traits::internal_size1(B);
00979         vcl_size_t B_internal_size2  = viennacl::traits::internal_size2(B);
00980 
00981         vcl_size_t C_start1 = viennacl::traits::start1(C);
00982         vcl_size_t C_start2 = viennacl::traits::start2(C);
00983         vcl_size_t C_inc1   = viennacl::traits::stride1(C);
00984         vcl_size_t C_inc2   = viennacl::traits::stride2(C);
00985         vcl_size_t C_size1  = viennacl::traits::size1(C);
00986         vcl_size_t C_size2  = viennacl::traits::size2(C);
00987         vcl_size_t C_internal_size1  = viennacl::traits::internal_size1(C);
00988         vcl_size_t C_internal_size2  = viennacl::traits::internal_size2(C);
00989 
00990         detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, true>    wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
00991         detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, false>   wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
00992         detail::matrix_array_wrapper<value_type,       typename F3::orientation_category, false>   wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
00993 
00994         detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
00995       }
00996 
00997 
00998 
00999 
01005       template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
01006       void prod_impl(const matrix_base<NumericT, F1> & A,
01007                      const viennacl::matrix_expression< const matrix_base<NumericT, F2>, const matrix_base<NumericT, F2>, op_trans> & B,
01008                            matrix_base<NumericT, F3> & C,
01009                      ScalarType alpha,
01010                      ScalarType beta)
01011       {
01012         typedef NumericT        value_type;
01013 
01014         value_type const * data_A = detail::extract_raw_pointer<value_type>(A);
01015         value_type const * data_B = detail::extract_raw_pointer<value_type>(B.lhs());
01016         value_type       * data_C = detail::extract_raw_pointer<value_type>(C);
01017 
01018         vcl_size_t A_start1 = viennacl::traits::start1(A);
01019         vcl_size_t A_start2 = viennacl::traits::start2(A);
01020         vcl_size_t A_inc1   = viennacl::traits::stride1(A);
01021         vcl_size_t A_inc2   = viennacl::traits::stride2(A);
01022         vcl_size_t A_size2  = viennacl::traits::size2(A);
01023         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(A);
01024         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(A);
01025 
01026         vcl_size_t B_start1 = viennacl::traits::start1(B.lhs());
01027         vcl_size_t B_start2 = viennacl::traits::start2(B.lhs());
01028         vcl_size_t B_inc1   = viennacl::traits::stride1(B.lhs());
01029         vcl_size_t B_inc2   = viennacl::traits::stride2(B.lhs());
01030         vcl_size_t B_internal_size1  = viennacl::traits::internal_size1(B.lhs());
01031         vcl_size_t B_internal_size2  = viennacl::traits::internal_size2(B.lhs());
01032 
01033         vcl_size_t C_start1 = viennacl::traits::start1(C);
01034         vcl_size_t C_start2 = viennacl::traits::start2(C);
01035         vcl_size_t C_inc1   = viennacl::traits::stride1(C);
01036         vcl_size_t C_inc2   = viennacl::traits::stride2(C);
01037         vcl_size_t C_size1  = viennacl::traits::size1(C);
01038         vcl_size_t C_size2  = viennacl::traits::size2(C);
01039         vcl_size_t C_internal_size1  = viennacl::traits::internal_size1(C);
01040         vcl_size_t C_internal_size2  = viennacl::traits::internal_size2(C);
01041 
01042         detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, false>   wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
01043         detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, true>    wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
01044         detail::matrix_array_wrapper<value_type,       typename F3::orientation_category, false>   wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
01045 
01046         detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
01047       }
01048 
01049 
01050 
01056       template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
01057       void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT, F1>, const matrix_base<NumericT, F1>, op_trans> & A,
01058                      const viennacl::matrix_expression< const matrix_base<NumericT, F2>, const matrix_base<NumericT, F2>, op_trans> & B,
01059                      matrix_base<NumericT, F3> & C,
01060                      ScalarType alpha,
01061                      ScalarType beta)
01062       {
01063         typedef NumericT        value_type;
01064 
01065         value_type const * data_A = detail::extract_raw_pointer<value_type>(A.lhs());
01066         value_type const * data_B = detail::extract_raw_pointer<value_type>(B.lhs());
01067         value_type       * data_C = detail::extract_raw_pointer<value_type>(C);
01068 
01069         vcl_size_t A_start1 = viennacl::traits::start1(A.lhs());
01070         vcl_size_t A_start2 = viennacl::traits::start2(A.lhs());
01071         vcl_size_t A_inc1   = viennacl::traits::stride1(A.lhs());
01072         vcl_size_t A_inc2   = viennacl::traits::stride2(A.lhs());
01073         vcl_size_t A_size1  = viennacl::traits::size1(A.lhs());
01074         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(A.lhs());
01075         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(A.lhs());
01076 
01077         vcl_size_t B_start1 = viennacl::traits::start1(B.lhs());
01078         vcl_size_t B_start2 = viennacl::traits::start2(B.lhs());
01079         vcl_size_t B_inc1   = viennacl::traits::stride1(B.lhs());
01080         vcl_size_t B_inc2   = viennacl::traits::stride2(B.lhs());
01081         vcl_size_t B_internal_size1  = viennacl::traits::internal_size1(B.lhs());
01082         vcl_size_t B_internal_size2  = viennacl::traits::internal_size2(B.lhs());
01083 
01084         vcl_size_t C_start1 = viennacl::traits::start1(C);
01085         vcl_size_t C_start2 = viennacl::traits::start2(C);
01086         vcl_size_t C_inc1   = viennacl::traits::stride1(C);
01087         vcl_size_t C_inc2   = viennacl::traits::stride2(C);
01088         vcl_size_t C_size1  = viennacl::traits::size1(C);
01089         vcl_size_t C_size2  = viennacl::traits::size2(C);
01090         vcl_size_t C_internal_size1  = viennacl::traits::internal_size1(C);
01091         vcl_size_t C_internal_size2  = viennacl::traits::internal_size2(C);
01092 
01093         detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, true>    wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
01094         detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, true>    wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
01095         detail::matrix_array_wrapper<value_type,       typename F3::orientation_category, false>   wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
01096 
01097         detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
01098       }
01099 
01100 
01101 
01102 
01103       //
01105       //
01106 
01107 
01119       template <typename NumericT, typename F, typename S1>
01120       void scaled_rank_1_update(matrix_base<NumericT, F> & mat1,
01121                                 S1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
01122                                 const vector_base<NumericT> & vec1,
01123                                 const vector_base<NumericT> & vec2)
01124       {
01125         typedef NumericT        value_type;
01126 
01127         value_type       * data_A  = detail::extract_raw_pointer<value_type>(mat1);
01128         value_type const * data_v1 = detail::extract_raw_pointer<value_type>(vec1);
01129         value_type const * data_v2 = detail::extract_raw_pointer<value_type>(vec2);
01130 
01131         vcl_size_t A_start1 = viennacl::traits::start1(mat1);
01132         vcl_size_t A_start2 = viennacl::traits::start2(mat1);
01133         vcl_size_t A_inc1   = viennacl::traits::stride1(mat1);
01134         vcl_size_t A_inc2   = viennacl::traits::stride2(mat1);
01135         vcl_size_t A_size1  = viennacl::traits::size1(mat1);
01136         vcl_size_t A_size2  = viennacl::traits::size2(mat1);
01137         vcl_size_t A_internal_size1  = viennacl::traits::internal_size1(mat1);
01138         vcl_size_t A_internal_size2  = viennacl::traits::internal_size2(mat1);
01139 
01140         vcl_size_t start1 = viennacl::traits::start(vec1);
01141         vcl_size_t inc1   = viennacl::traits::stride(vec1);
01142 
01143         vcl_size_t start2 = viennacl::traits::start(vec2);
01144         vcl_size_t inc2   = viennacl::traits::stride(vec2);
01145 
01146         value_type data_alpha = alpha;
01147         if (flip_sign_alpha)
01148           data_alpha = -data_alpha;
01149         if (reciprocal_alpha)
01150           data_alpha = static_cast<value_type>(1) / data_alpha;
01151 
01152         if (detail::is_row_major(typename F::orientation_category()))
01153         {
01154           for (vcl_size_t row = 0; row < A_size1; ++row)
01155           {
01156             value_type value_v1 = data_alpha * data_v1[row * inc1 + start1];
01157             for (vcl_size_t col = 0; col < A_size2; ++col)
01158               data_A[viennacl::row_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
01159           }
01160         }
01161         else
01162         {
01163           for (vcl_size_t col = 0; col < A_size2; ++col)  //run through matrix sequentially
01164           {
01165             value_type value_v2 = data_alpha * data_v2[col * inc2 + start2];
01166             for (vcl_size_t row = 0; row < A_size1; ++row)
01167               data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[row * inc1 + start1] * value_v2;
01168           }
01169         }
01170       }
01171 
01172     } // namespace host_based
01173   } //namespace linalg
01174 } //namespace viennacl
01175 
01176 
01177 #endif