ViennaCL - The Vienna Computing Library  1.5.1
viennacl/linalg/cuda/scalar_operations.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_
00002 #define VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include "viennacl/forwards.h"
00026 #include "viennacl/tools/tools.hpp"
00027 #include "viennacl/meta/predicate.hpp"
00028 #include "viennacl/meta/enable_if.hpp"
00029 #include "viennacl/traits/size.hpp"
00030 #include "viennacl/traits/start.hpp"
00031 #include "viennacl/traits/stride.hpp"
00032 #include "viennacl/linalg/cuda/common.hpp"
00033 
00034 // includes CUDA
00035 #include <cuda_runtime.h>
00036 
00037 
00038 namespace viennacl
00039 {
00040   namespace linalg
00041   {
00042     namespace cuda
00043     {
00044 
00045       namespace detail
00046       {
00047 
00048       }
00049 
00051 
00052       template <typename T>
00053       __global__ void as_kernel(T * s1, const T * fac2, unsigned int options2, const T * s2)
00054       {
00055           T alpha = *fac2;
00056           if (options2 & (1 << 0))
00057             alpha = -alpha;
00058           if (options2 & (1 << 1))
00059             alpha = ((T)(1)) / alpha;
00060 
00061           *s1 = *s2 * alpha;
00062       }
00063 
00064       template <typename T>
00065       __global__ void as_kernel(T * s1, T fac2, unsigned int options2, const T * s2)
00066       {
00067           T alpha = fac2;
00068           if (options2 & (1 << 0))
00069             alpha = -alpha;
00070           if (options2 & (1 << 1))
00071             alpha = ((T)(1)) / alpha;
00072 
00073           *s1 = *s2 * alpha;
00074       }
00075 
00076       template <typename S1,
00077                 typename S2, typename ScalarType1>
00078       typename viennacl::enable_if< viennacl::is_scalar<S1>::value
00079                                     && viennacl::is_scalar<S2>::value
00080                                     && viennacl::is_any_scalar<ScalarType1>::value
00081                                   >::type
00082       as(S1 & s1,
00083          S2 const & s2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
00084       {
00085         typedef typename viennacl::result_of::cpu_value_type<S1>::type        value_type;
00086 
00087         unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
00088 
00089         value_type temporary_alpha = 0;
00090         if (viennacl::is_cpu_scalar<ScalarType1>::value)
00091           temporary_alpha = alpha;
00092 
00093         as_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1),
00094                             detail::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)),
00095                             options_alpha,
00096                             detail::cuda_arg<value_type>(s2));
00097         VIENNACL_CUDA_LAST_ERROR_CHECK("as_kernel");
00098       }
00099 
00101 
00102       // alpha and beta on GPU
00103       template <typename T>
00104       __global__ void asbs_kernel(T * s1,
00105                                   const T * fac2, unsigned int options2, const T * s2,
00106                                   const T * fac3, unsigned int options3, const T * s3)
00107       {
00108           T alpha = *fac2;
00109           if (options2 & (1 << 0))
00110             alpha = -alpha;
00111           if (options2 & (1 << 1))
00112             alpha = ((T)(1)) / alpha;
00113 
00114           T beta = *fac3;
00115           if (options3 & (1 << 0))
00116             beta = -beta;
00117           if (options3 & (1 << 1))
00118             beta = ((T)(1)) / beta;
00119 
00120           *s1 = *s2 * alpha + *s3 * beta;
00121       }
00122 
00123       // alpha on CPU, beta on GPU
00124       template <typename T>
00125       __global__ void asbs_kernel(T * s1,
00126                                   T fac2, unsigned int options2, const T * s2,
00127                                   const T * fac3, unsigned int options3, const T * s3)
00128       {
00129           T alpha = fac2;
00130           if (options2 & (1 << 0))
00131             alpha = -alpha;
00132           if (options2 & (1 << 1))
00133             alpha = ((T)(1)) / alpha;
00134 
00135           T beta = *fac3;
00136           if (options3 & (1 << 0))
00137             beta = -beta;
00138           if (options3 & (1 << 1))
00139             beta = ((T)(1)) / beta;
00140 
00141           *s1 = *s2 * alpha + *s3 * beta;
00142       }
00143 
00144       // alpha on GPU, beta on CPU
00145       template <typename T>
00146       __global__ void asbs_kernel(T * s1,
00147                                   const T * fac2, unsigned int options2, const T * s2,
00148                                   T fac3, unsigned int options3, const T * s3)
00149       {
00150           T alpha = *fac2;
00151           if (options2 & (1 << 0))
00152             alpha = -alpha;
00153           if (options2 & (1 << 1))
00154             alpha = ((T)(1)) / alpha;
00155 
00156           T beta = fac3;
00157           if (options3 & (1 << 0))
00158             beta = -beta;
00159           if (options3 & (1 << 1))
00160             beta = ((T)(1)) / beta;
00161 
00162           *s1 = *s2 * alpha + *s3 * beta;
00163       }
00164 
00165       // alpha and beta on CPU
00166       template <typename T>
00167       __global__ void asbs_kernel(T * s1,
00168                                   T fac2, unsigned int options2, const T * s2,
00169                                   T fac3, unsigned int options3, const T * s3)
00170       {
00171           T alpha = fac2;
00172           if (options2 & (1 << 0))
00173             alpha = -alpha;
00174           if (options2 & (1 << 1))
00175             alpha = ((T)(1)) / alpha;
00176 
00177           T beta = fac3;
00178           if (options3 & (1 << 0))
00179             beta = -beta;
00180           if (options3 & (1 << 1))
00181             beta = ((T)(1)) / beta;
00182 
00183           *s1 = *s2 * alpha + *s3 * beta;
00184       }
00185 
00186 
00187       template <typename S1,
00188                 typename S2, typename ScalarType1,
00189                 typename S3, typename ScalarType2>
00190       typename viennacl::enable_if< viennacl::is_scalar<S1>::value
00191                                     && viennacl::is_scalar<S2>::value
00192                                     && viennacl::is_scalar<S3>::value
00193                                     && viennacl::is_any_scalar<ScalarType1>::value
00194                                     && viennacl::is_any_scalar<ScalarType2>::value
00195                                   >::type
00196       asbs(S1 & s1,
00197            S2 const & s2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
00198            S3 const & s3, ScalarType2 const & beta,  vcl_size_t len_beta,  bool reciprocal_beta,  bool flip_sign_beta)
00199       {
00200         typedef typename viennacl::result_of::cpu_value_type<S1>::type        value_type;
00201 
00202         unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
00203         unsigned int options_beta  = detail::make_options(len_beta,  reciprocal_beta,  flip_sign_beta);
00204 
00205         value_type temporary_alpha = 0;
00206         if (viennacl::is_cpu_scalar<ScalarType1>::value)
00207           temporary_alpha = alpha;
00208 
00209         value_type temporary_beta = 0;
00210         if (viennacl::is_cpu_scalar<ScalarType2>::value)
00211           temporary_beta = beta;
00212 
00213         asbs_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1),
00214                               detail::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)),
00215                               options_alpha,
00216                               detail::cuda_arg<value_type>(s2),
00217                               detail::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)),
00218                               options_beta,
00219                               detail::cuda_arg<value_type>(s3) );
00220         VIENNACL_CUDA_LAST_ERROR_CHECK("asbs_kernel");
00221       }
00222 
00224 
00225       // alpha and beta on GPU
00226       template <typename T>
00227       __global__ void asbs_s_kernel(T * s1,
00228                                     const T * fac2, unsigned int options2, const T * s2,
00229                                     const T * fac3, unsigned int options3, const T * s3)
00230       {
00231           T alpha = *fac2;
00232           if (options2 & (1 << 0))
00233             alpha = -alpha;
00234           if (options2 & (1 << 1))
00235             alpha = ((T)(1)) / alpha;
00236 
00237           T beta = *fac3;
00238           if (options3 & (1 << 0))
00239             beta = -beta;
00240           if (options3 & (1 << 1))
00241             beta = ((T)(1)) / beta;
00242 
00243           *s1 += *s2 * alpha + *s3 * beta;
00244       }
00245 
00246       // alpha on CPU, beta on GPU
00247       template <typename T>
00248       __global__ void asbs_s_kernel(T * s1,
00249                                     T fac2, unsigned int options2, const T * s2,
00250                                     const T * fac3, unsigned int options3, const T * s3)
00251       {
00252           T alpha = fac2;
00253           if (options2 & (1 << 0))
00254             alpha = -alpha;
00255           if (options2 & (1 << 1))
00256             alpha = ((T)(1)) / alpha;
00257 
00258           T beta = *fac3;
00259           if (options3 & (1 << 0))
00260             beta = -beta;
00261           if (options3 & (1 << 1))
00262             beta = ((T)(1)) / beta;
00263 
00264           *s1 += *s2 * alpha + *s3 * beta;
00265       }
00266 
00267       // alpha on GPU, beta on CPU
00268       template <typename T>
00269       __global__ void asbs_s_kernel(T * s1,
00270                                     const T * fac2, unsigned int options2, const T * s2,
00271                                     T fac3, unsigned int options3, const T * s3)
00272       {
00273           T alpha = *fac2;
00274           if (options2 & (1 << 0))
00275             alpha = -alpha;
00276           if (options2 & (1 << 1))
00277             alpha = ((T)(1)) / alpha;
00278 
00279           T beta = fac3;
00280           if (options3 & (1 << 0))
00281             beta = -beta;
00282           if (options3 & (1 << 1))
00283             beta = ((T)(1)) / beta;
00284 
00285           *s1 += *s2 * alpha + *s3 * beta;
00286       }
00287 
00288       // alpha and beta on CPU
00289       template <typename T>
00290       __global__ void asbs_s_kernel(T * s1,
00291                                     T fac2, unsigned int options2, const T * s2,
00292                                     T fac3, unsigned int options3, const T * s3)
00293       {
00294           T alpha = fac2;
00295           if (options2 & (1 << 0))
00296             alpha = -alpha;
00297           if (options2 & (1 << 1))
00298             alpha = ((T)(1)) / alpha;
00299 
00300           T beta = fac3;
00301           if (options3 & (1 << 0))
00302             beta = -beta;
00303           if (options3 & (1 << 1))
00304             beta = ((T)(1)) / beta;
00305 
00306           *s1 += *s2 * alpha + *s3 * beta;
00307       }
00308 
00309 
00310       template <typename S1,
00311                 typename S2, typename ScalarType1,
00312                 typename S3, typename ScalarType2>
00313       typename viennacl::enable_if< viennacl::is_scalar<S1>::value
00314                                     && viennacl::is_scalar<S2>::value
00315                                     && viennacl::is_scalar<S3>::value
00316                                     && viennacl::is_any_scalar<ScalarType1>::value
00317                                     && viennacl::is_any_scalar<ScalarType2>::value
00318                                   >::type
00319       asbs_s(S1 & s1,
00320              S2 const & s2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
00321              S3 const & s3, ScalarType2 const & beta,  vcl_size_t len_beta,  bool reciprocal_beta,  bool flip_sign_beta)
00322       {
00323         typedef typename viennacl::result_of::cpu_value_type<S1>::type        value_type;
00324 
00325         unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
00326         unsigned int options_beta  = detail::make_options(len_beta,  reciprocal_beta,  flip_sign_beta);
00327 
00328         value_type temporary_alpha = 0;
00329         if (viennacl::is_cpu_scalar<ScalarType1>::value)
00330           temporary_alpha = alpha;
00331 
00332         value_type temporary_beta = 0;
00333         if (viennacl::is_cpu_scalar<ScalarType2>::value)
00334           temporary_beta = beta;
00335 
00336         std::cout << "Launching asbs_s_kernel..." << std::endl;
00337         asbs_s_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1),
00338                                 detail::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)),
00339                                 options_alpha,
00340                                 detail::cuda_arg<value_type>(s2),
00341                                 detail::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)),
00342                                 options_beta,
00343                                 detail::cuda_arg<value_type>(s3) );
00344         VIENNACL_CUDA_LAST_ERROR_CHECK("asbs_s_kernel");
00345       }
00346 
00348 
00349       template <typename T>
00350       __global__ void scalar_swap_kernel(T * s1, T * s2)
00351       {
00352         T tmp = *s2;
00353         *s2 = *s1;
00354         *s1 = tmp;
00355       }
00356 
00362       template <typename S1, typename S2>
00363       typename viennacl::enable_if<    viennacl::is_scalar<S1>::value
00364                                     && viennacl::is_scalar<S2>::value
00365                                   >::type
00366       swap(S1 & s1, S2 & s2)
00367       {
00368         typedef typename viennacl::result_of::cpu_value_type<S1>::type        value_type;
00369 
00370         scalar_swap_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1),detail::cuda_arg<value_type>(s2));
00371       }
00372 
00373 
00374 
00375     } //namespace single_threaded
00376   } //namespace linalg
00377 } //namespace viennacl
00378 
00379 
00380 #endif