ViennaCL - The Vienna Computing Library  1.5.1
Namespaces | Defines | Functions
viennacl/linalg/cuda/vector_operations.hpp File Reference

Implementations of vector operations using a plain single-threaded execution on CPU. More...

#include <cmath>
#include "viennacl/forwards.h"
#include "viennacl/scalar.hpp"
#include "viennacl/tools/tools.hpp"
#include "viennacl/meta/predicate.hpp"
#include "viennacl/meta/enable_if.hpp"
#include "viennacl/traits/size.hpp"
#include "viennacl/traits/start.hpp"
#include "viennacl/traits/stride.hpp"

Go to the source code of this file.

Namespaces

namespace  viennacl
 

Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.


namespace  viennacl::linalg
 

Provides all linear algebra operations which are not covered by operator overloads.


namespace  viennacl::linalg::cuda
 

Holds all CUDA compute kernels used by ViennaCL.


namespace  viennacl::linalg::cuda::detail
 

Helper functions for the CUDA linear algebra backend.


Defines

#define VIENNACL_MDOT_WORKGROUP_SIZE   128
#define VIENNACL_MDOT_WORKGROUP_NUM   128

Functions

template<typename T >
__global__ void av_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
__global__ void av_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2)
template<typename T , typename ScalarType1 >
void av (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
template<typename T >
__global__ void avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
template<typename T >
__global__ void avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
template<typename T >
__global__ void avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
template<typename T >
__global__ void avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
template<typename T , typename ScalarType1 , typename ScalarType2 >
void avbv (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
template<typename T >
__global__ void avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
template<typename T >
__global__ void avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
template<typename T >
__global__ void avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
template<typename T >
__global__ void avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
template<typename T , typename ScalarType1 , typename ScalarType2 >
void avbv_v (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
template<typename T >
__global__ void vector_assign_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, T alpha)
template<typename T , typename S1 >
void vector_assign (vector_base< T > &vec1, const S1 &alpha, bool up_to_internal_size=false)
 Assign a constant value to a vector (-range/-slice)
template<typename T >
__global__ void vector_swap_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void vector_swap (vector_base< T > &vec1, vector_base< T > &vec2)
 Swaps the contents of two vectors, data is copied.
template<typename T >
__global__ void element_op_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2, T const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
template<typename T >
__global__ void element_op_int_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2, T const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
template<typename T , typename OP >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< OP > > const &proxy)
 Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax)
template<typename OP >
void element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OP > > const &proxy)
template<typename OP >
void element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OP > > const &proxy)
template<typename T >
__global__ void vec_element_acos_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_acos > > const &proxy)
template<typename T >
__global__ void vec_element_asin_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_asin > > const &proxy)
template<typename T >
__global__ void vec_element_atan_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_atan > > const &proxy)
template<typename T >
__global__ void vec_element_ceil_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_ceil > > const &proxy)
template<typename T >
__global__ void vec_element_cos_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cos > > const &proxy)
template<typename T >
__global__ void vec_element_cosh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cosh > > const &proxy)
template<typename T >
__global__ void vec_element_exp_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_exp > > const &proxy)
template<typename T >
__global__ void vec_element_fabs_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_fabs > > const &proxy)
template<typename T >
__global__ void vec_element_abs_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_abs > > const &proxy)
template<typename T >
__global__ void vec_element_floor_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_floor > > const &proxy)
template<typename T >
__global__ void vec_element_log_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log > > const &proxy)
template<typename T >
__global__ void vec_element_log10_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log10 > > const &proxy)
template<typename T >
__global__ void vec_element_sin_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sin > > const &proxy)
template<typename T >
__global__ void vec_element_sinh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sinh > > const &proxy)
template<typename T >
__global__ void vec_element_sqrt_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sqrt > > const &proxy)
template<typename T >
__global__ void vec_element_tan_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tan > > const &proxy)
template<typename T >
__global__ void vec_element_tanh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tanh > > const &proxy)
template<typename T >
__global__ void inner_prod_kernel (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, T *group_buffer)
template<typename T >
__global__ void vector_sum_kernel_floats (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result)
template<typename T >
__global__ void vector_sum_kernel_integers (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result)
template<typename T >
__global__ void vector_sum_kernel_unsigned_integers (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result)
template<typename T , typename S3 >
void inner_prod_impl (vector_base< T > const &vec1, vector_base< T > const &vec2, S3 &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
template<typename T >
void inner_prod_cpu (vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
template<typename NumericT >
__global__ void inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results)
template<typename NumericT >
__global__ void inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results)
template<typename NumericT >
__global__ void inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results)
template<typename NumericT >
__global__ void inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results)
template<typename T >
__global__ void vector_multi_sum_kernel (T const *vec1, T *result, unsigned int start_result, unsigned int inc_result)
template<typename T >
void inner_prod_impl (vector_base< T > const &x, vector_tuple< T > const &vec_tuple, vector_base< T > &result)
template<typename T >
__global__ void norm_kernel_floats (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer)
template<typename T >
__global__ void norm_kernel_integers (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer)
template<typename T >
__global__ void norm_kernel_unsigned_integers (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer)
template<typename T >
void norm_1_impl (vector_base< T > const &vec1, scalar< T > &result)
 Computes the l^1-norm of a vector.
template<typename T >
void norm_1_cpu (vector_base< T > const &vec1, T &result)
 Computes the l^1-norm of a vector.
template<typename T >
void norm_2_impl (vector_base< T > const &vec1, scalar< T > &result)
 Computes the l^2-norm of a vector - implementation.
template<typename T >
void norm_2_cpu (vector_base< T > const &vec1, T &result)
 Computes the l^2-norm of a vector - implementation.
template<typename T >
void norm_inf_impl (vector_base< T > const &vec1, scalar< T > &result)
 Computes the supremum-norm of a vector.
template<typename T >
void norm_inf_cpu (vector_base< T > const &vec1, T &result)
 Computes the supremum-norm of a vector.
template<typename T >
__device__ T cuda_abs (T val)
__device__ unsigned long cuda_abs (unsigned long val)
__device__ unsigned int cuda_abs (unsigned int val)
__device__ unsigned short cuda_abs (unsigned short val)
__device__ unsigned char cuda_abs (unsigned char val)
template<typename T >
__global__ void index_norm_inf_kernel (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result)
template<typename T >
vcl_size_t index_norm_inf (vector_base< T > const &vec1)
 Computes the index of the first entry that is equal to the supremum-norm in modulus.
template<typename T >
__global__ void plane_rotation_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, T alpha, T beta)
template<typename T >
void plane_rotation (vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
 Computes a plane rotation of two vectors.

Detailed Description

Implementations of vector operations using a plain single-threaded execution on CPU.


Define Documentation

#define VIENNACL_MDOT_WORKGROUP_NUM   128
#define VIENNACL_MDOT_WORKGROUP_SIZE   128