ViennaCL - The Vienna Computing Library
1.5.1
|
Implementations of vector operations using a plain single-threaded execution on CPU. More...
#include <cmath>
#include "viennacl/forwards.h"
#include "viennacl/scalar.hpp"
#include "viennacl/tools/tools.hpp"
#include "viennacl/meta/predicate.hpp"
#include "viennacl/meta/enable_if.hpp"
#include "viennacl/traits/size.hpp"
#include "viennacl/traits/start.hpp"
#include "viennacl/traits/stride.hpp"
Go to the source code of this file.
Namespaces | |
namespace | viennacl |
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them. | |
namespace | viennacl::linalg |
Provides all linear algebra operations which are not covered by operator overloads. | |
namespace | viennacl::linalg::cuda |
Holds all CUDA compute kernels used by ViennaCL. | |
namespace | viennacl::linalg::cuda::detail |
Helper functions for the CUDA linear algebra backend. | |
Defines | |
#define | VIENNACL_MDOT_WORKGROUP_SIZE 128 |
#define | VIENNACL_MDOT_WORKGROUP_NUM 128 |
Functions | |
template<typename T > | |
__global__ void | av_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
__global__ void | av_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2) |
template<typename T , typename ScalarType1 > | |
void | av (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename T > | |
__global__ void | avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T , typename ScalarType1 , typename ScalarType2 > | |
void | avbv (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename T > | |
__global__ void | avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T , typename ScalarType1 , typename ScalarType2 > | |
void | avbv_v (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename T > | |
__global__ void | vector_assign_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, T alpha) |
template<typename T , typename S1 > | |
void | vector_assign (vector_base< T > &vec1, const S1 &alpha, bool up_to_internal_size=false) |
Assign a constant value to a vector (-range/-slice) | |
template<typename T > | |
__global__ void | vector_swap_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | vector_swap (vector_base< T > &vec1, vector_base< T > &vec2) |
Swaps the contents of two vectors, data is copied. | |
template<typename T > | |
__global__ void | element_op_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2, T const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename T > | |
__global__ void | element_op_int_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2, T const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename T , typename OP > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< OP > > const &proxy) |
Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) | |
template<typename OP > | |
void | element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OP > > const &proxy) |
template<typename OP > | |
void | element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OP > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_acos_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_acos > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_asin_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_asin > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_atan_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_atan > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_ceil_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_ceil > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_cos_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cos > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_cosh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cosh > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_exp_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_exp > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_fabs_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_fabs > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_abs_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_abs > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_floor_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_floor > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_log_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_log10_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log10 > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_sin_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sin > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_sinh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sinh > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_sqrt_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sqrt > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_tan_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tan > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_tanh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tanh > > const &proxy) |
template<typename T > | |
__global__ void | inner_prod_kernel (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, T *group_buffer) |
template<typename T > | |
__global__ void | vector_sum_kernel_floats (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result) |
template<typename T > | |
__global__ void | vector_sum_kernel_integers (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result) |
template<typename T > | |
__global__ void | vector_sum_kernel_unsigned_integers (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result) |
template<typename T , typename S3 > | |
void | inner_prod_impl (vector_base< T > const &vec1, vector_base< T > const &vec2, S3 &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). | |
template<typename T > | |
void | inner_prod_cpu (vector_base< T > const &vec1, vector_base< T > const &vec2, T &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). | |
template<typename NumericT > | |
__global__ void | inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results) |
template<typename T > | |
__global__ void | vector_multi_sum_kernel (T const *vec1, T *result, unsigned int start_result, unsigned int inc_result) |
template<typename T > | |
void | inner_prod_impl (vector_base< T > const &x, vector_tuple< T > const &vec_tuple, vector_base< T > &result) |
template<typename T > | |
__global__ void | norm_kernel_floats (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer) |
template<typename T > | |
__global__ void | norm_kernel_integers (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer) |
template<typename T > | |
__global__ void | norm_kernel_unsigned_integers (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer) |
template<typename T > | |
void | norm_1_impl (vector_base< T > const &vec1, scalar< T > &result) |
Computes the l^1-norm of a vector. | |
template<typename T > | |
void | norm_1_cpu (vector_base< T > const &vec1, T &result) |
Computes the l^1-norm of a vector. | |
template<typename T > | |
void | norm_2_impl (vector_base< T > const &vec1, scalar< T > &result) |
Computes the l^2-norm of a vector - implementation. | |
template<typename T > | |
void | norm_2_cpu (vector_base< T > const &vec1, T &result) |
Computes the l^2-norm of a vector - implementation. | |
template<typename T > | |
void | norm_inf_impl (vector_base< T > const &vec1, scalar< T > &result) |
Computes the supremum-norm of a vector. | |
template<typename T > | |
void | norm_inf_cpu (vector_base< T > const &vec1, T &result) |
Computes the supremum-norm of a vector. | |
template<typename T > | |
__device__ T | cuda_abs (T val) |
__device__ unsigned long | cuda_abs (unsigned long val) |
__device__ unsigned int | cuda_abs (unsigned int val) |
__device__ unsigned short | cuda_abs (unsigned short val) |
__device__ unsigned char | cuda_abs (unsigned char val) |
template<typename T > | |
__global__ void | index_norm_inf_kernel (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result) |
template<typename T > | |
vcl_size_t | index_norm_inf (vector_base< T > const &vec1) |
Computes the index of the first entry that is equal to the supremum-norm in modulus. | |
template<typename T > | |
__global__ void | plane_rotation_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, T alpha, T beta) |
template<typename T > | |
void | plane_rotation (vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta) |
Computes a plane rotation of two vectors. |
Implementations of vector operations using a plain single-threaded execution on CPU.
#define VIENNACL_MDOT_WORKGROUP_NUM 128 |
#define VIENNACL_MDOT_WORKGROUP_SIZE 128 |