ViennaCL - The Vienna Computing Library
1.5.1
|
00001 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_ELL_MATRIX_HPP 00002 #define VIENNACL_LINALG_OPENCL_KERNELS_ELL_MATRIX_HPP 00003 00004 #include "viennacl/tools/tools.hpp" 00005 #include "viennacl/ocl/kernel.hpp" 00006 #include "viennacl/ocl/platform.hpp" 00007 #include "viennacl/ocl/utils.hpp" 00008 00009 #include "viennacl/linalg/opencl/common.hpp" 00010 00013 namespace viennacl 00014 { 00015 namespace linalg 00016 { 00017 namespace opencl 00018 { 00019 namespace kernels 00020 { 00021 00023 00024 template <typename StringType> 00025 void generate_ell_vec_mul(StringType & source, std::string const & numeric_string) 00026 { 00027 source.append("__kernel void vec_mul( \n"); 00028 source.append(" __global const unsigned int * coords, \n"); 00029 source.append(" __global const "); source.append(numeric_string); source.append(" * elements, \n"); 00030 source.append(" __global const "); source.append(numeric_string); source.append(" * x, \n"); 00031 source.append(" uint4 layout_x, \n"); 00032 source.append(" __global "); source.append(numeric_string); source.append(" * result, \n"); 00033 source.append(" uint4 layout_result, \n"); 00034 source.append(" unsigned int row_num, \n"); 00035 source.append(" unsigned int col_num, \n"); 00036 source.append(" unsigned int internal_row_num, \n"); 00037 source.append(" unsigned int items_per_row, \n"); 00038 source.append(" unsigned int aligned_items_per_row) \n"); 00039 source.append("{ \n"); 00040 source.append(" uint glb_id = get_global_id(0); \n"); 00041 source.append(" uint glb_sz = get_global_size(0); \n"); 00042 00043 source.append(" for(uint row_id = glb_id; row_id < row_num; row_id += glb_sz) { \n"); 00044 source.append(" "); source.append(numeric_string); source.append(" sum = 0; \n"); 00045 00046 source.append(" uint offset = row_id; \n"); 00047 source.append(" for(uint item_id = 0; item_id < items_per_row; item_id++, offset += internal_row_num) { \n"); 00048 source.append(" "); source.append(numeric_string); source.append(" val = elements[offset]; \n"); 00049 00050 source.append(" if(val != 0.0f) { \n"); 00051 source.append(" int col = coords[offset]; \n"); 00052 source.append(" sum += (x[col * layout_x.y + layout_x.x] * val); \n"); 00053 source.append(" } \n"); 00054 00055 source.append(" } \n"); 00056 00057 source.append(" result[row_id * layout_result.y + layout_result.x] = sum; \n"); 00058 source.append(" } \n"); 00059 source.append("} \n"); 00060 } 00061 00062 namespace detail 00063 { 00064 template <typename StringType> 00065 void generate_ell_matrix_dense_matrix_mul(StringType & source, std::string const & numeric_string, 00066 bool B_transposed, bool B_row_major, bool C_row_major) 00067 { 00068 source.append("__kernel void "); 00069 source.append(viennacl::linalg::opencl::detail::sparse_dense_matmult_kernel_name(B_transposed, B_row_major, C_row_major)); 00070 source.append("( \n"); 00071 source.append(" __global const unsigned int * sp_mat_coords, \n"); 00072 source.append(" __global const "); source.append(numeric_string); source.append(" * sp_mat_elems, \n"); 00073 source.append(" unsigned int sp_mat_row_num, \n"); 00074 source.append(" unsigned int sp_mat_col_num, \n"); 00075 source.append(" unsigned int sp_mat_internal_row_num, \n"); 00076 source.append(" unsigned int sp_mat_items_per_row, \n"); 00077 source.append(" unsigned int sp_mat_aligned_items_per_row, \n"); 00078 source.append(" __global const "); source.append(numeric_string); source.append("* d_mat, \n"); 00079 source.append(" unsigned int d_mat_row_start, \n"); 00080 source.append(" unsigned int d_mat_col_start, \n"); 00081 source.append(" unsigned int d_mat_row_inc, \n"); 00082 source.append(" unsigned int d_mat_col_inc, \n"); 00083 source.append(" unsigned int d_mat_row_size, \n"); 00084 source.append(" unsigned int d_mat_col_size, \n"); 00085 source.append(" unsigned int d_mat_internal_rows, \n"); 00086 source.append(" unsigned int d_mat_internal_cols, \n"); 00087 source.append(" __global "); source.append(numeric_string); source.append(" * result, \n"); 00088 source.append(" unsigned int result_row_start, \n"); 00089 source.append(" unsigned int result_col_start, \n"); 00090 source.append(" unsigned int result_row_inc, \n"); 00091 source.append(" unsigned int result_col_inc, \n"); 00092 source.append(" unsigned int result_row_size, \n"); 00093 source.append(" unsigned int result_col_size, \n"); 00094 source.append(" unsigned int result_internal_rows, \n"); 00095 source.append(" unsigned int result_internal_cols) { \n"); 00096 00097 source.append(" uint glb_id = get_global_id(0); \n"); 00098 source.append(" uint glb_sz = get_global_size(0); \n"); 00099 00100 source.append(" for( uint rc = glb_id; rc < (sp_mat_row_num * result_col_size); rc += glb_sz) { \n"); 00101 source.append(" uint row = rc % sp_mat_row_num; \n"); 00102 source.append(" uint col = rc / sp_mat_row_num; \n"); 00103 00104 source.append(" uint offset = row; \n"); 00105 source.append(" "); source.append(numeric_string); source.append(" r = ("); source.append(numeric_string); source.append(")0; \n"); 00106 00107 source.append(" for( uint k = 0; k < sp_mat_items_per_row; k++, offset += sp_mat_internal_row_num) { \n"); 00108 00109 source.append(" uint j = sp_mat_coords[offset]; \n"); 00110 source.append(" "); source.append(numeric_string); source.append(" x = sp_mat_elems[offset]; \n"); 00111 00112 source.append(" if(x != ("); source.append(numeric_string); source.append(")0) { \n"); 00113 source.append(" "); source.append(numeric_string); 00114 if (B_transposed && B_row_major) 00115 source.append(" y = d_mat[ (d_mat_row_start + col * d_mat_row_inc) * d_mat_internal_cols + d_mat_col_start + j * d_mat_col_inc ]; \n"); 00116 else if (B_transposed && !B_row_major) 00117 source.append(" y = d_mat[ (d_mat_row_start + col * d_mat_row_inc) + (d_mat_col_start + j * d_mat_col_inc) * d_mat_internal_rows ]; \n"); 00118 else if (!B_transposed && B_row_major) 00119 source.append(" y = d_mat[ (d_mat_row_start + j * d_mat_row_inc) * d_mat_internal_cols + d_mat_col_start + col * d_mat_col_inc ]; \n"); 00120 else 00121 source.append(" y = d_mat[ (d_mat_row_start + j * d_mat_row_inc) + (d_mat_col_start + col * d_mat_col_inc) * d_mat_internal_rows ]; \n"); 00122 00123 source.append(" r += x*y; \n"); 00124 source.append(" } \n"); 00125 source.append(" } \n"); 00126 00127 if (C_row_major) 00128 source.append(" result[ (result_row_start + row * result_row_inc) * result_internal_cols + result_col_start + col * result_col_inc ] = r; \n"); 00129 else 00130 source.append(" result[ (result_row_start + row * result_row_inc) + (result_col_start + col * result_col_inc) * result_internal_rows ] = r; \n"); 00131 source.append(" } \n"); 00132 source.append("} \n"); 00133 00134 } 00135 } 00136 00137 template <typename StringType> 00138 void generate_ell_matrix_dense_matrix_multiplication(StringType & source, std::string const & numeric_string) 00139 { 00140 detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, false, false, false); 00141 detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, false, false, true); 00142 detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, false, true, false); 00143 detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, false, true, true); 00144 00145 detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, true, false, false); 00146 detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, true, false, true); 00147 detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, true, true, false); 00148 detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, true, true, true); 00149 } 00150 00152 00153 // main kernel class 00155 template <typename NumericT> 00156 struct ell_matrix 00157 { 00158 static std::string program_name() 00159 { 00160 return viennacl::ocl::type_to_string<NumericT>::apply() + "_ell_matrix"; 00161 } 00162 00163 static void init(viennacl::ocl::context & ctx) 00164 { 00165 viennacl::ocl::DOUBLE_PRECISION_CHECKER<NumericT>::apply(ctx); 00166 std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply(); 00167 00168 static std::map<cl_context, bool> init_done; 00169 if (!init_done[ctx.handle().get()]) 00170 { 00171 std::string source; 00172 source.reserve(1024); 00173 00174 viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source); 00175 00176 // fully parametrized kernels: 00177 generate_ell_vec_mul(source, numeric_string); 00178 generate_ell_matrix_dense_matrix_multiplication(source, numeric_string); 00179 00180 std::string prog_name = program_name(); 00181 #ifdef VIENNACL_BUILD_INFO 00182 std::cout << "Creating program " << prog_name << std::endl; 00183 #endif 00184 ctx.add_program(source, prog_name); 00185 init_done[ctx.handle().get()] = true; 00186 } //if 00187 } //init 00188 }; 00189 00190 } // namespace kernels 00191 } // namespace opencl 00192 } // namespace linalg 00193 } // namespace viennacl 00194 #endif 00195