ViennaCL - The Vienna Computing Library
1.5.1
|
00001 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP 00002 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <utility> 00026 #include <iostream> 00027 #include <fstream> 00028 #include <string> 00029 #include <algorithm> 00030 #include <vector> 00031 #include <math.h> 00032 #include <map> 00033 00034 //local includes 00035 #include "viennacl/linalg/detail/spai/spai_tag.hpp" 00036 #include "viennacl/linalg/qr.hpp" 00037 #include "viennacl/linalg/detail/spai/spai-dynamic.hpp" 00038 #include "viennacl/linalg/detail/spai/spai-static.hpp" 00039 #include "viennacl/linalg/detail/spai/sparse_vector.hpp" 00040 #include "viennacl/linalg/detail/spai/block_matrix.hpp" 00041 #include "viennacl/linalg/detail/spai/block_vector.hpp" 00042 00043 //boost includes 00044 #include "boost/numeric/ublas/vector.hpp" 00045 #include "boost/numeric/ublas/matrix.hpp" 00046 #include "boost/numeric/ublas/matrix_proxy.hpp" 00047 #include "boost/numeric/ublas/vector_proxy.hpp" 00048 #include "boost/numeric/ublas/storage.hpp" 00049 #include "boost/numeric/ublas/io.hpp" 00050 #include "boost/numeric/ublas/lu.hpp" 00051 #include "boost/numeric/ublas/triangular.hpp" 00052 #include "boost/numeric/ublas/matrix_expression.hpp" 00053 00054 // ViennaCL includes 00055 #include "viennacl/linalg/prod.hpp" 00056 #include "viennacl/matrix.hpp" 00057 #include "viennacl/compressed_matrix.hpp" 00058 #include "viennacl/linalg/sparse_matrix_operations.hpp" 00059 #include "viennacl/linalg/matrix_operations.hpp" 00060 #include "viennacl/scalar.hpp" 00061 #include "viennacl/linalg/inner_prod.hpp" 00062 #include "viennacl/linalg/ilu.hpp" 00063 #include "viennacl/ocl/backend.hpp" 00064 #include "viennacl/linalg/opencl/kernels/spai.hpp" 00065 00066 00067 00068 #define VIENNACL_SPAI_K_b 20 00069 00070 namespace viennacl 00071 { 00072 namespace linalg 00073 { 00074 namespace detail 00075 { 00076 namespace spai 00077 { 00078 00079 //debug function for print 00080 template<typename SparseVectorType> 00081 void print_sparse_vector(const SparseVectorType& v){ 00082 for(typename SparseVectorType::const_iterator vec_it = v.begin(); vec_it!= v.end(); ++vec_it){ 00083 std::cout<<"[ "<<vec_it->first<<" ]:"<<vec_it->second<<std::endl; 00084 } 00085 } 00086 template<typename DenseMatrixType> 00087 void print_matrix( DenseMatrixType & m){ 00088 for(int i = 0; i < m.size2(); ++i){ 00089 for(int j = 0; j < m.size1(); ++j){ 00090 std::cout<<m(j, i)<<" "; 00091 } 00092 std::cout<<std::endl; 00093 } 00094 } 00095 00101 template<typename SparseVectorType, typename ScalarType> 00102 void add_sparse_vectors(const SparseVectorType& v, const ScalarType b, SparseVectorType& res_v){ 00103 for(typename SparseVectorType::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it){ 00104 res_v[v_it->first] += b*v_it->second; 00105 } 00106 } 00107 //sparse-matrix - vector product 00114 template<typename SparseVectorType, typename ScalarType> 00115 void compute_spai_residual(const std::vector<SparseVectorType>& A_v_c, const SparseVectorType& v, 00116 const unsigned int ind, SparseVectorType& res){ 00117 for(typename SparseVectorType::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it){ 00118 add_sparse_vectors(A_v_c[v_it->first], v_it->second, res); 00119 } 00120 res[ind] -= static_cast<ScalarType>(1); 00121 } 00122 00129 template<typename SparseVectorType> 00130 void build_index_set(const std::vector<SparseVectorType>& A_v_c, const SparseVectorType& v, std::vector<unsigned int>& J, 00131 std::vector<unsigned int>& I){ 00132 buildColumnIndexSet(v, J); 00133 projectRows(A_v_c, J, I); 00134 } 00135 00142 template<typename SparseMatrixType, typename DenseMatrixType> 00143 void initProjectSubMatrix(const SparseMatrixType& A_in, const std::vector<unsigned int>& J, std::vector<unsigned int>& I, 00144 DenseMatrixType& A_out) 00145 { 00146 A_out.resize(I.size(), J.size(), false); 00147 for(vcl_size_t j = 0; j < J.size(); ++j) 00148 { 00149 for(vcl_size_t i = 0; i < I.size(); ++i) 00150 A_out(i,j) = A_in(I[i],J[j]); 00151 } 00152 } 00153 00154 00155 /************************************************** CPU BLOCK SET UP ***************************************/ 00165 template<typename SparseMatrixType, typename DenseMatrixType, typename SparseVectorType, typename VectorType> 00166 void block_set_up(const SparseMatrixType& A, 00167 const std::vector<SparseVectorType>& A_v_c, 00168 const std::vector<SparseVectorType>& M_v, 00169 std::vector<std::vector<unsigned int> >& g_I, 00170 std::vector<std::vector<unsigned int> >& g_J, 00171 std::vector<DenseMatrixType>& g_A_I_J, 00172 std::vector<VectorType>& g_b_v){ 00173 #ifdef VIENNACL_WITH_OPENMP 00174 #pragma omp parallel for 00175 #endif 00176 for (long i = 0; i < static_cast<long>(M_v.size()); ++i){ 00177 build_index_set(A_v_c, M_v[i], g_J[i], g_I[i]); 00178 initProjectSubMatrix(A, g_J[i], g_I[i], g_A_I_J[i]); 00179 //print_matrix(g_A_I_J[i]); 00180 single_qr(g_A_I_J[i], g_b_v[i]); 00181 //print_matrix(g_A_I_J[i]); 00182 } 00183 } 00184 00191 template<typename SparseVectorType> 00192 void index_set_up(const std::vector<SparseVectorType>& A_v_c, 00193 const std::vector<SparseVectorType>& M_v, 00194 std::vector<std::vector<unsigned int> >& g_J, 00195 std::vector<std::vector<unsigned int> >& g_I) 00196 { 00197 #ifdef VIENNACL_WITH_OPENMP 00198 #pragma omp parallel for 00199 #endif 00200 for (long i = 0; i < static_cast<long>(M_v.size()); ++i){ 00201 build_index_set(A_v_c, M_v[i], g_J[i], g_I[i]); 00202 } 00203 } 00204 00205 /************************************************** GPU BLOCK SET UP ***************************************/ 00216 template<typename ScalarType, unsigned int MAT_ALIGNMENT, typename SparseVectorType> 00217 void block_set_up(const viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& A, 00218 const std::vector<SparseVectorType>& A_v_c, 00219 const std::vector<SparseVectorType>& M_v, 00220 std::vector<cl_uint> g_is_update, 00221 std::vector<std::vector<unsigned int> >& g_I, 00222 std::vector<std::vector<unsigned int> >& g_J, 00223 block_matrix & g_A_I_J, 00224 block_vector & g_bv) 00225 { 00226 viennacl::context ctx = viennacl::traits::context(A); 00227 bool is_empty_block; 00228 //build index set 00229 index_set_up(A_v_c, M_v, g_J, g_I); 00230 block_assembly(A, g_J, g_I, g_A_I_J, g_is_update, is_empty_block); 00231 block_qr<ScalarType>(g_I, g_J, g_A_I_J, g_bv, g_is_update, ctx); 00232 00233 } 00234 00235 00236 /***************************************************************************************************/ 00237 /******************************** SOLVING LS PROBLEMS ON GPU ***************************************/ 00238 /***************************************************************************************************/ 00245 template<typename ScalarType, typename SparseVectorType> 00246 void custom_fan_out(const std::vector<ScalarType> & m_in, 00247 unsigned int start_m_ind, 00248 const std::vector<unsigned int> & J, 00249 SparseVectorType & m) 00250 { 00251 unsigned int cnt = 0; 00252 for (vcl_size_t i = 0; i < J.size(); ++i) { 00253 m[J[i]] = m_in[start_m_ind + cnt++]; 00254 } 00255 } 00256 00257 00258 00259 //GPU based least square problem 00272 template<typename SparseVectorType, typename ScalarType> 00273 void least_square_solve(std::vector<SparseVectorType> & A_v_c, 00274 std::vector<SparseVectorType> & M_v, 00275 std::vector<std::vector<unsigned int> >& g_I, 00276 std::vector<std::vector<unsigned int> > & g_J, 00277 block_matrix & g_A_I_J_vcl, 00278 block_vector & g_bv_vcl, 00279 std::vector<SparseVectorType> & g_res, 00280 std::vector<cl_uint> & g_is_update, 00281 const spai_tag & tag, 00282 viennacl::context ctx) 00283 { 00284 viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context()); 00285 unsigned int y_sz, m_sz; 00286 std::vector<cl_uint> y_inds(M_v.size() + 1, static_cast<cl_uint>(0)); 00287 std::vector<cl_uint> m_inds(M_v.size() + 1, static_cast<cl_uint>(0)); 00288 get_size(g_I, y_sz); 00289 init_start_inds(g_I, y_inds); 00290 init_start_inds(g_J, m_inds); 00291 //create y_v 00292 std::vector<ScalarType> y_v(y_sz, static_cast<ScalarType>(0)); 00293 for(vcl_size_t i = 0; i < M_v.size(); ++i) 00294 { 00295 for(vcl_size_t j = 0; j < g_I[i].size(); ++j) 00296 { 00297 if(g_I[i][j] == i) 00298 y_v[y_inds[i] + j] = static_cast<ScalarType>(1.0); 00299 } 00300 } 00301 //compute m_v 00302 get_size(g_J, m_sz); 00303 std::vector<ScalarType> m_v(m_sz, static_cast<cl_uint>(0)); 00304 00305 block_vector y_v_vcl; 00306 block_vector m_v_vcl; 00307 //prepearing memory for least square problem on GPU 00308 y_v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00309 static_cast<unsigned int>(sizeof(ScalarType)*y_v.size()), 00310 &(y_v[0])); 00311 m_v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00312 static_cast<unsigned int>(sizeof(ScalarType)*m_v.size()), 00313 &(m_v[0])); 00314 y_v_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00315 static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)), 00316 &(y_inds[0])); 00317 viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00318 static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())), 00319 &(g_is_update[0])); 00320 viennacl::linalg::opencl::kernels::spai<ScalarType>::init(opencl_ctx); 00321 viennacl::ocl::kernel& ls_kernel = opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<ScalarType>::program_name(), "block_least_squares"); 00322 ls_kernel.local_work_size(0, 1); 00323 ls_kernel.global_work_size(0, 256); 00324 viennacl::ocl::enqueue(ls_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle2(), g_bv_vcl.handle(), g_bv_vcl.handle1(), m_v_vcl.handle(), 00325 y_v_vcl.handle(), y_v_vcl.handle1(), 00326 g_A_I_J_vcl.handle1(), g_is_update_vcl, 00327 //viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(ScalarType)*(local_r_n*local_c_n))), 00328 static_cast<unsigned int>(M_v.size()))); 00329 //copy vector m_v back from GPU to CPU 00330 cl_int vcl_err = clEnqueueReadBuffer(opencl_ctx.get_queue().handle().get(), 00331 m_v_vcl.handle().get(), CL_TRUE, 0, 00332 sizeof(ScalarType)*(m_v.size()), 00333 &(m_v[0]), 0, NULL, NULL); 00334 VIENNACL_ERR_CHECK(vcl_err); 00335 //fan out vector in parallel 00336 //#pragma omp parallel for 00337 for(long i = 0; i < static_cast<long>(M_v.size()); ++i) 00338 { 00339 if(g_is_update[i]) 00340 { 00341 //faned out onto sparse vector 00342 custom_fan_out(m_v, m_inds[i], g_J[i], M_v[i]); 00343 g_res[i].clear(); 00344 compute_spai_residual<SparseVectorType, ScalarType>(A_v_c, M_v[i], static_cast<unsigned int>(i), g_res[i]); 00345 ScalarType res_norm = 0; 00346 //compute norm of res - just to make sure that this implementatino works correct 00347 sparse_norm_2(g_res[i], res_norm); 00348 //std::cout<<"Residual norm of column #: "<<i<<std::endl; 00349 //std::cout<<res_norm<<std::endl; 00350 //std::cout<<"************************"<<std::endl; 00351 g_is_update[i] = (res_norm > tag.getResidualNormThreshold())&& (!tag.getIsStatic())?(1):(0); 00352 } 00353 } 00354 } 00355 00356 //CPU based least square problems 00368 template<typename SparseVectorType, typename DenseMatrixType, typename VectorType> 00369 void least_square_solve(const std::vector<SparseVectorType>& A_v_c, 00370 std::vector<DenseMatrixType>& g_R, 00371 std::vector<VectorType>& g_b_v, 00372 std::vector<std::vector<unsigned int> >& g_I, 00373 std::vector<std::vector<unsigned int> >& g_J, 00374 std::vector<SparseVectorType>& g_res, 00375 std::vector<bool>& g_is_update, 00376 std::vector<SparseVectorType>& M_v, 00377 const spai_tag& tag){ 00378 typedef typename DenseMatrixType::value_type ScalarType; 00379 //VectorType m_new, y; 00380 #ifdef VIENNACL_WITH_OPENMP 00381 #pragma omp parallel for 00382 #endif 00383 for (long i = 0; i < static_cast<long>(M_v.size()); ++i){ 00384 if(g_is_update[i]){ 00385 VectorType y = boost::numeric::ublas::zero_vector<ScalarType>(g_I[i].size()); 00386 //std::cout<<y<<std::endl; 00387 projectI<VectorType, ScalarType>(g_I[i], y, static_cast<unsigned int>(tag.getBegInd() + i)); 00388 apply_q_trans_vec(g_R[i], g_b_v[i], y); 00389 VectorType m_new = boost::numeric::ublas::zero_vector<ScalarType>(g_R[i].size2()); 00390 backwardSolve(g_R[i], y, m_new); 00391 fanOutVector(m_new, g_J[i], M_v[i]); 00392 g_res[i].clear(); 00393 compute_spai_residual<SparseVectorType, ScalarType>(A_v_c, M_v[i], static_cast<unsigned int>(tag.getBegInd() + i), g_res[i]); 00394 ScalarType res_norm = 0; 00395 sparse_norm_2(g_res[i], res_norm); 00396 // std::cout<<"Residual norm of column #: "<<i<<std::endl; 00397 // std::cout<<res_norm<<std::endl; 00398 // std::cout<<"************************"<<std::endl; 00399 g_is_update[i] = (res_norm > tag.getResidualNormThreshold())&& (!tag.getIsStatic()); 00400 } 00401 } 00402 } 00403 00404 //************************************ UPDATE CHECK ***************************************************// 00405 template<typename VectorType> 00406 bool is_all_update(VectorType& parallel_is_update){ 00407 00408 for(unsigned int i = 0; i < parallel_is_update.size(); ++i){ 00409 if(parallel_is_update[i]) 00410 return true; 00411 } 00412 return false; 00413 } 00414 00415 //********************************** MATRIX VECTORIZATION ***********************************************// 00416 //Matrix vectorization, column based approach 00421 template<typename SparseMatrixType, typename SparseVectorType> 00422 void vectorize_column_matrix(const SparseMatrixType& M_in, std::vector<SparseVectorType>& M_v){ 00423 for(typename SparseMatrixType::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it){ 00424 // 00425 for(typename SparseMatrixType::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it){ 00426 M_v[static_cast<unsigned int>(col_it.index2())][static_cast<unsigned int>(col_it.index1())] = *col_it; 00427 } 00428 //std::cout<<std::endl; 00429 } 00430 } 00431 00432 //Matrix vectorization row based approach 00433 template<typename SparseMatrixType, typename SparseVectorType> 00434 void vectorize_row_matrix(const SparseMatrixType& M_in, std::vector<SparseVectorType>& M_v){ 00435 for(typename SparseMatrixType::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it){ 00436 for(typename SparseMatrixType::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it){ 00437 M_v[static_cast<unsigned int>(col_it.index1())][static_cast<unsigned int>(col_it.index2())] = *col_it; 00438 } 00439 } 00440 } 00441 00442 //************************************* BLOCK ASSEMBLY CODE *********************************************// 00443 00444 00445 template <typename SizeType> 00446 void write_set_to_array(const std::vector<std::vector<SizeType> >& ind_set, std::vector<cl_uint>& a){ 00447 vcl_size_t cnt = 0; 00448 //unsigned int tmp; 00449 for(vcl_size_t i = 0; i < ind_set.size(); ++i){ 00450 for(vcl_size_t j = 0; j < ind_set[i].size(); ++j){ 00451 a[cnt++] = static_cast<cl_uint>(ind_set[i][j]); 00452 } 00453 } 00454 } 00455 00456 00457 00458 //assembling blocks on GPU 00467 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00468 void block_assembly(const viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& A, const std::vector<std::vector<unsigned int> >& g_J, 00469 const std::vector<std::vector<unsigned int> >& g_I, 00470 block_matrix& g_A_I_J_vcl, 00471 std::vector<cl_uint>& g_is_update, 00472 bool& is_empty_block) 00473 { 00474 //computing start indices for index sets and start indices for block matrices 00475 unsigned int sz_I, sz_J, sz_blocks; 00476 std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0)); 00477 std::vector<cl_uint> i_ind(g_I.size() + 1, static_cast<cl_uint>(0)); 00478 std::vector<cl_uint> j_ind(g_I.size() + 1, static_cast<cl_uint>(0)); 00479 std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0)); 00480 // 00481 init_start_inds(g_J, j_ind); 00482 init_start_inds(g_I, i_ind); 00483 // 00484 get_size(g_J, sz_J); 00485 get_size(g_I, sz_I); 00486 std::vector<cl_uint> I_set(sz_I, static_cast<cl_uint>(0)); 00487 // 00488 std::vector<cl_uint> J_set(sz_J, static_cast<cl_uint>(0)); 00489 // computing size for blocks 00490 // writing set to arrays 00491 write_set_to_array(g_I, I_set); 00492 write_set_to_array(g_J, J_set); 00493 // if block for assembly does exist 00494 if (I_set.size() > 0 && J_set.size() > 0) 00495 { 00496 viennacl::context ctx = viennacl::traits::context(A); 00497 viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context()); 00498 compute_blocks_size(g_I, g_J, sz_blocks, blocks_ind, matrix_dims); 00499 std::vector<ScalarType> con_A_I_J(sz_blocks, static_cast<ScalarType>(0)); 00500 00501 block_vector set_I_vcl, set_J_vcl; 00502 //init memory on GPU 00503 //contigious g_A_I_J 00504 g_A_I_J_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00505 static_cast<unsigned int>(sizeof(ScalarType)*(sz_blocks)), 00506 &(con_A_I_J[0])); 00507 g_A_I_J_vcl.handle().context(opencl_ctx); 00508 00509 //matrix_dimensions 00510 g_A_I_J_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00511 static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<cl_uint>(g_I.size())), 00512 &(matrix_dims[0])); 00513 g_A_I_J_vcl.handle1().context(opencl_ctx); 00514 00515 //start_block inds 00516 g_A_I_J_vcl.handle2() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00517 static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)), 00518 &(blocks_ind[0])); 00519 g_A_I_J_vcl.handle2().context(opencl_ctx); 00520 00521 //set_I 00522 set_I_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00523 static_cast<unsigned int>(sizeof(cl_uint)*sz_I), 00524 &(I_set[0])); 00525 set_I_vcl.handle().context(opencl_ctx); 00526 00527 //set_J 00528 set_J_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00529 static_cast<unsigned int>(sizeof(cl_uint)*sz_J), 00530 &(J_set[0])); 00531 set_J_vcl.handle().context(opencl_ctx); 00532 00533 //i_ind 00534 set_I_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00535 static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)), 00536 &(i_ind[0])); 00537 set_I_vcl.handle().context(opencl_ctx); 00538 00539 //j_ind 00540 set_J_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00541 static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)), 00542 &(j_ind[0])); 00543 set_J_vcl.handle().context(opencl_ctx); 00544 00545 viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE, 00546 static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()), 00547 &(g_is_update[0])); 00548 00549 viennacl::linalg::opencl::kernels::spai<ScalarType>::init(opencl_ctx); 00550 viennacl::ocl::kernel& assembly_kernel = opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<ScalarType>::program_name(), "assemble_blocks"); 00551 assembly_kernel.local_work_size(0, 1); 00552 assembly_kernel.global_work_size(0, 256); 00553 viennacl::ocl::enqueue(assembly_kernel(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), 00554 set_I_vcl.handle(), set_J_vcl.handle(), set_I_vcl.handle1(), 00555 set_J_vcl.handle1(), 00556 g_A_I_J_vcl.handle2(), g_A_I_J_vcl.handle1(), g_A_I_J_vcl.handle(), 00557 g_is_update_vcl, 00558 static_cast<unsigned int>(g_I.size()))); 00559 is_empty_block = false; 00560 } 00561 else 00562 is_empty_block = true; 00563 } 00564 00565 /************************************************************************************************************************/ 00566 00572 template<typename SparseMatrixType, typename SparseVectorType> 00573 void insert_sparse_columns(const std::vector<SparseVectorType>& M_v, 00574 SparseMatrixType& M, 00575 bool is_right){ 00576 if (is_right) 00577 { 00578 for(unsigned int i = 0; i < M_v.size(); ++i){ 00579 for(typename SparseVectorType::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it){ 00580 M(vec_it->first, i) = vec_it->second; 00581 } 00582 } 00583 } 00584 else //transposed fill of M 00585 { 00586 for(unsigned int i = 0; i < M_v.size(); ++i){ 00587 for(typename SparseVectorType::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it){ 00588 M(i, vec_it->first) = vec_it->second; 00589 } 00590 } 00591 } 00592 } 00593 00598 template<typename MatrixType> 00599 void sparse_transpose(const MatrixType& A_in, MatrixType& A){ 00600 typedef typename MatrixType::value_type ScalarType; 00601 std::vector<std::map<vcl_size_t, ScalarType> > temp_A(A_in.size2()); 00602 A.resize(A_in.size2(), A_in.size1(), false); 00603 00604 for (typename MatrixType::const_iterator1 row_it = A_in.begin1(); 00605 row_it != A_in.end1(); 00606 ++row_it) 00607 { 00608 for (typename MatrixType::const_iterator2 col_it = row_it.begin(); 00609 col_it != row_it.end(); 00610 ++col_it) 00611 { 00612 temp_A[col_it.index2()][col_it.index1()] = *col_it; 00613 } 00614 } 00615 00616 for (vcl_size_t i=0; i<temp_A.size(); ++i) 00617 { 00618 for (typename std::map<vcl_size_t, ScalarType>::const_iterator it = temp_A[i].begin(); 00619 it != temp_A[i].end(); 00620 ++it) 00621 A(i, it->first) = it->second; 00622 } 00623 } 00624 00625 00626 00627 00628 // template<typename SparseVectorType> 00629 // void custom_copy(std::vector<SparseVectorType> & M_v, std::vector<SparseVectorType> & l_M_v, const unsigned int beg_ind){ 00630 // for(int i = 0; i < l_M_v.size(); ++i){ 00631 // l_M_v[i] = M_v[i + beg_ind]; 00632 // } 00633 // } 00634 00635 //CPU version 00641 template <typename MatrixType> 00642 void computeSPAI(const MatrixType & A, MatrixType & M, spai_tag & tag){ 00643 typedef typename MatrixType::value_type ScalarType; 00644 typedef typename boost::numeric::ublas::vector<ScalarType> VectorType; 00645 typedef typename viennacl::linalg::detail::spai::sparse_vector<ScalarType> SparseVectorType; 00646 typedef typename boost::numeric::ublas::matrix<ScalarType> DenseMatrixType; 00647 //sparse matrix transpose... 00648 unsigned int cur_iter = 0; 00649 tag.setBegInd(0); tag.setEndInd(VIENNACL_SPAI_K_b); 00650 bool go_on = true; 00651 std::vector<SparseVectorType> A_v_c(M.size2()); 00652 std::vector<SparseVectorType> M_v(M.size2()); 00653 vectorize_column_matrix(A, A_v_c); 00654 vectorize_column_matrix(M, M_v); 00655 00656 00657 while(go_on){ 00658 go_on = (tag.getEndInd() < static_cast<long>(M.size2())); 00659 cur_iter = 0; 00660 unsigned int l_sz = tag.getEndInd() - tag.getBegInd(); 00661 //std::vector<bool> g_is_update(M.size2(), true); 00662 std::vector<bool> g_is_update(l_sz, true); 00663 //init is update 00664 //init_parallel_is_update(g_is_update); 00665 //std::vector<SparseVectorType> A_v_c(K); 00666 //std::vector<SparseVectorType> M_v(K); 00667 //vectorization of marices 00668 //print_matrix(M_v); 00669 std::vector<SparseVectorType> l_M_v(l_sz); 00670 //custom_copy(M_v, l_M_v, beg_ind); 00671 std::copy(M_v.begin() + tag.getBegInd(), M_v.begin() + tag.getEndInd(), l_M_v.begin()); 00672 //print_matrix(l_M_v); 00673 //std::vector<SparseVectorType> l_A_v_c(K); 00674 //custom_copy(A_v_c, l_A_v_c, beg_ind); 00675 //std::copy(A_v_c.begin() + beg_ind, A_v_c.begin() + end_ind, l_A_v_c.begin()); 00676 //print_matrix(l_A_v_c); 00677 //vectorize_row_matrix(A, A_v_r); 00678 //working blocks 00679 //std::vector<DenseMatrixType> g_A_I_J(M.size2()) 00680 std::vector<DenseMatrixType> g_A_I_J(l_sz); 00681 //std::vector<VectorType> g_b_v(M.size2()); 00682 std::vector<VectorType> g_b_v(l_sz); 00683 //std::vector<SparseVectorType> g_res(M.size2()); 00684 std::vector<SparseVectorType> g_res(l_sz); 00685 //std::vector<std::vector<unsigned int> > g_I(M.size2()); 00686 std::vector<std::vector<unsigned int> > g_I(l_sz); 00687 //std::vector<std::vector<unsigned int> > g_J(M.size2()); 00688 std::vector<std::vector<unsigned int> > g_J(l_sz); 00689 while((cur_iter < tag.getIterationLimit())&&is_all_update(g_is_update)){ 00690 // SET UP THE BLOCKS.. 00691 // PHASE ONE 00692 if(cur_iter == 0) block_set_up(A, A_v_c, l_M_v, g_I, g_J, g_A_I_J, g_b_v); 00693 else block_update(A, A_v_c, g_res, g_is_update, g_I, g_J, g_b_v, g_A_I_J, tag); 00694 00695 //PHASE TWO, LEAST SQUARE SOLUTION 00696 least_square_solve(A_v_c, g_A_I_J, g_b_v, g_I, g_J, g_res, g_is_update, l_M_v, tag); 00697 00698 if(tag.getIsStatic()) break; 00699 cur_iter++; 00700 00701 00702 } 00703 std::copy(l_M_v.begin(), l_M_v.end(), M_v.begin() + tag.getBegInd()); 00704 tag.setBegInd(tag.getEndInd());//beg_ind = end_ind; 00705 tag.setEndInd(std::min(static_cast<long>(tag.getBegInd() + VIENNACL_SPAI_K_b), static_cast<long>(M.size2()))); 00706 //std::copy(l_M_v.begin(), l_M_v.end(), M_v.begin() + tag.getBegInd()); 00707 00708 } 00709 M.resize(M.size1(), M.size2(), false); 00710 insert_sparse_columns(M_v, M, tag.getIsRight()); 00711 } 00712 00713 00714 //GPU - based version 00722 template <typename ScalarType, unsigned int MAT_ALIGNMENT> 00723 void computeSPAI(const viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& A, //input 00724 const boost::numeric::ublas::compressed_matrix<ScalarType>& cpu_A, 00725 boost::numeric::ublas::compressed_matrix<ScalarType>& cpu_M, //output 00726 viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& M, 00727 const spai_tag& tag){ 00728 typedef typename viennacl::linalg::detail::spai::sparse_vector<ScalarType> SparseVectorType; 00729 //typedef typename viennacl::compressed_matrix<ScalarType> GPUSparseMatrixType; 00730 //sparse matrix transpose... 00731 unsigned int cur_iter = 0; 00732 std::vector<cl_uint> g_is_update(cpu_M.size2(), static_cast<cl_uint>(1)); 00733 //init is update 00734 //init_parallel_is_update(g_is_update); 00735 std::vector<SparseVectorType> A_v_c(cpu_M.size2()); 00736 std::vector<SparseVectorType> M_v(cpu_M.size2()); 00737 vectorize_column_matrix(cpu_A, A_v_c); 00738 vectorize_column_matrix(cpu_M, M_v); 00739 std::vector<SparseVectorType> g_res(cpu_M.size2()); 00740 std::vector<std::vector<unsigned int> > g_I(cpu_M.size2()); 00741 std::vector<std::vector<unsigned int> > g_J(cpu_M.size2()); 00742 00743 //OpenCL variables 00744 block_matrix g_A_I_J_vcl; 00745 block_vector g_bv_vcl; 00746 while((cur_iter < tag.getIterationLimit())&&is_all_update(g_is_update)){ 00747 // SET UP THE BLOCKS.. 00748 // PHASE ONE.. 00749 //timer.start(); 00750 //index set up on CPU 00751 if(cur_iter == 0) 00752 block_set_up(A, A_v_c, M_v, g_is_update, g_I, g_J, g_A_I_J_vcl, g_bv_vcl); 00753 else 00754 block_update(A, A_v_c, g_is_update, g_res, g_J, g_I, g_A_I_J_vcl, g_bv_vcl, tag); 00755 //std::cout<<"Phase 2 timing: "<<timer.get()<<std::endl; 00756 //PERFORM LEAST SQUARE problems solution 00757 //PHASE TWO 00758 //timer.start(); 00759 least_square_solve<SparseVectorType, ScalarType>(A_v_c, M_v, g_I, g_J, g_A_I_J_vcl, g_bv_vcl, g_res, g_is_update, tag, viennacl::traits::context(A)); 00760 //std::cout<<"Phase 3 timing: "<<timer.get()<<std::endl; 00761 if(tag.getIsStatic()) break; 00762 cur_iter++; 00763 } 00764 cpu_M.resize(cpu_M.size1(), cpu_M.size2(), false); 00765 insert_sparse_columns(M_v, cpu_M, tag.getIsRight()); 00766 //copy back to GPU 00767 M.resize(static_cast<unsigned int>(cpu_M.size1()), static_cast<unsigned int>(cpu_M.size2())); 00768 viennacl::copy(cpu_M, M); 00769 } 00770 00771 } 00772 } 00773 } 00774 } 00775 #endif