ViennaCL - The Vienna Computing Library
1.5.1
|
00001 #ifndef VIENNACL_LINALG_AMG_HPP_ 00002 #define VIENNACL_LINALG_AMG_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00027 #include <boost/numeric/ublas/matrix.hpp> 00028 #include <boost/numeric/ublas/lu.hpp> 00029 #include <boost/numeric/ublas/operation.hpp> 00030 #include <boost/numeric/ublas/vector_proxy.hpp> 00031 #include <boost/numeric/ublas/matrix_proxy.hpp> 00032 #include <boost/numeric/ublas/vector.hpp> 00033 #include <boost/numeric/ublas/triangular.hpp> 00034 #include <vector> 00035 #include <cmath> 00036 #include "viennacl/forwards.h" 00037 #include "viennacl/tools/tools.hpp" 00038 #include "viennacl/linalg/prod.hpp" 00039 #include "viennacl/linalg/direct_solve.hpp" 00040 00041 #include "viennacl/linalg/detail/amg/amg_base.hpp" 00042 #include "viennacl/linalg/detail/amg/amg_coarse.hpp" 00043 #include "viennacl/linalg/detail/amg/amg_interpol.hpp" 00044 00045 #include <map> 00046 00047 #ifdef VIENNACL_WITH_OPENMP 00048 #include <omp.h> 00049 #endif 00050 00051 #include "viennacl/linalg/detail/amg/amg_debug.hpp" 00052 00053 #define VIENNACL_AMG_COARSE_LIMIT 50 00054 #define VIENNACL_AMG_MAX_LEVELS 100 00055 00056 namespace viennacl 00057 { 00058 namespace linalg 00059 { 00060 typedef detail::amg::amg_tag amg_tag; 00061 00062 00063 00071 template <typename InternalType1, typename InternalType2> 00072 void amg_setup(InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag) 00073 { 00074 typedef typename InternalType2::value_type PointVectorType; 00075 00076 unsigned int i, iterations, c_points, f_points; 00077 detail::amg::amg_slicing<InternalType1,InternalType2> Slicing; 00078 00079 // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process. 00080 iterations = tag.get_coarselevels(); 00081 if (iterations == 0) 00082 iterations = VIENNACL_AMG_MAX_LEVELS; 00083 00084 // For parallel coarsenings build data structures (number of threads set automatically). 00085 if (tag.get_coarse() == VIENNACL_AMG_COARSE_RS0 || tag.get_coarse() == VIENNACL_AMG_COARSE_RS3) 00086 Slicing.init(iterations); 00087 00088 for (i=0; i<iterations; ++i) 00089 { 00090 // Initialize Pointvector on level i and construct points. 00091 Pointvector[i] = PointVectorType(static_cast<unsigned int>(A[i].size1())); 00092 Pointvector[i].init_points(); 00093 00094 // Construct C and F points on coarse level (i is fine level, i+1 coarse level). 00095 detail::amg::amg_coarse (i, A, Pointvector, Slicing, tag); 00096 00097 // Calculate number of C and F points on level i. 00098 c_points = Pointvector[i].get_cpoints(); 00099 f_points = Pointvector[i].get_fpoints(); 00100 00101 #if defined (VIENNACL_AMG_DEBUG) //or defined(VIENNACL_AMG_DEBUGBENCH) 00102 std::cout << "Level " << i << ": "; 00103 std::cout << "No of C points = " << c_points << ", "; 00104 std::cout << "No of F points = " << f_points << std::endl; 00105 #endif 00106 00107 // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i. 00108 if (c_points == 0 || f_points == 0) 00109 break; 00110 00111 // Construct interpolation matrix for level i. 00112 detail::amg::amg_interpol (i, A, P, Pointvector, tag); 00113 00114 // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P). 00115 detail::amg::amg_galerkin_prod(A[i], P[i], A[i+1]); 00116 00117 // Test triple matrix product. Very slow for large matrix sizes (ublas). 00118 // test_triplematprod(A[i],P[i],A[i+1]); 00119 00120 Pointvector[i].delete_points(); 00121 00122 #ifdef VIENNACL_AMG_DEBUG 00123 std::cout << "Coarse Grid Operator Matrix:" << std::endl; 00124 printmatrix (A[i+1]); 00125 #endif 00126 00127 // If Limit of coarse points is reached then stop. Coarsest level is level i+1. 00128 if (tag.get_coarselevels() == 0 && c_points <= VIENNACL_AMG_COARSE_LIMIT) 00129 { 00130 tag.set_coarselevels(i+1); 00131 return; 00132 } 00133 } 00134 tag.set_coarselevels(i); 00135 } 00136 00145 template <typename MatrixType, typename InternalType1, typename InternalType2> 00146 void amg_init(MatrixType const & mat, InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag) 00147 { 00148 //typedef typename MatrixType::value_type ScalarType; 00149 typedef typename InternalType1::value_type SparseMatrixType; 00150 00151 if (tag.get_coarselevels() > 0) 00152 { 00153 A.resize(tag.get_coarselevels()+1); 00154 P.resize(tag.get_coarselevels()); 00155 Pointvector.resize(tag.get_coarselevels()); 00156 } 00157 else 00158 { 00159 A.resize(VIENNACL_AMG_MAX_LEVELS+1); 00160 P.resize(VIENNACL_AMG_MAX_LEVELS); 00161 Pointvector.resize(VIENNACL_AMG_MAX_LEVELS); 00162 } 00163 00164 // Insert operator matrix as operator for finest level. 00165 SparseMatrixType A0 (mat); 00166 A.insert_element (0, A0); 00167 } 00168 00178 template <typename InternalType1, typename InternalType2> 00179 void amg_transform_cpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag) 00180 { 00181 //typedef typename InternalType1::value_type MatrixType; 00182 00183 // Resize internal data structures to actual size. 00184 A.resize(tag.get_coarselevels()+1); 00185 P.resize(tag.get_coarselevels()); 00186 R.resize(tag.get_coarselevels()); 00187 00188 // Transform into matrix type. 00189 for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i) 00190 { 00191 A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false); 00192 A[i] = A_setup[i]; 00193 } 00194 for (unsigned int i=0; i<tag.get_coarselevels(); ++i) 00195 { 00196 P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false); 00197 P[i] = P_setup[i]; 00198 } 00199 for (unsigned int i=0; i<tag.get_coarselevels(); ++i) 00200 { 00201 R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false); 00202 P_setup[i].set_trans(true); 00203 R[i] = P_setup[i]; 00204 P_setup[i].set_trans(false); 00205 } 00206 } 00207 00218 template <typename InternalType1, typename InternalType2> 00219 void amg_transform_gpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag, viennacl::context ctx) 00220 { 00221 // Resize internal data structures to actual size. 00222 A.resize(tag.get_coarselevels()+1); 00223 P.resize(tag.get_coarselevels()); 00224 R.resize(tag.get_coarselevels()); 00225 00226 // Copy to GPU using the internal sparse matrix structure: std::vector<std::map>. 00227 for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i) 00228 { 00229 viennacl::switch_memory_context(A[i], ctx); 00230 //A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false); 00231 viennacl::copy(*(A_setup[i].get_internal_pointer()),A[i]); 00232 } 00233 for (unsigned int i=0; i<tag.get_coarselevels(); ++i) 00234 { 00235 viennacl::switch_memory_context(P[i], ctx); 00236 //P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false); 00237 viennacl::copy(*(P_setup[i].get_internal_pointer()),P[i]); 00238 //viennacl::copy((boost::numeric::ublas::compressed_matrix<ScalarType>)P_setup[i],P[i]); 00239 } 00240 for (unsigned int i=0; i<tag.get_coarselevels(); ++i) 00241 { 00242 viennacl::switch_memory_context(R[i], ctx); 00243 //R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false); 00244 P_setup[i].set_trans(true); 00245 viennacl::copy(*(P_setup[i].get_internal_pointer()),R[i]); 00246 P_setup[i].set_trans(false); 00247 } 00248 } 00249 00258 template <typename InternalVectorType, typename SparseMatrixType> 00259 void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag) 00260 { 00261 typedef typename InternalVectorType::value_type VectorType; 00262 00263 result.resize(tag.get_coarselevels()+1); 00264 rhs.resize(tag.get_coarselevels()+1); 00265 residual.resize(tag.get_coarselevels()); 00266 00267 for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level) 00268 { 00269 result[level] = VectorType(A[level].size1()); 00270 result[level].clear(); 00271 rhs[level] = VectorType(A[level].size1()); 00272 rhs[level].clear(); 00273 } 00274 for (unsigned int level=0; level < tag.get_coarselevels(); ++level) 00275 { 00276 residual[level] = VectorType(A[level].size1()); 00277 residual[level].clear(); 00278 } 00279 } 00280 00281 00291 template <typename InternalVectorType, typename SparseMatrixType> 00292 void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag, viennacl::context ctx) 00293 { 00294 typedef typename InternalVectorType::value_type VectorType; 00295 00296 result.resize(tag.get_coarselevels()+1); 00297 rhs.resize(tag.get_coarselevels()+1); 00298 residual.resize(tag.get_coarselevels()); 00299 00300 for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level) 00301 { 00302 result[level] = VectorType(A[level].size1(), ctx); 00303 rhs[level] = VectorType(A[level].size1(), ctx); 00304 } 00305 for (unsigned int level=0; level < tag.get_coarselevels(); ++level) 00306 { 00307 residual[level] = VectorType(A[level].size1(), ctx); 00308 } 00309 } 00310 00311 00319 template <typename ScalarType, typename SparseMatrixType> 00320 void amg_lu(boost::numeric::ublas::compressed_matrix<ScalarType> & op, boost::numeric::ublas::permutation_matrix<> & Permutation, SparseMatrixType const & A) 00321 { 00322 typedef typename SparseMatrixType::const_iterator1 ConstRowIterator; 00323 typedef typename SparseMatrixType::const_iterator2 ConstColIterator; 00324 00325 // Copy to operator matrix. Needed 00326 op.resize(A.size1(),A.size2(),false); 00327 for (ConstRowIterator row_iter = A.begin1(); row_iter != A.end1(); ++row_iter) 00328 for (ConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) 00329 op (col_iter.index1(), col_iter.index2()) = *col_iter; 00330 00331 // Permutation matrix has to be reinitialized with actual size. Do not clear() or resize()! 00332 Permutation = boost::numeric::ublas::permutation_matrix<> (op.size1()); 00333 boost::numeric::ublas::lu_factorize(op,Permutation); 00334 } 00335 00338 template <typename MatrixType> 00339 class amg_precond 00340 { 00341 typedef typename MatrixType::value_type ScalarType; 00342 typedef boost::numeric::ublas::vector<ScalarType> VectorType; 00343 typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType; 00344 typedef detail::amg::amg_pointvector PointVectorType; 00345 00346 typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator; 00347 typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator; 00348 typedef typename SparseMatrixType::iterator1 InternalRowIterator; 00349 typedef typename SparseMatrixType::iterator2 InternalColIterator; 00350 00351 boost::numeric::ublas::vector <SparseMatrixType> A_setup; 00352 boost::numeric::ublas::vector <SparseMatrixType> P_setup; 00353 boost::numeric::ublas::vector <MatrixType> A; 00354 boost::numeric::ublas::vector <MatrixType> P; 00355 boost::numeric::ublas::vector <MatrixType> R; 00356 boost::numeric::ublas::vector <PointVectorType> Pointvector; 00357 00358 mutable boost::numeric::ublas::compressed_matrix<ScalarType> op; 00359 mutable boost::numeric::ublas::permutation_matrix<> Permutation; 00360 00361 mutable boost::numeric::ublas::vector <VectorType> result; 00362 mutable boost::numeric::ublas::vector <VectorType> rhs; 00363 mutable boost::numeric::ublas::vector <VectorType> residual; 00364 00365 mutable bool done_init_apply; 00366 00367 amg_tag tag_; 00368 public: 00369 00370 amg_precond(): Permutation(0) {} 00376 amg_precond(MatrixType const & mat, amg_tag const & tag): Permutation(0) 00377 { 00378 tag_ = tag; 00379 // Initialize data structures. 00380 amg_init (mat,A_setup,P_setup,Pointvector,tag_); 00381 00382 done_init_apply = false; 00383 } 00384 00387 void setup() 00388 { 00389 // Start setup phase. 00390 amg_setup(A_setup,P_setup,Pointvector,tag_); 00391 // Transform to CPU-Matrixtype for precondition phase. 00392 amg_transform_cpu(A,P,R,A_setup,P_setup,tag_); 00393 00394 done_init_apply = false; 00395 } 00396 00401 void init_apply() const 00402 { 00403 // Setup precondition phase (Data structures). 00404 amg_setup_apply(result,rhs,residual,A_setup,tag_); 00405 // Do LU factorization for direct solve. 00406 amg_lu(op,Permutation,A_setup[tag_.get_coarselevels()]); 00407 00408 done_init_apply = true; 00409 } 00410 00416 template <typename VectorType> 00417 ScalarType calc_complexity(VectorType & avgstencil) 00418 { 00419 avgstencil = VectorType (tag_.get_coarselevels()+1); 00420 unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0; 00421 00422 for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level) 00423 { 00424 level_coefficients = 0; 00425 for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter) 00426 { 00427 for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) 00428 { 00429 if (level == 0) 00430 systemmat_nonzero++; 00431 nonzero++; 00432 level_coefficients++; 00433 } 00434 } 00435 avgstencil[level] = level_coefficients/static_cast<ScalarType>(A_setup[level].size1()); 00436 } 00437 return nonzero/static_cast<ScalarType>(systemmat_nonzero); 00438 } 00439 00444 template <typename VectorType> 00445 void apply(VectorType & vec) const 00446 { 00447 // Build data structures and do lu factorization before first iteration step. 00448 if (!done_init_apply) 00449 init_apply(); 00450 00451 int level; 00452 00453 // Precondition operation (Yang, p.3) 00454 rhs[0] = vec; 00455 for (level=0; level <static_cast<int>(tag_.get_coarselevels()); level++) 00456 { 00457 result[level].clear(); 00458 00459 // Apply Smoother presmooth_ times. 00460 smooth_jacobi (level, tag_.get_presmooth(), result[level], rhs[level]); 00461 00462 #ifdef VIENNACL_AMG_DEBUG 00463 std::cout << "After presmooth:" << std::endl; 00464 printvector(result[level]); 00465 #endif 00466 00467 // Compute residual. 00468 residual[level] = rhs[level] - boost::numeric::ublas::prod (A[level],result[level]); 00469 00470 #ifdef VIENNACL_AMG_DEBUG 00471 std::cout << "Residual:" << std::endl; 00472 printvector(residual[level]); 00473 #endif 00474 00475 // Restrict to coarse level. Restricted residual is RHS of coarse level. 00476 rhs[level+1] = boost::numeric::ublas::prod (R[level],residual[level]); 00477 00478 #ifdef VIENNACL_AMG_DEBUG 00479 std::cout << "Restricted Residual: " << std::endl; 00480 printvector(rhs[level+1]); 00481 #endif 00482 } 00483 00484 // On highest level use direct solve to solve equation. 00485 result[level] = rhs[level]; 00486 boost::numeric::ublas::lu_substitute(op,Permutation,result[level]); 00487 00488 #ifdef VIENNACL_AMG_DEBUG 00489 std::cout << "After direct solve: " << std::endl; 00490 printvector (result[level]); 00491 #endif 00492 00493 for (level=tag_.get_coarselevels()-1; level >= 0; level--) 00494 { 00495 #ifdef VIENNACL_AMG_DEBUG 00496 std::cout << "Coarse Error: " << std::endl; 00497 printvector(result[level+1]); 00498 #endif 00499 00500 // Interpolate error to fine level. Correct solution by adding error. 00501 result[level] += boost::numeric::ublas::prod (P[level], result[level+1]); 00502 00503 #ifdef VIENNACL_AMG_DEBUG 00504 std::cout << "Corrected Result: " << std::endl; 00505 printvector (result[level]); 00506 #endif 00507 00508 // Apply Smoother postsmooth_ times. 00509 smooth_jacobi (level, tag_.get_postsmooth(), result[level], rhs[level]); 00510 00511 #ifdef VIENNACL_AMG_DEBUG 00512 std::cout << "After postsmooth: " << std::endl; 00513 printvector (result[level]); 00514 #endif 00515 } 00516 vec = result[0]; 00517 } 00518 00525 template <typename VectorType> 00526 void smooth_jacobi(int level, int const iterations, VectorType & x, VectorType const & rhs) const 00527 { 00528 VectorType old_result (x.size()); 00529 long index; 00530 ScalarType sum = 0, diag = 1; 00531 00532 for (int i=0; i<iterations; ++i) 00533 { 00534 old_result = x; 00535 x.clear(); 00536 #ifdef VIENNACL_WITH_OPENMP 00537 #pragma omp parallel for private (sum,diag) shared (rhs,x) 00538 #endif 00539 for (index=0; index < static_cast<long>(A_setup[level].size1()); ++index) 00540 { 00541 InternalConstRowIterator row_iter = A_setup[level].begin1(); 00542 row_iter += index; 00543 sum = 0; 00544 diag = 1; 00545 for (InternalConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) 00546 { 00547 if (col_iter.index1() == col_iter.index2()) 00548 diag = *col_iter; 00549 else 00550 sum += *col_iter * old_result[col_iter.index2()]; 00551 } 00552 x[index]= static_cast<ScalarType>(tag_.get_jacobiweight()) * (rhs[index] - sum) / diag + (1-static_cast<ScalarType>(tag_.get_jacobiweight())) * old_result[index]; 00553 } 00554 } 00555 } 00556 00557 amg_tag & tag() { return tag_; } 00558 }; 00559 00564 template <typename ScalarType, unsigned int MAT_ALIGNMENT> 00565 class amg_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT> > 00566 { 00567 typedef viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT> MatrixType; 00568 typedef viennacl::vector<ScalarType> VectorType; 00569 typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType; 00570 typedef detail::amg::amg_pointvector PointVectorType; 00571 00572 typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator; 00573 typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator; 00574 typedef typename SparseMatrixType::iterator1 InternalRowIterator; 00575 typedef typename SparseMatrixType::iterator2 InternalColIterator; 00576 00577 boost::numeric::ublas::vector <SparseMatrixType> A_setup; 00578 boost::numeric::ublas::vector <SparseMatrixType> P_setup; 00579 boost::numeric::ublas::vector <MatrixType> A; 00580 boost::numeric::ublas::vector <MatrixType> P; 00581 boost::numeric::ublas::vector <MatrixType> R; 00582 boost::numeric::ublas::vector <PointVectorType> Pointvector; 00583 00584 mutable boost::numeric::ublas::compressed_matrix<ScalarType> op; 00585 mutable boost::numeric::ublas::permutation_matrix<> Permutation; 00586 00587 mutable boost::numeric::ublas::vector <VectorType> result; 00588 mutable boost::numeric::ublas::vector <VectorType> rhs; 00589 mutable boost::numeric::ublas::vector <VectorType> residual; 00590 00591 viennacl::context ctx_; 00592 00593 mutable bool done_init_apply; 00594 00595 amg_tag tag_; 00596 00597 public: 00598 00599 amg_precond(): Permutation(0) {} 00600 00606 amg_precond(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & mat, amg_tag const & tag): Permutation(0), ctx_(viennacl::traits::context(mat)) 00607 { 00608 tag_ = tag; 00609 00610 // Copy to CPU. Internal structure of sparse matrix is used for copy operation. 00611 std::vector<std::map<unsigned int, ScalarType> > mat2 = std::vector<std::map<unsigned int, ScalarType> >(mat.size1()); 00612 viennacl::copy(mat, mat2); 00613 00614 // Initialize data structures. 00615 amg_init (mat2,A_setup,P_setup,Pointvector,tag_); 00616 00617 done_init_apply = false; 00618 } 00619 00622 void setup() 00623 { 00624 // Start setup phase. 00625 amg_setup(A_setup,P_setup,Pointvector, tag_); 00626 // Transform to GPU-Matrixtype for precondition phase. 00627 amg_transform_gpu(A,P,R,A_setup,P_setup, tag_, ctx_); 00628 00629 done_init_apply = false; 00630 } 00631 00636 void init_apply() const 00637 { 00638 // Setup precondition phase (Data structures). 00639 amg_setup_apply(result,rhs,residual,A_setup,tag_, ctx_); 00640 // Do LU factorization for direct solve. 00641 amg_lu(op,Permutation,A_setup[tag_.get_coarselevels()]); 00642 00643 done_init_apply = true; 00644 } 00645 00651 template <typename VectorType> 00652 ScalarType calc_complexity(VectorType & avgstencil) 00653 { 00654 avgstencil = VectorType (tag_.get_coarselevels()+1); 00655 unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0; 00656 00657 for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level) 00658 { 00659 level_coefficients = 0; 00660 for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter) 00661 { 00662 for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) 00663 { 00664 if (level == 0) 00665 systemmat_nonzero++; 00666 nonzero++; 00667 level_coefficients++; 00668 } 00669 } 00670 avgstencil[level] = level_coefficients/(double)A[level].size1(); 00671 } 00672 return nonzero/static_cast<double>(systemmat_nonzero); 00673 } 00674 00679 template <typename VectorType> 00680 void apply(VectorType & vec) const 00681 { 00682 if (!done_init_apply) 00683 init_apply(); 00684 00685 int level; 00686 00687 // Precondition operation (Yang, p.3). 00688 rhs[0] = vec; 00689 for (level=0; level <static_cast<int>(tag_.get_coarselevels()); level++) 00690 { 00691 result[level].clear(); 00692 00693 // Apply Smoother presmooth_ times. 00694 smooth_jacobi (level, tag_.get_presmooth(), result[level], rhs[level]); 00695 00696 #ifdef VIENNACL_AMG_DEBUG 00697 std::cout << "After presmooth: " << std::endl; 00698 printvector(result[level]); 00699 #endif 00700 00701 // Compute residual. 00702 //residual[level] = rhs[level] - viennacl::linalg::prod (A[level],result[level]); 00703 residual[level] = viennacl::linalg::prod (A[level],result[level]); 00704 residual[level] = rhs[level] - residual[level]; 00705 00706 #ifdef VIENNACL_AMG_DEBUG 00707 std::cout << "Residual: " << std::endl; 00708 printvector(residual[level]); 00709 #endif 00710 00711 // Restrict to coarse level. Result is RHS of coarse level equation. 00712 //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]); 00713 rhs[level+1] = viennacl::linalg::prod(R[level],residual[level]); 00714 00715 #ifdef VIENNACL_AMG_DEBUG 00716 std::cout << "Restricted Residual: " << std::endl; 00717 printvector(rhs[level+1]); 00718 #endif 00719 } 00720 00721 // On highest level use direct solve to solve equation (on the CPU) 00722 //TODO: Use GPU direct solve! 00723 result[level] = rhs[level]; 00724 boost::numeric::ublas::vector <ScalarType> result_cpu (result[level].size()); 00725 00726 copy (result[level],result_cpu); 00727 boost::numeric::ublas::lu_substitute(op,Permutation,result_cpu); 00728 copy (result_cpu, result[level]); 00729 00730 #ifdef VIENNACL_AMG_DEBUG 00731 std::cout << "After direct solve: " << std::endl; 00732 printvector (result[level]); 00733 #endif 00734 00735 for (level=tag_.get_coarselevels()-1; level >= 0; level--) 00736 { 00737 #ifdef VIENNACL_AMG_DEBUG 00738 std::cout << "Coarse Error: " << std::endl; 00739 printvector(result[level+1]); 00740 #endif 00741 00742 // Interpolate error to fine level and correct solution. 00743 result[level] += viennacl::linalg::prod(P[level],result[level+1]); 00744 00745 #ifdef VIENNACL_AMG_DEBUG 00746 std::cout << "Corrected Result: " << std::endl; 00747 printvector (result[level]); 00748 #endif 00749 00750 // Apply Smoother postsmooth_ times. 00751 smooth_jacobi (level, tag_.get_postsmooth(), result[level], rhs[level]); 00752 00753 #ifdef VIENNACL_AMG_DEBUG 00754 std::cout << "After postsmooth: " << std::endl; 00755 printvector (result[level]); 00756 #endif 00757 } 00758 vec = result[0]; 00759 } 00760 00767 template <typename VectorType> 00768 void smooth_jacobi(int level, unsigned int iterations, VectorType & x, VectorType const & rhs) const 00769 { 00770 VectorType old_result = x; 00771 00772 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(x).context()); 00773 viennacl::linalg::opencl::kernels::compressed_matrix<ScalarType>::init(ctx); 00774 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<ScalarType>::program_name(), "jacobi"); 00775 00776 for (unsigned int i=0; i<iterations; ++i) 00777 { 00778 if (i > 0) 00779 old_result = x; 00780 x.clear(); 00781 viennacl::ocl::enqueue(k(A[level].handle1().opencl_handle(), A[level].handle2().opencl_handle(), A[level].handle().opencl_handle(), 00782 static_cast<ScalarType>(tag_.get_jacobiweight()), 00783 viennacl::traits::opencl_handle(old_result), 00784 viennacl::traits::opencl_handle(x), 00785 viennacl::traits::opencl_handle(rhs), 00786 static_cast<cl_uint>(rhs.size()))); 00787 00788 } 00789 } 00790 00791 amg_tag & tag() { return tag_; } 00792 }; 00793 00794 } 00795 } 00796 00797 00798 00799 #endif 00800