ViennaCL - The Vienna Computing Library  1.5.1
viennacl/linalg/amg.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_AMG_HPP_
00002 #define VIENNACL_LINALG_AMG_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00027 #include <boost/numeric/ublas/matrix.hpp>
00028 #include <boost/numeric/ublas/lu.hpp>
00029 #include <boost/numeric/ublas/operation.hpp>
00030 #include <boost/numeric/ublas/vector_proxy.hpp>
00031 #include <boost/numeric/ublas/matrix_proxy.hpp>
00032 #include <boost/numeric/ublas/vector.hpp>
00033 #include <boost/numeric/ublas/triangular.hpp>
00034 #include <vector>
00035 #include <cmath>
00036 #include "viennacl/forwards.h"
00037 #include "viennacl/tools/tools.hpp"
00038 #include "viennacl/linalg/prod.hpp"
00039 #include "viennacl/linalg/direct_solve.hpp"
00040 
00041 #include "viennacl/linalg/detail/amg/amg_base.hpp"
00042 #include "viennacl/linalg/detail/amg/amg_coarse.hpp"
00043 #include "viennacl/linalg/detail/amg/amg_interpol.hpp"
00044 
00045 #include <map>
00046 
00047 #ifdef VIENNACL_WITH_OPENMP
00048  #include <omp.h>
00049 #endif
00050 
00051 #include "viennacl/linalg/detail/amg/amg_debug.hpp"
00052 
00053 #define VIENNACL_AMG_COARSE_LIMIT 50
00054 #define VIENNACL_AMG_MAX_LEVELS 100
00055 
00056 namespace viennacl
00057 {
00058   namespace linalg
00059   {
00060     typedef detail::amg::amg_tag          amg_tag;
00061 
00062 
00063 
00071     template <typename InternalType1, typename InternalType2>
00072     void amg_setup(InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
00073     {
00074       typedef typename InternalType2::value_type PointVectorType;
00075 
00076       unsigned int i, iterations, c_points, f_points;
00077       detail::amg::amg_slicing<InternalType1,InternalType2> Slicing;
00078 
00079       // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process.
00080       iterations = tag.get_coarselevels();
00081       if (iterations == 0)
00082         iterations = VIENNACL_AMG_MAX_LEVELS;
00083 
00084       // For parallel coarsenings build data structures (number of threads set automatically).
00085       if (tag.get_coarse() == VIENNACL_AMG_COARSE_RS0 || tag.get_coarse() == VIENNACL_AMG_COARSE_RS3)
00086         Slicing.init(iterations);
00087 
00088       for (i=0; i<iterations; ++i)
00089       {
00090         // Initialize Pointvector on level i and construct points.
00091         Pointvector[i] = PointVectorType(static_cast<unsigned int>(A[i].size1()));
00092         Pointvector[i].init_points();
00093 
00094         // Construct C and F points on coarse level (i is fine level, i+1 coarse level).
00095         detail::amg::amg_coarse (i, A, Pointvector, Slicing, tag);
00096 
00097         // Calculate number of C and F points on level i.
00098         c_points = Pointvector[i].get_cpoints();
00099         f_points = Pointvector[i].get_fpoints();
00100 
00101         #if defined (VIENNACL_AMG_DEBUG) //or defined(VIENNACL_AMG_DEBUGBENCH)
00102         std::cout << "Level " << i << ": ";
00103         std::cout << "No of C points = " << c_points << ", ";
00104         std::cout << "No of F points = " << f_points << std::endl;
00105         #endif
00106 
00107         // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i.
00108         if (c_points == 0 || f_points == 0)
00109           break;
00110 
00111         // Construct interpolation matrix for level i.
00112         detail::amg::amg_interpol (i, A, P, Pointvector, tag);
00113 
00114         // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P).
00115         detail::amg::amg_galerkin_prod(A[i], P[i], A[i+1]);
00116 
00117         // Test triple matrix product. Very slow for large matrix sizes (ublas).
00118         // test_triplematprod(A[i],P[i],A[i+1]);
00119 
00120         Pointvector[i].delete_points();
00121 
00122         #ifdef VIENNACL_AMG_DEBUG
00123         std::cout << "Coarse Grid Operator Matrix:" << std::endl;
00124         printmatrix (A[i+1]);
00125         #endif
00126 
00127         // If Limit of coarse points is reached then stop. Coarsest level is level i+1.
00128         if (tag.get_coarselevels() == 0 && c_points <= VIENNACL_AMG_COARSE_LIMIT)
00129         {
00130           tag.set_coarselevels(i+1);
00131           return;
00132         }
00133       }
00134       tag.set_coarselevels(i);
00135     }
00136 
00145     template <typename MatrixType, typename InternalType1, typename InternalType2>
00146     void amg_init(MatrixType const & mat, InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
00147     {
00148       //typedef typename MatrixType::value_type ScalarType;
00149       typedef typename InternalType1::value_type SparseMatrixType;
00150 
00151       if (tag.get_coarselevels() > 0)
00152       {
00153         A.resize(tag.get_coarselevels()+1);
00154         P.resize(tag.get_coarselevels());
00155         Pointvector.resize(tag.get_coarselevels());
00156       }
00157       else
00158       {
00159         A.resize(VIENNACL_AMG_MAX_LEVELS+1);
00160         P.resize(VIENNACL_AMG_MAX_LEVELS);
00161         Pointvector.resize(VIENNACL_AMG_MAX_LEVELS);
00162       }
00163 
00164       // Insert operator matrix as operator for finest level.
00165       SparseMatrixType A0 (mat);
00166       A.insert_element (0, A0);
00167     }
00168 
00178     template <typename InternalType1, typename InternalType2>
00179     void amg_transform_cpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag)
00180     {
00181       //typedef typename InternalType1::value_type MatrixType;
00182 
00183       // Resize internal data structures to actual size.
00184       A.resize(tag.get_coarselevels()+1);
00185       P.resize(tag.get_coarselevels());
00186       R.resize(tag.get_coarselevels());
00187 
00188       // Transform into matrix type.
00189       for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
00190       {
00191         A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
00192         A[i] = A_setup[i];
00193       }
00194       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00195       {
00196         P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
00197         P[i] = P_setup[i];
00198       }
00199       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00200       {
00201         R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
00202         P_setup[i].set_trans(true);
00203         R[i] = P_setup[i];
00204         P_setup[i].set_trans(false);
00205       }
00206     }
00207 
00218     template <typename InternalType1, typename InternalType2>
00219     void amg_transform_gpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag, viennacl::context ctx)
00220     {
00221       // Resize internal data structures to actual size.
00222       A.resize(tag.get_coarselevels()+1);
00223       P.resize(tag.get_coarselevels());
00224       R.resize(tag.get_coarselevels());
00225 
00226       // Copy to GPU using the internal sparse matrix structure: std::vector<std::map>.
00227       for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
00228       {
00229         viennacl::switch_memory_context(A[i], ctx);
00230         //A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
00231         viennacl::copy(*(A_setup[i].get_internal_pointer()),A[i]);
00232       }
00233       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00234       {
00235         viennacl::switch_memory_context(P[i], ctx);
00236         //P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
00237         viennacl::copy(*(P_setup[i].get_internal_pointer()),P[i]);
00238         //viennacl::copy((boost::numeric::ublas::compressed_matrix<ScalarType>)P_setup[i],P[i]);
00239       }
00240       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00241       {
00242         viennacl::switch_memory_context(R[i], ctx);
00243         //R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
00244         P_setup[i].set_trans(true);
00245         viennacl::copy(*(P_setup[i].get_internal_pointer()),R[i]);
00246         P_setup[i].set_trans(false);
00247       }
00248     }
00249 
00258     template <typename InternalVectorType, typename SparseMatrixType>
00259     void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag)
00260     {
00261       typedef typename InternalVectorType::value_type VectorType;
00262 
00263       result.resize(tag.get_coarselevels()+1);
00264       rhs.resize(tag.get_coarselevels()+1);
00265       residual.resize(tag.get_coarselevels());
00266 
00267       for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level)
00268       {
00269         result[level] = VectorType(A[level].size1());
00270         result[level].clear();
00271         rhs[level] = VectorType(A[level].size1());
00272         rhs[level].clear();
00273       }
00274       for (unsigned int level=0; level < tag.get_coarselevels(); ++level)
00275       {
00276         residual[level] = VectorType(A[level].size1());
00277         residual[level].clear();
00278       }
00279     }
00280 
00281 
00291     template <typename InternalVectorType, typename SparseMatrixType>
00292     void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag, viennacl::context ctx)
00293     {
00294       typedef typename InternalVectorType::value_type VectorType;
00295 
00296       result.resize(tag.get_coarselevels()+1);
00297       rhs.resize(tag.get_coarselevels()+1);
00298       residual.resize(tag.get_coarselevels());
00299 
00300       for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level)
00301       {
00302         result[level] = VectorType(A[level].size1(), ctx);
00303         rhs[level] = VectorType(A[level].size1(), ctx);
00304       }
00305       for (unsigned int level=0; level < tag.get_coarselevels(); ++level)
00306       {
00307         residual[level] = VectorType(A[level].size1(), ctx);
00308       }
00309     }
00310 
00311 
00319     template <typename ScalarType, typename SparseMatrixType>
00320     void amg_lu(boost::numeric::ublas::compressed_matrix<ScalarType> & op, boost::numeric::ublas::permutation_matrix<> & Permutation, SparseMatrixType const & A)
00321     {
00322       typedef typename SparseMatrixType::const_iterator1 ConstRowIterator;
00323       typedef typename SparseMatrixType::const_iterator2 ConstColIterator;
00324 
00325       // Copy to operator matrix. Needed
00326       op.resize(A.size1(),A.size2(),false);
00327       for (ConstRowIterator row_iter = A.begin1(); row_iter != A.end1(); ++row_iter)
00328         for (ConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00329           op (col_iter.index1(), col_iter.index2()) = *col_iter;
00330 
00331       // Permutation matrix has to be reinitialized with actual size. Do not clear() or resize()!
00332       Permutation = boost::numeric::ublas::permutation_matrix<> (op.size1());
00333       boost::numeric::ublas::lu_factorize(op,Permutation);
00334     }
00335 
00338     template <typename MatrixType>
00339     class amg_precond
00340     {
00341       typedef typename MatrixType::value_type ScalarType;
00342       typedef boost::numeric::ublas::vector<ScalarType> VectorType;
00343       typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType;
00344       typedef detail::amg::amg_pointvector PointVectorType;
00345 
00346       typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator;
00347       typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator;
00348       typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00349       typedef typename SparseMatrixType::iterator2 InternalColIterator;
00350 
00351       boost::numeric::ublas::vector <SparseMatrixType> A_setup;
00352       boost::numeric::ublas::vector <SparseMatrixType> P_setup;
00353       boost::numeric::ublas::vector <MatrixType> A;
00354       boost::numeric::ublas::vector <MatrixType> P;
00355       boost::numeric::ublas::vector <MatrixType> R;
00356       boost::numeric::ublas::vector <PointVectorType> Pointvector;
00357 
00358       mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
00359       mutable boost::numeric::ublas::permutation_matrix<> Permutation;
00360 
00361       mutable boost::numeric::ublas::vector <VectorType> result;
00362       mutable boost::numeric::ublas::vector <VectorType> rhs;
00363       mutable boost::numeric::ublas::vector <VectorType> residual;
00364 
00365       mutable bool done_init_apply;
00366 
00367       amg_tag tag_;
00368     public:
00369 
00370       amg_precond(): Permutation(0) {}
00376       amg_precond(MatrixType const & mat, amg_tag const & tag): Permutation(0)
00377       {
00378         tag_ = tag;
00379         // Initialize data structures.
00380         amg_init (mat,A_setup,P_setup,Pointvector,tag_);
00381 
00382         done_init_apply = false;
00383       }
00384 
00387       void setup()
00388       {
00389         // Start setup phase.
00390         amg_setup(A_setup,P_setup,Pointvector,tag_);
00391         // Transform to CPU-Matrixtype for precondition phase.
00392         amg_transform_cpu(A,P,R,A_setup,P_setup,tag_);
00393 
00394         done_init_apply = false;
00395       }
00396 
00401       void init_apply() const
00402       {
00403         // Setup precondition phase (Data structures).
00404         amg_setup_apply(result,rhs,residual,A_setup,tag_);
00405         // Do LU factorization for direct solve.
00406         amg_lu(op,Permutation,A_setup[tag_.get_coarselevels()]);
00407 
00408         done_init_apply = true;
00409       }
00410 
00416       template <typename VectorType>
00417       ScalarType calc_complexity(VectorType & avgstencil)
00418       {
00419         avgstencil = VectorType (tag_.get_coarselevels()+1);
00420         unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
00421 
00422         for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level)
00423         {
00424           level_coefficients = 0;
00425           for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
00426           {
00427             for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00428             {
00429               if (level == 0)
00430                 systemmat_nonzero++;
00431               nonzero++;
00432               level_coefficients++;
00433             }
00434           }
00435           avgstencil[level] = level_coefficients/static_cast<ScalarType>(A_setup[level].size1());
00436         }
00437         return nonzero/static_cast<ScalarType>(systemmat_nonzero);
00438       }
00439 
00444       template <typename VectorType>
00445       void apply(VectorType & vec) const
00446       {
00447         // Build data structures and do lu factorization before first iteration step.
00448         if (!done_init_apply)
00449           init_apply();
00450 
00451         int level;
00452 
00453         // Precondition operation (Yang, p.3)
00454         rhs[0] = vec;
00455         for (level=0; level <static_cast<int>(tag_.get_coarselevels()); level++)
00456         {
00457           result[level].clear();
00458 
00459           // Apply Smoother presmooth_ times.
00460           smooth_jacobi (level, tag_.get_presmooth(), result[level], rhs[level]);
00461 
00462           #ifdef VIENNACL_AMG_DEBUG
00463           std::cout << "After presmooth:" << std::endl;
00464           printvector(result[level]);
00465           #endif
00466 
00467           // Compute residual.
00468           residual[level] = rhs[level] - boost::numeric::ublas::prod (A[level],result[level]);
00469 
00470           #ifdef VIENNACL_AMG_DEBUG
00471           std::cout << "Residual:" << std::endl;
00472           printvector(residual[level]);
00473           #endif
00474 
00475           // Restrict to coarse level. Restricted residual is RHS of coarse level.
00476           rhs[level+1] = boost::numeric::ublas::prod (R[level],residual[level]);
00477 
00478           #ifdef VIENNACL_AMG_DEBUG
00479           std::cout << "Restricted Residual: " << std::endl;
00480           printvector(rhs[level+1]);
00481           #endif
00482         }
00483 
00484         // On highest level use direct solve to solve equation.
00485         result[level] = rhs[level];
00486         boost::numeric::ublas::lu_substitute(op,Permutation,result[level]);
00487 
00488         #ifdef VIENNACL_AMG_DEBUG
00489         std::cout << "After direct solve: " << std::endl;
00490         printvector (result[level]);
00491         #endif
00492 
00493         for (level=tag_.get_coarselevels()-1; level >= 0; level--)
00494         {
00495           #ifdef VIENNACL_AMG_DEBUG
00496           std::cout << "Coarse Error: " << std::endl;
00497           printvector(result[level+1]);
00498           #endif
00499 
00500           // Interpolate error to fine level. Correct solution by adding error.
00501           result[level] += boost::numeric::ublas::prod (P[level], result[level+1]);
00502 
00503           #ifdef VIENNACL_AMG_DEBUG
00504           std::cout << "Corrected Result: " << std::endl;
00505           printvector (result[level]);
00506           #endif
00507 
00508           // Apply Smoother postsmooth_ times.
00509           smooth_jacobi (level, tag_.get_postsmooth(), result[level], rhs[level]);
00510 
00511           #ifdef VIENNACL_AMG_DEBUG
00512           std::cout << "After postsmooth: " << std::endl;
00513           printvector (result[level]);
00514           #endif
00515         }
00516         vec = result[0];
00517       }
00518 
00525       template <typename VectorType>
00526       void smooth_jacobi(int level, int const iterations, VectorType & x, VectorType const & rhs) const
00527       {
00528         VectorType old_result (x.size());
00529         long index;
00530         ScalarType sum = 0, diag = 1;
00531 
00532         for (int i=0; i<iterations; ++i)
00533         {
00534           old_result = x;
00535           x.clear();
00536 #ifdef VIENNACL_WITH_OPENMP
00537           #pragma omp parallel for private (sum,diag) shared (rhs,x)
00538 #endif
00539           for (index=0; index < static_cast<long>(A_setup[level].size1()); ++index)
00540           {
00541             InternalConstRowIterator row_iter = A_setup[level].begin1();
00542             row_iter += index;
00543             sum = 0;
00544             diag = 1;
00545             for (InternalConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00546             {
00547               if (col_iter.index1() == col_iter.index2())
00548                 diag = *col_iter;
00549               else
00550                 sum += *col_iter * old_result[col_iter.index2()];
00551             }
00552             x[index]= static_cast<ScalarType>(tag_.get_jacobiweight()) * (rhs[index] - sum) / diag + (1-static_cast<ScalarType>(tag_.get_jacobiweight())) * old_result[index];
00553           }
00554         }
00555       }
00556 
00557       amg_tag & tag() { return tag_; }
00558     };
00559 
00564     template <typename ScalarType, unsigned int MAT_ALIGNMENT>
00565     class amg_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT> >
00566     {
00567       typedef viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT> MatrixType;
00568       typedef viennacl::vector<ScalarType> VectorType;
00569       typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType;
00570       typedef detail::amg::amg_pointvector PointVectorType;
00571 
00572       typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator;
00573       typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator;
00574       typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00575       typedef typename SparseMatrixType::iterator2 InternalColIterator;
00576 
00577       boost::numeric::ublas::vector <SparseMatrixType> A_setup;
00578       boost::numeric::ublas::vector <SparseMatrixType> P_setup;
00579       boost::numeric::ublas::vector <MatrixType> A;
00580       boost::numeric::ublas::vector <MatrixType> P;
00581       boost::numeric::ublas::vector <MatrixType> R;
00582       boost::numeric::ublas::vector <PointVectorType> Pointvector;
00583 
00584       mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
00585       mutable boost::numeric::ublas::permutation_matrix<> Permutation;
00586 
00587       mutable boost::numeric::ublas::vector <VectorType> result;
00588       mutable boost::numeric::ublas::vector <VectorType> rhs;
00589       mutable boost::numeric::ublas::vector <VectorType> residual;
00590 
00591       viennacl::context ctx_;
00592 
00593       mutable bool done_init_apply;
00594 
00595       amg_tag tag_;
00596 
00597     public:
00598 
00599       amg_precond(): Permutation(0) {}
00600 
00606       amg_precond(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & mat, amg_tag const & tag): Permutation(0), ctx_(viennacl::traits::context(mat))
00607       {
00608         tag_ = tag;
00609 
00610         // Copy to CPU. Internal structure of sparse matrix is used for copy operation.
00611         std::vector<std::map<unsigned int, ScalarType> > mat2 = std::vector<std::map<unsigned int, ScalarType> >(mat.size1());
00612         viennacl::copy(mat, mat2);
00613 
00614         // Initialize data structures.
00615         amg_init (mat2,A_setup,P_setup,Pointvector,tag_);
00616 
00617         done_init_apply = false;
00618       }
00619 
00622       void setup()
00623       {
00624         // Start setup phase.
00625         amg_setup(A_setup,P_setup,Pointvector, tag_);
00626         // Transform to GPU-Matrixtype for precondition phase.
00627         amg_transform_gpu(A,P,R,A_setup,P_setup, tag_, ctx_);
00628 
00629         done_init_apply = false;
00630       }
00631 
00636       void init_apply() const
00637       {
00638         // Setup precondition phase (Data structures).
00639         amg_setup_apply(result,rhs,residual,A_setup,tag_, ctx_);
00640         // Do LU factorization for direct solve.
00641         amg_lu(op,Permutation,A_setup[tag_.get_coarselevels()]);
00642 
00643         done_init_apply = true;
00644       }
00645 
00651       template <typename VectorType>
00652       ScalarType calc_complexity(VectorType & avgstencil)
00653       {
00654         avgstencil = VectorType (tag_.get_coarselevels()+1);
00655         unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
00656 
00657         for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level)
00658         {
00659           level_coefficients = 0;
00660           for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
00661           {
00662             for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00663             {
00664               if (level == 0)
00665                 systemmat_nonzero++;
00666               nonzero++;
00667               level_coefficients++;
00668             }
00669           }
00670           avgstencil[level] = level_coefficients/(double)A[level].size1();
00671         }
00672         return nonzero/static_cast<double>(systemmat_nonzero);
00673       }
00674 
00679       template <typename VectorType>
00680       void apply(VectorType & vec) const
00681       {
00682         if (!done_init_apply)
00683           init_apply();
00684 
00685         int level;
00686 
00687         // Precondition operation (Yang, p.3).
00688         rhs[0] = vec;
00689         for (level=0; level <static_cast<int>(tag_.get_coarselevels()); level++)
00690         {
00691           result[level].clear();
00692 
00693           // Apply Smoother presmooth_ times.
00694           smooth_jacobi (level, tag_.get_presmooth(), result[level], rhs[level]);
00695 
00696           #ifdef VIENNACL_AMG_DEBUG
00697           std::cout << "After presmooth: " << std::endl;
00698           printvector(result[level]);
00699           #endif
00700 
00701           // Compute residual.
00702           //residual[level] = rhs[level] - viennacl::linalg::prod (A[level],result[level]);
00703           residual[level] = viennacl::linalg::prod (A[level],result[level]);
00704           residual[level] = rhs[level] - residual[level];
00705 
00706           #ifdef VIENNACL_AMG_DEBUG
00707           std::cout << "Residual: " << std::endl;
00708           printvector(residual[level]);
00709           #endif
00710 
00711           // Restrict to coarse level. Result is RHS of coarse level equation.
00712           //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]);
00713           rhs[level+1] = viennacl::linalg::prod(R[level],residual[level]);
00714 
00715           #ifdef VIENNACL_AMG_DEBUG
00716           std::cout << "Restricted Residual: " << std::endl;
00717           printvector(rhs[level+1]);
00718           #endif
00719         }
00720 
00721         // On highest level use direct solve to solve equation (on the CPU)
00722         //TODO: Use GPU direct solve!
00723         result[level] = rhs[level];
00724         boost::numeric::ublas::vector <ScalarType> result_cpu (result[level].size());
00725 
00726         copy (result[level],result_cpu);
00727         boost::numeric::ublas::lu_substitute(op,Permutation,result_cpu);
00728         copy (result_cpu, result[level]);
00729 
00730         #ifdef VIENNACL_AMG_DEBUG
00731         std::cout << "After direct solve: " << std::endl;
00732         printvector (result[level]);
00733         #endif
00734 
00735         for (level=tag_.get_coarselevels()-1; level >= 0; level--)
00736         {
00737           #ifdef VIENNACL_AMG_DEBUG
00738           std::cout << "Coarse Error: " << std::endl;
00739           printvector(result[level+1]);
00740           #endif
00741 
00742           // Interpolate error to fine level and correct solution.
00743           result[level] += viennacl::linalg::prod(P[level],result[level+1]);
00744 
00745           #ifdef VIENNACL_AMG_DEBUG
00746           std::cout << "Corrected Result: " << std::endl;
00747           printvector (result[level]);
00748           #endif
00749 
00750           // Apply Smoother postsmooth_ times.
00751           smooth_jacobi (level, tag_.get_postsmooth(), result[level], rhs[level]);
00752 
00753           #ifdef VIENNACL_AMG_DEBUG
00754           std::cout << "After postsmooth: " << std::endl;
00755           printvector (result[level]);
00756           #endif
00757         }
00758         vec = result[0];
00759       }
00760 
00767       template <typename VectorType>
00768       void smooth_jacobi(int level, unsigned int iterations, VectorType & x, VectorType const & rhs) const
00769       {
00770         VectorType old_result = x;
00771 
00772         viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(x).context());
00773         viennacl::linalg::opencl::kernels::compressed_matrix<ScalarType>::init(ctx);
00774         viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<ScalarType>::program_name(), "jacobi");
00775 
00776         for (unsigned int i=0; i<iterations; ++i)
00777         {
00778           if (i > 0)
00779             old_result = x;
00780           x.clear();
00781           viennacl::ocl::enqueue(k(A[level].handle1().opencl_handle(), A[level].handle2().opencl_handle(), A[level].handle().opencl_handle(),
00782                                   static_cast<ScalarType>(tag_.get_jacobiweight()),
00783                                   viennacl::traits::opencl_handle(old_result),
00784                                   viennacl::traits::opencl_handle(x),
00785                                   viennacl::traits::opencl_handle(rhs),
00786                                   static_cast<cl_uint>(rhs.size())));
00787 
00788         }
00789       }
00790 
00791       amg_tag & tag() { return tag_; }
00792     };
00793 
00794   }
00795 }
00796 
00797 
00798 
00799 #endif
00800