ViennaCL - The Vienna Computing Library  1.5.1
viennacl/linalg/gmres.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_GMRES_HPP_
00002 #define VIENNACL_GMRES_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include <vector>
00026 #include <cmath>
00027 #include <limits>
00028 #include "viennacl/forwards.h"
00029 #include "viennacl/tools/tools.hpp"
00030 #include "viennacl/linalg/norm_2.hpp"
00031 #include "viennacl/linalg/prod.hpp"
00032 #include "viennacl/linalg/inner_prod.hpp"
00033 #include "viennacl/traits/clear.hpp"
00034 #include "viennacl/traits/size.hpp"
00035 #include "viennacl/meta/result_of.hpp"
00036 
00037 namespace viennacl
00038 {
00039   namespace linalg
00040   {
00041 
00044     class gmres_tag       //generalized minimum residual
00045     {
00046       public:
00053         gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20)
00054          : tol_(tol), iterations_(max_iterations), krylov_dim_(krylov_dim), iters_taken_(0) {}
00055 
00057         double tolerance() const { return tol_; }
00059         unsigned int max_iterations() const { return iterations_; }
00061         unsigned int krylov_dim() const { return krylov_dim_; }
00063         unsigned int max_restarts() const
00064         {
00065           unsigned int ret = iterations_ / krylov_dim_;
00066           if (ret > 0 && (ret * krylov_dim_ == iterations_) )
00067             return ret - 1;
00068           return ret;
00069         }
00070 
00072         unsigned int iters() const { return iters_taken_; }
00074         void iters(unsigned int i) const { iters_taken_ = i; }
00075 
00077         double error() const { return last_error_; }
00079         void error(double e) const { last_error_ = e; }
00080 
00081       private:
00082         double tol_;
00083         unsigned int iterations_;
00084         unsigned int krylov_dim_;
00085 
00086         //return values from solver
00087         mutable unsigned int iters_taken_;
00088         mutable double last_error_;
00089     };
00090 
00091     namespace detail
00092     {
00093 
00094       template <typename SRC_VECTOR, typename DEST_VECTOR>
00095       void gmres_copy_helper(SRC_VECTOR const & src, DEST_VECTOR & dest, vcl_size_t len, vcl_size_t start = 0)
00096       {
00097         for (vcl_size_t i=0; i<len; ++i)
00098           dest[start+i] = src[start+i];
00099       }
00100 
00101       template <typename ScalarType, typename DEST_VECTOR>
00102       void gmres_copy_helper(viennacl::vector<ScalarType> const & src, DEST_VECTOR & dest, vcl_size_t len, vcl_size_t start = 0)
00103       {
00104         typedef typename viennacl::vector<ScalarType>::difference_type   difference_type;
00105         viennacl::copy( src.begin() + static_cast<difference_type>(start),
00106                         src.begin() + static_cast<difference_type>(start + len),
00107                        dest.begin() + static_cast<difference_type>(start));
00108       }
00109 
00118       template <typename VectorType, typename ScalarType>
00119       void gmres_setup_householder_vector(VectorType const & input_vec, VectorType & hh_vec, ScalarType & beta, ScalarType & mu, vcl_size_t j)
00120       {
00121         ScalarType input_j = input_vec(j);
00122 
00123         // copy entries from input vector to householder vector:
00124         detail::gmres_copy_helper(input_vec, hh_vec, viennacl::traits::size(hh_vec) - (j+1), j+1);
00125 
00126         ScalarType sigma = viennacl::linalg::norm_2(hh_vec);
00127         sigma *= sigma;
00128 
00129         if (sigma == 0)
00130         {
00131           beta = 0;
00132           mu = input_j;
00133         }
00134         else
00135         {
00136           mu = std::sqrt(sigma + input_j*input_j);
00137 
00138           ScalarType hh_vec_0 = (input_j <= 0) ? (input_j - mu) : (-sigma / (input_j + mu));
00139 
00140           beta = ScalarType(2) * hh_vec_0 * hh_vec_0 / (sigma + hh_vec_0 * hh_vec_0);
00141 
00142           //divide hh_vec by its diagonal element hh_vec_0
00143           hh_vec /= hh_vec_0;
00144           hh_vec[j] = ScalarType(1);
00145         }
00146       }
00147 
00148       // Apply (I - beta h h^T) to x (Householder reflection with Householder vector h)
00149       template <typename VectorType, typename ScalarType>
00150       void gmres_householder_reflect(VectorType & x, VectorType const & h, ScalarType beta)
00151       {
00152         ScalarType hT_in_x = viennacl::linalg::inner_prod(h, x);
00153         x -= (beta * hT_in_x) * h;
00154       }
00155 
00156     }
00157 
00168     template <typename MatrixType, typename VectorType, typename PreconditionerType>
00169     VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag, PreconditionerType const & precond)
00170     {
00171       typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
00172       typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
00173       unsigned int problem_size = static_cast<unsigned int>(viennacl::traits::size(rhs));
00174       VectorType result = rhs;
00175       viennacl::traits::clear(result);
00176 
00177       unsigned int krylov_dim = tag.krylov_dim();
00178       if (problem_size < tag.krylov_dim())
00179         krylov_dim = problem_size; //A Krylov space larger than the matrix would lead to seg-faults (mathematically, error is certain to be zero already)
00180 
00181       VectorType res = rhs;
00182       VectorType v_k_tilde = rhs;
00183       VectorType v_k_tilde_temp = rhs;
00184 
00185       std::vector< std::vector<CPU_ScalarType> > R(krylov_dim, std::vector<CPU_ScalarType>(tag.krylov_dim()));
00186       std::vector<CPU_ScalarType> projection_rhs(krylov_dim);
00187 
00188       std::vector<VectorType>      householder_reflectors(krylov_dim, rhs);
00189       std::vector<CPU_ScalarType>  betas(krylov_dim);
00190 
00191       CPU_ScalarType norm_rhs = viennacl::linalg::norm_2(rhs);
00192 
00193       if (norm_rhs == 0) //solution is zero if RHS norm is zero
00194         return result;
00195 
00196       tag.iters(0);
00197 
00198       for (unsigned int it = 0; it <= tag.max_restarts(); ++it)
00199       {
00200         //
00201         // (Re-)Initialize residual: r = b - A*x (without temporary for the result of A*x)
00202         //
00203         res = rhs;
00204         res -= viennacl::linalg::prod(matrix, result);  //initial guess zero
00205         precond.apply(res);
00206 
00207         CPU_ScalarType rho_0 = viennacl::linalg::norm_2(res);
00208 
00209         //
00210         // Check for premature convergence
00211         //
00212         if (rho_0 / norm_rhs < tag.tolerance() ) // norm_rhs is known to be nonzero here
00213         {
00214           tag.error(rho_0 / norm_rhs);
00215           return result;
00216         }
00217 
00218         //
00219         // Normalize residual and set 'rho' to 1 as requested in 'A Simpler GMRES' by Walker and Zhou.
00220         //
00221         res /= rho_0;
00222         CPU_ScalarType rho = static_cast<CPU_ScalarType>(1.0);
00223 
00224 
00225         //
00226         // Iterate up until maximal Krylove space dimension is reached:
00227         //
00228         unsigned int k = 0;
00229         for (k = 0; k < krylov_dim; ++k)
00230         {
00231           tag.iters( tag.iters() + 1 ); //increase iteration counter
00232 
00233           // prepare storage:
00234           viennacl::traits::clear(R[k]);
00235           viennacl::traits::clear(householder_reflectors[k]);
00236 
00237           //compute v_k = A * v_{k-1} via Householder matrices
00238           if (k == 0)
00239           {
00240             v_k_tilde = viennacl::linalg::prod(matrix, res);
00241             precond.apply(v_k_tilde);
00242           }
00243           else
00244           {
00245             viennacl::traits::clear(v_k_tilde);
00246             v_k_tilde[k-1] = CPU_ScalarType(1);
00247 
00248             //Householder rotations, part 1: Compute P_1 * P_2 * ... * P_{k-1} * e_{k-1}
00249             for (int i = k-1; i > -1; --i)
00250               detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[i], betas[i]);
00251 
00252             v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde);
00253             precond.apply(v_k_tilde_temp);
00254             v_k_tilde = v_k_tilde_temp;
00255 
00256             //Householder rotations, part 2: Compute P_{k-1} * ... * P_{1} * v_k_tilde
00257             for (unsigned int i = 0; i < k; ++i)
00258               detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[i], betas[i]);
00259           }
00260 
00261           //
00262           // Compute Householder reflection for v_k_tilde such that all entries below k-th entry are zero:
00263           //
00264           CPU_ScalarType rho_k_k = 0;
00265           detail::gmres_setup_householder_vector(v_k_tilde, householder_reflectors[k], betas[k], rho_k_k, k);
00266 
00267           //
00268           // copy first k entries from v_k_tilde to R[k] in order to fill k-th column with result of
00269           // P_k * v_k_tilde = (v[0], ... , v[k-1], norm(v), 0, 0, ...) =: (rho_{1,k}, rho_{2,k}, ..., rho_{k,k}, 0, ..., 0);
00270           //
00271           detail::gmres_copy_helper(v_k_tilde, R[k], k);
00272           R[k][k] = rho_k_k;
00273 
00274           //
00275           // Update residual: r = P_k r
00276           // Set zeta_k = r[k] including machine precision considerations: mathematically we have |r[k]| <= rho
00277           // Set rho *= sin(acos(r[k] / rho))
00278           //
00279           detail::gmres_householder_reflect(res, householder_reflectors[k], betas[k]);
00280 
00281           if (res[k] > rho) //machine precision reached
00282             res[k] = rho;
00283           if (res[k] < -rho) //machine precision reached
00284             res[k] = -rho;
00285           projection_rhs[k] = res[k];
00286 
00287           rho *= std::sin( std::acos(projection_rhs[k] / rho) );
00288 
00289           if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance())  // Residual is sufficiently reduced, stop here
00290           {
00291             tag.error( std::fabs(rho*rho_0 / norm_rhs) );
00292             ++k;
00293             break;
00294           }
00295         } // for k
00296 
00297         //
00298         // Triangular solver stage:
00299         //
00300 
00301         for (int i=k-1; i>-1; --i)
00302         {
00303           for (unsigned int j=i+1; j<k; ++j)
00304             projection_rhs[i] -= R[j][i] * projection_rhs[j];     //R is transposed
00305 
00306           projection_rhs[i] /= R[i][i];
00307         }
00308 
00309         //
00310         // Note: 'projection_rhs' now holds the solution (eta_1, ..., eta_k)
00311         //
00312 
00313         res *= projection_rhs[0];
00314 
00315         if (k > 0)
00316         {
00317           for (unsigned int i = 0; i < k-1; ++i)
00318             res[i] += projection_rhs[i+1];
00319         }
00320 
00321         //
00322         // Form z inplace in 'res' by applying P_1 * ... * P_{k}
00323         //
00324         for (int i=k-1; i>=0; --i)
00325           detail::gmres_householder_reflect(res, householder_reflectors[i], betas[i]);
00326 
00327         res *= rho_0;
00328         result += res;  // x += rho_0 * z    in the paper
00329 
00330         //
00331         // Check for convergence:
00332         //
00333         tag.error(std::fabs(rho*rho_0 / norm_rhs));
00334         if ( tag.error() < tag.tolerance() )
00335           return result;
00336       }
00337 
00338       return result;
00339     }
00340 
00343     template <typename MatrixType, typename VectorType>
00344     VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag)
00345     {
00346       return solve(matrix, rhs, tag, no_precond());
00347     }
00348 
00349 
00350   }
00351 }
00352 
00353 #endif