ViennaCL - The Vienna Computing Library  1.5.1
viennacl/linalg/detail/ilu/block_ilu.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
00002 #define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include <vector>
00026 #include <cmath>
00027 #include "viennacl/forwards.h"
00028 #include "viennacl/tools/tools.hpp"
00029 #include "viennacl/linalg/detail/ilu/common.hpp"
00030 #include "viennacl/linalg/detail/ilu/ilu0.hpp"
00031 #include "viennacl/linalg/detail/ilu/ilut.hpp"
00032 
00033 #include <map>
00034 
00035 namespace viennacl
00036 {
00037   namespace linalg
00038   {
00039     namespace detail
00040     {
00042       template <typename VectorType, typename ValueType, typename SizeType = vcl_size_t>
00043       class ilu_vector_range
00044       {
00045         public:
00046           //typedef typename VectorType::value_type      value_type;
00047           //typedef typename VectorType::size_type       size_type;
00048 
00049           ilu_vector_range(VectorType & v,
00050                            SizeType start_index,
00051                            SizeType vec_size
00052                           ) : vec_(v), start_(start_index), size_(vec_size) {}
00053 
00054           ValueType & operator()(SizeType index)
00055           {
00056             assert(index < size_ && bool("Index out of bounds!"));
00057             return vec_[start_ + index];
00058           }
00059 
00060           ValueType & operator[](SizeType index)
00061           {
00062             assert(index < size_ && bool("Index out of bounds!"));
00063             return vec_[start_ + index];
00064           }
00065 
00066           SizeType size() const { return size_; }
00067 
00068         private:
00069           VectorType & vec_;
00070           SizeType start_;
00071           SizeType size_;
00072       };
00073 
00081       template <typename ScalarType>
00082       void extract_block_matrix(viennacl::compressed_matrix<ScalarType> const & A,
00083                                 viennacl::compressed_matrix<ScalarType> & diagonal_block_A,
00084                                 vcl_size_t start_index,
00085                                 vcl_size_t stop_index
00086                                 )
00087       {
00088 
00089         assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
00090         assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
00091         assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
00092 
00093         ScalarType   const * A_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(A.handle());
00094         unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
00095         unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
00096 
00097         ScalarType   * output_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(diagonal_block_A.handle());
00098         unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1());
00099         unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2());
00100 
00101         vcl_size_t output_counter = 0;
00102         for (vcl_size_t row = start_index; row < stop_index; ++row)
00103         {
00104           unsigned int buffer_col_start = A_row_buffer[row];
00105           unsigned int buffer_col_end   = A_row_buffer[row+1];
00106 
00107           output_row_buffer[row - start_index] = static_cast<unsigned int>(output_counter);
00108 
00109           for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
00110           {
00111             unsigned int col = A_col_buffer[buf_index];
00112             if (col < start_index)
00113               continue;
00114 
00115             if (col >= static_cast<unsigned int>(stop_index))
00116               continue;
00117 
00118             output_col_buffer[output_counter] = static_cast<unsigned int>(col - start_index);
00119             output_elements[output_counter] = A_elements[buf_index];
00120             ++output_counter;
00121           }
00122           output_row_buffer[row - start_index + 1] = static_cast<unsigned int>(output_counter);
00123         }
00124       }
00125 
00126 
00127     }
00128 
00134     template <typename MatrixType, typename ILUTag>
00135     class block_ilu_precond
00136     {
00137       typedef typename MatrixType::value_type      ScalarType;
00138 
00139       public:
00140         typedef std::vector<std::pair<vcl_size_t, vcl_size_t> >    index_vector_type;   //the pair refers to index range [a, b) of each block
00141 
00142 
00143         block_ilu_precond(MatrixType const & mat,
00144                           ILUTag const & tag,
00145                           vcl_size_t num_blocks = 8
00146                          ) : tag_(tag), LU_blocks(num_blocks)
00147         {
00148 
00149           // Set up vector of block indices:
00150           block_indices_.resize(num_blocks);
00151           for (vcl_size_t i=0; i<num_blocks; ++i)
00152           {
00153             vcl_size_t start_index = (   i  * mat.size1()) / num_blocks;
00154             vcl_size_t stop_index  = ((i+1) * mat.size1()) / num_blocks;
00155 
00156             block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
00157           }
00158 
00159           //initialize preconditioner:
00160           //std::cout << "Start CPU precond" << std::endl;
00161           init(mat);
00162           //std::cout << "End CPU precond" << std::endl;
00163         }
00164 
00165         block_ilu_precond(MatrixType const & mat,
00166                           ILUTag const & tag,
00167                           index_vector_type const & block_boundaries
00168                          ) : tag_(tag), block_indices_(block_boundaries), LU_blocks(block_boundaries.size())
00169         {
00170           //initialize preconditioner:
00171           //std::cout << "Start CPU precond" << std::endl;
00172           init(mat);
00173           //std::cout << "End CPU precond" << std::endl;
00174         }
00175 
00176 
00177         template <typename VectorType>
00178         void apply(VectorType & vec) const
00179         {
00180           for (vcl_size_t i=0; i<block_indices_.size(); ++i)
00181           {
00182             detail::ilu_vector_range<VectorType, ScalarType>  vec_range(vec, block_indices_[i].first, LU_blocks[i].size2());
00183 
00184             unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1());
00185             unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2());
00186             ScalarType   const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle());
00187 
00188             viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag());
00189             viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag());
00190 
00191           }
00192         }
00193 
00194       private:
00195         void init(MatrixType const & A)
00196         {
00197           viennacl::context host_context(viennacl::MAIN_MEMORY);
00198           viennacl::compressed_matrix<ScalarType> mat(host_context);
00199 
00200           viennacl::copy(A, mat);
00201 
00202           unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
00203 
00204 #ifdef VIENNACL_WITH_OPENMP
00205           #pragma omp parallel for
00206 #endif
00207           for (long i=0; i<static_cast<long>(block_indices_.size()); ++i)
00208           {
00209             // Step 1: Extract blocks
00210             vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first;
00211             vcl_size_t block_nnz  = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first];
00212             viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context);
00213 
00214             detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second);
00215 
00216             // Step 2: Precondition blocks:
00217             viennacl::switch_memory_context(LU_blocks[i], host_context);
00218             preconditioner_dispatch(mat_block, LU_blocks[i], tag_);
00219           }
00220 
00221         }
00222 
00223         void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
00224                                      viennacl::compressed_matrix<ScalarType> & LU,
00225                                      viennacl::linalg::ilu0_tag)
00226         {
00227           LU = mat_block;
00228           viennacl::linalg::precondition(LU, tag_);
00229         }
00230 
00231         void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
00232                                      viennacl::compressed_matrix<ScalarType> & LU,
00233                                      viennacl::linalg::ilut_tag)
00234         {
00235           std::vector< std::map<unsigned int, ScalarType> > temp(mat_block.size1());
00236 
00237           viennacl::linalg::precondition(mat_block, temp, tag_);
00238 
00239           viennacl::copy(temp, LU);
00240         }
00241 
00242         ILUTag const & tag_;
00243         index_vector_type block_indices_;
00244         std::vector< viennacl::compressed_matrix<ScalarType> > LU_blocks;
00245     };
00246 
00247 
00248 
00249 
00250 
00255     template <typename ScalarType, unsigned int MAT_ALIGNMENT, typename ILUTag>
00256     class block_ilu_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT>, ILUTag >
00257     {
00258         typedef compressed_matrix<ScalarType, MAT_ALIGNMENT>        MatrixType;
00259         //typedef std::vector<ScalarType>                             STLVectorType;
00260 
00261       public:
00262         typedef std::vector<std::pair<vcl_size_t, vcl_size_t> >    index_vector_type;   //the pair refers to index range [a, b) of each block
00263 
00264 
00265         block_ilu_precond(MatrixType const & mat,
00266                           ILUTag const & tag,
00267                           vcl_size_t num_blocks = 8
00268                          ) : tag_(tag),
00269                              block_indices_(num_blocks),
00270                              gpu_block_indices(),
00271                              gpu_L_trans(0,0, viennacl::traits::context(mat)),
00272                              gpu_U_trans(0,0, viennacl::traits::context(mat)),
00273                              gpu_D(mat.size1(), viennacl::traits::context(mat)),
00274                              LU_blocks(num_blocks)
00275         {
00276           // Set up vector of block indices:
00277           block_indices_.resize(num_blocks);
00278           for (vcl_size_t i=0; i<num_blocks; ++i)
00279           {
00280             vcl_size_t start_index = (   i  * mat.size1()) / num_blocks;
00281             vcl_size_t stop_index  = ((i+1) * mat.size1()) / num_blocks;
00282 
00283             block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
00284           }
00285 
00286           //initialize preconditioner:
00287           //std::cout << "Start CPU precond" << std::endl;
00288           init(mat);
00289           //std::cout << "End CPU precond" << std::endl;
00290         }
00291 
00292         block_ilu_precond(MatrixType const & mat,
00293                           ILUTag const & tag,
00294                           index_vector_type const & block_boundaries
00295                          ) : tag_(tag),
00296                              block_indices_(block_boundaries),
00297                              gpu_block_indices(viennacl::traits::context(mat)),
00298                              gpu_L_trans(0,0,viennacl::traits::context(mat)),
00299                              gpu_U_trans(0,0,viennacl::traits::context(mat)),
00300                              gpu_D(0,viennacl::traits::context(mat)),
00301                              LU_blocks(block_boundaries.size())
00302         {
00303           //initialize preconditioner:
00304           //std::cout << "Start CPU precond" << std::endl;
00305           init(mat);
00306           //std::cout << "End CPU precond" << std::endl;
00307         }
00308 
00309 
00310         void apply(vector<ScalarType> & vec) const
00311         {
00312           viennacl::linalg::detail::block_inplace_solve(trans(gpu_L_trans), gpu_block_indices, block_indices_.size(), gpu_D,
00313                                                         vec,
00314                                                         viennacl::linalg::unit_lower_tag());
00315 
00316           viennacl::linalg::detail::block_inplace_solve(trans(gpu_U_trans), gpu_block_indices, block_indices_.size(), gpu_D,
00317                                                         vec,
00318                                                         viennacl::linalg::upper_tag());
00319 
00320           //apply_cpu(vec);
00321         }
00322 
00323 
00324       private:
00325 
00326         void init(MatrixType const & A)
00327         {
00328           viennacl::context host_context(viennacl::MAIN_MEMORY);
00329           viennacl::compressed_matrix<ScalarType> mat(host_context);
00330 
00331           mat = A;
00332 
00333           unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
00334 
00335 #ifdef VIENNACL_WITH_OPENMP
00336           #pragma omp parallel for
00337 #endif
00338           for (long i=0; i<static_cast<long>(block_indices_.size()); ++i)
00339           {
00340             // Step 1: Extract blocks
00341             vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first;
00342             vcl_size_t block_nnz  = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first];
00343             viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context);
00344 
00345             detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second);
00346 
00347             // Step 2: Precondition blocks:
00348             viennacl::switch_memory_context(LU_blocks[i], host_context);
00349             preconditioner_dispatch(mat_block, LU_blocks[i], tag_);
00350           }
00351 
00352           /*
00353            * copy resulting preconditioner back to GPU:
00354            */
00355 
00356           viennacl::switch_memory_context(gpu_L_trans, viennacl::traits::context(A));
00357           viennacl::switch_memory_context(gpu_U_trans, viennacl::traits::context(A));
00358           viennacl::switch_memory_context(gpu_D, viennacl::traits::context(A));
00359 
00360           viennacl::backend::typesafe_host_array<unsigned int> block_indices_uint(gpu_block_indices, 2 * block_indices_.size());
00361           for (vcl_size_t i=0; i<block_indices_.size(); ++i)
00362           {
00363             block_indices_uint.set(2*i, block_indices_[i].first);
00364             block_indices_uint.set(2*i + 1, block_indices_[i].second);
00365           }
00366 
00367           viennacl::backend::memory_create(gpu_block_indices, block_indices_uint.raw_size(), viennacl::traits::context(A), block_indices_uint.get());
00368 
00369           blocks_to_device(mat.size1());
00370 
00371         }
00372 
00373         // Copy computed preconditioned blocks to OpenCL device
00374         void blocks_to_device(vcl_size_t matrix_size)
00375         {
00376           std::vector< std::map<unsigned int, ScalarType> > L_transposed(matrix_size);
00377           std::vector< std::map<unsigned int, ScalarType> > U_transposed(matrix_size);
00378           std::vector<ScalarType> entries_D(matrix_size);
00379 
00380           //
00381           // Transpose individual blocks into a single large matrix:
00382           //
00383           for (vcl_size_t block_index = 0; block_index < LU_blocks.size(); ++block_index)
00384           {
00385             MatrixType const & current_block = LU_blocks[block_index];
00386 
00387             unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle1());
00388             unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle2());
00389             ScalarType   const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(current_block.handle());
00390 
00391             vcl_size_t block_start = block_indices_[block_index].first;
00392 
00393             //transpose L and U:
00394             for (vcl_size_t row = 0; row < current_block.size1(); ++row)
00395             {
00396               unsigned int buffer_col_start = row_buffer[row];
00397               unsigned int buffer_col_end   = row_buffer[row+1];
00398 
00399               for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
00400               {
00401                 unsigned int col = col_buffer[buf_index];
00402 
00403                 if (row > col) //entry for L
00404                   L_transposed[col + block_start][static_cast<unsigned int>(row + block_start)] = elements[buf_index];
00405                 else if (row == col)
00406                   entries_D[row + block_start] = elements[buf_index];
00407                 else //entry for U
00408                   U_transposed[col + block_start][static_cast<unsigned int>(row + block_start)] = elements[buf_index];
00409               }
00410             }
00411           }
00412 
00413           //
00414           // Move data to GPU:
00415           //
00416           tools::const_sparse_matrix_adapter<ScalarType, unsigned int> adapted_L_transposed(L_transposed, matrix_size, matrix_size);
00417           tools::const_sparse_matrix_adapter<ScalarType, unsigned int> adapted_U_transposed(U_transposed, matrix_size, matrix_size);
00418           viennacl::copy(adapted_L_transposed, gpu_L_trans);
00419           viennacl::copy(adapted_U_transposed, gpu_U_trans);
00420           viennacl::copy(entries_D, gpu_D);
00421         }
00422 
00423         void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
00424                                      viennacl::compressed_matrix<ScalarType> & LU,
00425                                      viennacl::linalg::ilu0_tag)
00426         {
00427           LU = mat_block;
00428           viennacl::linalg::precondition(LU, tag_);
00429         }
00430 
00431         void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
00432                                      viennacl::compressed_matrix<ScalarType> & LU,
00433                                      viennacl::linalg::ilut_tag)
00434         {
00435           std::vector< std::map<unsigned int, ScalarType> > temp(mat_block.size1());
00436 
00437           viennacl::linalg::precondition(mat_block, temp, tag_);
00438 
00439           viennacl::copy(temp, LU);
00440         }
00441 
00442 
00443         ILUTag const & tag_;
00444         index_vector_type block_indices_;
00445         viennacl::backend::mem_handle gpu_block_indices;
00446         viennacl::compressed_matrix<ScalarType> gpu_L_trans;
00447         viennacl::compressed_matrix<ScalarType> gpu_U_trans;
00448         viennacl::vector<ScalarType> gpu_D;
00449 
00450         std::vector< MatrixType > LU_blocks;
00451     };
00452 
00453 
00454   }
00455 }
00456 
00457 
00458 
00459 
00460 #endif
00461 
00462 
00463