ViennaCL - The Vienna Computing Library  1.5.1
viennacl/linalg/lu.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_LU_HPP
00002 #define VIENNACL_LINALG_LU_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include <algorithm>    //for std::min
00026 
00027 #include "viennacl/matrix.hpp"
00028 #include "viennacl/matrix_proxy.hpp"
00029 
00030 #include "viennacl/linalg/prod.hpp"
00031 #include "viennacl/linalg/direct_solve.hpp"
00032 
00033 namespace viennacl
00034 {
00035   namespace linalg
00036   {
00041     template<typename SCALARTYPE>
00042     void lu_factorize(matrix<SCALARTYPE, viennacl::row_major> & A)
00043     {
00044       typedef matrix<SCALARTYPE, viennacl::row_major>  MatrixType;
00045 
00046       vcl_size_t max_block_size = 32;
00047       vcl_size_t num_blocks = (A.size2() - 1) / max_block_size + 1;
00048       std::vector<SCALARTYPE> temp_buffer(A.internal_size2() * max_block_size);
00049 
00050       // Iterate over panels
00051       for (vcl_size_t panel_id = 0; panel_id < num_blocks; ++panel_id)
00052       {
00053         vcl_size_t row_start = panel_id * max_block_size;
00054         vcl_size_t current_block_size = std::min<vcl_size_t>(A.size1() - row_start, max_block_size);
00055 
00056         viennacl::range     block_range(row_start, row_start + current_block_size);
00057         viennacl::range remainder_range(row_start + current_block_size, A.size1());
00058 
00059         //
00060         // Perform LU factorization on panel:
00061         //
00062 
00063 
00064         // Read from matrix to buffer:
00065         viennacl::backend::memory_read(A.handle(),
00066                                        sizeof(SCALARTYPE) * row_start          * A.internal_size2(),
00067                                        sizeof(SCALARTYPE) * current_block_size * A.internal_size2(),
00068                                        &(temp_buffer[0]));
00069 
00070         // Factorize (kij-version):
00071         for (vcl_size_t k=0; k < current_block_size - 1; ++k)
00072         {
00073           for (vcl_size_t i=k+1; i < current_block_size; ++i)
00074           {
00075             temp_buffer[row_start + i * A.internal_size2() + k] /= temp_buffer[row_start + k * A.internal_size2() + k];  // write l_ik
00076 
00077             SCALARTYPE l_ik = temp_buffer[row_start + i * A.internal_size2() + k];
00078 
00079             for (vcl_size_t j = row_start + k + 1; j < A.size1(); ++j)
00080               temp_buffer[i * A.internal_size2() + j] -= l_ik * temp_buffer[k * A.internal_size2() + j];  // l_ik * a_kj
00081           }
00082         }
00083 
00084         // Write back:
00085         viennacl::backend::memory_write(A.handle(),
00086                                         sizeof(SCALARTYPE) * row_start          * A.internal_size2(),
00087                                         sizeof(SCALARTYPE) * current_block_size * A.internal_size2(),
00088                                         &(temp_buffer[0]));
00089 
00090         if (remainder_range.size() > 0)
00091         {
00092           //
00093           // Compute L_12 = [ (U_11)^{T}^{-1} A_{21}^T ]^T
00094           //
00095           viennacl::matrix_range<MatrixType> U_11(A, block_range,     block_range);
00096           viennacl::matrix_range<MatrixType> A_21(A, remainder_range, block_range);
00097           viennacl::linalg::inplace_solve(trans(U_11), trans(A_21), viennacl::linalg::lower_tag());
00098 
00099           //
00100           // Update remainder of A
00101           //
00102           viennacl::matrix_range<MatrixType> L_21(A, remainder_range, block_range);
00103           viennacl::matrix_range<MatrixType> U_12(A, block_range,     remainder_range);
00104           viennacl::matrix_range<MatrixType> A_22(A, remainder_range, remainder_range);
00105 
00106           A_22 -= viennacl::linalg::prod(L_21, U_12);
00107         }
00108       }
00109 
00110     }
00111 
00112 
00117     template<typename SCALARTYPE>
00118     void lu_factorize(matrix<SCALARTYPE, viennacl::column_major> & A)
00119     {
00120       typedef matrix<SCALARTYPE, viennacl::column_major>  MatrixType;
00121 
00122       vcl_size_t max_block_size = 32;
00123       vcl_size_t num_blocks = (A.size1() - 1) / max_block_size + 1;
00124       std::vector<SCALARTYPE> temp_buffer(A.internal_size1() * max_block_size);
00125 
00126       // Iterate over panels
00127       for (vcl_size_t panel_id = 0; panel_id < num_blocks; ++panel_id)
00128       {
00129         vcl_size_t col_start = panel_id * max_block_size;
00130         vcl_size_t current_block_size = std::min<vcl_size_t>(A.size1() - col_start, max_block_size);
00131 
00132         viennacl::range     block_range(col_start, col_start + current_block_size);
00133         viennacl::range remainder_range(col_start + current_block_size, A.size1());
00134 
00135         //
00136         // Perform LU factorization on panel:
00137         //
00138 
00139 
00140         // Read from matrix to buffer:
00141         viennacl::backend::memory_read(A.handle(),
00142                                        sizeof(SCALARTYPE) * col_start          * A.internal_size1(),
00143                                        sizeof(SCALARTYPE) * current_block_size * A.internal_size1(),
00144                                        &(temp_buffer[0]));
00145 
00146         // Factorize (kji-version):
00147         for (vcl_size_t k=0; k < current_block_size; ++k)
00148         {
00149           SCALARTYPE a_kk = temp_buffer[col_start + k + k * A.internal_size1()];
00150           for (vcl_size_t i=col_start+k+1; i < A.size1(); ++i)
00151             temp_buffer[i + k * A.internal_size1()] /= a_kk;  // write l_ik
00152 
00153           for (vcl_size_t j=k+1; j < current_block_size; ++j)
00154           {
00155             SCALARTYPE a_kj = temp_buffer[col_start + k + j * A.internal_size1()];
00156             for (vcl_size_t i=col_start+k+1; i < A.size1(); ++i)
00157               temp_buffer[i + j * A.internal_size1()] -= temp_buffer[i + k * A.internal_size1()] * a_kj;  // l_ik * a_kj
00158           }
00159         }
00160 
00161         // Write back:
00162         viennacl::backend::memory_write(A.handle(),
00163                                         sizeof(SCALARTYPE) * col_start          * A.internal_size1(),
00164                                         sizeof(SCALARTYPE) * current_block_size * A.internal_size1(),
00165                                         &(temp_buffer[0]));
00166 
00167         if (remainder_range.size() > 0)
00168         {
00169           //
00170           // Compute U_12:
00171           //
00172           viennacl::matrix_range<MatrixType> L_11(A, block_range,     block_range);
00173           viennacl::matrix_range<MatrixType> A_12(A, block_range, remainder_range);
00174           viennacl::linalg::inplace_solve(L_11, A_12, viennacl::linalg::unit_lower_tag());
00175 
00176           //
00177           // Update remainder of A
00178           //
00179           viennacl::matrix_range<MatrixType> L_21(A, remainder_range, block_range);
00180           viennacl::matrix_range<MatrixType> U_12(A, block_range,     remainder_range);
00181           viennacl::matrix_range<MatrixType> A_22(A, remainder_range, remainder_range);
00182 
00183           A_22 -= viennacl::linalg::prod(L_21, U_12);
00184         }
00185 
00186       }
00187 
00188     }
00189 
00190 
00191     //
00192     // Convenience layer:
00193     //
00194 
00200     template<typename SCALARTYPE, typename F1, typename F2, unsigned int ALIGNMENT_A, unsigned int ALIGNMENT_B>
00201     void lu_substitute(matrix<SCALARTYPE, F1, ALIGNMENT_A> const & A,
00202                        matrix<SCALARTYPE, F2, ALIGNMENT_B> & B)
00203     {
00204       assert(A.size1() == A.size2() && bool("Matrix must be square"));
00205       assert(A.size1() == B.size1() && bool("Matrix must be square"));
00206       inplace_solve(A, B, unit_lower_tag());
00207       inplace_solve(A, B, upper_tag());
00208     }
00209 
00215     template<typename SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VEC_ALIGNMENT>
00216     void lu_substitute(matrix<SCALARTYPE, F, ALIGNMENT> const & A,
00217                        vector<SCALARTYPE, VEC_ALIGNMENT> & vec)
00218     {
00219       assert(A.size1() == A.size2() && bool("Matrix must be square"));
00220       inplace_solve(A, vec, unit_lower_tag());
00221       inplace_solve(A, vec, upper_tag());
00222     }
00223 
00224   }
00225 }
00226 
00227 #endif