ViennaCL - The Vienna Computing Library
1.5.1
|
00001 #ifndef VIENNACL_LINALG_LU_HPP 00002 #define VIENNACL_LINALG_LU_HPP 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <algorithm> //for std::min 00026 00027 #include "viennacl/matrix.hpp" 00028 #include "viennacl/matrix_proxy.hpp" 00029 00030 #include "viennacl/linalg/prod.hpp" 00031 #include "viennacl/linalg/direct_solve.hpp" 00032 00033 namespace viennacl 00034 { 00035 namespace linalg 00036 { 00041 template<typename SCALARTYPE> 00042 void lu_factorize(matrix<SCALARTYPE, viennacl::row_major> & A) 00043 { 00044 typedef matrix<SCALARTYPE, viennacl::row_major> MatrixType; 00045 00046 vcl_size_t max_block_size = 32; 00047 vcl_size_t num_blocks = (A.size2() - 1) / max_block_size + 1; 00048 std::vector<SCALARTYPE> temp_buffer(A.internal_size2() * max_block_size); 00049 00050 // Iterate over panels 00051 for (vcl_size_t panel_id = 0; panel_id < num_blocks; ++panel_id) 00052 { 00053 vcl_size_t row_start = panel_id * max_block_size; 00054 vcl_size_t current_block_size = std::min<vcl_size_t>(A.size1() - row_start, max_block_size); 00055 00056 viennacl::range block_range(row_start, row_start + current_block_size); 00057 viennacl::range remainder_range(row_start + current_block_size, A.size1()); 00058 00059 // 00060 // Perform LU factorization on panel: 00061 // 00062 00063 00064 // Read from matrix to buffer: 00065 viennacl::backend::memory_read(A.handle(), 00066 sizeof(SCALARTYPE) * row_start * A.internal_size2(), 00067 sizeof(SCALARTYPE) * current_block_size * A.internal_size2(), 00068 &(temp_buffer[0])); 00069 00070 // Factorize (kij-version): 00071 for (vcl_size_t k=0; k < current_block_size - 1; ++k) 00072 { 00073 for (vcl_size_t i=k+1; i < current_block_size; ++i) 00074 { 00075 temp_buffer[row_start + i * A.internal_size2() + k] /= temp_buffer[row_start + k * A.internal_size2() + k]; // write l_ik 00076 00077 SCALARTYPE l_ik = temp_buffer[row_start + i * A.internal_size2() + k]; 00078 00079 for (vcl_size_t j = row_start + k + 1; j < A.size1(); ++j) 00080 temp_buffer[i * A.internal_size2() + j] -= l_ik * temp_buffer[k * A.internal_size2() + j]; // l_ik * a_kj 00081 } 00082 } 00083 00084 // Write back: 00085 viennacl::backend::memory_write(A.handle(), 00086 sizeof(SCALARTYPE) * row_start * A.internal_size2(), 00087 sizeof(SCALARTYPE) * current_block_size * A.internal_size2(), 00088 &(temp_buffer[0])); 00089 00090 if (remainder_range.size() > 0) 00091 { 00092 // 00093 // Compute L_12 = [ (U_11)^{T}^{-1} A_{21}^T ]^T 00094 // 00095 viennacl::matrix_range<MatrixType> U_11(A, block_range, block_range); 00096 viennacl::matrix_range<MatrixType> A_21(A, remainder_range, block_range); 00097 viennacl::linalg::inplace_solve(trans(U_11), trans(A_21), viennacl::linalg::lower_tag()); 00098 00099 // 00100 // Update remainder of A 00101 // 00102 viennacl::matrix_range<MatrixType> L_21(A, remainder_range, block_range); 00103 viennacl::matrix_range<MatrixType> U_12(A, block_range, remainder_range); 00104 viennacl::matrix_range<MatrixType> A_22(A, remainder_range, remainder_range); 00105 00106 A_22 -= viennacl::linalg::prod(L_21, U_12); 00107 } 00108 } 00109 00110 } 00111 00112 00117 template<typename SCALARTYPE> 00118 void lu_factorize(matrix<SCALARTYPE, viennacl::column_major> & A) 00119 { 00120 typedef matrix<SCALARTYPE, viennacl::column_major> MatrixType; 00121 00122 vcl_size_t max_block_size = 32; 00123 vcl_size_t num_blocks = (A.size1() - 1) / max_block_size + 1; 00124 std::vector<SCALARTYPE> temp_buffer(A.internal_size1() * max_block_size); 00125 00126 // Iterate over panels 00127 for (vcl_size_t panel_id = 0; panel_id < num_blocks; ++panel_id) 00128 { 00129 vcl_size_t col_start = panel_id * max_block_size; 00130 vcl_size_t current_block_size = std::min<vcl_size_t>(A.size1() - col_start, max_block_size); 00131 00132 viennacl::range block_range(col_start, col_start + current_block_size); 00133 viennacl::range remainder_range(col_start + current_block_size, A.size1()); 00134 00135 // 00136 // Perform LU factorization on panel: 00137 // 00138 00139 00140 // Read from matrix to buffer: 00141 viennacl::backend::memory_read(A.handle(), 00142 sizeof(SCALARTYPE) * col_start * A.internal_size1(), 00143 sizeof(SCALARTYPE) * current_block_size * A.internal_size1(), 00144 &(temp_buffer[0])); 00145 00146 // Factorize (kji-version): 00147 for (vcl_size_t k=0; k < current_block_size; ++k) 00148 { 00149 SCALARTYPE a_kk = temp_buffer[col_start + k + k * A.internal_size1()]; 00150 for (vcl_size_t i=col_start+k+1; i < A.size1(); ++i) 00151 temp_buffer[i + k * A.internal_size1()] /= a_kk; // write l_ik 00152 00153 for (vcl_size_t j=k+1; j < current_block_size; ++j) 00154 { 00155 SCALARTYPE a_kj = temp_buffer[col_start + k + j * A.internal_size1()]; 00156 for (vcl_size_t i=col_start+k+1; i < A.size1(); ++i) 00157 temp_buffer[i + j * A.internal_size1()] -= temp_buffer[i + k * A.internal_size1()] * a_kj; // l_ik * a_kj 00158 } 00159 } 00160 00161 // Write back: 00162 viennacl::backend::memory_write(A.handle(), 00163 sizeof(SCALARTYPE) * col_start * A.internal_size1(), 00164 sizeof(SCALARTYPE) * current_block_size * A.internal_size1(), 00165 &(temp_buffer[0])); 00166 00167 if (remainder_range.size() > 0) 00168 { 00169 // 00170 // Compute U_12: 00171 // 00172 viennacl::matrix_range<MatrixType> L_11(A, block_range, block_range); 00173 viennacl::matrix_range<MatrixType> A_12(A, block_range, remainder_range); 00174 viennacl::linalg::inplace_solve(L_11, A_12, viennacl::linalg::unit_lower_tag()); 00175 00176 // 00177 // Update remainder of A 00178 // 00179 viennacl::matrix_range<MatrixType> L_21(A, remainder_range, block_range); 00180 viennacl::matrix_range<MatrixType> U_12(A, block_range, remainder_range); 00181 viennacl::matrix_range<MatrixType> A_22(A, remainder_range, remainder_range); 00182 00183 A_22 -= viennacl::linalg::prod(L_21, U_12); 00184 } 00185 00186 } 00187 00188 } 00189 00190 00191 // 00192 // Convenience layer: 00193 // 00194 00200 template<typename SCALARTYPE, typename F1, typename F2, unsigned int ALIGNMENT_A, unsigned int ALIGNMENT_B> 00201 void lu_substitute(matrix<SCALARTYPE, F1, ALIGNMENT_A> const & A, 00202 matrix<SCALARTYPE, F2, ALIGNMENT_B> & B) 00203 { 00204 assert(A.size1() == A.size2() && bool("Matrix must be square")); 00205 assert(A.size1() == B.size1() && bool("Matrix must be square")); 00206 inplace_solve(A, B, unit_lower_tag()); 00207 inplace_solve(A, B, upper_tag()); 00208 } 00209 00215 template<typename SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VEC_ALIGNMENT> 00216 void lu_substitute(matrix<SCALARTYPE, F, ALIGNMENT> const & A, 00217 vector<SCALARTYPE, VEC_ALIGNMENT> & vec) 00218 { 00219 assert(A.size1() == A.size2() && bool("Matrix must be square")); 00220 inplace_solve(A, vec, unit_lower_tag()); 00221 inplace_solve(A, vec, upper_tag()); 00222 } 00223 00224 } 00225 } 00226 00227 #endif