ViennaCL - The Vienna Computing Library  1.5.1
viennacl/linalg/svd.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_SVD_HPP
00002 #define VIENNACL_LINALG_SVD_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00028 // Note: Boost.uBLAS is required at the moment
00029 #include <boost/numeric/ublas/vector.hpp>
00030 #include <boost/numeric/ublas/matrix.hpp>
00031 
00032 
00033 #include <cmath>
00034 
00035 #include "viennacl/matrix.hpp"
00036 #include "viennacl/linalg/opencl/kernels/svd.hpp"
00037 #include "viennacl/linalg/qr-method-common.hpp"
00038 
00039 namespace viennacl
00040 {
00041   namespace linalg
00042   {
00043 
00044     namespace detail
00045     {
00046 
00047       template<typename MatrixType, typename VectorType>
00048       void givens_prev(MatrixType & matrix,
00049                        VectorType & tmp1,
00050                        VectorType & tmp2,
00051                        int n,
00052                        int l,
00053                        int k
00054                       )
00055       {
00056         typedef typename MatrixType::value_type                                   ScalarType;
00057         typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
00058 
00059         viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
00060         viennacl::ocl::kernel & kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<CPU_ScalarType>::program_name(), SVD_GIVENS_PREV_KERNEL);
00061 
00062         kernel.global_work_size(0, viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size1(matrix), 256));
00063         kernel.local_work_size(0, 256);
00064 
00065         viennacl::ocl::enqueue(kernel(
00066                                       matrix,
00067                                       tmp1,
00068                                       tmp2,
00069                                       static_cast<cl_uint>(n),
00070                                       static_cast<cl_uint>(matrix.internal_size1()),
00071                                       static_cast<cl_uint>(l + 1),
00072                                       static_cast<cl_uint>(k + 1)
00073                               ));
00074       }
00075 
00076 
00077       template<typename MatrixType, typename VectorType>
00078       void change_signs(MatrixType& matrix, VectorType& signs, int n)
00079       {
00080         typedef typename MatrixType::value_type                                   ScalarType;
00081         typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
00082 
00083         viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
00084         viennacl::ocl::kernel & kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<CPU_ScalarType>::program_name(), SVD_INVERSE_SIGNS_KERNEL);
00085 
00086         kernel.global_work_size(0, viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size1(matrix), 16));
00087         kernel.global_work_size(1, viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size2(matrix), 16));
00088 
00089         kernel.local_work_size(0, 16);
00090         kernel.local_work_size(1, 16);
00091 
00092         viennacl::ocl::enqueue(kernel(
00093                                       matrix,
00094                                       signs,
00095                                       static_cast<cl_uint>(n),
00096                                       static_cast<cl_uint>(matrix.internal_size1())
00097                               ));
00098       }
00099 
00100       template<typename MatrixType, typename CPU_VectorType>
00101       void svd_qr_shift(MatrixType & vcl_u,
00102                         MatrixType & vcl_v,
00103                         CPU_VectorType & q,
00104                         CPU_VectorType & e)
00105       {
00106         typedef typename MatrixType::value_type                                   ScalarType;
00107         typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
00108 
00109         int n = static_cast<int>(q.size());
00110         int m = static_cast<int>(vcl_u.size1());
00111 
00112         detail::transpose(vcl_u);
00113         detail::transpose(vcl_v);
00114 
00115         std::vector<CPU_ScalarType> signs_v(n, 1);
00116         std::vector<CPU_ScalarType> cs1(n), ss1(n), cs2(n), ss2(n);
00117 
00118         viennacl::vector<CPU_ScalarType> tmp1(n), tmp2(n);
00119 
00120         bool goto_test_conv = false;
00121 
00122         for (int k = n - 1; k >= 0; k--)
00123         {
00124           // std::cout << "K = " << k << std::endl;
00125 
00126           vcl_size_t iter = 0;
00127           for (iter = 0; iter < detail::ITER_MAX; iter++)
00128           {
00129             // test for split
00130             int l;
00131             for (l = k; l >= 0; l--)
00132             {
00133               goto_test_conv = false;
00134               if (std::fabs(e[l]) <= detail::EPS)
00135               {
00136                 // set it
00137                 goto_test_conv = true;
00138                 break;
00139               }
00140 
00141               if (std::fabs(q[l - 1]) <= detail::EPS)
00142               {
00143                 // goto
00144                 break;
00145               }
00146             }
00147 
00148             if (!goto_test_conv)
00149             {
00150               CPU_ScalarType c = 0.0;
00151               CPU_ScalarType s = 1.0;
00152 
00153               //int l1 = l - 1;
00154               //int l2 = k;
00155 
00156               for (int i = l; i <= k; i++)
00157               {
00158                 CPU_ScalarType f = s * e[i];
00159                 e[i] = c * e[i];
00160 
00161                 if (std::fabs(f) <= detail::EPS)
00162                 {
00163                   //l2 = i - 1;
00164                   break;
00165                 }
00166 
00167                 CPU_ScalarType g = q[i];
00168                 CPU_ScalarType h = detail::pythag(f, g);
00169                 q[i] = h;
00170                 c = g / h;
00171                 s = -f / h;
00172 
00173                 cs1[i] = c;
00174                 ss1[i] = s;
00175               }
00176 
00177               // std::cout << "Hitted!" << l1 << " " << l2 << "\n";
00178 
00179               // for(int i = l; i <= l2; i++)
00180               // {
00181               //   for (int j = 0; j < m; j++)
00182               //   {
00183               //     CPU_ScalarType y = u(j, l1);
00184               //     CPU_ScalarType z = u(j, i);
00185               //     u(j, l1) = y * cs1[i] + z * ss1[i];
00186               //     u(j, i) = -y * ss1[i] + z * cs1[i];
00187               //   }
00188               // }
00189             }
00190 
00191             CPU_ScalarType z = q[k];
00192 
00193             if (l == k)
00194             {
00195               if (z < 0)
00196               {
00197                 q[k] = -z;
00198 
00199                 signs_v[k] *= -1;
00200               }
00201 
00202               break;
00203             }
00204 
00205             if (iter >= detail::ITER_MAX - 1)
00206               break;
00207 
00208             CPU_ScalarType x = q[l];
00209             CPU_ScalarType y = q[k - 1];
00210             CPU_ScalarType g = e[k - 1];
00211             CPU_ScalarType h = e[k];
00212             CPU_ScalarType f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2 * h * y);
00213 
00214             g = detail::pythag<CPU_ScalarType>(f, 1);
00215 
00216             if (f < 0) {
00217               f = ((x - z) * (x + z) + h * (y / (f - g) - h)) / x;
00218             } else {
00219               f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x;
00220             }
00221 
00222             CPU_ScalarType c = 1;
00223             CPU_ScalarType s = 1;
00224 
00225             for (vcl_size_t i = l + 1; i <= static_cast<vcl_size_t>(k); i++)
00226             {
00227               g = e[i];
00228               y = q[i];
00229               h = s * g;
00230               g = c * g;
00231               CPU_ScalarType z = detail::pythag(f, h);
00232               e[i - 1] = z;
00233               c = f / z;
00234               s = h / z;
00235               f = x * c + g * s;
00236               g = -x * s + g * c;
00237               h = y * s;
00238               y = y * c;
00239 
00240               cs1[i] = c;
00241               ss1[i] = s;
00242 
00243               z = detail::pythag(f, h);
00244               q[i - 1] = z;
00245               c = f / z;
00246               s = h / z;
00247               f = c * g + s * y;
00248               x = -s * g + c * y;
00249 
00250               cs2[i] = c;
00251               ss2[i] = s;
00252             }
00253 
00254             {
00255               viennacl::copy(cs1, tmp1);
00256               viennacl::copy(ss1, tmp2);
00257 
00258               givens_prev(vcl_v, tmp1, tmp2, n, l, k);
00259             }
00260 
00261             {
00262               viennacl::copy(cs2, tmp1);
00263               viennacl::copy(ss2, tmp2);
00264 
00265               givens_prev(vcl_u, tmp1, tmp2, m, l, k);
00266             }
00267 
00268             e[l] = 0.0;
00269             e[k] = f;
00270             q[k] = x;
00271           }
00272 
00273         }
00274 
00275 
00276         viennacl::copy(signs_v, tmp1);
00277         change_signs(vcl_v, tmp1, n);
00278 
00279         // transpose singular matrices again
00280         detail::transpose(vcl_u);
00281         detail::transpose(vcl_v);
00282       }
00283 
00284 
00285       /*template <typename SCALARTYPE, unsigned int ALIGNMENT>
00286       bool householder_c(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & A,
00287                           viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & Q,
00288                           viennacl::vector<SCALARTYPE, ALIGNMENT> & D,
00289                           vcl_size_t start)
00290       {
00291 
00292         vcl_size_t row_start = start;
00293         vcl_size_t col_start = start;
00294 
00295         if(row_start + 1 >= A.size1())
00296           return false;
00297 
00298         std::vector<SCALARTYPE> tmp(A.size1(), 0);
00299 
00300         copy_vec(A, D, row_start, col_start, true);
00301         fast_copy(D.begin(), D.begin() + (A.size1() - row_start), tmp.begin() + row_start);
00302 
00303         detail::householder_vector(tmp, row_start);
00304 
00305         fast_copy(tmp, D);
00306 
00307         viennacl::ocl::kernel & kernel = viennacl::ocl::get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_COL_KERNEL);
00308 
00309         //kernel.global_work_size(0, A.size1() << 1);
00310 
00311         viennacl::ocl::enqueue(kernel(
00312                                       A,
00313                                       Q,
00314                                       D,
00315                                       static_cast<cl_uint>(row_start),
00316                                       static_cast<cl_uint>(col_start),
00317                                       static_cast<cl_uint>(A.size1()),
00318                                       static_cast<cl_uint>(A.size2()),
00319                                       static_cast<cl_uint>(A.internal_size2()),
00320                                       static_cast<cl_uint>(Q.internal_size2()),
00321                                       viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
00322                               ));
00323 
00324         return true;
00325       }*/
00326 
00327       template <typename SCALARTYPE, unsigned int ALIGNMENT>
00328       bool householder_c(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& A,
00329                           viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& Q,
00330                           viennacl::vector<SCALARTYPE, ALIGNMENT>& D,
00331                           vcl_size_t row_start, vcl_size_t col_start)
00332       {
00333         viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
00334 
00335         if(row_start + 1 >= A.size1())
00336           return false;
00337 
00338         prepare_householder_vector(A, D, A.size1(), row_start, col_start, row_start, true);
00339 
00340         {
00341           viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL);
00342 
00343           viennacl::ocl::enqueue(kernel(
00344                                         A,
00345                                         D,
00346                                         static_cast<cl_uint>(row_start),
00347                                         static_cast<cl_uint>(col_start),
00348                                         static_cast<cl_uint>(A.size1()),
00349                                         static_cast<cl_uint>(A.size2()),
00350                                         static_cast<cl_uint>(A.internal_size2()),
00351                                         viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
00352                                 ));
00353         }
00354 
00355         {
00356           viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_UPDATE_QL_KERNEL);
00357 
00358           viennacl::ocl::enqueue(kernel(
00359                                         Q,
00360                                         D,
00361                                         static_cast<cl_uint>(A.size1()),
00362                                         static_cast<cl_uint>(A.size2()),
00363                                         static_cast<cl_uint>(Q.internal_size2()),
00364                                         viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
00365                                 ));
00366         }
00367 
00368         return true;
00369       }
00370 
00371       /*
00372       template <typename SCALARTYPE, unsigned int ALIGNMENT>
00373       bool householder_r(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& A,
00374                           viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& Q,
00375                           viennacl::vector<SCALARTYPE, ALIGNMENT>& S,
00376                           vcl_size_t start)
00377       {
00378 
00379         vcl_size_t row_start = start;
00380         vcl_size_t col_start = start + 1;
00381 
00382         if(col_start + 1 >= A.size2())
00383           return false;
00384 
00385         std::vector<SCALARTYPE> tmp(A.size2(), 0);
00386 
00387         copy_vec(A, S, row_start, col_start, false);
00388         fast_copy(S.begin(),
00389                   S.begin() + (A.size2() - col_start),
00390                   tmp.begin() + col_start);
00391 
00392         detail::householder_vector(tmp, col_start);
00393         fast_copy(tmp, S);
00394 
00395         viennacl::ocl::kernel& kernel = viennacl::ocl::get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_ROW_KERNEL);
00396 
00397         viennacl::ocl::enqueue(kernel(
00398                                       A,
00399                                       Q,
00400                                       S,
00401                                       static_cast<cl_uint>(row_start),
00402                                       static_cast<cl_uint>(col_start),
00403                                       static_cast<cl_uint>(A.size1()),
00404                                       static_cast<cl_uint>(A.size2()),
00405                                       static_cast<cl_uint>(A.internal_size2()),
00406                                       static_cast<cl_uint>(Q.internal_size2()),
00407                                       viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
00408                                 ));
00409         return true;
00410       } */
00411 
00412       template <typename SCALARTYPE, unsigned int ALIGNMENT>
00413       bool householder_r(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & A,
00414                           viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & Q,
00415                           viennacl::vector<SCALARTYPE, ALIGNMENT>& D,
00416                           vcl_size_t row_start, vcl_size_t col_start)
00417       {
00418         viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
00419 
00420         if(col_start + 1 >= A.size2())
00421           return false;
00422 
00423         prepare_householder_vector(A, D, A.size2(), row_start, col_start, col_start, false);
00424 
00425         {
00426           viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL);
00427 
00428           viennacl::ocl::enqueue(kernel(
00429                                         A,
00430                                         D,
00431                                         static_cast<cl_uint>(row_start),
00432                                         static_cast<cl_uint>(col_start),
00433                                         static_cast<cl_uint>(A.size1()),
00434                                         static_cast<cl_uint>(A.size2()),
00435                                         static_cast<cl_uint>(A.internal_size2()),
00436                                         viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
00437                                 ));
00438         }
00439 
00440         {
00441           viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_UPDATE_QR_KERNEL);
00442 
00443           viennacl::ocl::enqueue(kernel(
00444                                         Q,
00445                                         D,
00446                                         static_cast<cl_uint>(A.size1()),
00447                                         static_cast<cl_uint>(A.size2()),
00448                                         static_cast<cl_uint>(Q.internal_size2()),
00449                                         viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
00450                                 ));
00451         }
00452 
00453         return true;
00454       }
00455 
00456       template <typename SCALARTYPE, unsigned int ALIGNMENT>
00457       void bidiag(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & Ai,
00458                   viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & QL,
00459                   viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & QR)
00460       {
00461         viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(QL).context());
00462 
00463         vcl_size_t row_num = Ai.size1();
00464         vcl_size_t col_num = Ai.size2();
00465 
00466         vcl_size_t to = std::min(row_num, col_num);
00467         vcl_size_t big_to = std::max(row_num, col_num);
00468 
00469         //for storing householder vector
00470         viennacl::vector<SCALARTYPE, ALIGNMENT> hh_vector(big_to);
00471 
00472         QL = viennacl::identity_matrix<SCALARTYPE>(QL.size1(), ctx);
00473         QR = viennacl::identity_matrix<SCALARTYPE>(QR.size1(), ctx);
00474 
00475         for(vcl_size_t i = 0; i < to; i++)
00476         {
00477           householder_c(Ai, QL, hh_vector, i, i);
00478           householder_r(Ai, QR, hh_vector, i, i+1);
00479         }
00480       }
00481 
00482     } // namespace detail
00483 
00484 
00491     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00492     void svd(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & A,
00493               viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & QL,
00494               viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & QR)
00495     {
00496       viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
00497       viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::init(ctx);
00498 
00499       vcl_size_t row_num = A.size1();
00500       vcl_size_t col_num = A.size2();
00501 
00502       vcl_size_t to = std::min(row_num, col_num);
00503 
00504 
00505       //viennacl::vector<SCALARTYPE, ALIGNMENT> d(to);
00506       //viennacl::vector<SCALARTYPE, ALIGNMENT> s(to + 1);
00507 
00508       // first stage
00509       detail::bidiag(A, QL, QR);
00510 
00511       // second stage
00512       //std::vector<SCALARTYPE> dh(to, 0);
00513       //std::vector<SCALARTYPE> sh(to + 1, 0);
00514       boost::numeric::ublas::vector<SCALARTYPE> dh = boost::numeric::ublas::scalar_vector<SCALARTYPE>(to, 0);
00515       boost::numeric::ublas::vector<SCALARTYPE> sh = boost::numeric::ublas::scalar_vector<SCALARTYPE>(to + 1, 0);
00516 
00517       detail::bidiag_pack(A, dh, sh);
00518 
00519       detail::svd_qr_shift( QL, QR, dh, sh);
00520 
00521       // Write resulting diagonal matrix with singular values to A:
00522       boost::numeric::ublas::matrix<SCALARTYPE> h_Sigma(row_num, col_num);
00523       h_Sigma.clear();
00524 
00525       for (vcl_size_t i = 0; i < to; i++)
00526         h_Sigma(i, i) = dh[i];
00527 
00528       copy(h_Sigma, A);
00529     }
00530   }
00531 }
00532 #endif