ViennaCL - The Vienna Computing Library  1.5.1
viennacl/linalg/opencl/kernels/fft.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP
00002 #define VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP
00003 
00004 #include "viennacl/tools/tools.hpp"
00005 #include "viennacl/ocl/kernel.hpp"
00006 #include "viennacl/ocl/platform.hpp"
00007 #include "viennacl/ocl/utils.hpp"
00008 
00011 namespace viennacl
00012 {
00013   namespace linalg
00014   {
00015     namespace opencl
00016     {
00017       namespace kernels
00018       {
00019 
00021 
00022 
00023         // Postprocessing phase of Bluestein algorithm
00024         template <typename StringType>
00025         void generate_fft_bluestein_post(StringType & source, std::string const & numeric_string)
00026         {
00027           source.append("__kernel void bluestein_post(__global "); source.append(numeric_string); source.append("2 *Z, \n");
00028           source.append("                             __global "); source.append(numeric_string); source.append("2 *out, \n");
00029           source.append("                             unsigned int size) \n");
00030           source.append("{ \n");
00031           source.append("  unsigned int glb_id = get_global_id(0); \n");
00032           source.append("  unsigned int glb_sz = get_global_size(0); \n");
00033 
00034           source.append("  unsigned int double_size = size << 1; \n");
00035           source.append("  "); source.append(numeric_string); source.append(" sn_a, cs_a; \n");
00036           source.append("  const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n");
00037 
00038           source.append("  for(unsigned int i = glb_id; i < size; i += glb_sz) { \n");
00039           source.append("    unsigned int rm = i * i % (double_size); \n");
00040           source.append("    "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * (-NUM_PI); \n");
00041 
00042           source.append("    sn_a = sincos(angle, &cs_a); \n");
00043 
00044           source.append("    "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n");
00045           source.append("    out[i] = ("); source.append(numeric_string); source.append("2)(Z[i].x * b_i.x - Z[i].y * b_i.y, Z[i].x * b_i.y + Z[i].y * b_i.x); \n");
00046           source.append("  } \n");
00047           source.append("} \n");
00048         }
00049 
00050         // Preprocessing phase of Bluestein algorithm
00051         template <typename StringType>
00052         void generate_fft_bluestein_pre(StringType & source, std::string const & numeric_string)
00053         {
00054           source.append("__kernel void bluestein_pre(__global "); source.append(numeric_string); source.append("2 *input, \n");
00055           source.append("  __global "); source.append(numeric_string); source.append("2 *A, \n");
00056           source.append("  __global "); source.append(numeric_string); source.append("2 *B, \n");
00057           source.append("  unsigned int size, \n");
00058           source.append("  unsigned int ext_size \n");
00059           source.append("  ) { \n");
00060           source.append("  unsigned int glb_id = get_global_id(0); \n");
00061           source.append("  unsigned int glb_sz = get_global_size(0); \n");
00062 
00063           source.append("  unsigned int double_size = size << 1; \n");
00064 
00065           source.append("  "); source.append(numeric_string); source.append(" sn_a, cs_a; \n");
00066           source.append("  const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n");
00067 
00068           source.append("  for(unsigned int i = glb_id; i < size; i += glb_sz) { \n");
00069           source.append("    unsigned int rm = i * i % (double_size); \n");
00070           source.append("    "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * NUM_PI; \n");
00071 
00072           source.append("    sn_a = sincos(-angle, &cs_a); \n");
00073 
00074           source.append("    "); source.append(numeric_string); source.append("2 a_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n");
00075           source.append("    "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, -sn_a); \n");
00076 
00077           source.append("    A[i] = ("); source.append(numeric_string); source.append("2)(input[i].x * a_i.x - input[i].y * a_i.y, input[i].x * a_i.y + input[i].y * a_i.x); \n");
00078           source.append("    B[i] = b_i; \n");
00079 
00080                   // very bad instruction, to be fixed
00081           source.append("    if(i) \n");
00082           source.append("      B[ext_size - i] = b_i; \n");
00083           source.append("  } \n");
00084           source.append("} \n");
00085         }
00086 
00088         template <typename StringType>
00089         void generate_fft_complex_to_real(StringType & source, std::string const & numeric_string)
00090         {
00091           source.append("__kernel void complex_to_real(__global "); source.append(numeric_string); source.append("2 *in, \n");
00092           source.append("  __global "); source.append(numeric_string); source.append("  *out, \n");
00093           source.append("  unsigned int size) { \n");
00094           source.append("  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))  \n");
00095           source.append("    out[i] = in[i].x; \n");
00096           source.append("} \n");
00097         }
00098 
00100         template <typename StringType>
00101         void generate_fft_div_vec_scalar(StringType & source, std::string const & numeric_string)
00102         {
00103           source.append("__kernel void fft_div_vec_scalar(__global "); source.append(numeric_string); source.append("2 *input1, \n");
00104           source.append("  unsigned int size, \n");
00105           source.append("  "); source.append(numeric_string); source.append(" factor) { \n");
00106           source.append("  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))  \n");
00107           source.append("    input1[i] /= factor; \n");
00108           source.append("} \n");
00109         }
00110 
00112         template <typename StringType>
00113         void generate_fft_mult_vec(StringType & source, std::string const & numeric_string)
00114         {
00115           source.append("__kernel void fft_mult_vec(__global const "); source.append(numeric_string); source.append("2 *input1, \n");
00116           source.append("  __global const "); source.append(numeric_string); source.append("2 *input2, \n");
00117           source.append("  __global "); source.append(numeric_string); source.append("2 *output, \n");
00118           source.append("  unsigned int size) { \n");
00119           source.append("  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
00120           source.append("    "); source.append(numeric_string); source.append("2 in1 = input1[i]; \n");
00121           source.append("    "); source.append(numeric_string); source.append("2 in2 = input2[i]; \n");
00122 
00123           source.append("    output[i] = ("); source.append(numeric_string); source.append("2)(in1.x * in2.x - in1.y * in2.y, in1.x * in2.y + in1.y * in2.x); \n");
00124           source.append("  } \n");
00125           source.append("} \n");
00126         }
00127 
00129         template <typename StringType>
00130         void generate_fft_real_to_complex(StringType & source, std::string const & numeric_string)
00131         {
00132           source.append("__kernel void real_to_complex(__global "); source.append(numeric_string); source.append(" *in, \n");
00133           source.append("  __global "); source.append(numeric_string); source.append("2 *out, \n");
00134           source.append("  unsigned int size) { \n");
00135           source.append("  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
00136           source.append("    "); source.append(numeric_string); source.append("2 val = 0; \n");
00137           source.append("    val.x = in[i]; \n");
00138           source.append("    out[i] = val; \n");
00139           source.append("  } \n");
00140           source.append("} \n");
00141         }
00142 
00144         template <typename StringType>
00145         void generate_fft_reverse_inplace(StringType & source, std::string const & numeric_string)
00146         {
00147           source.append("__kernel void reverse_inplace(__global "); source.append(numeric_string); source.append(" *vec, uint size) { \n");
00148           source.append("  for(uint i = get_global_id(0); i < (size >> 1); i+=get_global_size(0)) { \n");
00149           source.append("    "); source.append(numeric_string); source.append(" val1 = vec[i]; \n");
00150           source.append("    "); source.append(numeric_string); source.append(" val2 = vec[size - i - 1]; \n");
00151 
00152           source.append("    vec[i] = val2; \n");
00153           source.append("    vec[size - i - 1] = val1; \n");
00154           source.append("  } \n");
00155           source.append("} \n");
00156         }
00157 
00159         template <typename StringType>
00160         void generate_fft_transpose(StringType & source, std::string const & numeric_string)
00161         {
00162           source.append("__kernel void transpose(__global "); source.append(numeric_string); source.append("2 *input, \n");
00163           source.append("  __global "); source.append(numeric_string); source.append("2 *output, \n");
00164           source.append("  unsigned int row_num, \n");
00165           source.append("  unsigned int col_num) { \n");
00166           source.append("  unsigned int size = row_num * col_num; \n");
00167           source.append("  for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
00168           source.append("    unsigned int row = i / col_num; \n");
00169           source.append("    unsigned int col = i - row*col_num; \n");
00170 
00171           source.append("    unsigned int new_pos = col * row_num + row; \n");
00172 
00173           source.append("    output[new_pos] = input[i]; \n");
00174           source.append("  } \n");
00175           source.append("} \n");
00176         }
00177 
00179         template <typename StringType>
00180         void generate_fft_transpose_inplace(StringType & source, std::string const & numeric_string)
00181         {
00182           source.append("__kernel void transpose_inplace(__global "); source.append(numeric_string); source.append("2* input, \n");
00183           source.append("  unsigned int row_num, \n");
00184           source.append("  unsigned int col_num) { \n");
00185           source.append("  unsigned int size = row_num * col_num; \n");
00186           source.append("  for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
00187           source.append("    unsigned int row = i / col_num; \n");
00188           source.append("    unsigned int col = i - row*col_num; \n");
00189 
00190           source.append("    unsigned int new_pos = col * row_num + row; \n");
00191 
00192           source.append("    if(i < new_pos) { \n");
00193           source.append("      "); source.append(numeric_string); source.append("2 val = input[i]; \n");
00194           source.append("      input[i] = input[new_pos]; \n");
00195           source.append("      input[new_pos] = val; \n");
00196           source.append("    } \n");
00197           source.append("  } \n");
00198           source.append("} \n");
00199         }
00200 
00202         template <typename StringType>
00203         void generate_fft_vandermonde_prod(StringType & source, std::string const & numeric_string)
00204         {
00205           source.append("__kernel void vandermonde_prod(__global "); source.append(numeric_string); source.append(" *vander, \n");
00206           source.append("  __global "); source.append(numeric_string); source.append(" *vector, \n");
00207           source.append("  __global "); source.append(numeric_string); source.append(" *result, \n");
00208           source.append("  uint size) { \n");
00209           source.append("  for(uint i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
00210           source.append("    "); source.append(numeric_string); source.append(" mul = vander[i]; \n");
00211           source.append("    "); source.append(numeric_string); source.append(" pwr = 1; \n");
00212           source.append("    "); source.append(numeric_string); source.append(" val = 0; \n");
00213 
00214           source.append("    for(uint j = 0; j < size; j++) { \n");
00215           source.append("      val = val + pwr * vector[j]; \n");
00216           source.append("      pwr *= mul; \n");
00217           source.append("    } \n");
00218 
00219           source.append("    result[i] = val; \n");
00220           source.append("  } \n");
00221           source.append("} \n");
00222         }
00223 
00225         template <typename StringType>
00226         void generate_fft_zero2(StringType & source, std::string const & numeric_string)
00227         {
00228           source.append("__kernel void zero2(__global "); source.append(numeric_string); source.append("2 *input1, \n");
00229           source.append("  __global "); source.append(numeric_string); source.append("2 *input2, \n");
00230           source.append("  unsigned int size) { \n");
00231           source.append("  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
00232           source.append("    input1[i] = 0; \n");
00233           source.append("    input2[i] = 0; \n");
00234           source.append("  } \n");
00235           source.append("} \n");
00236         }
00237 
00239 
00240         // main kernel class
00242         template <class NumericT>
00243         struct fft
00244         {
00245           static std::string program_name()
00246           {
00247             return viennacl::ocl::type_to_string<NumericT>::apply() + "_fft";
00248           }
00249 
00250           static void init(viennacl::ocl::context & ctx)
00251           {
00252             viennacl::ocl::DOUBLE_PRECISION_CHECKER<NumericT>::apply(ctx);
00253             std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
00254 
00255             static std::map<cl_context, bool> init_done;
00256             if (!init_done[ctx.handle().get()])
00257             {
00258               std::string source;
00259               source.reserve(8192);
00260 
00261               viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
00262 
00263               // unary operations
00264               if (numeric_string == "float" || numeric_string == "double")
00265               {
00266                 generate_fft_bluestein_post(source, numeric_string);
00267                 generate_fft_bluestein_pre(source, numeric_string);
00268                 generate_fft_complex_to_real(source, numeric_string);
00269                 generate_fft_div_vec_scalar(source, numeric_string);
00270                 generate_fft_mult_vec(source, numeric_string);
00271                 generate_fft_real_to_complex(source, numeric_string);
00272                 generate_fft_reverse_inplace(source, numeric_string);
00273                 generate_fft_transpose(source, numeric_string);
00274                 generate_fft_transpose_inplace(source, numeric_string);
00275                 generate_fft_vandermonde_prod(source, numeric_string);
00276                 generate_fft_zero2(source, numeric_string);
00277               }
00278 
00279               std::string prog_name = program_name();
00280               #ifdef VIENNACL_BUILD_INFO
00281               std::cout << "Creating program " << prog_name << std::endl;
00282               #endif
00283               ctx.add_program(source, prog_name);
00284               init_done[ctx.handle().get()] = true;
00285             } //if
00286           } //init
00287         };
00288 
00289       }  // namespace kernels
00290     }  // namespace opencl
00291   }  // namespace linalg
00292 }  // namespace viennacl
00293 #endif
00294