5 CudaSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
6 cudaErrorCheck(cublasCreate(&cublas));
7 cudaErrorCheck(cublasSetStream(cublas, cudaStreamPerThread));
10 cufftHandle cuFFT::create_plan_fwd(uint howmany) const
13 int n[] = {(int)m_height, (int)m_width};
14 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
15 int istride = 1, ostride = 1;
16 int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
19 cudaErrorCheck(cufftPlanMany(&plan, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
20 cudaErrorCheck(cufftSetStream(plan, cudaStreamPerThread));
24 cufftHandle cuFFT::create_plan_inv(uint howmany) const
27 int n[] = {(int)m_height, (int)m_width};
28 int idist = m_height * (m_width / 2 + 1), odist = m_height * m_width;
29 int istride = 1, ostride = 1;
30 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
33 cudaErrorCheck(cufftPlanMany(&plan, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_C2R, howmany));
34 cudaErrorCheck(cufftSetStream(plan, cudaStreamPerThread));
39 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
41 Fft::init(width, height, num_of_feats, num_of_scales);
43 std::cout << "FFT: cuFFT" << std::endl;
45 plan_f = create_plan_fwd(1);
46 plan_fw = create_plan_fwd(m_num_of_feats);
47 plan_i_1ch = create_plan_inv(1);
50 plan_f_all_scales = create_plan_fwd(m_num_of_scales);
51 plan_fw_all_scales = create_plan_fwd(m_num_of_scales * m_num_of_feats);
52 plan_i_all_scales = create_plan_inv(m_num_of_scales);
56 void cuFFT::set_window(const MatDynMem &window)
58 Fft::set_window(window);
62 void cuFFT::forward(const MatScales &real_input, ComplexMat &complex_result)
64 Fft::forward(real_input, complex_result);
65 auto in = static_cast<cufftReal *>(const_cast<MatScales&>(real_input).deviceMem());
67 if (real_input.size[0] == 1)
68 cudaErrorCheck(cufftExecR2C(plan_f, in, complex_result.get_dev_data()));
71 cudaErrorCheck(cufftExecR2C(plan_f_all_scales, in, complex_result.get_dev_data()));
75 void cuFFT::forward_window(MatScaleFeats &feat, ComplexMat &complex_result, MatScaleFeats &temp)
77 Fft::forward_window(feat, complex_result, temp);
79 cufftReal *temp_data = temp.deviceMem();
80 uint n_scales = feat.size[0];
82 for (uint s = 0; s < n_scales; ++s) {
83 for (uint ch = 0; ch < uint(feat.size[1]); ++ch) {
84 cv::Mat feat_plane = feat.plane(s, ch);
85 cv::Mat temp_plane = temp.plane(s, ch);
86 temp_plane = feat_plane.mul(m_window);
91 cudaErrorCheck(cufftExecR2C(plan_fw, temp_data, complex_result.get_dev_data()));
94 cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_dev_data()));
98 void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result)
100 Fft::inverse(complex_input, real_result);
102 uint n_channels = complex_input.n_channels;
103 cufftComplex *in = reinterpret_cast<cufftComplex *>(complex_input.get_dev_data());
104 cufftReal *out = real_result.deviceMem();
105 float alpha = 1.0 / (m_width * m_height);
108 cudaErrorCheck(cufftExecC2R(plan_i_1ch, in, out));
111 cudaErrorCheck(cufftExecC2R(plan_i_all_scales, in, out));
113 cudaErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1));
114 // The result is a cv::Mat, which will be accesses by CPU, so we
115 // must synchronize with the GPU here
116 CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
121 cudaErrorCheck(cublasDestroy(cublas));
123 cudaErrorCheck(cufftDestroy(plan_f));
124 cudaErrorCheck(cufftDestroy(plan_fw));
125 cudaErrorCheck(cufftDestroy(plan_i_1ch));
128 cudaErrorCheck(cufftDestroy(plan_f_all_scales));
129 cudaErrorCheck(cufftDestroy(plan_fw_all_scales));
130 cudaErrorCheck(cufftDestroy(plan_i_all_scales));