6 cudaErrorCheck(cublasCreate(&cublas));
9 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
11 Fft::init(width, height, num_of_feats, num_of_scales);
13 std::cout << "FFT: cuFFT" << std::endl;
18 int n[] = {int(m_height), int(m_width)};
20 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
21 int istride = 1, ostride = 1;
22 int *inembed = n, onembed[] = {int(m_height), int(m_width) / 2 + 1};
24 cudaErrorCheck(cufftPlanMany(&plan_f, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
25 cudaErrorCheck(cufftSetStream(plan_f, cudaStreamPerThread));
28 // FFT forward all scales
29 if (m_num_of_scales > 1) {
31 int n[] = {int(m_height), int(m_width)};
32 int howmany = m_num_of_scales;
33 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
34 int istride = 1, ostride = 1;
35 int *inembed = n, onembed[] = {int(m_height), int(m_width) / 2 + 1};
37 cudaErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
38 cudaErrorCheck(cufftSetStream(plan_f_all_scales, cudaStreamPerThread));
45 int n[] = {int(m_height), int(m_width)};
46 int howmany = m_num_of_feats;
47 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
48 int istride = 1, ostride = 1;
49 int *inembed = n, onembed[] = {int(m_height), int(m_width) / 2 + 1};
51 cudaErrorCheck(cufftPlanMany(&plan_fw, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
52 cudaErrorCheck(cufftSetStream(plan_fw, cudaStreamPerThread));
55 // FFT forward all scales
56 if (m_num_of_scales > 1) {
58 int n[] = {int(m_height), int(m_width)};
59 int howmany = m_num_of_feats * m_num_of_scales;
60 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
61 int istride = 1, ostride = 1;
62 int *inembed = n, onembed[] = {int(m_height), int(m_width) / 2 + 1};
64 cudaErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
65 cudaErrorCheck(cufftSetStream(plan_fw_all_scales, cudaStreamPerThread));
68 // FFT inverse all channels
71 int n[] = {int(m_height), int(m_width)};
72 int howmany = m_num_of_feats ;
73 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
74 int istride = 1, ostride = m_num_of_feats;
75 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
77 cudaErrorCheck(cufftPlanMany(&plan_i_features, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_C2R, howmany));
78 cudaErrorCheck(cufftSetStream(plan_i_features, cudaStreamPerThread));
81 // FFT forward all scales
82 if (m_num_of_scales > 1) {
84 int n[] = {int(m_height), int(m_width)};
85 int howmany = m_num_of_feats * m_num_of_scales;
86 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
87 int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
88 int inembed[] = {int(m_height), int(m_width) / 2 + 1}, *onembed = n;
90 cudaErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
91 cudaErrorCheck(cufftSetStream(plan_fw_all_scales, cudaStreamPerThread));
94 // FFT inverse one channel
97 int n[] = {int(m_height), int(m_width)};
98 int howmany = IF_BIG_BATCH(m_num_of_scales, 1);
99 int idist = m_height * (m_width / 2 + 1), odist = 1;
100 int istride = 1, ostride = IF_BIG_BATCH(m_num_of_scales, 1);
101 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
103 cudaErrorCheck(cufftPlanMany(&plan_i_1ch, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_C2R, howmany));
104 cudaErrorCheck(cufftSetStream(plan_i_1ch, cudaStreamPerThread));
108 void cuFFT::set_window(const MatDynMem &window)
110 Fft::set_window(window);
114 void cuFFT::forward(const MatScales &real_input, ComplexMat &complex_result)
116 Fft::forward(real_input, complex_result);
117 auto in = static_cast<cufftReal *>(const_cast<MatScales&>(real_input).deviceMem());
119 cudaErrorCheck(cufftExecR2C(plan_f, in, complex_result.get_p_data()));
121 if (BIG_BATCH_MODE && real_input.rows == int(m_height * IF_BIG_BATCH(m_num_of_scales, 1))) {
122 cudaErrorCheck(cufftExecR2C(plan_f_all_scales, in, complex_result.get_p_data()));
124 cudaErrorCheck(cufftExecR2C(plan_f, in, complex_result.get_p_data()));
128 void cuFFT::forward_window(MatScaleFeats &feat, ComplexMat &complex_result, MatScaleFeats &temp)
130 Fft::forward_window(feat, complex_result, temp);
132 uint n_channels = feat.size[0];
133 cufftReal *temp_data = temp.deviceMem();
135 for (uint i = 0; i < n_channels; ++i) {
136 for (uint j = 0; j < uint(feat.size[1]); ++j) {
137 cv::Mat feat_plane = feat.plane(i, j);
138 cv::Mat temp_plane = temp.plane(i, j);
139 temp_plane = feat_plane.mul(m_window);
143 if (n_channels <= int(m_num_of_feats))
144 cudaErrorCheck(cufftExecR2C(plan_fw, temp_data, complex_result.get_p_data()));
146 cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_p_data()));
149 void cuFFT::inverse(ComplexMat &complex_input, MatDynMem &real_result)
151 Fft::inverse(complex_input, real_result);
153 uint n_channels = complex_input.n_channels;
154 cufftComplex *in = reinterpret_cast<cufftComplex *>(complex_input.get_p_data());
155 cufftReal *out = real_result.deviceMem();
156 float alpha = 1.0 / (m_width * m_height);
158 if (n_channels == 1 || (BIG_BATCH_MODE && n_channels == int(IF_BIG_BATCH(m_num_of_scales, 1))))
159 cudaErrorCheck(cufftExecC2R(plan_i_1ch, in, out));
160 else if (BIG_BATCH_MODE && n_channels == int(m_num_of_feats) * int(IF_BIG_BATCH(m_num_of_scales, 1)))
161 cudaErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, out));
163 cudaErrorCheck(cufftExecC2R(plan_i_features, in, out));
164 // TODO: Investigate whether this scalling is needed or not
165 cudaErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1));
170 cudaErrorCheck(cublasDestroy(cublas));
172 cudaErrorCheck(cufftDestroy(plan_f));
173 cudaErrorCheck(cufftDestroy(plan_fw));
174 cudaErrorCheck(cufftDestroy(plan_i_1ch));
175 cudaErrorCheck(cufftDestroy(plan_i_features));