3 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
7 m_num_of_feats = num_of_feats;
8 m_num_of_scales = num_of_scales;
9 m_big_batch_mode = big_batch_mode;
11 std::cout << "FFT: cuFFT" << std::endl;
13 // FFT forward one scale
15 CufftErrorCheck(cufftPlan2d(&plan_f, int(m_height), int(m_width), CUFFT_R2C));
18 // FFT forward all scales
19 if (m_num_of_scales > 1 && m_big_batch_mode) {
21 int n[] = {(int)m_height, (int)m_width};
22 int howmany = m_num_of_scales;
23 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
24 int istride = 1, ostride = 1;
25 int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
27 CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
31 // FFT forward window one scale
34 int n[] = {int(m_height), int(m_width)};
35 int howmany = int(m_num_of_feats);
36 int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
37 int istride = 1, ostride = 1;
38 int *inembed = n, onembed[] = {int(m_height), int(m_width / 2 + 1)};
41 cufftPlanMany(&plan_fw, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
44 // FFT forward window all scales all feats
45 if (m_num_of_scales > 1 && m_big_batch_mode) {
47 int n[] = {(int)m_height, (int)m_width};
48 int howmany = m_num_of_scales * m_num_of_feats;
49 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
50 int istride = 1, ostride = 1;
51 int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
53 CufftErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
57 // FFT inverse one scale
60 int n[] = {int(m_height), int(m_width)};
61 int howmany = int(m_num_of_feats);
62 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
63 int istride = 1, ostride = int(m_num_of_feats);
64 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
66 CufftErrorCheck(cufftPlanMany(&plan_i_features, rank, n, inembed, istride, idist, onembed, ostride, odist,
69 // FFT inverse all scales
71 if (m_num_of_scales > 1 && m_big_batch_mode) {
73 int n[] = {(int)m_height, (int)m_width};
74 int howmany = m_num_of_feats * m_num_of_scales;
75 int idist = m_height * (m_width / 2 + 1), odist = 1;
76 int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
77 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
79 CufftErrorCheck(cufftPlanMany(&plan_i_features_all_scales, rank, n, inembed, istride, idist, onembed, ostride,
80 odist, CUFFT_C2R, howmany));
83 // FFT inverse one channel one scale
86 int n[] = {int(m_height), int(m_width)};
88 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
89 int istride = 1, ostride = 1;
90 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
93 cufftPlanMany(&plan_i_1ch, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_C2R, howmany));
96 // FFT inverse one channel all scales
97 if (m_num_of_scales > 1 && m_big_batch_mode) {
99 int n[] = {(int)m_height, (int)m_width};
100 int howmany = m_num_of_scales;
101 int idist = m_height * (m_width / 2 + 1), odist = 1;
102 int istride = 1, ostride = m_num_of_scales;
103 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
105 CufftErrorCheck(cufftPlanMany(&plan_i_1ch_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
106 CUFFT_C2R, howmany));
111 void cuFFT::set_window(const cv::Mat &window)
116 void cuFFT::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr, cudaStream_t stream)
120 if (m_big_batch_mode && real_input.rows == int(m_height * m_num_of_scales)) {
121 CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal *>(real_input_arr),
122 complex_result.get_p_data()));
126 CufftErrorCheck(cufftSetStream(plan_f, stream));
128 cufftExecR2C(plan_f, reinterpret_cast<cufftReal *>(real_input_arr), complex_result.get_p_data()));
129 cudaStreamSynchronize(stream);
135 void cuFFT::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
136 float *real_input_arr, cudaStream_t stream)
138 int n_channels = int(patch_feats.size());
140 if (n_channels > int(m_num_of_feats)) {
141 for (uint i = 0; i < uint(n_channels); ++i) {
142 cv::Mat in_roi(fw_all, cv::Rect(0, int(i * m_height), int(m_width), int(m_height)));
143 in_roi = patch_feats[i].mul(m_window);
145 CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal *>(real_input_arr),
146 complex_result.get_p_data()));
148 for (uint i = 0; i < uint(n_channels); ++i) {
149 cv::Mat in_roi(fw_all, cv::Rect(0, int(i * m_height), int(m_width), int(m_height)));
150 in_roi = patch_feats[i].mul(m_window);
154 CufftErrorCheck(cufftSetStream(plan_fw, stream));
156 cufftExecR2C(plan_fw, reinterpret_cast<cufftReal *>(real_input_arr), complex_result.get_p_data()));
157 cudaStreamSynchronize(stream);
163 void cuFFT::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr, cudaStream_t stream)
165 int n_channels = complex_input.n_channels;
166 cufftComplex *in = reinterpret_cast<cufftComplex *>(complex_input.get_p_data());
168 if (n_channels == 1) {
171 CufftErrorCheck(cufftSetStream(plan_i_1ch, stream));
172 CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal *>(real_result_arr)));
173 cudaStreamSynchronize(stream);
175 real_result = real_result / (m_width * m_height);
177 } else if (n_channels == int(m_num_of_scales)) {
178 CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal *>(real_result_arr)));
179 cudaStreamSynchronize(stream);
181 real_result = real_result / (m_width * m_height);
183 } else if (n_channels == int(m_num_of_feats) * int(m_num_of_scales)) {
184 CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal *>(real_result_arr)));
189 CufftErrorCheck(cufftSetStream(plan_i_features, stream));
190 CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal *>(real_result_arr)));
191 #if defined(OPENMP) && !defined(BIG_BATCH)
192 cudaStreamSynchronize(stream);
200 CufftErrorCheck(cufftDestroy(plan_f));
201 CufftErrorCheck(cufftDestroy(plan_fw));
202 CufftErrorCheck(cufftDestroy(plan_i_1ch));
203 CufftErrorCheck(cufftDestroy(plan_i_features));
205 if (m_big_batch_mode) {
206 CufftErrorCheck(cufftDestroy(plan_f_all_scales));
207 CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
208 CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
209 CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));