9 #if (defined(BIG_BATCH) && !defined(CUFFTW)) || (!defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW))
10 #define FFTW_PLAN_WITH_THREADS() fftwf_plan_with_nthreads(4);
11 #define FFTW_INIT_THREAD() fftwf_init_threads();
12 #define FFTW_CLEAN_THREADS() fftwf_cleanup_threads();
14 #define FFTW_PLAN_WITH_THREADS()
15 #define FFTW_INIT_THREAD()
16 #define FFTW_CLEAN_THREADS()
21 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
25 m_num_of_feats = num_of_feats;
26 m_num_of_scales = num_of_scales;
27 m_big_batch_mode = big_batch_mode;
30 std::cout << "FFT: FFTW" << std::endl;
32 std::cout << "FFT: cuFFTW" << std::endl;
37 // FFT forward one scale
39 cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
40 ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
42 FFTW_PLAN_WITH_THREADS();
43 plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width), reinterpret_cast<float *>(in_f.data),
44 reinterpret_cast<fftwf_complex *>(out_f.get_p_data()), FFTW_PATIENT);
47 // FFT forward all scales
48 if (m_num_of_scales > 1 && m_big_batch_mode) {
49 cv::Mat in_f_all = cv::Mat::zeros(m_height * m_num_of_scales, m_width, CV_32F);
50 ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
51 float *in = reinterpret_cast<float *>(in_f_all.data);
52 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_f_all.get_p_data());
54 int n[] = {(int)m_height, (int)m_width};
55 int howmany = m_num_of_scales;
56 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
57 int istride = 1, ostride = 1;
58 int *inembed = NULL, *onembed = NULL;
60 FFTW_PLAN_WITH_THREADS();
61 plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
62 ostride, odist, FFTW_PATIENT);
65 // FFT forward window one scale
67 cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
68 ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
69 float *in = reinterpret_cast<float *>(in_fw.data);
70 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_fw.get_p_data());
72 int n[] = {int(m_height), int(m_width)};
73 int howmany = int(m_num_of_feats);
74 int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
75 int istride = 1, ostride = 1;
76 int *inembed = nullptr, *onembed = nullptr;
78 FFTW_PLAN_WITH_THREADS();
79 plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist,
83 // FFT forward window all scales all feats
84 if (m_num_of_scales > 1 && m_big_batch_mode) {
85 cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales * m_num_of_feats), m_width, CV_32F);
86 ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales * m_num_of_feats);
87 float *in = reinterpret_cast<float *>(in_all.data);
88 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_all.get_p_data());
90 int n[] = {(int)m_height, (int)m_width};
91 int howmany = m_num_of_scales * m_num_of_feats;
92 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
93 int istride = 1, ostride = 1;
94 int *inembed = NULL, *onembed = NULL;
96 FFTW_PLAN_WITH_THREADS();
97 plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
98 ostride, odist, FFTW_PATIENT);
101 // FFT inverse one scale
103 ComplexMat in_i(m_height, m_width, m_num_of_feats);
104 cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
105 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i.get_p_data());
106 float *out = reinterpret_cast<float *>(out_i.data);
108 int n[] = {int(m_height), int(m_width)};
109 int howmany = int(m_num_of_feats);
110 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
111 int istride = 1, ostride = int(m_num_of_feats);
112 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
114 FFTW_PLAN_WITH_THREADS();
115 plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
116 odist, FFTW_PATIENT);
118 // FFT inverse all scales
120 if (m_num_of_scales > 1 && m_big_batch_mode) {
121 ComplexMat in_i_all(m_height, m_width, m_num_of_feats * m_num_of_scales);
122 cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats * m_num_of_scales));
123 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i_all.get_p_data());
124 float *out = reinterpret_cast<float *>(out_i_all.data);
126 int n[] = {(int)m_height, (int)m_width};
127 int howmany = m_num_of_feats * m_num_of_scales;
128 int idist = m_height * (m_width / 2 + 1), odist = 1;
129 int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
130 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
132 FFTW_PLAN_WITH_THREADS();
133 plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out,
134 onembed, ostride, odist, FFTW_PATIENT);
137 // FFT inver one channel one scale
139 ComplexMat in_i1(int(m_height), int(m_width), 1);
140 cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
141 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1.get_p_data());
142 float *out = reinterpret_cast<float *>(out_i1.data);
144 int n[] = {int(m_height), int(m_width)};
146 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
147 int istride = 1, ostride = 1;
148 int inembed[] = {int(m_height), int(m_width) / 2 + 1}, *onembed = n;
150 FFTW_PLAN_WITH_THREADS();
151 plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
152 odist, FFTW_PATIENT);
155 // FFT inver one channel all scales
156 if (m_num_of_scales > 1 && m_big_batch_mode) {
157 ComplexMat in_i1_all(m_height, m_width, m_num_of_scales);
158 cv::Mat out_i1_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_scales));
159 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1_all.get_p_data());
160 float *out = reinterpret_cast<float *>(out_i1_all.data);
162 int n[] = {(int)m_height, (int)m_width};
163 int howmany = m_num_of_scales;
164 int idist = m_height * (m_width / 2 + 1), odist = 1;
165 int istride = 1, ostride = m_num_of_scales;
166 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
168 FFTW_PLAN_WITH_THREADS();
169 plan_i_1ch_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed,
170 ostride, odist, FFTW_PATIENT);
175 void Fftw::set_window(const cv::Mat &window)
180 void Fftw::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr, cudaStream_t stream)
182 (void)real_input_arr;
185 if (m_big_batch_mode && real_input.rows == int(m_height * m_num_of_scales)) {
186 fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
187 reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
189 fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float *>(real_input.data),
190 reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
195 void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
196 float *real_input_arr, cudaStream_t stream)
198 (void)real_input_arr;
201 int n_channels = int(patch_feats.size());
202 for (int i = 0; i < n_channels; ++i) {
203 cv::Mat in_roi(fw_all, cv::Rect(0, i * int(m_height), int(m_width), int(m_height)));
204 in_roi = patch_feats[uint(i)].mul(m_window);
207 float *in = reinterpret_cast<float *>(fw_all.data);
208 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
210 if (n_channels <= int(m_num_of_feats))
211 fftwf_execute_dft_r2c(plan_fw, in, out);
213 fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
217 void Fftw::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr, cudaStream_t stream)
219 (void)real_result_arr;
222 int n_channels = complex_input.n_channels;
223 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
224 float *out = reinterpret_cast<float *>(real_result.data);
227 fftwf_execute_dft_c2r(plan_i_1ch, in, out);
228 else if (m_big_batch_mode && n_channels == int(m_num_of_scales))
229 fftwf_execute_dft_c2r(plan_i_1ch_all_scales, in, out);
230 else if (m_big_batch_mode && n_channels == int(m_num_of_feats) * int(m_num_of_scales))
231 fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
233 fftwf_execute_dft_c2r(plan_i_features, in, out);
235 real_result = real_result / (m_width * m_height);
241 fftwf_destroy_plan(plan_f);
242 fftwf_destroy_plan(plan_fw);
243 fftwf_destroy_plan(plan_i_features);
244 fftwf_destroy_plan(plan_i_1ch);
246 if (m_big_batch_mode) {
247 fftwf_destroy_plan(plan_f_all_scales);
248 fftwf_destroy_plan(plan_i_features_all_scales);
249 fftwf_destroy_plan(plan_fw_all_scales);
250 fftwf_destroy_plan(plan_i_1ch_all_scales);
252 FFTW_CLEAN_THREADS();