9 #if (defined(BIG_BATCH) && !defined(CUFFTW)) || (!defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW))
10 #define FFTW_PLAN_WITH_THREADS() fftwf_plan_with_nthreads(4);
11 #define FFTW_INIT_THREAD() fftwf_init_threads();
12 #define FFTW_CLEAN_THREADS() fftwf_cleanup_threads();
14 #define FFTW_PLAN_WITH_THREADS()
15 #define FFTW_INIT_THREAD()
16 #define FFTW_CLEAN_THREADS()
21 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
25 m_num_of_feats = num_of_feats;
26 m_num_of_scales = num_of_scales;
29 std::cout << "FFT: FFTW" << std::endl;
31 std::cout << "FFT: cuFFTW" << std::endl;
36 // FFT forward one scale
38 cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
39 ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
41 FFTW_PLAN_WITH_THREADS();
42 plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width), reinterpret_cast<float *>(in_f.data),
43 reinterpret_cast<fftwf_complex *>(out_f.get_p_data()), FFTW_PATIENT);
46 // FFT forward all scales
47 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
48 cv::Mat in_f_all = cv::Mat::zeros(m_height * m_num_of_scales, m_width, CV_32F);
49 ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
50 float *in = reinterpret_cast<float *>(in_f_all.data);
51 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_f_all.get_p_data());
53 int n[] = {(int)m_height, (int)m_width};
54 int howmany = m_num_of_scales;
55 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
56 int istride = 1, ostride = 1;
57 int *inembed = NULL, *onembed = NULL;
59 FFTW_PLAN_WITH_THREADS();
60 plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
61 ostride, odist, FFTW_PATIENT);
64 // FFT forward window one scale
66 cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
67 ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
68 float *in = reinterpret_cast<float *>(in_fw.data);
69 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_fw.get_p_data());
71 int n[] = {int(m_height), int(m_width)};
72 int howmany = int(m_num_of_feats);
73 int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
74 int istride = 1, ostride = 1;
75 int *inembed = nullptr, *onembed = nullptr;
77 FFTW_PLAN_WITH_THREADS();
78 plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist,
82 // FFT forward window all scales all feats
83 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
84 cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales * m_num_of_feats), m_width, CV_32F);
85 ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales * m_num_of_feats);
86 float *in = reinterpret_cast<float *>(in_all.data);
87 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_all.get_p_data());
89 int n[] = {(int)m_height, (int)m_width};
90 int howmany = m_num_of_scales * m_num_of_feats;
91 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
92 int istride = 1, ostride = 1;
93 int *inembed = NULL, *onembed = NULL;
95 FFTW_PLAN_WITH_THREADS();
96 plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
97 ostride, odist, FFTW_PATIENT);
100 // FFT inverse one scale
102 ComplexMat in_i(m_height, m_width, m_num_of_feats);
103 cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
104 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i.get_p_data());
105 float *out = reinterpret_cast<float *>(out_i.data);
107 int n[] = {int(m_height), int(m_width)};
108 int howmany = int(m_num_of_feats);
109 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
110 int istride = 1, ostride = int(m_num_of_feats);
111 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
113 FFTW_PLAN_WITH_THREADS();
114 plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
115 odist, FFTW_PATIENT);
117 // FFT inverse all scales
119 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
120 ComplexMat in_i_all(m_height, m_width, m_num_of_feats * m_num_of_scales);
121 cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats * m_num_of_scales));
122 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i_all.get_p_data());
123 float *out = reinterpret_cast<float *>(out_i_all.data);
125 int n[] = {(int)m_height, (int)m_width};
126 int howmany = m_num_of_feats * m_num_of_scales;
127 int idist = m_height * (m_width / 2 + 1), odist = 1;
128 int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
129 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
131 FFTW_PLAN_WITH_THREADS();
132 plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out,
133 onembed, ostride, odist, FFTW_PATIENT);
136 // FFT inver one channel one scale
138 ComplexMat in_i1(int(m_height), int(m_width), 1);
139 cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
140 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1.get_p_data());
141 float *out = reinterpret_cast<float *>(out_i1.data);
143 int n[] = {int(m_height), int(m_width)};
145 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
146 int istride = 1, ostride = 1;
147 int inembed[] = {int(m_height), int(m_width) / 2 + 1}, *onembed = n;
149 FFTW_PLAN_WITH_THREADS();
150 plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
151 odist, FFTW_PATIENT);
154 // FFT inver one channel all scales
155 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
156 ComplexMat in_i1_all(m_height, m_width, m_num_of_scales);
157 cv::Mat out_i1_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_scales));
158 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1_all.get_p_data());
159 float *out = reinterpret_cast<float *>(out_i1_all.data);
161 int n[] = {(int)m_height, (int)m_width};
162 int howmany = m_num_of_scales;
163 int idist = m_height * (m_width / 2 + 1), odist = 1;
164 int istride = 1, ostride = m_num_of_scales;
165 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
167 FFTW_PLAN_WITH_THREADS();
168 plan_i_1ch_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed,
169 ostride, odist, FFTW_PATIENT);
174 void Fftw::set_window(const cv::Mat &window)
179 void Fftw::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr, cudaStream_t stream)
181 (void)real_input_arr;
184 if (BIG_BATCH_MODE && real_input.rows == int(m_height * m_num_of_scales)) {
185 fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
186 reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
188 fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float *>(real_input.data),
189 reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
194 void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
195 float *real_input_arr, cudaStream_t stream)
197 (void)real_input_arr;
200 int n_channels = int(patch_feats.size());
201 for (int i = 0; i < n_channels; ++i) {
202 cv::Mat in_roi(fw_all, cv::Rect(0, i * int(m_height), int(m_width), int(m_height)));
203 in_roi = patch_feats[uint(i)].mul(m_window);
206 float *in = reinterpret_cast<float *>(fw_all.data);
207 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
209 if (n_channels <= int(m_num_of_feats))
210 fftwf_execute_dft_r2c(plan_fw, in, out);
212 fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
216 void Fftw::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr, cudaStream_t stream)
218 (void)real_result_arr;
221 int n_channels = complex_input.n_channels;
222 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
223 float *out = reinterpret_cast<float *>(real_result.data);
226 fftwf_execute_dft_c2r(plan_i_1ch, in, out);
227 else if (BIG_BATCH_MODE && n_channels == int(m_num_of_scales))
228 fftwf_execute_dft_c2r(plan_i_1ch_all_scales, in, out);
229 else if (BIG_BATCH_MODE && n_channels == int(m_num_of_feats) * int(m_num_of_scales))
230 fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
232 fftwf_execute_dft_c2r(plan_i_features, in, out);
234 real_result = real_result / (m_width * m_height);
240 fftwf_destroy_plan(plan_f);
241 fftwf_destroy_plan(plan_fw);
242 fftwf_destroy_plan(plan_i_features);
243 fftwf_destroy_plan(plan_i_1ch);
245 if (BIG_BATCH_MODE) {
246 fftwf_destroy_plan(plan_f_all_scales);
247 fftwf_destroy_plan(plan_i_features_all_scales);
248 fftwf_destroy_plan(plan_fw_all_scales);
249 fftwf_destroy_plan(plan_i_1ch_all_scales);
251 FFTW_CLEAN_THREADS();