9 #if !defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW)
10 #define FFTW_PLAN_WITH_THREADS() fftw_plan_with_nthreads(int(m_num_threads));
12 #define FFTW_PLAN_WITH_THREADS()
20 Fftw::Fftw(unsigned num_threads)
21 : m_num_threads(num_threads)
25 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
29 m_num_of_feats = num_of_feats;
30 m_num_of_scales = num_of_scales;
31 m_big_batch_mode = big_batch_mode;
33 #if (!defined(ASYNC) && !defined(CUFFTW)) && defined(OPENMP)
38 std::cout << "FFT: FFTW" << std::endl;
40 std::cout << "FFT: cuFFTW" << std::endl;
43 //FFT forward one scale
45 cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
46 ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
47 plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width),
48 reinterpret_cast<float*>(in_f.data),
49 reinterpret_cast<fftwf_complex*>(out_f.get_p_data()),
53 //FFT forward all scales
54 if (m_num_of_scales > 1 && m_big_batch_mode) {
55 cv::Mat in_f_all = cv::Mat::zeros(m_height*m_num_of_scales, m_width, CV_32F);
56 ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
57 float *in = reinterpret_cast<float*>(in_f_all.data);
58 fftwf_complex *out = reinterpret_cast<fftwf_complex*>(out_f_all.get_p_data());
60 int n[] = {(int)m_height, (int)m_width};
61 int howmany = m_num_of_scales;
62 int idist = m_height*m_width, odist = m_height*(m_width/2+1);
63 int istride = 1, ostride = 1;
64 int *inembed = NULL, *onembed = NULL;
66 FFTW_PLAN_WITH_THREADS();
67 plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany,
68 in, inembed, istride, idist,
69 out, onembed, ostride, odist,
73 //FFT forward window one scale
75 cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
76 ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
77 float *in = reinterpret_cast<float*>(in_fw.data);
78 fftwf_complex *out = reinterpret_cast<fftwf_complex*>(out_fw.get_p_data());
80 int n[] = {int(m_height), int(m_width)};
81 int howmany = int(m_num_of_feats);
82 int idist = int(m_height*m_width), odist = int(m_height*(m_width/2+1));
83 int istride = 1, ostride = 1;
84 int *inembed = nullptr, *onembed = nullptr;
86 FFTW_PLAN_WITH_THREADS();
87 plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany,
88 in, inembed, istride, idist,
89 out, onembed, ostride, odist,
93 //FFT forward window all scales all feats
94 if (m_num_of_scales > 1 && m_big_batch_mode) {
95 cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales*m_num_of_feats), m_width, CV_32F);
96 ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales*m_num_of_feats);
97 float *in = reinterpret_cast<float*>(in_all.data);
98 fftwf_complex *out = reinterpret_cast<fftwf_complex*>(out_all.get_p_data());
100 int n[] = {(int)m_height, (int)m_width};
101 int howmany = m_num_of_scales*m_num_of_feats;
102 int idist = m_height*m_width, odist = m_height*(m_width/2+1);
103 int istride = 1, ostride = 1;
104 int *inembed = NULL, *onembed = NULL;
106 FFTW_PLAN_WITH_THREADS();
107 plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany,
108 in, inembed, istride, idist,
109 out, onembed, ostride, odist,
113 //FFT inverse one scale
115 ComplexMat in_i(m_height, m_width, m_num_of_feats);
116 cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
117 fftwf_complex *in = reinterpret_cast<fftwf_complex*>(in_i.get_p_data());
118 float *out = reinterpret_cast<float*>(out_i.data);
120 int n[] = {int(m_height), int(m_width)};
121 int howmany = int(m_num_of_feats);
122 int idist = int(m_height*(m_width/2+1)), odist = 1;
123 int istride = 1, ostride = int(m_num_of_feats);
124 int inembed[] = {int(m_height), int(m_width/2+1)}, *onembed = n;
126 FFTW_PLAN_WITH_THREADS();
127 plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany,
128 in, inembed, istride, idist,
129 out, onembed, ostride, odist,
132 //FFT inverse all scales
134 if (m_num_of_scales > 1 && m_big_batch_mode) {
135 ComplexMat in_i_all(m_height,m_width,m_num_of_feats*m_num_of_scales);
136 cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats*m_num_of_scales));
137 fftwf_complex *in = reinterpret_cast<fftwf_complex*>(in_i_all.get_p_data());
138 float *out = reinterpret_cast<float*>(out_i_all.data);
140 int n[] = {(int)m_height, (int)m_width};
141 int howmany = m_num_of_feats*m_num_of_scales;
142 int idist = m_height*(m_width/2+1), odist = 1;
143 int istride = 1, ostride = m_num_of_feats*m_num_of_scales;
144 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
146 FFTW_PLAN_WITH_THREADS();
147 plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany,
148 in, inembed, istride, idist,
149 out, onembed, ostride, odist,
153 //FFT inver one channel one scale
155 ComplexMat in_i1(int(m_height),int(m_width),1);
156 cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
157 fftwf_complex *in = reinterpret_cast<fftwf_complex*>(in_i1.get_p_data());
158 float *out = reinterpret_cast<float*>(out_i1.data);
160 int n[] = {int(m_height), int(m_width)};
162 int idist = int(m_height*(m_width/2+1)), odist = 1;
163 int istride = 1, ostride = 1;
164 int inembed[] = {int(m_height), int(m_width)/2+1}, *onembed = n;
166 FFTW_PLAN_WITH_THREADS();
167 plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany,
168 in, inembed, istride, idist,
169 out, onembed, ostride, odist,
173 //FFT inver one channel all scales
174 if (m_num_of_scales > 1 && m_big_batch_mode) {
175 ComplexMat in_i1_all(m_height,m_width,m_num_of_scales);
176 cv::Mat out_i1_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_scales));
177 fftwf_complex *in = reinterpret_cast<fftwf_complex*>(in_i1_all.get_p_data());
178 float *out = reinterpret_cast<float*>(out_i1_all.data);
180 int n[] = {(int)m_height, (int)m_width};
181 int howmany = m_num_of_scales;
182 int idist = m_height*(m_width/2+1), odist = 1;
183 int istride = 1, ostride = m_num_of_scales;
184 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
186 FFTW_PLAN_WITH_THREADS();
187 plan_i_1ch_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany,
188 in, inembed, istride, idist,
189 out, onembed, ostride, odist,
195 void Fftw::set_window(const cv::Mat &window)
200 void Fftw::forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr, cudaStream_t stream)
202 (void) real_input_arr;
205 if(m_big_batch_mode && real_input.rows == int(m_height*m_num_of_scales)){
206 fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float*>(real_input.data),
207 reinterpret_cast<fftwf_complex*>(complex_result.get_p_data()));
209 fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float*>(real_input.data),
210 reinterpret_cast<fftwf_complex*>(complex_result.get_p_data()));
215 void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr, cudaStream_t stream)
217 (void) real_input_arr;
220 int n_channels = int(patch_feats.size());
221 for (int i = 0; i < n_channels; ++i) {
222 cv::Mat in_roi(fw_all, cv::Rect(0, i*int(m_height), int(m_width), int(m_height)));
223 in_roi = patch_feats[uint(i)].mul(m_window);
226 float *in = reinterpret_cast<float*>(fw_all.data);
227 fftwf_complex *out = reinterpret_cast<fftwf_complex*>(complex_result.get_p_data());
229 if (n_channels <= int(m_num_of_feats))
230 fftwf_execute_dft_r2c(plan_fw, in, out);
232 fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
236 void Fftw::inverse(ComplexMat & complex_input, cv::Mat & real_result, float *real_result_arr, cudaStream_t stream)
238 (void) real_result_arr;
241 int n_channels = complex_input.n_channels;
242 fftwf_complex *in = reinterpret_cast<fftwf_complex*>(complex_input.get_p_data());
243 float *out = reinterpret_cast<float*>(real_result.data);
246 fftwf_execute_dft_c2r(plan_i_1ch, in, out);
247 else if(m_big_batch_mode && n_channels == int(m_num_of_scales))
248 fftwf_execute_dft_c2r(plan_i_1ch_all_scales, in, out);
249 else if(m_big_batch_mode && n_channels == int(m_num_of_feats) * int(m_num_of_scales))
250 fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
252 fftwf_execute_dft_c2r(plan_i_features, in, out);
254 real_result = real_result/(m_width*m_height);
260 fftwf_destroy_plan(plan_f);
261 fftwf_destroy_plan(plan_fw);
262 fftwf_destroy_plan(plan_i_features);
263 fftwf_destroy_plan(plan_i_1ch);
265 if (m_big_batch_mode) {
266 fftwf_destroy_plan(plan_f_all_scales);
267 fftwf_destroy_plan(plan_i_features_all_scales);
268 fftwf_destroy_plan(plan_fw_all_scales);
269 fftwf_destroy_plan(plan_i_1ch_all_scales);