9 #if !defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW)
10 #define FFTW_PLAN_WITH_THREADS() fftw_plan_with_nthreads(int(m_num_threads));
12 #define FFTW_PLAN_WITH_THREADS()
15 Fftw::Fftw() : m_num_threads(4) {}
17 Fftw::Fftw(unsigned num_threads) : m_num_threads(num_threads) {}
19 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
23 m_num_of_feats = num_of_feats;
24 m_num_of_scales = num_of_scales;
25 m_big_batch_mode = big_batch_mode;
27 #if (!defined(ASYNC) && !defined(CUFFTW)) && defined(OPENMP)
32 std::cout << "FFT: FFTW" << std::endl;
34 std::cout << "FFT: cuFFTW" << std::endl;
37 // FFT forward one scale
39 cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
40 ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
41 plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width), reinterpret_cast<float *>(in_f.data),
42 reinterpret_cast<fftwf_complex *>(out_f.get_p_data()), FFTW_PATIENT);
45 // FFT forward all scales
46 if (m_num_of_scales > 1 && m_big_batch_mode) {
47 cv::Mat in_f_all = cv::Mat::zeros(m_height * m_num_of_scales, m_width, CV_32F);
48 ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
49 float *in = reinterpret_cast<float *>(in_f_all.data);
50 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_f_all.get_p_data());
52 int n[] = {(int)m_height, (int)m_width};
53 int howmany = m_num_of_scales;
54 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
55 int istride = 1, ostride = 1;
56 int *inembed = NULL, *onembed = NULL;
58 FFTW_PLAN_WITH_THREADS();
59 plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
60 ostride, odist, FFTW_PATIENT);
63 // FFT forward window one scale
65 cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
66 ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
67 float *in = reinterpret_cast<float *>(in_fw.data);
68 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_fw.get_p_data());
70 int n[] = {int(m_height), int(m_width)};
71 int howmany = int(m_num_of_feats);
72 int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
73 int istride = 1, ostride = 1;
74 int *inembed = nullptr, *onembed = nullptr;
76 FFTW_PLAN_WITH_THREADS();
77 plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist,
81 // FFT forward window all scales all feats
82 if (m_num_of_scales > 1 && m_big_batch_mode) {
83 cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales * m_num_of_feats), m_width, CV_32F);
84 ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales * m_num_of_feats);
85 float *in = reinterpret_cast<float *>(in_all.data);
86 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_all.get_p_data());
88 int n[] = {(int)m_height, (int)m_width};
89 int howmany = m_num_of_scales * m_num_of_feats;
90 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
91 int istride = 1, ostride = 1;
92 int *inembed = NULL, *onembed = NULL;
94 FFTW_PLAN_WITH_THREADS();
95 plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
96 ostride, odist, FFTW_PATIENT);
99 // FFT inverse one scale
101 ComplexMat in_i(m_height, m_width, m_num_of_feats);
102 cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
103 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i.get_p_data());
104 float *out = reinterpret_cast<float *>(out_i.data);
106 int n[] = {int(m_height), int(m_width)};
107 int howmany = int(m_num_of_feats);
108 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
109 int istride = 1, ostride = int(m_num_of_feats);
110 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
112 FFTW_PLAN_WITH_THREADS();
113 plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
114 odist, FFTW_PATIENT);
116 // FFT inverse all scales
118 if (m_num_of_scales > 1 && m_big_batch_mode) {
119 ComplexMat in_i_all(m_height, m_width, m_num_of_feats * m_num_of_scales);
120 cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats * m_num_of_scales));
121 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i_all.get_p_data());
122 float *out = reinterpret_cast<float *>(out_i_all.data);
124 int n[] = {(int)m_height, (int)m_width};
125 int howmany = m_num_of_feats * m_num_of_scales;
126 int idist = m_height * (m_width / 2 + 1), odist = 1;
127 int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
128 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
130 FFTW_PLAN_WITH_THREADS();
131 plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out,
132 onembed, ostride, odist, FFTW_PATIENT);
135 // FFT inver one channel one scale
137 ComplexMat in_i1(int(m_height), int(m_width), 1);
138 cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
139 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1.get_p_data());
140 float *out = reinterpret_cast<float *>(out_i1.data);
142 int n[] = {int(m_height), int(m_width)};
144 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
145 int istride = 1, ostride = 1;
146 int inembed[] = {int(m_height), int(m_width) / 2 + 1}, *onembed = n;
148 FFTW_PLAN_WITH_THREADS();
149 plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
150 odist, FFTW_PATIENT);
153 // FFT inver one channel all scales
154 if (m_num_of_scales > 1 && m_big_batch_mode) {
155 ComplexMat in_i1_all(m_height, m_width, m_num_of_scales);
156 cv::Mat out_i1_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_scales));
157 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1_all.get_p_data());
158 float *out = reinterpret_cast<float *>(out_i1_all.data);
160 int n[] = {(int)m_height, (int)m_width};
161 int howmany = m_num_of_scales;
162 int idist = m_height * (m_width / 2 + 1), odist = 1;
163 int istride = 1, ostride = m_num_of_scales;
164 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
166 FFTW_PLAN_WITH_THREADS();
167 plan_i_1ch_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed,
168 ostride, odist, FFTW_PATIENT);
173 void Fftw::set_window(const cv::Mat &window)
178 void Fftw::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr, cudaStream_t stream)
180 (void)real_input_arr;
183 if (m_big_batch_mode && real_input.rows == int(m_height * m_num_of_scales)) {
184 fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
185 reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
187 fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float *>(real_input.data),
188 reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
193 void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
194 float *real_input_arr, cudaStream_t stream)
196 (void)real_input_arr;
199 int n_channels = int(patch_feats.size());
200 for (int i = 0; i < n_channels; ++i) {
201 cv::Mat in_roi(fw_all, cv::Rect(0, i * int(m_height), int(m_width), int(m_height)));
202 in_roi = patch_feats[uint(i)].mul(m_window);
205 float *in = reinterpret_cast<float *>(fw_all.data);
206 fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
208 if (n_channels <= int(m_num_of_feats))
209 fftwf_execute_dft_r2c(plan_fw, in, out);
211 fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
215 void Fftw::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr, cudaStream_t stream)
217 (void)real_result_arr;
220 int n_channels = complex_input.n_channels;
221 fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
222 float *out = reinterpret_cast<float *>(real_result.data);
225 fftwf_execute_dft_c2r(plan_i_1ch, in, out);
226 else if (m_big_batch_mode && n_channels == int(m_num_of_scales))
227 fftwf_execute_dft_c2r(plan_i_1ch_all_scales, in, out);
228 else if (m_big_batch_mode && n_channels == int(m_num_of_feats) * int(m_num_of_scales))
229 fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
231 fftwf_execute_dft_c2r(plan_i_features, in, out);
233 real_result = real_result / (m_width * m_height);
239 fftwf_destroy_plan(plan_f);
240 fftwf_destroy_plan(plan_fw);
241 fftwf_destroy_plan(plan_i_features);
242 fftwf_destroy_plan(plan_i_1ch);
244 if (m_big_batch_mode) {
245 fftwf_destroy_plan(plan_f_all_scales);
246 fftwf_destroy_plan(plan_i_features_all_scales);
247 fftwf_destroy_plan(plan_fw_all_scales);
248 fftwf_destroy_plan(plan_i_1ch_all_scales);