]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/fft_fftw.cpp
Formatted files with Clang Format.
[hercules2020/kcf.git] / src / fft_fftw.cpp
1 #include "fft_fftw.h"
2
3 #include "fft.h"
4
5 #ifdef OPENMP
6 #include <omp.h>
7 #endif
8
9 #if !defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW)
10 #define FFTW_PLAN_WITH_THREADS() fftw_plan_with_nthreads(int(m_num_threads));
11 #else
12 #define FFTW_PLAN_WITH_THREADS()
13 #endif
14
15 Fftw::Fftw() : m_num_threads(4) {}
16
17 Fftw::Fftw(unsigned num_threads) : m_num_threads(num_threads) {}
18
19 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
20 {
21     m_width = width;
22     m_height = height;
23     m_num_of_feats = num_of_feats;
24     m_num_of_scales = num_of_scales;
25     m_big_batch_mode = big_batch_mode;
26
27 #if (!defined(ASYNC) && !defined(CUFFTW)) && defined(OPENMP)
28     fftw_init_threads();
29 #endif // OPENMP
30
31 #ifndef CUFFTW
32     std::cout << "FFT: FFTW" << std::endl;
33 #else
34     std::cout << "FFT: cuFFTW" << std::endl;
35 #endif
36     fftwf_cleanup();
37     // FFT forward one scale
38     {
39         cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
40         ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
41         plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width), reinterpret_cast<float *>(in_f.data),
42                                        reinterpret_cast<fftwf_complex *>(out_f.get_p_data()), FFTW_PATIENT);
43     }
44 #ifdef BIG_BATCH
45     // FFT forward all scales
46     if (m_num_of_scales > 1 && m_big_batch_mode) {
47         cv::Mat in_f_all = cv::Mat::zeros(m_height * m_num_of_scales, m_width, CV_32F);
48         ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
49         float *in = reinterpret_cast<float *>(in_f_all.data);
50         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_f_all.get_p_data());
51         int rank = 2;
52         int n[] = {(int)m_height, (int)m_width};
53         int howmany = m_num_of_scales;
54         int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
55         int istride = 1, ostride = 1;
56         int *inembed = NULL, *onembed = NULL;
57
58         FFTW_PLAN_WITH_THREADS();
59         plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
60                                                     ostride, odist, FFTW_PATIENT);
61     }
62 #endif
63     // FFT forward window one scale
64     {
65         cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
66         ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
67         float *in = reinterpret_cast<float *>(in_fw.data);
68         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_fw.get_p_data());
69         int rank = 2;
70         int n[] = {int(m_height), int(m_width)};
71         int howmany = int(m_num_of_feats);
72         int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
73         int istride = 1, ostride = 1;
74         int *inembed = nullptr, *onembed = nullptr;
75
76         FFTW_PLAN_WITH_THREADS();
77         plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist,
78                                           FFTW_PATIENT);
79     }
80 #ifdef BIG_BATCH
81     // FFT forward window all scales all feats
82     if (m_num_of_scales > 1 && m_big_batch_mode) {
83         cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales * m_num_of_feats), m_width, CV_32F);
84         ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales * m_num_of_feats);
85         float *in = reinterpret_cast<float *>(in_all.data);
86         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_all.get_p_data());
87         int rank = 2;
88         int n[] = {(int)m_height, (int)m_width};
89         int howmany = m_num_of_scales * m_num_of_feats;
90         int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
91         int istride = 1, ostride = 1;
92         int *inembed = NULL, *onembed = NULL;
93
94         FFTW_PLAN_WITH_THREADS();
95         plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
96                                                      ostride, odist, FFTW_PATIENT);
97     }
98 #endif
99     // FFT inverse one scale
100     {
101         ComplexMat in_i(m_height, m_width, m_num_of_feats);
102         cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
103         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i.get_p_data());
104         float *out = reinterpret_cast<float *>(out_i.data);
105         int rank = 2;
106         int n[] = {int(m_height), int(m_width)};
107         int howmany = int(m_num_of_feats);
108         int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
109         int istride = 1, ostride = int(m_num_of_feats);
110         int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
111
112         FFTW_PLAN_WITH_THREADS();
113         plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
114                                                   odist, FFTW_PATIENT);
115     }
116     // FFT inverse all scales
117 #ifdef BIG_BATCH
118     if (m_num_of_scales > 1 && m_big_batch_mode) {
119         ComplexMat in_i_all(m_height, m_width, m_num_of_feats * m_num_of_scales);
120         cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats * m_num_of_scales));
121         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i_all.get_p_data());
122         float *out = reinterpret_cast<float *>(out_i_all.data);
123         int rank = 2;
124         int n[] = {(int)m_height, (int)m_width};
125         int howmany = m_num_of_feats * m_num_of_scales;
126         int idist = m_height * (m_width / 2 + 1), odist = 1;
127         int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
128         int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
129
130         FFTW_PLAN_WITH_THREADS();
131         plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out,
132                                                              onembed, ostride, odist, FFTW_PATIENT);
133     }
134 #endif
135     // FFT inver one channel one scale
136     {
137         ComplexMat in_i1(int(m_height), int(m_width), 1);
138         cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
139         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1.get_p_data());
140         float *out = reinterpret_cast<float *>(out_i1.data);
141         int rank = 2;
142         int n[] = {int(m_height), int(m_width)};
143         int howmany = 1;
144         int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
145         int istride = 1, ostride = 1;
146         int inembed[] = {int(m_height), int(m_width) / 2 + 1}, *onembed = n;
147
148         FFTW_PLAN_WITH_THREADS();
149         plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
150                                              odist, FFTW_PATIENT);
151     }
152 #ifdef BIG_BATCH
153     // FFT inver one channel all scales
154     if (m_num_of_scales > 1 && m_big_batch_mode) {
155         ComplexMat in_i1_all(m_height, m_width, m_num_of_scales);
156         cv::Mat out_i1_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_scales));
157         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1_all.get_p_data());
158         float *out = reinterpret_cast<float *>(out_i1_all.data);
159         int rank = 2;
160         int n[] = {(int)m_height, (int)m_width};
161         int howmany = m_num_of_scales;
162         int idist = m_height * (m_width / 2 + 1), odist = 1;
163         int istride = 1, ostride = m_num_of_scales;
164         int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
165
166         FFTW_PLAN_WITH_THREADS();
167         plan_i_1ch_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed,
168                                                         ostride, odist, FFTW_PATIENT);
169     }
170 #endif
171 }
172
173 void Fftw::set_window(const cv::Mat &window)
174 {
175     m_window = window;
176 }
177
178 void Fftw::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr, cudaStream_t stream)
179 {
180     (void)real_input_arr;
181     (void)stream;
182
183     if (m_big_batch_mode && real_input.rows == int(m_height * m_num_of_scales)) {
184         fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
185                               reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
186     } else {
187         fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float *>(real_input.data),
188                               reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
189     }
190     return;
191 }
192
193 void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
194                           float *real_input_arr, cudaStream_t stream)
195 {
196     (void)real_input_arr;
197     (void)stream;
198
199     int n_channels = int(patch_feats.size());
200     for (int i = 0; i < n_channels; ++i) {
201         cv::Mat in_roi(fw_all, cv::Rect(0, i * int(m_height), int(m_width), int(m_height)));
202         in_roi = patch_feats[uint(i)].mul(m_window);
203     }
204
205     float *in = reinterpret_cast<float *>(fw_all.data);
206     fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
207
208     if (n_channels <= int(m_num_of_feats))
209         fftwf_execute_dft_r2c(plan_fw, in, out);
210     else
211         fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
212     return;
213 }
214
215 void Fftw::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr, cudaStream_t stream)
216 {
217     (void)real_result_arr;
218     (void)stream;
219
220     int n_channels = complex_input.n_channels;
221     fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
222     float *out = reinterpret_cast<float *>(real_result.data);
223
224     if (n_channels == 1)
225         fftwf_execute_dft_c2r(plan_i_1ch, in, out);
226     else if (m_big_batch_mode && n_channels == int(m_num_of_scales))
227         fftwf_execute_dft_c2r(plan_i_1ch_all_scales, in, out);
228     else if (m_big_batch_mode && n_channels == int(m_num_of_feats) * int(m_num_of_scales))
229         fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
230     else
231         fftwf_execute_dft_c2r(plan_i_features, in, out);
232
233     real_result = real_result / (m_width * m_height);
234     return;
235 }
236
237 Fftw::~Fftw()
238 {
239     fftwf_destroy_plan(plan_f);
240     fftwf_destroy_plan(plan_fw);
241     fftwf_destroy_plan(plan_i_features);
242     fftwf_destroy_plan(plan_i_1ch);
243
244     if (m_big_batch_mode) {
245         fftwf_destroy_plan(plan_f_all_scales);
246         fftwf_destroy_plan(plan_i_features_all_scales);
247         fftwf_destroy_plan(plan_fw_all_scales);
248         fftwf_destroy_plan(plan_i_1ch_all_scales);
249     }
250 }