]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/fft_fftw.cpp
Merge remote-tracking branch 'upstream/master' into rotation
[hercules2020/kcf.git] / src / fft_fftw.cpp
1 #include "fft_fftw.h"
2
3 #include "fft.h"
4
5 #ifdef OPENMP
6 #include <omp.h>
7 #endif
8
9 #if (defined(BIG_BATCH) && !defined(CUFFTW)) || (!defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW))
10 #define FFTW_PLAN_WITH_THREADS() fftwf_plan_with_nthreads(4);
11 #define FFTW_INIT_THREAD() fftwf_init_threads();
12 #define FFTW_CLEAN_THREADS() fftwf_cleanup_threads();
13 #else
14 #define FFTW_PLAN_WITH_THREADS()
15 #define FFTW_INIT_THREAD()
16 #define FFTW_CLEAN_THREADS()
17 #endif
18
19 Fftw::Fftw() {}
20
21 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
22 {
23     m_width = width;
24     m_height = height;
25     m_num_of_feats = num_of_feats;
26     m_num_of_scales = num_of_scales;
27     m_big_batch_mode = big_batch_mode;
28
29 #ifndef CUFFTW
30     std::cout << "FFT: FFTW" << std::endl;
31 #else
32     std::cout << "FFT: cuFFTW" << std::endl;
33 #endif
34
35      FFTW_INIT_THREAD();
36
37     // FFT forward one scale
38     {
39         cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
40         ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
41
42         FFTW_PLAN_WITH_THREADS();
43         plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width), reinterpret_cast<float *>(in_f.data),
44                                        reinterpret_cast<fftwf_complex *>(out_f.get_p_data()), FFTW_PATIENT);
45     }
46 #ifdef BIG_BATCH
47     // FFT forward all scales
48     if (m_num_of_scales > 1 && m_big_batch_mode) {
49         cv::Mat in_f_all = cv::Mat::zeros(m_height * m_num_of_scales, m_width, CV_32F);
50         ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
51         float *in = reinterpret_cast<float *>(in_f_all.data);
52         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_f_all.get_p_data());
53         int rank = 2;
54         int n[] = {(int)m_height, (int)m_width};
55         int howmany = m_num_of_scales;
56         int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
57         int istride = 1, ostride = 1;
58         int *inembed = NULL, *onembed = NULL;
59
60         FFTW_PLAN_WITH_THREADS();
61         plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
62                                                     ostride, odist, FFTW_PATIENT);
63     }
64 #endif
65     // FFT forward window one scale
66     {
67         cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
68         ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
69         float *in = reinterpret_cast<float *>(in_fw.data);
70         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_fw.get_p_data());
71         int rank = 2;
72         int n[] = {int(m_height), int(m_width)};
73         int howmany = int(m_num_of_feats);
74         int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
75         int istride = 1, ostride = 1;
76         int *inembed = nullptr, *onembed = nullptr;
77
78         FFTW_PLAN_WITH_THREADS();
79         plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist,
80                                           FFTW_PATIENT);
81     }
82 #ifdef BIG_BATCH
83     // FFT forward window all scales all feats
84     if (m_num_of_scales > 1 && m_big_batch_mode) {
85         cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales * m_num_of_feats), m_width, CV_32F);
86         ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales * m_num_of_feats);
87         float *in = reinterpret_cast<float *>(in_all.data);
88         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_all.get_p_data());
89         int rank = 2;
90         int n[] = {(int)m_height, (int)m_width};
91         int howmany = m_num_of_scales * m_num_of_feats;
92         int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
93         int istride = 1, ostride = 1;
94         int *inembed = NULL, *onembed = NULL;
95
96         FFTW_PLAN_WITH_THREADS();
97         plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
98                                                      ostride, odist, FFTW_PATIENT);
99     }
100 #endif
101     // FFT inverse one scale
102     {
103         ComplexMat in_i(m_height, m_width, m_num_of_feats);
104         cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
105         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i.get_p_data());
106         float *out = reinterpret_cast<float *>(out_i.data);
107         int rank = 2;
108         int n[] = {int(m_height), int(m_width)};
109         int howmany = int(m_num_of_feats);
110         int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
111         int istride = 1, ostride = int(m_num_of_feats);
112         int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
113
114         FFTW_PLAN_WITH_THREADS();
115         plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
116                                                   odist, FFTW_PATIENT);
117     }
118     // FFT inverse all scales
119 #ifdef BIG_BATCH
120     if (m_num_of_scales > 1 && m_big_batch_mode) {
121         ComplexMat in_i_all(m_height, m_width, m_num_of_feats * m_num_of_scales);
122         cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats * m_num_of_scales));
123         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i_all.get_p_data());
124         float *out = reinterpret_cast<float *>(out_i_all.data);
125         int rank = 2;
126         int n[] = {(int)m_height, (int)m_width};
127         int howmany = m_num_of_feats * m_num_of_scales;
128         int idist = m_height * (m_width / 2 + 1), odist = 1;
129         int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
130         int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
131
132         FFTW_PLAN_WITH_THREADS();
133         plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out,
134                                                              onembed, ostride, odist, FFTW_PATIENT);
135     }
136 #endif
137     // FFT inver one channel one scale
138     {
139         ComplexMat in_i1(int(m_height), int(m_width), 1);
140         cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
141         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1.get_p_data());
142         float *out = reinterpret_cast<float *>(out_i1.data);
143         int rank = 2;
144         int n[] = {int(m_height), int(m_width)};
145         int howmany = 1;
146         int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
147         int istride = 1, ostride = 1;
148         int inembed[] = {int(m_height), int(m_width) / 2 + 1}, *onembed = n;
149
150         FFTW_PLAN_WITH_THREADS();
151         plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
152                                              odist, FFTW_PATIENT);
153     }
154 #ifdef BIG_BATCH
155     // FFT inver one channel all scales
156     if (m_num_of_scales > 1 && m_big_batch_mode) {
157         ComplexMat in_i1_all(m_height, m_width, m_num_of_scales);
158         cv::Mat out_i1_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_scales));
159         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1_all.get_p_data());
160         float *out = reinterpret_cast<float *>(out_i1_all.data);
161         int rank = 2;
162         int n[] = {(int)m_height, (int)m_width};
163         int howmany = m_num_of_scales;
164         int idist = m_height * (m_width / 2 + 1), odist = 1;
165         int istride = 1, ostride = m_num_of_scales;
166         int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
167
168         FFTW_PLAN_WITH_THREADS();
169         plan_i_1ch_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed,
170                                                         ostride, odist, FFTW_PATIENT);
171     }
172 #endif
173 }
174
175 void Fftw::set_window(const cv::Mat &window)
176 {
177     m_window = window;
178 }
179
180 void Fftw::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr, cudaStream_t stream)
181 {
182     (void)real_input_arr;
183     (void)stream;
184
185     if (m_big_batch_mode && real_input.rows == int(m_height * m_num_of_scales)) {
186         fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
187                               reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
188     } else {
189         fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float *>(real_input.data),
190                               reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
191     }
192     return;
193 }
194
195 void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
196                           float *real_input_arr, cudaStream_t stream)
197 {
198     (void)real_input_arr;
199     (void)stream;
200
201     int n_channels = int(patch_feats.size());
202     for (int i = 0; i < n_channels; ++i) {
203         cv::Mat in_roi(fw_all, cv::Rect(0, i * int(m_height), int(m_width), int(m_height)));
204         in_roi = patch_feats[uint(i)].mul(m_window);
205     }
206
207     float *in = reinterpret_cast<float *>(fw_all.data);
208     fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
209
210     if (n_channels <= int(m_num_of_feats))
211         fftwf_execute_dft_r2c(plan_fw, in, out);
212     else
213         fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
214     return;
215 }
216
217 void Fftw::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr, cudaStream_t stream)
218 {
219     (void)real_result_arr;
220     (void)stream;
221
222     int n_channels = complex_input.n_channels;
223     fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
224     float *out = reinterpret_cast<float *>(real_result.data);
225
226     if (n_channels == 1)
227         fftwf_execute_dft_c2r(plan_i_1ch, in, out);
228     else if (m_big_batch_mode && n_channels == int(m_num_of_scales))
229         fftwf_execute_dft_c2r(plan_i_1ch_all_scales, in, out);
230     else if (m_big_batch_mode && n_channels == int(m_num_of_feats) * int(m_num_of_scales))
231         fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
232     else
233         fftwf_execute_dft_c2r(plan_i_features, in, out);
234
235     real_result = real_result / (m_width * m_height);
236     return;
237 }
238
239 Fftw::~Fftw()
240 {
241     fftwf_destroy_plan(plan_f);
242     fftwf_destroy_plan(plan_fw);
243     fftwf_destroy_plan(plan_i_features);
244     fftwf_destroy_plan(plan_i_1ch);
245
246     if (m_big_batch_mode) {
247         fftwf_destroy_plan(plan_f_all_scales);
248         fftwf_destroy_plan(plan_i_features_all_scales);
249         fftwf_destroy_plan(plan_fw_all_scales);
250         fftwf_destroy_plan(plan_i_1ch_all_scales);
251     }
252     FFTW_CLEAN_THREADS();
253 }