]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/fft_fftw.cpp
Merge remote-tracking branch 'upstream/master' into rotation
[hercules2020/kcf.git] / src / fft_fftw.cpp
1 #include "fft_fftw.h"
2
3 #include "fft.h"
4
5 #ifdef OPENMP
6 #include <omp.h>
7 #endif
8
9 #if (defined(BIG_BATCH) && !defined(CUFFTW)) || (!defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW))
10 #define FFTW_PLAN_WITH_THREADS() fftwf_plan_with_nthreads(4);
11 #define FFTW_INIT_THREAD() fftwf_init_threads();
12 #define FFTW_CLEAN_THREADS() fftwf_cleanup_threads();
13 #else
14 #define FFTW_PLAN_WITH_THREADS()
15 #define FFTW_INIT_THREAD()
16 #define FFTW_CLEAN_THREADS()
17 #endif
18
19 Fftw::Fftw() {}
20
21 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
22 {
23     m_width = width;
24     m_height = height;
25     m_num_of_feats = num_of_feats;
26     m_num_of_scales = num_of_scales;
27
28 #ifndef CUFFTW
29     std::cout << "FFT: FFTW" << std::endl;
30 #else
31     std::cout << "FFT: cuFFTW" << std::endl;
32 #endif
33
34      FFTW_INIT_THREAD();
35
36     // FFT forward one scale
37     {
38         cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
39         ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
40
41         FFTW_PLAN_WITH_THREADS();
42         plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width), reinterpret_cast<float *>(in_f.data),
43                                        reinterpret_cast<fftwf_complex *>(out_f.get_p_data()), FFTW_PATIENT);
44     }
45 #ifdef BIG_BATCH
46     // FFT forward all scales
47     if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
48         cv::Mat in_f_all = cv::Mat::zeros(m_height * m_num_of_scales, m_width, CV_32F);
49         ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
50         float *in = reinterpret_cast<float *>(in_f_all.data);
51         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_f_all.get_p_data());
52         int rank = 2;
53         int n[] = {(int)m_height, (int)m_width};
54         int howmany = m_num_of_scales;
55         int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
56         int istride = 1, ostride = 1;
57         int *inembed = NULL, *onembed = NULL;
58
59         FFTW_PLAN_WITH_THREADS();
60         plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
61                                                     ostride, odist, FFTW_PATIENT);
62     }
63 #endif
64     // FFT forward window one scale
65     {
66         cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
67         ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
68         float *in = reinterpret_cast<float *>(in_fw.data);
69         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_fw.get_p_data());
70         int rank = 2;
71         int n[] = {int(m_height), int(m_width)};
72         int howmany = int(m_num_of_feats);
73         int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
74         int istride = 1, ostride = 1;
75         int *inembed = nullptr, *onembed = nullptr;
76
77         FFTW_PLAN_WITH_THREADS();
78         plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist,
79                                           FFTW_PATIENT);
80     }
81 #ifdef BIG_BATCH
82     // FFT forward window all scales all feats
83     if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
84         cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales * m_num_of_feats), m_width, CV_32F);
85         ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales * m_num_of_feats);
86         float *in = reinterpret_cast<float *>(in_all.data);
87         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_all.get_p_data());
88         int rank = 2;
89         int n[] = {(int)m_height, (int)m_width};
90         int howmany = m_num_of_scales * m_num_of_feats;
91         int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
92         int istride = 1, ostride = 1;
93         int *inembed = NULL, *onembed = NULL;
94
95         FFTW_PLAN_WITH_THREADS();
96         plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
97                                                      ostride, odist, FFTW_PATIENT);
98     }
99 #endif
100     // FFT inverse one scale
101     {
102         ComplexMat in_i(m_height, m_width, m_num_of_feats);
103         cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
104         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i.get_p_data());
105         float *out = reinterpret_cast<float *>(out_i.data);
106         int rank = 2;
107         int n[] = {int(m_height), int(m_width)};
108         int howmany = int(m_num_of_feats);
109         int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
110         int istride = 1, ostride = int(m_num_of_feats);
111         int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
112
113         FFTW_PLAN_WITH_THREADS();
114         plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
115                                                   odist, FFTW_PATIENT);
116     }
117     // FFT inverse all scales
118 #ifdef BIG_BATCH
119     if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
120         ComplexMat in_i_all(m_height, m_width, m_num_of_feats * m_num_of_scales);
121         cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats * m_num_of_scales));
122         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i_all.get_p_data());
123         float *out = reinterpret_cast<float *>(out_i_all.data);
124         int rank = 2;
125         int n[] = {(int)m_height, (int)m_width};
126         int howmany = m_num_of_feats * m_num_of_scales;
127         int idist = m_height * (m_width / 2 + 1), odist = 1;
128         int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
129         int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
130
131         FFTW_PLAN_WITH_THREADS();
132         plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out,
133                                                              onembed, ostride, odist, FFTW_PATIENT);
134     }
135 #endif
136     // FFT inver one channel one scale
137     {
138         ComplexMat in_i1(int(m_height), int(m_width), 1);
139         cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
140         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1.get_p_data());
141         float *out = reinterpret_cast<float *>(out_i1.data);
142         int rank = 2;
143         int n[] = {int(m_height), int(m_width)};
144         int howmany = 1;
145         int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
146         int istride = 1, ostride = 1;
147         int inembed[] = {int(m_height), int(m_width) / 2 + 1}, *onembed = n;
148
149         FFTW_PLAN_WITH_THREADS();
150         plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
151                                              odist, FFTW_PATIENT);
152     }
153 #ifdef BIG_BATCH
154     // FFT inver one channel all scales
155     if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
156         ComplexMat in_i1_all(m_height, m_width, m_num_of_scales);
157         cv::Mat out_i1_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_scales));
158         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1_all.get_p_data());
159         float *out = reinterpret_cast<float *>(out_i1_all.data);
160         int rank = 2;
161         int n[] = {(int)m_height, (int)m_width};
162         int howmany = m_num_of_scales;
163         int idist = m_height * (m_width / 2 + 1), odist = 1;
164         int istride = 1, ostride = m_num_of_scales;
165         int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
166
167         FFTW_PLAN_WITH_THREADS();
168         plan_i_1ch_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed,
169                                                         ostride, odist, FFTW_PATIENT);
170     }
171 #endif
172 }
173
174 void Fftw::set_window(const cv::Mat &window)
175 {
176     m_window = window;
177 }
178
179 void Fftw::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr, cudaStream_t stream)
180 {
181     (void)real_input_arr;
182     (void)stream;
183
184     if (BIG_BATCH_MODE && real_input.rows == int(m_height * m_num_of_scales)) {
185         fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
186                               reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
187     } else {
188         fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float *>(real_input.data),
189                               reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
190     }
191     return;
192 }
193
194 void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
195                           float *real_input_arr, cudaStream_t stream)
196 {
197     (void)real_input_arr;
198     (void)stream;
199
200     int n_channels = int(patch_feats.size());
201     for (int i = 0; i < n_channels; ++i) {
202         cv::Mat in_roi(fw_all, cv::Rect(0, i * int(m_height), int(m_width), int(m_height)));
203         in_roi = patch_feats[uint(i)].mul(m_window);
204     }
205
206     float *in = reinterpret_cast<float *>(fw_all.data);
207     fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
208
209     if (n_channels <= int(m_num_of_feats))
210         fftwf_execute_dft_r2c(plan_fw, in, out);
211     else
212         fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
213     return;
214 }
215
216 void Fftw::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr, cudaStream_t stream)
217 {
218     (void)real_result_arr;
219     (void)stream;
220
221     int n_channels = complex_input.n_channels;
222     fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
223     float *out = reinterpret_cast<float *>(real_result.data);
224
225     if (n_channels == 1)
226         fftwf_execute_dft_c2r(plan_i_1ch, in, out);
227     else if (BIG_BATCH_MODE && n_channels == int(m_num_of_scales))
228         fftwf_execute_dft_c2r(plan_i_1ch_all_scales, in, out);
229     else if (BIG_BATCH_MODE && n_channels == int(m_num_of_feats) * int(m_num_of_scales))
230         fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
231     else
232         fftwf_execute_dft_c2r(plan_i_features, in, out);
233
234     real_result = real_result / (m_width * m_height);
235     return;
236 }
237
238 Fftw::~Fftw()
239 {
240     fftwf_destroy_plan(plan_f);
241     fftwf_destroy_plan(plan_fw);
242     fftwf_destroy_plan(plan_i_features);
243     fftwf_destroy_plan(plan_i_1ch);
244
245     if (BIG_BATCH_MODE) {
246         fftwf_destroy_plan(plan_f_all_scales);
247         fftwf_destroy_plan(plan_i_features_all_scales);
248         fftwf_destroy_plan(plan_fw_all_scales);
249         fftwf_destroy_plan(plan_i_1ch_all_scales);
250     }
251     FFTW_CLEAN_THREADS();
252 }