]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/fft_fftw.cpp
CUDA: Use per-thread default streams rather than explicit streams
[hercules2020/kcf.git] / src / fft_fftw.cpp
1 #include "fft_fftw.h"
2
3 #include "fft.h"
4
5 #ifdef OPENMP
6 #include <omp.h>
7 #endif
8
9 #if !defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW)
10 #define FFTW_PLAN_WITH_THREADS() fftw_plan_with_nthreads(4);
11 #else
12 #define FFTW_PLAN_WITH_THREADS()
13 #endif
14
15 Fftw::Fftw(){}
16
17 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
18 {
19     m_width = width;
20     m_height = height;
21     m_num_of_feats = num_of_feats;
22     m_num_of_scales = num_of_scales;
23
24 #if (!defined(ASYNC) && !defined(CUFFTW)) && defined(OPENMP)
25     fftw_init_threads();
26 #endif // OPENMP
27
28 #ifndef CUFFTW
29     std::cout << "FFT: FFTW" << std::endl;
30 #else
31     std::cout << "FFT: cuFFTW" << std::endl;
32 #endif
33     fftwf_cleanup();
34     // FFT forward one scale
35     {
36         cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
37         ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
38         plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width), reinterpret_cast<float *>(in_f.data),
39                                        reinterpret_cast<fftwf_complex *>(out_f.get_p_data()), FFTW_PATIENT);
40     }
41 #ifdef BIG_BATCH
42     // FFT forward all scales
43     if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
44         cv::Mat in_f_all = cv::Mat::zeros(m_height * m_num_of_scales, m_width, CV_32F);
45         ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
46         float *in = reinterpret_cast<float *>(in_f_all.data);
47         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_f_all.get_p_data());
48         int rank = 2;
49         int n[] = {(int)m_height, (int)m_width};
50         int howmany = m_num_of_scales;
51         int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
52         int istride = 1, ostride = 1;
53         int *inembed = NULL, *onembed = NULL;
54
55         FFTW_PLAN_WITH_THREADS();
56         plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
57                                                     ostride, odist, FFTW_PATIENT);
58     }
59 #endif
60     // FFT forward window one scale
61     {
62         cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
63         ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
64         float *in = reinterpret_cast<float *>(in_fw.data);
65         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_fw.get_p_data());
66         int rank = 2;
67         int n[] = {int(m_height), int(m_width)};
68         int howmany = int(m_num_of_feats);
69         int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
70         int istride = 1, ostride = 1;
71         int *inembed = nullptr, *onembed = nullptr;
72
73         FFTW_PLAN_WITH_THREADS();
74         plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist,
75                                           FFTW_PATIENT);
76     }
77 #ifdef BIG_BATCH
78     // FFT forward window all scales all feats
79     if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
80         cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales * m_num_of_feats), m_width, CV_32F);
81         ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales * m_num_of_feats);
82         float *in = reinterpret_cast<float *>(in_all.data);
83         fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_all.get_p_data());
84         int rank = 2;
85         int n[] = {(int)m_height, (int)m_width};
86         int howmany = m_num_of_scales * m_num_of_feats;
87         int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
88         int istride = 1, ostride = 1;
89         int *inembed = NULL, *onembed = NULL;
90
91         FFTW_PLAN_WITH_THREADS();
92         plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
93                                                      ostride, odist, FFTW_PATIENT);
94     }
95 #endif
96     // FFT inverse one scale
97     {
98         ComplexMat in_i(m_height, m_width, m_num_of_feats);
99         cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
100         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i.get_p_data());
101         float *out = reinterpret_cast<float *>(out_i.data);
102         int rank = 2;
103         int n[] = {int(m_height), int(m_width)};
104         int howmany = int(m_num_of_feats);
105         int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
106         int istride = 1, ostride = int(m_num_of_feats);
107         int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
108
109         FFTW_PLAN_WITH_THREADS();
110         plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
111                                                   odist, FFTW_PATIENT);
112     }
113     // FFT inverse all scales
114 #ifdef BIG_BATCH
115     if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
116         ComplexMat in_i_all(m_height, m_width, m_num_of_feats * m_num_of_scales);
117         cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats * m_num_of_scales));
118         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i_all.get_p_data());
119         float *out = reinterpret_cast<float *>(out_i_all.data);
120         int rank = 2;
121         int n[] = {(int)m_height, (int)m_width};
122         int howmany = m_num_of_feats * m_num_of_scales;
123         int idist = m_height * (m_width / 2 + 1), odist = 1;
124         int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
125         int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
126
127         FFTW_PLAN_WITH_THREADS();
128         plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out,
129                                                              onembed, ostride, odist, FFTW_PATIENT);
130     }
131 #endif
132     // FFT inver one channel one scale
133     {
134         ComplexMat in_i1(int(m_height), int(m_width), 1);
135         cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
136         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1.get_p_data());
137         float *out = reinterpret_cast<float *>(out_i1.data);
138         int rank = 2;
139         int n[] = {int(m_height), int(m_width)};
140         int howmany = 1;
141         int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
142         int istride = 1, ostride = 1;
143         int inembed[] = {int(m_height), int(m_width) / 2 + 1}, *onembed = n;
144
145         FFTW_PLAN_WITH_THREADS();
146         plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
147                                              odist, FFTW_PATIENT);
148     }
149 #ifdef BIG_BATCH
150     // FFT inver one channel all scales
151     if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
152         ComplexMat in_i1_all(m_height, m_width, m_num_of_scales);
153         cv::Mat out_i1_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_scales));
154         fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1_all.get_p_data());
155         float *out = reinterpret_cast<float *>(out_i1_all.data);
156         int rank = 2;
157         int n[] = {(int)m_height, (int)m_width};
158         int howmany = m_num_of_scales;
159         int idist = m_height * (m_width / 2 + 1), odist = 1;
160         int istride = 1, ostride = m_num_of_scales;
161         int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
162
163         FFTW_PLAN_WITH_THREADS();
164         plan_i_1ch_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed,
165                                                         ostride, odist, FFTW_PATIENT);
166     }
167 #endif
168 }
169
170 void Fftw::set_window(const cv::Mat &window)
171 {
172     m_window = window;
173 }
174
175 void Fftw::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr)
176 {
177     (void)real_input_arr;
178
179     if (BIG_BATCH_MODE && real_input.rows == int(m_height * m_num_of_scales)) {
180         fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
181                               reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
182     } else {
183         fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float *>(real_input.data),
184                               reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
185     }
186     return;
187 }
188
189 void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
190                           float *real_input_arr)
191 {
192     (void)real_input_arr;
193
194     int n_channels = int(patch_feats.size());
195     for (int i = 0; i < n_channels; ++i) {
196         cv::Mat in_roi(fw_all, cv::Rect(0, i * int(m_height), int(m_width), int(m_height)));
197         in_roi = patch_feats[uint(i)].mul(m_window);
198     }
199
200     float *in = reinterpret_cast<float *>(fw_all.data);
201     fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
202
203     if (n_channels <= int(m_num_of_feats))
204         fftwf_execute_dft_r2c(plan_fw, in, out);
205     else
206         fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
207     return;
208 }
209
210 void Fftw::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr)
211 {
212     (void)real_result_arr;
213
214     int n_channels = complex_input.n_channels;
215     fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
216     float *out = reinterpret_cast<float *>(real_result.data);
217
218     if (n_channels == 1)
219         fftwf_execute_dft_c2r(plan_i_1ch, in, out);
220     else if (BIG_BATCH_MODE && n_channels == int(m_num_of_scales))
221         fftwf_execute_dft_c2r(plan_i_1ch_all_scales, in, out);
222     else if (BIG_BATCH_MODE && n_channels == int(m_num_of_feats) * int(m_num_of_scales))
223         fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
224     else
225         fftwf_execute_dft_c2r(plan_i_features, in, out);
226
227     real_result = real_result / (m_width * m_height);
228     return;
229 }
230
231 Fftw::~Fftw()
232 {
233     fftwf_destroy_plan(plan_f);
234     fftwf_destroy_plan(plan_fw);
235     fftwf_destroy_plan(plan_i_features);
236     fftwf_destroy_plan(plan_i_1ch);
237
238     if (BIG_BATCH_MODE) {
239         fftwf_destroy_plan(plan_f_all_scales);
240         fftwf_destroy_plan(plan_i_features_all_scales);
241         fftwf_destroy_plan(plan_fw_all_scales);
242         fftwf_destroy_plan(plan_i_1ch_all_scales);
243     }
244 }