]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/fft_fftw.cpp
CUDA streams works
[hercules2020/kcf.git] / src / fft_fftw.cpp
1 #include "fft_fftw.h"
2
3 #include "fft.h"
4
5 #ifdef OPENMP
6   #include <omp.h>
7 #endif
8
9 #if !defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW)
10 #define FFTW_PLAN_WITH_THREADS() fftw_plan_with_nthreads(int(m_num_threads));
11 #else
12 #define FFTW_PLAN_WITH_THREADS()
13 #endif
14
15 Fftw::Fftw()
16     : m_num_threads(4)
17 {
18 }
19
20 Fftw::Fftw(unsigned num_threads)
21     : m_num_threads(num_threads)
22 {
23 }
24
25 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
26 {
27     m_width = width;
28     m_height = height;
29     m_num_of_feats = num_of_feats;
30     m_num_of_scales = num_of_scales;
31     m_big_batch_mode = big_batch_mode;
32
33 #if (!defined(ASYNC) && !defined(CUFFTW)) && defined(OPENMP)
34     fftw_init_threads();
35 #endif //OPENMP
36
37 #ifndef CUFFTW
38     std::cout << "FFT: FFTW" << std::endl;
39 #else
40     std::cout << "FFT: cuFFTW" << std::endl;
41 #endif
42     fftwf_cleanup();
43     //FFT forward one scale
44     {
45         cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
46         ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
47         plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width),
48                                        reinterpret_cast<float*>(in_f.data),
49                                        reinterpret_cast<fftwf_complex*>(out_f.get_p_data()),
50                                        FFTW_PATIENT);
51     }
52 #ifdef BIG_BATCH
53     //FFT forward all scales
54     if (m_num_of_scales > 1 && m_big_batch_mode) {
55         cv::Mat in_f_all = cv::Mat::zeros(m_height*m_num_of_scales, m_width, CV_32F);
56         ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
57         float *in = reinterpret_cast<float*>(in_f_all.data);
58         fftwf_complex *out = reinterpret_cast<fftwf_complex*>(out_f_all.get_p_data());
59         int rank = 2;
60         int n[] = {(int)m_height, (int)m_width};
61         int howmany = m_num_of_scales;
62         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
63         int istride = 1, ostride = 1;
64         int *inembed = NULL, *onembed = NULL;
65
66         FFTW_PLAN_WITH_THREADS();
67         plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany,
68                                                     in, inembed, istride, idist,
69                                                     out, onembed, ostride, odist,
70                                                     FFTW_PATIENT);
71     }
72 #endif
73     //FFT forward window one scale
74     {
75         cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
76         ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
77         float *in = reinterpret_cast<float*>(in_fw.data);
78         fftwf_complex *out = reinterpret_cast<fftwf_complex*>(out_fw.get_p_data());
79         int rank = 2;
80         int n[] = {int(m_height), int(m_width)};
81         int howmany = int(m_num_of_feats);
82         int idist = int(m_height*m_width), odist = int(m_height*(m_width/2+1));
83         int istride = 1, ostride = 1;
84         int *inembed = nullptr, *onembed = nullptr;
85
86         FFTW_PLAN_WITH_THREADS();
87         plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany,
88                                           in, inembed, istride, idist,
89                                           out, onembed, ostride, odist,
90                                           FFTW_PATIENT);
91     }
92 #ifdef BIG_BATCH
93     //FFT forward window all scales all feats
94     if (m_num_of_scales > 1 && m_big_batch_mode) {
95         cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales*m_num_of_feats), m_width, CV_32F);
96         ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales*m_num_of_feats);
97         float *in = reinterpret_cast<float*>(in_all.data);
98         fftwf_complex *out = reinterpret_cast<fftwf_complex*>(out_all.get_p_data());
99         int rank = 2;
100         int n[] = {(int)m_height, (int)m_width};
101         int howmany = m_num_of_scales*m_num_of_feats;
102         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
103         int istride = 1, ostride = 1;
104         int *inembed = NULL, *onembed = NULL;
105
106         FFTW_PLAN_WITH_THREADS();
107         plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany,
108                                                      in,  inembed, istride, idist,
109                                                      out, onembed, ostride, odist,
110                                                      FFTW_PATIENT);
111     }
112 #endif
113     //FFT inverse one scale
114     {
115         ComplexMat in_i(m_height, m_width, m_num_of_feats);
116         cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
117         fftwf_complex *in = reinterpret_cast<fftwf_complex*>(in_i.get_p_data());
118         float *out = reinterpret_cast<float*>(out_i.data);
119         int rank = 2;
120         int n[] = {int(m_height), int(m_width)};
121         int howmany = int(m_num_of_feats);
122         int idist = int(m_height*(m_width/2+1)), odist = 1;
123         int istride = 1, ostride = int(m_num_of_feats);
124         int inembed[] = {int(m_height), int(m_width/2+1)}, *onembed = n;
125
126         FFTW_PLAN_WITH_THREADS();
127         plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany,
128                                                   in,  inembed, istride, idist,
129                                                   out, onembed, ostride, odist,
130                                                   FFTW_PATIENT);
131     }
132     //FFT inverse all scales
133 #ifdef BIG_BATCH
134     if (m_num_of_scales > 1 && m_big_batch_mode) {
135         ComplexMat in_i_all(m_height,m_width,m_num_of_feats*m_num_of_scales);
136         cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats*m_num_of_scales));
137         fftwf_complex *in = reinterpret_cast<fftwf_complex*>(in_i_all.get_p_data());
138         float *out = reinterpret_cast<float*>(out_i_all.data);
139         int rank = 2;
140         int n[] = {(int)m_height, (int)m_width};
141         int howmany = m_num_of_feats*m_num_of_scales;
142         int idist = m_height*(m_width/2+1), odist = 1;
143         int istride = 1, ostride = m_num_of_feats*m_num_of_scales;
144         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
145
146         FFTW_PLAN_WITH_THREADS();
147         plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany,
148                                                              in,  inembed, istride, idist,
149                                                              out, onembed, ostride, odist,
150                                                              FFTW_PATIENT);
151     }
152 #endif
153     //FFT inver one channel one scale
154     {
155         ComplexMat in_i1(int(m_height),int(m_width),1);
156         cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
157         fftwf_complex *in = reinterpret_cast<fftwf_complex*>(in_i1.get_p_data());
158         float *out = reinterpret_cast<float*>(out_i1.data);
159         int rank = 2;
160         int n[] = {int(m_height), int(m_width)};
161         int howmany = 1;
162         int idist = int(m_height*(m_width/2+1)), odist = 1;
163         int istride = 1, ostride = 1;
164         int inembed[] = {int(m_height), int(m_width)/2+1}, *onembed = n;
165
166         FFTW_PLAN_WITH_THREADS();
167         plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany,
168                                              in,  inembed, istride, idist,
169                                              out, onembed, ostride, odist,
170                                              FFTW_PATIENT);
171     }
172 #ifdef BIG_BATCH
173     //FFT inver one channel all scales
174     if (m_num_of_scales > 1 && m_big_batch_mode) {
175         ComplexMat in_i1_all(m_height,m_width,m_num_of_scales);
176         cv::Mat out_i1_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_scales));
177         fftwf_complex *in = reinterpret_cast<fftwf_complex*>(in_i1_all.get_p_data());
178         float *out = reinterpret_cast<float*>(out_i1_all.data);
179         int rank = 2;
180         int n[] = {(int)m_height, (int)m_width};
181         int howmany = m_num_of_scales;
182         int idist = m_height*(m_width/2+1), odist = 1;
183         int istride = 1, ostride = m_num_of_scales;
184         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
185
186         FFTW_PLAN_WITH_THREADS();
187         plan_i_1ch_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany,
188                                                         in,  inembed, istride, idist,
189                                                         out, onembed, ostride, odist,
190                                                         FFTW_PATIENT);
191     }
192 #endif
193 }
194
195 void Fftw::set_window(const cv::Mat &window)
196 {
197     m_window = window;
198 }
199
200 void Fftw::forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr, cudaStream_t stream)
201 {
202     (void) real_input_arr;
203     (void) stream;
204
205     if(m_big_batch_mode && real_input.rows == int(m_height*m_num_of_scales)){
206         fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float*>(real_input.data),
207                               reinterpret_cast<fftwf_complex*>(complex_result.get_p_data()));
208     } else {
209         fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float*>(real_input.data),
210                               reinterpret_cast<fftwf_complex*>(complex_result.get_p_data()));
211     }
212     return;
213 }
214
215 void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr, cudaStream_t stream)
216 {
217     (void) real_input_arr;
218     (void) stream;
219
220     int n_channels = int(patch_feats.size());
221     for (int i = 0; i < n_channels; ++i) {
222         cv::Mat in_roi(fw_all, cv::Rect(0, i*int(m_height), int(m_width), int(m_height)));
223         in_roi = patch_feats[uint(i)].mul(m_window);
224     }
225
226     float *in = reinterpret_cast<float*>(fw_all.data);
227     fftwf_complex *out = reinterpret_cast<fftwf_complex*>(complex_result.get_p_data());
228
229     if (n_channels <= int(m_num_of_feats))
230         fftwf_execute_dft_r2c(plan_fw, in, out);
231     else
232         fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
233     return;
234 }
235
236 void Fftw::inverse(ComplexMat &  complex_input, cv::Mat & real_result, float *real_result_arr, cudaStream_t stream)
237 {
238     (void) real_result_arr;
239     (void) stream;
240
241     int n_channels = complex_input.n_channels;
242     fftwf_complex *in = reinterpret_cast<fftwf_complex*>(complex_input.get_p_data());
243     float *out = reinterpret_cast<float*>(real_result.data);
244
245     if(n_channels == 1)
246         fftwf_execute_dft_c2r(plan_i_1ch, in, out);
247     else if(m_big_batch_mode && n_channels == int(m_num_of_scales))
248         fftwf_execute_dft_c2r(plan_i_1ch_all_scales, in, out);
249     else if(m_big_batch_mode && n_channels == int(m_num_of_feats) * int(m_num_of_scales))
250         fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
251     else
252         fftwf_execute_dft_c2r(plan_i_features, in, out);
253
254     real_result = real_result/(m_width*m_height);
255     return;
256 }
257
258 Fftw::~Fftw()
259 {
260     fftwf_destroy_plan(plan_f);
261     fftwf_destroy_plan(plan_fw);
262     fftwf_destroy_plan(plan_i_features);
263     fftwf_destroy_plan(plan_i_1ch);
264     
265     if (m_big_batch_mode) {
266         fftwf_destroy_plan(plan_f_all_scales);
267         fftwf_destroy_plan(plan_i_features_all_scales);
268         fftwf_destroy_plan(plan_fw_all_scales);
269         fftwf_destroy_plan(plan_i_1ch_all_scales);
270     }
271 }