]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/fft_cufft.cpp
KCF_Tracker::init now uses Scale_vars FFT API
[hercules2020/kcf.git] / src / fft_cufft.cpp
1 #include "fft_cufft.h"
2
3 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
4 {
5     m_width = width;
6     m_height = height;
7     m_num_of_feats = num_of_feats;
8     m_num_of_scales = num_of_scales;
9     m_big_batch_mode = big_batch_mode;
10
11     std::cout << "FFT: cuFFT" << std::endl;
12
13     //FFT forward one scale
14     {
15         CudaSafeCall(cudaMalloc(&data_f, m_height*m_width*sizeof(cufftReal)));
16
17        CufftErrorCheck(cufftPlan2d(&plan_f, m_height, m_width, CUFFT_R2C));
18
19
20     }
21     //FFT forward all scales
22     if(m_num_of_scales > 1 && m_big_batch_mode)
23     {
24         CudaSafeCall(cudaMalloc(&data_f_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal)));
25
26         int rank = 2;
27         int n[] = {(int)m_height, (int)m_width};
28         int howmany = m_num_of_scales;
29         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
30         int istride = 1, ostride = 1;
31         int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
32
33         CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n,
34                   inembed, istride, idist,
35                   onembed, ostride, odist,
36                   CUFFT_R2C, howmany));
37     }
38     //FFT forward window one scale
39     {
40         CudaSafeCall(cudaHostAlloc(&data_fw, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped));
41         CudaSafeCall(cudaHostGetDevicePointer(&data_fw_d, data_fw, 0));
42
43         int rank = 2;
44         int n[] = {(int)m_height, (int)m_width};
45         int howmany = m_num_of_feats;
46         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
47         int istride = 1, ostride = 1;
48         int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
49
50         CufftErrorCheck(cufftPlanMany(&plan_fw, rank, n,
51                   inembed, istride, idist,
52                   onembed, ostride, odist,
53                   CUFFT_R2C, howmany));
54     }
55     //FFT forward window all scales all feats
56     if(m_num_of_scales > 1 && m_big_batch_mode)
57     {
58         CudaSafeCall(cudaHostAlloc(&data_fw_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
59         CudaSafeCall(cudaHostGetDevicePointer(&data_fw_all_scales_d, data_fw_all_scales, 0));
60
61         int rank = 2;
62         int n[] = {(int)m_height, (int)m_width};
63         int howmany = m_num_of_scales*m_num_of_feats;
64         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
65         int istride = 1, ostride = 1;
66         int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
67
68         CufftErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n,
69                   inembed, istride, idist,
70                   onembed, ostride, odist,
71                   CUFFT_R2C, howmany));
72
73
74     }
75     //FFT inverse one scale
76     {
77         int rank = 2;
78         int n[] = {(int)m_height, (int)m_width};
79         int howmany = m_num_of_feats;
80         int idist = m_height*(m_width/2+1), odist = 1;
81         int istride = 1, ostride = m_num_of_feats;
82         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
83
84         CufftErrorCheck(cufftPlanMany(&plan_i_features, rank, n,
85                   inembed, istride, idist,
86                   onembed, ostride, odist,
87                   CUFFT_C2R, howmany));
88     }
89     //FFT inverse all scales
90 #ifdef BIG_BATCH
91     if(m_num_of_scales > 1)
92     {
93         CudaSafeCall(cudaHostAlloc(&data_i_features_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
94         CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_all_scales_d, data_i_features_all_scales, 0));
95
96         int rank = 2;
97         int n[] = {(int)m_height, (int)m_width};
98         int howmany = m_num_of_feats*m_num_of_scales;
99         int idist = m_height*(m_width/2+1), odist = 1;
100         int istride = 1, ostride = m_num_of_feats*m_num_of_scales;
101         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
102
103         CufftErrorCheck(cufftPlanMany(&plan_i_features_all_scales, rank, n,
104                   inembed, istride, idist,
105                   onembed, ostride, odist,
106                   CUFFT_C2R, howmany));
107     }
108 #endif
109     //FFT inverse one channel one scale
110     {
111         int rank = 2;
112         int n[] = {(int)m_height, (int)m_width};
113         int howmany = 1;
114         int idist = m_height*(m_width/2+1), odist = 1;
115         int istride = 1, ostride = 1;
116         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
117
118         CufftErrorCheck(cufftPlanMany(&plan_i_1ch, rank, n,
119                   inembed, istride, idist,
120                   onembed, ostride, odist,
121                   CUFFT_C2R, howmany));
122     }
123 #ifdef BIG_BATCH
124     //FFT inverse one channel all scales
125     if(m_num_of_scales > 1 && m_big_batch_mode)
126     {
127         CudaSafeCall(cudaHostAlloc(&data_i_1ch_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
128         CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_all_scales_d, data_i_1ch_all_scales, 0));
129
130         int rank = 2;
131         int n[] = {(int)m_height, (int)m_width};
132         int howmany = m_num_of_scales;
133         int idist = m_height*(m_width/2+1), odist = 1;
134         int istride = 1, ostride = m_num_of_scales;
135         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
136
137         CufftErrorCheck(cufftPlanMany(&plan_i_1ch_all_scales, rank, n,
138                   inembed, istride, idist,
139                   onembed, ostride, odist,
140                   CUFFT_C2R, howmany));
141     }
142 #endif
143 }
144
145 void cuFFT::set_window(const cv::Mat & window)
146 {
147      m_window = window;
148 }
149
150 ComplexMat cuFFT::forward(const cv::Mat & input)
151 {
152     ComplexMat complex_result;
153     if(m_big_batch_mode && input.rows == (int)(m_height*m_num_of_scales)){
154         CudaSafeCall(cudaMemcpy(data_f_all_scales, input.ptr<cufftReal>(), m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
155         complex_result.create(m_height, m_width / 2 + 1, m_num_of_scales);
156         CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(data_f_all_scales),
157                                 complex_result.get_p_data()));
158     } else {
159         CudaSafeCall(cudaMemcpy(data_f, input.ptr<cufftReal>(), m_height*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
160         complex_result.create(m_height, m_width/ 2 + 1, 1);
161         CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(data_f),
162                                 complex_result.get_p_data()));
163     }
164
165     return complex_result;
166 }
167
168 void cuFFT::forward(Scale_vars & vars)
169 {
170     ComplexMat *complex_result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_yf_ptr :
171                                                   vars.flag & Tracker_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
172     cv::Mat *input = vars.flag & Tracker_flags::TRACKER_INIT ? & vars.rot_labels : & vars.in_all;
173
174     if(m_big_batch_mode && vars.in_all.rows == (int)(m_height*m_num_of_scales)){
175         CudaSafeCall(cudaMemcpy(data_f_all_scales, input->ptr<cufftReal>(), m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
176         CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(data_f_all_scales),
177                                 complex_result->get_p_data()));
178     } else {
179         CudaSafeCall(cudaMemcpy(data_f, input->ptr<cufftReal>(), m_height*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
180         CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(data_f),
181                                 complex_result->get_p_data()));
182     }
183     return;
184 }
185
186 void cuFFT::forward_raw(Scale_vars & vars, bool all_scales)
187 {
188     ComplexMat *result = vars.flag & Tracker_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
189     if (all_scales){
190         CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
191                                 result->get_p_data()));
192     } else {
193         CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
194                                 result->get_p_data()));
195     }
196     return;
197 }
198
199 ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> & input)
200 {
201     int n_channels = input.size();
202     ComplexMat result;
203     if(n_channels > (int) m_num_of_feats){
204         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
205         for (int i = 0; i < n_channels; ++i) {
206             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
207             in_roi = input[i].mul(m_window);
208         }
209
210         result.create(m_height, m_width/2 + 1, n_channels,m_num_of_scales);
211
212         CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result.get_p_data()));
213     } else {
214         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
215         for (int i = 0; i < n_channels; ++i) {
216             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
217             in_roi = input[i].mul(m_window);
218         }
219
220         result.create(m_height, m_width/2 + 1, n_channels);
221
222         CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result.get_p_data()));
223     }
224     return result;
225 }
226
227 void cuFFT::forward_window(Scale_vars & vars)
228 {
229     int n_channels = vars.patch_feats.size();
230
231     ComplexMat *result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_model_xf_ptr :
232                                                   vars.flag & Tracker_flags::TRACKER_UPDATE ? & vars.xf : & vars.zf;
233
234     if(n_channels > (int) m_num_of_feats){
235         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
236         for (int i = 0; i < n_channels; ++i) {
237             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
238             in_roi = vars.patch_feats[i].mul(m_window);
239         }
240
241         CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result->get_p_data()));
242     } else {
243         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
244         for (int i = 0; i < n_channels; ++i) {
245             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
246             in_roi = vars.patch_feats[i].mul(m_window);
247         }
248
249         CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result->get_p_data()));
250     }
251     return;
252 }
253
254 void cuFFT::inverse(Scale_vars & vars)
255 {
256     ComplexMat *input = vars.flag & Tracker_flags::RESPONSE ? & vars.kzf : &  vars.xyf;
257     cv::Mat *real_result = vars.flag & Tracker_flags::RESPONSE ? & vars.response : & vars.ifft2_res;
258
259     int n_channels = input->n_channels;
260     cufftComplex *in = reinterpret_cast<cufftComplex*>(input->get_p_data());
261
262     if(n_channels == 1){
263
264         CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(vars.data_i_1ch_d)));
265         cudaDeviceSynchronize();
266         *real_result = *real_result/(m_width*m_height);
267         return;
268     }
269 #ifdef BIG_BATCH
270     else if(n_channels == (int) m_num_of_scales){
271         cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), vars.data_i_1ch_all_scales);
272
273         CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal*>(vars.data_i_1ch_all_scales_d)));
274         cudaDeviceSynchronize();
275
276         return real_result/(m_width*m_height);
277     } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){
278         cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features_all_scales);
279
280         CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(vars.data_i_features_all_scales_d)));
281         cudaDeviceSynchronize();
282
283         return real_result/(m_width*m_height);
284     }
285 #endif
286
287     CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(vars.data_i_features_d)));
288
289     if (vars.cuda_gauss)
290         return;
291     else {
292         cudaDeviceSynchronize();
293         *real_result = *real_result/(m_width*m_height);
294     }
295     return;
296 }
297
298 cuFFT::~cuFFT()
299 {
300   CufftErrorCheck(cufftDestroy(plan_f));
301   CufftErrorCheck(cufftDestroy(plan_fw));
302   CufftErrorCheck(cufftDestroy(plan_i_1ch));
303   CufftErrorCheck(cufftDestroy(plan_i_features));
304
305   CudaSafeCall(cudaFree(data_f));
306   CudaSafeCall(cudaFreeHost(data_fw));
307   CudaSafeCall(cudaFreeHost(data_i_1ch));
308   CudaSafeCall(cudaFreeHost(data_i_features));
309   
310   if (m_big_batch_mode) {
311       CufftErrorCheck(cufftDestroy(plan_f_all_scales));
312       CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
313       CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
314       CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));
315       
316       CudaSafeCall(cudaFree(data_f_all_scales));
317       CudaSafeCall(cudaFreeHost(data_fw_all_scales));
318       CudaSafeCall(cudaFreeHost(data_i_1ch_all_scales));
319       CudaSafeCall(cudaFreeHost(data_i_features_all_scales));
320   }
321 }