]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/fft_cufft.cpp
CuFFT uses only inverse(Scale_vars & vars)
[hercules2020/kcf.git] / src / fft_cufft.cpp
1 #include "fft_cufft.h"
2
3 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
4 {
5     m_width = width;
6     m_height = height;
7     m_num_of_feats = num_of_feats;
8     m_num_of_scales = num_of_scales;
9     m_big_batch_mode = big_batch_mode;
10
11     std::cout << "FFT: cuFFT" << std::endl;
12
13     //FFT forward one scale
14     {
15         CudaSafeCall(cudaMalloc(&data_f, m_height*m_width*sizeof(cufftReal)));
16
17        CufftErrorCheck(cufftPlan2d(&plan_f, m_height, m_width, CUFFT_R2C));
18
19
20     }
21     //FFT forward all scales
22     if(m_num_of_scales > 1 && m_big_batch_mode)
23     {
24         CudaSafeCall(cudaMalloc(&data_f_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal)));
25
26         int rank = 2;
27         int n[] = {(int)m_height, (int)m_width};
28         int howmany = m_num_of_scales;
29         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
30         int istride = 1, ostride = 1;
31         int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
32
33         CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n,
34                   inembed, istride, idist,
35                   onembed, ostride, odist,
36                   CUFFT_R2C, howmany));
37     }
38     //FFT forward window one scale
39     {
40         CudaSafeCall(cudaHostAlloc(&data_fw, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped));
41         CudaSafeCall(cudaHostGetDevicePointer(&data_fw_d, data_fw, 0));
42
43         int rank = 2;
44         int n[] = {(int)m_height, (int)m_width};
45         int howmany = m_num_of_feats;
46         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
47         int istride = 1, ostride = 1;
48         int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
49
50         CufftErrorCheck(cufftPlanMany(&plan_fw, rank, n,
51                   inembed, istride, idist,
52                   onembed, ostride, odist,
53                   CUFFT_R2C, howmany));
54     }
55     //FFT forward window all scales all feats
56     if(m_num_of_scales > 1 && m_big_batch_mode)
57     {
58         CudaSafeCall(cudaHostAlloc(&data_fw_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
59         CudaSafeCall(cudaHostGetDevicePointer(&data_fw_all_scales_d, data_fw_all_scales, 0));
60
61         int rank = 2;
62         int n[] = {(int)m_height, (int)m_width};
63         int howmany = m_num_of_scales*m_num_of_feats;
64         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
65         int istride = 1, ostride = 1;
66         int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
67
68         CufftErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n,
69                   inembed, istride, idist,
70                   onembed, ostride, odist,
71                   CUFFT_R2C, howmany));
72
73
74     }
75     //FFT inverse one scale
76     {
77         int rank = 2;
78         int n[] = {(int)m_height, (int)m_width};
79         int howmany = m_num_of_feats;
80         int idist = m_height*(m_width/2+1), odist = 1;
81         int istride = 1, ostride = m_num_of_feats;
82         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
83
84         CufftErrorCheck(cufftPlanMany(&plan_i_features, rank, n,
85                   inembed, istride, idist,
86                   onembed, ostride, odist,
87                   CUFFT_C2R, howmany));
88     }
89     //FFT inverse all scales
90 #ifdef BIG_BATCH
91     if(m_num_of_scales > 1)
92     {
93         CudaSafeCall(cudaHostAlloc(&data_i_features_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
94         CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_all_scales_d, data_i_features_all_scales, 0));
95
96         int rank = 2;
97         int n[] = {(int)m_height, (int)m_width};
98         int howmany = m_num_of_feats*m_num_of_scales;
99         int idist = m_height*(m_width/2+1), odist = 1;
100         int istride = 1, ostride = m_num_of_feats*m_num_of_scales;
101         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
102
103         CufftErrorCheck(cufftPlanMany(&plan_i_features_all_scales, rank, n,
104                   inembed, istride, idist,
105                   onembed, ostride, odist,
106                   CUFFT_C2R, howmany));
107     }
108 #endif
109     //FFT inverse one channel one scale
110     {
111         int rank = 2;
112         int n[] = {(int)m_height, (int)m_width};
113         int howmany = 1;
114         int idist = m_height*(m_width/2+1), odist = 1;
115         int istride = 1, ostride = 1;
116         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
117
118         CufftErrorCheck(cufftPlanMany(&plan_i_1ch, rank, n,
119                   inembed, istride, idist,
120                   onembed, ostride, odist,
121                   CUFFT_C2R, howmany));
122     }
123 #ifdef BIG_BATCH
124     //FFT inverse one channel all scales
125     if(m_num_of_scales > 1 && m_big_batch_mode)
126     {
127         CudaSafeCall(cudaHostAlloc(&data_i_1ch_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
128         CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_all_scales_d, data_i_1ch_all_scales, 0));
129
130         int rank = 2;
131         int n[] = {(int)m_height, (int)m_width};
132         int howmany = m_num_of_scales;
133         int idist = m_height*(m_width/2+1), odist = 1;
134         int istride = 1, ostride = m_num_of_scales;
135         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
136
137         CufftErrorCheck(cufftPlanMany(&plan_i_1ch_all_scales, rank, n,
138                   inembed, istride, idist,
139                   onembed, ostride, odist,
140                   CUFFT_C2R, howmany));
141     }
142 #endif
143 }
144
145 void cuFFT::set_window(const cv::Mat & window)
146 {
147      m_window = window;
148 }
149
150 ComplexMat cuFFT::forward(const cv::Mat & input)
151 {
152     ComplexMat complex_result;
153     if(m_big_batch_mode && input.rows == (int)(m_height*m_num_of_scales)){
154         CudaSafeCall(cudaMemcpy(data_f_all_scales, input.ptr<cufftReal>(), m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
155         complex_result.create(m_height, m_width / 2 + 1, m_num_of_scales);
156         CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(data_f_all_scales),
157                                 complex_result.get_p_data()));
158     } else {
159         CudaSafeCall(cudaMemcpy(data_f, input.ptr<cufftReal>(), m_height*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
160         complex_result.create(m_height, m_width/ 2 + 1, 1);
161         CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(data_f),
162                                 complex_result.get_p_data()));
163     }
164
165     return complex_result;
166 }
167
168 void cuFFT::forward(Scale_vars & vars)
169 {
170     ComplexMat *complex_result = vars.flag & Track_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
171     if(m_big_batch_mode && vars.in_all.rows == (int)(m_height*m_num_of_scales)){
172         CudaSafeCall(cudaMemcpy(data_f_all_scales, vars.in_all.ptr<cufftReal>(), m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
173         CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(data_f_all_scales),
174                                 complex_result->get_p_data()));
175     } else {
176         CudaSafeCall(cudaMemcpy(data_f, vars.in_all.ptr<cufftReal>(), m_height*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
177         CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(data_f),
178                                 complex_result->get_p_data()));
179     }
180     return;
181 }
182
183 void cuFFT::forward_raw(Scale_vars & vars, bool all_scales)
184 {
185     ComplexMat *result = vars.flag & Track_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
186     if (all_scales){
187         CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
188                                 result->get_p_data()));
189     } else {
190         CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
191                                 result->get_p_data()));
192     }
193     return;
194 }
195
196 ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> & input)
197 {
198     int n_channels = input.size();
199     ComplexMat result;
200     if(n_channels > (int) m_num_of_feats){
201         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
202         for (int i = 0; i < n_channels; ++i) {
203             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
204             in_roi = input[i].mul(m_window);
205         }
206
207         result.create(m_height, m_width/2 + 1, n_channels,m_num_of_scales);
208
209         CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result.get_p_data()));
210     } else {
211         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
212         for (int i = 0; i < n_channels; ++i) {
213             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
214             in_roi = input[i].mul(m_window);
215         }
216
217         result.create(m_height, m_width/2 + 1, n_channels);
218
219         CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result.get_p_data()));
220     }
221     return result;
222 }
223
224 void cuFFT::forward_window(Scale_vars & vars)
225 {
226     int n_channels = vars.patch_feats.size();
227     ComplexMat *result = vars.flag & Track_flags::TRACKER_UPDATE ? & vars.xf : & vars.zf;
228     if(n_channels > (int) m_num_of_feats){
229         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
230         for (int i = 0; i < n_channels; ++i) {
231             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
232             in_roi = vars.patch_feats[i].mul(m_window);
233         }
234
235         CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result->get_p_data()));
236     } else {
237         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
238         for (int i = 0; i < n_channels; ++i) {
239             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
240             in_roi = vars.patch_feats[i].mul(m_window);
241         }
242
243         CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result->get_p_data()));
244     }
245     return;
246 }
247
248 void cuFFT::inverse(Scale_vars & vars)
249 {
250     ComplexMat *input = vars.flag & Track_flags::RESPONSE ? & vars.kzf : &  vars.xyf;
251     cv::Mat *real_result = vars.flag & Track_flags::RESPONSE ? & vars.response : & vars.ifft2_res;
252
253     int n_channels = input->n_channels;
254     cufftComplex *in = reinterpret_cast<cufftComplex*>(input->get_p_data());
255
256     if(n_channels == 1){
257
258         CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(vars.data_i_1ch_d)));
259         cudaDeviceSynchronize();
260         *real_result = *real_result/(m_width*m_height);
261         return;
262     }
263 #ifdef BIG_BATCH
264     else if(n_channels == (int) m_num_of_scales){
265         cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), vars.data_i_1ch_all_scales);
266
267         CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal*>(vars.data_i_1ch_all_scales_d)));
268         cudaDeviceSynchronize();
269
270         return real_result/(m_width*m_height);
271     } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){
272         cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features_all_scales);
273
274         CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(vars.data_i_features_all_scales_d)));
275         cudaDeviceSynchronize();
276
277         return real_result/(m_width*m_height);
278     }
279 #endif
280
281     CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(vars.data_i_features_d)));
282
283     if (vars.cuda_gauss)
284         return;
285     else {
286         cudaDeviceSynchronize();
287         *real_result = *real_result/(m_width*m_height);
288     }
289     return;
290 }
291
292 cuFFT::~cuFFT()
293 {
294   CufftErrorCheck(cufftDestroy(plan_f));
295   CufftErrorCheck(cufftDestroy(plan_fw));
296   CufftErrorCheck(cufftDestroy(plan_i_1ch));
297   CufftErrorCheck(cufftDestroy(plan_i_features));
298
299   CudaSafeCall(cudaFree(data_f));
300   CudaSafeCall(cudaFreeHost(data_fw));
301   CudaSafeCall(cudaFreeHost(data_i_1ch));
302   CudaSafeCall(cudaFreeHost(data_i_features));
303   
304   if (m_big_batch_mode) {
305       CufftErrorCheck(cufftDestroy(plan_f_all_scales));
306       CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
307       CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
308       CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));
309       
310       CudaSafeCall(cudaFree(data_f_all_scales));
311       CudaSafeCall(cudaFreeHost(data_fw_all_scales));
312       CudaSafeCall(cudaFreeHost(data_i_1ch_all_scales));
313       CudaSafeCall(cudaFreeHost(data_i_features_all_scales));
314   }
315 }