3 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
7 m_num_of_feats = num_of_feats;
8 m_num_of_scales = num_of_scales;
9 m_big_batch_mode = big_batch_mode;
11 std::cout << "FFT: cuFFT" << std::endl;
13 //FFT forward one scale
15 CudaSafeCall(cudaMalloc(&data_f, m_height*m_width*sizeof(cufftReal)));
17 CufftErrorCheck(cufftPlan2d(&plan_f, m_height, m_width, CUFFT_R2C));
21 //FFT forward all scales
22 if(m_num_of_scales > 1 && m_big_batch_mode)
24 CudaSafeCall(cudaMalloc(&data_f_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal)));
27 int n[] = {(int)m_height, (int)m_width};
28 int howmany = m_num_of_scales;
29 int idist = m_height*m_width, odist = m_height*(m_width/2+1);
30 int istride = 1, ostride = 1;
31 int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
33 CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n,
34 inembed, istride, idist,
35 onembed, ostride, odist,
38 //FFT forward window one scale
40 CudaSafeCall(cudaHostAlloc(&data_fw, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped));
41 CudaSafeCall(cudaHostGetDevicePointer(&data_fw_d, data_fw, 0));
44 int n[] = {(int)m_height, (int)m_width};
45 int howmany = m_num_of_feats;
46 int idist = m_height*m_width, odist = m_height*(m_width/2+1);
47 int istride = 1, ostride = 1;
48 int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
50 CufftErrorCheck(cufftPlanMany(&plan_fw, rank, n,
51 inembed, istride, idist,
52 onembed, ostride, odist,
55 //FFT forward window all scales all feats
56 if(m_num_of_scales > 1 && m_big_batch_mode)
58 CudaSafeCall(cudaHostAlloc(&data_fw_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
59 CudaSafeCall(cudaHostGetDevicePointer(&data_fw_all_scales_d, data_fw_all_scales, 0));
62 int n[] = {(int)m_height, (int)m_width};
63 int howmany = m_num_of_scales*m_num_of_feats;
64 int idist = m_height*m_width, odist = m_height*(m_width/2+1);
65 int istride = 1, ostride = 1;
66 int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
68 CufftErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n,
69 inembed, istride, idist,
70 onembed, ostride, odist,
75 //FFT inverse one scale
77 CudaSafeCall(cudaHostAlloc(&data_i_features, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped));
78 CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_d, data_i_features, 0));
81 int n[] = {(int)m_height, (int)m_width};
82 int howmany = m_num_of_feats;
83 int idist = m_height*(m_width/2+1), odist = 1;
84 int istride = 1, ostride = m_num_of_feats;
85 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
87 CufftErrorCheck(cufftPlanMany(&plan_i_features, rank, n,
88 inembed, istride, idist,
89 onembed, ostride, odist,
92 //FFT inverse all scales
93 if(m_num_of_scales > 1)
95 CudaSafeCall(cudaHostAlloc(&data_i_features_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
96 CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_all_scales_d, data_i_features_all_scales, 0));
99 int n[] = {(int)m_height, (int)m_width};
100 int howmany = m_num_of_feats*m_num_of_scales;
101 int idist = m_height*(m_width/2+1), odist = 1;
102 int istride = 1, ostride = m_num_of_feats*m_num_of_scales;
103 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
105 CufftErrorCheck(cufftPlanMany(&plan_i_features_all_scales, rank, n,
106 inembed, istride, idist,
107 onembed, ostride, odist,
108 CUFFT_C2R, howmany));
110 //FFT inverse one channel one scale
112 CudaSafeCall(cudaHostAlloc(&data_i_1ch, m_height*m_width*sizeof(cufftReal), cudaHostAllocMapped));
113 CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_d, data_i_1ch, 0));
116 int n[] = {(int)m_height, (int)m_width};
118 int idist = m_height*(m_width/2+1), odist = 1;
119 int istride = 1, ostride = 1;
120 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
122 CufftErrorCheck(cufftPlanMany(&plan_i_1ch, rank, n,
123 inembed, istride, idist,
124 onembed, ostride, odist,
125 CUFFT_C2R, howmany));
127 //FFT inverse one channel all scales
128 if(m_num_of_scales > 1 && m_big_batch_mode)
130 CudaSafeCall(cudaHostAlloc(&data_i_1ch_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
131 CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_all_scales_d, data_i_1ch_all_scales, 0));
134 int n[] = {(int)m_height, (int)m_width};
135 int howmany = m_num_of_scales;
136 int idist = m_height*(m_width/2+1), odist = 1;
137 int istride = 1, ostride = m_num_of_scales;
138 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
140 CufftErrorCheck(cufftPlanMany(&plan_i_1ch_all_scales, rank, n,
141 inembed, istride, idist,
142 onembed, ostride, odist,
143 CUFFT_C2R, howmany));
147 void cuFFT::set_window(const cv::Mat & window)
152 ComplexMat cuFFT::forward(const cv::Mat & input)
154 ComplexMat complex_result;
155 if(m_big_batch_mode && input.rows == (int)(m_height*m_num_of_scales)){
156 CudaSafeCall(cudaMemcpy(data_f_all_scales, input.ptr<cufftReal>(), m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
157 complex_result.create(m_height, m_width / 2 + 1, m_num_of_scales);
158 CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(data_f_all_scales),
159 complex_result.get_p_data()));
161 CudaSafeCall(cudaMemcpy(data_f, input.ptr<cufftReal>(), m_height*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
162 complex_result.create(m_height, m_width/ 2 + 1, 1);
163 CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(data_f),
164 complex_result.get_p_data()));
167 return complex_result;
170 void cuFFT::forward(Scale_vars & vars)
175 void cuFFT::forward_raw(Scale_vars & vars, bool all_scales)
177 ComplexMat *result = vars.flag & Track_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
179 CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
180 result->get_p_data()));
182 CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
183 result->get_p_data()));
188 ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> & input)
190 int n_channels = input.size();
192 if(n_channels > (int) m_num_of_feats){
193 cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
194 for (int i = 0; i < n_channels; ++i) {
195 cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
196 in_roi = input[i].mul(m_window);
199 result.create(m_height, m_width/2 + 1, n_channels,m_num_of_scales);
201 CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result.get_p_data()));
203 cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
204 for (int i = 0; i < n_channels; ++i) {
205 cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
206 in_roi = input[i].mul(m_window);
209 result.create(m_height, m_width/2 + 1, n_channels);
211 CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result.get_p_data()));
216 void cuFFT::forward_window(Scale_vars & vars)
218 int n_channels = vars.patch_feats.size();
219 ComplexMat *result = vars.flag & Track_flags::TRACKER_UPDATE ? & vars.xf : & vars.zf;
220 if(n_channels > (int) m_num_of_feats){
221 cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
222 for (int i = 0; i < n_channels; ++i) {
223 cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
224 in_roi = vars.patch_feats[i].mul(m_window);
227 CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result->get_p_data()));
229 cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
230 for (int i = 0; i < n_channels; ++i) {
231 cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
232 in_roi = vars.patch_feats[i].mul(m_window);
235 CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result->get_p_data()));
240 cv::Mat cuFFT::inverse(const ComplexMat & input)
242 int n_channels = input.n_channels;
243 cufftComplex *in = reinterpret_cast<cufftComplex*>(input.get_p_data());
246 cv::Mat real_result(m_height, m_width, CV_32FC1, data_i_1ch);
248 CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(data_i_1ch_d)));
249 cudaDeviceSynchronize();
251 return real_result/(m_width*m_height);
252 } else if(n_channels == (int) m_num_of_scales){
253 cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_1ch_all_scales);
255 CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal*>(data_i_1ch_all_scales_d)));
256 cudaDeviceSynchronize();
258 return real_result/(m_width*m_height);
259 } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){
260 cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features_all_scales);
262 CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(data_i_features_all_scales_d)));
263 cudaDeviceSynchronize();
265 return real_result/(m_width*m_height);
268 cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features);
270 CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(data_i_features_d)));
271 cudaDeviceSynchronize();
273 return real_result/(m_width*m_height);
276 void cuFFT::inverse(Scale_vars & vars)
281 float* cuFFT::inverse_raw(const ComplexMat & input)
283 int n_channels = input.n_channels;
284 cufftComplex *in = reinterpret_cast<cufftComplex*>(input.get_p_data());
287 CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(data_i_1ch_d)));
290 } else if(n_channels == (int) m_num_of_scales){
291 CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal*>(data_i_1ch_all_scales_d)));
293 return data_i_1ch_all_scales_d;
294 } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){
295 CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(data_i_features_all_scales_d)));
297 return data_i_features_all_scales_d;
300 CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(data_i_features_d)));
302 return data_i_features_d;
307 CufftErrorCheck(cufftDestroy(plan_f));
308 CufftErrorCheck(cufftDestroy(plan_fw));
309 CufftErrorCheck(cufftDestroy(plan_i_1ch));
310 CufftErrorCheck(cufftDestroy(plan_i_features));
312 CudaSafeCall(cudaFree(data_f));
313 CudaSafeCall(cudaFreeHost(data_fw));
314 CudaSafeCall(cudaFreeHost(data_i_1ch));
315 CudaSafeCall(cudaFreeHost(data_i_features));
317 if (m_big_batch_mode) {
318 CufftErrorCheck(cufftDestroy(plan_f_all_scales));
319 CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
320 CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
321 CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));
323 CudaSafeCall(cudaFree(data_f_all_scales));
324 CudaSafeCall(cudaFreeHost(data_fw_all_scales));
325 CudaSafeCall(cudaFreeHost(data_i_1ch_all_scales));
326 CudaSafeCall(cudaFreeHost(data_i_features_all_scales));