3 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
7 m_num_of_feats = num_of_feats;
8 m_num_of_scales = num_of_scales;
9 m_big_batch_mode = big_batch_mode;
11 std::cout << "FFT: cuFFT" << std::endl;
13 //FFT forward one scale
15 CudaSafeCall(cudaMalloc(&data_f, m_height*m_width*sizeof(cufftReal)));
17 CufftErrorCheck(cufftPlan2d(&plan_f, m_height, m_width, CUFFT_R2C));
21 //FFT forward all scales
22 if(m_num_of_scales > 1 && m_big_batch_mode)
24 CudaSafeCall(cudaMalloc(&data_f_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal)));
27 int n[] = {(int)m_height, (int)m_width};
28 int howmany = m_num_of_scales;
29 int idist = m_height*m_width, odist = m_height*(m_width/2+1);
30 int istride = 1, ostride = 1;
31 int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
33 CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n,
34 inembed, istride, idist,
35 onembed, ostride, odist,
38 //FFT forward window one scale
40 CudaSafeCall(cudaHostAlloc(&data_fw, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped));
41 CudaSafeCall(cudaHostGetDevicePointer(&data_fw_d, data_fw, 0));
44 int n[] = {(int)m_height, (int)m_width};
45 int howmany = m_num_of_feats;
46 int idist = m_height*m_width, odist = m_height*(m_width/2+1);
47 int istride = 1, ostride = 1;
48 int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
50 CufftErrorCheck(cufftPlanMany(&plan_fw, rank, n,
51 inembed, istride, idist,
52 onembed, ostride, odist,
55 //FFT forward window all scales all feats
56 if(m_num_of_scales > 1 && m_big_batch_mode)
58 CudaSafeCall(cudaHostAlloc(&data_fw_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
59 CudaSafeCall(cudaHostGetDevicePointer(&data_fw_all_scales_d, data_fw_all_scales, 0));
62 int n[] = {(int)m_height, (int)m_width};
63 int howmany = m_num_of_scales*m_num_of_feats;
64 int idist = m_height*m_width, odist = m_height*(m_width/2+1);
65 int istride = 1, ostride = 1;
66 int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
68 CufftErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n,
69 inembed, istride, idist,
70 onembed, ostride, odist,
75 //FFT inverse one scale
78 int n[] = {(int)m_height, (int)m_width};
79 int howmany = m_num_of_feats;
80 int idist = m_height*(m_width/2+1), odist = 1;
81 int istride = 1, ostride = m_num_of_feats;
82 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
84 CufftErrorCheck(cufftPlanMany(&plan_i_features, rank, n,
85 inembed, istride, idist,
86 onembed, ostride, odist,
89 //FFT inverse all scales
91 if(m_num_of_scales > 1)
93 CudaSafeCall(cudaHostAlloc(&data_i_features_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
94 CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_all_scales_d, data_i_features_all_scales, 0));
97 int n[] = {(int)m_height, (int)m_width};
98 int howmany = m_num_of_feats*m_num_of_scales;
99 int idist = m_height*(m_width/2+1), odist = 1;
100 int istride = 1, ostride = m_num_of_feats*m_num_of_scales;
101 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
103 CufftErrorCheck(cufftPlanMany(&plan_i_features_all_scales, rank, n,
104 inembed, istride, idist,
105 onembed, ostride, odist,
106 CUFFT_C2R, howmany));
109 //FFT inverse one channel one scale
112 int n[] = {(int)m_height, (int)m_width};
114 int idist = m_height*(m_width/2+1), odist = 1;
115 int istride = 1, ostride = 1;
116 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
118 CufftErrorCheck(cufftPlanMany(&plan_i_1ch, rank, n,
119 inembed, istride, idist,
120 onembed, ostride, odist,
121 CUFFT_C2R, howmany));
124 //FFT inverse one channel all scales
125 if(m_num_of_scales > 1 && m_big_batch_mode)
127 CudaSafeCall(cudaHostAlloc(&data_i_1ch_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
128 CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_all_scales_d, data_i_1ch_all_scales, 0));
131 int n[] = {(int)m_height, (int)m_width};
132 int howmany = m_num_of_scales;
133 int idist = m_height*(m_width/2+1), odist = 1;
134 int istride = 1, ostride = m_num_of_scales;
135 int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
137 CufftErrorCheck(cufftPlanMany(&plan_i_1ch_all_scales, rank, n,
138 inembed, istride, idist,
139 onembed, ostride, odist,
140 CUFFT_C2R, howmany));
145 void cuFFT::set_window(const cv::Mat & window)
150 ComplexMat cuFFT::forward(const cv::Mat & input)
152 ComplexMat complex_result;
153 if(m_big_batch_mode && input.rows == (int)(m_height*m_num_of_scales)){
154 CudaSafeCall(cudaMemcpy(data_f_all_scales, input.ptr<cufftReal>(), m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
155 complex_result.create(m_height, m_width / 2 + 1, m_num_of_scales);
156 CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(data_f_all_scales),
157 complex_result.get_p_data()));
159 CudaSafeCall(cudaMemcpy(data_f, input.ptr<cufftReal>(), m_height*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
160 complex_result.create(m_height, m_width/ 2 + 1, 1);
161 CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(data_f),
162 complex_result.get_p_data()));
165 return complex_result;
168 void cuFFT::forward(Scale_vars & vars)
170 ComplexMat *complex_result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_yf_ptr :
171 vars.flag & Tracker_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
172 cv::Mat *input = vars.flag & Tracker_flags::TRACKER_INIT ? & vars.rot_labels : & vars.in_all;
174 if(m_big_batch_mode && vars.in_all.rows == (int)(m_height*m_num_of_scales)){
175 CudaSafeCall(cudaMemcpy(data_f_all_scales, input->ptr<cufftReal>(), m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
176 CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(data_f_all_scales),
177 complex_result->get_p_data()));
179 CudaSafeCall(cudaMemcpy(data_f, input->ptr<cufftReal>(), m_height*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice));
180 CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(data_f),
181 complex_result->get_p_data()));
186 void cuFFT::forward_raw(Scale_vars & vars, bool all_scales)
188 ComplexMat *result = vars.flag & Tracker_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
190 CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
191 result->get_p_data()));
193 CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
194 result->get_p_data()));
199 ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> & input)
201 int n_channels = input.size();
203 if(n_channels > (int) m_num_of_feats){
204 cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
205 for (int i = 0; i < n_channels; ++i) {
206 cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
207 in_roi = input[i].mul(m_window);
210 result.create(m_height, m_width/2 + 1, n_channels,m_num_of_scales);
212 CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result.get_p_data()));
214 cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
215 for (int i = 0; i < n_channels; ++i) {
216 cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
217 in_roi = input[i].mul(m_window);
220 result.create(m_height, m_width/2 + 1, n_channels);
222 CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result.get_p_data()));
227 void cuFFT::forward_window(Scale_vars & vars)
229 int n_channels = vars.patch_feats.size();
231 ComplexMat *result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_model_xf_ptr :
232 vars.flag & Tracker_flags::TRACKER_UPDATE ? & vars.xf : & vars.zf;
234 if(n_channels > (int) m_num_of_feats){
235 cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
236 for (int i = 0; i < n_channels; ++i) {
237 cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
238 in_roi = vars.patch_feats[i].mul(m_window);
241 CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result->get_p_data()));
243 cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
244 for (int i = 0; i < n_channels; ++i) {
245 cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
246 in_roi = vars.patch_feats[i].mul(m_window);
249 CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result->get_p_data()));
254 void cuFFT::inverse(Scale_vars & vars)
256 ComplexMat *input = vars.flag & Tracker_flags::RESPONSE ? & vars.kzf : & vars.xyf;
257 cv::Mat *real_result = vars.flag & Tracker_flags::RESPONSE ? & vars.response : & vars.ifft2_res;
259 int n_channels = input->n_channels;
260 cufftComplex *in = reinterpret_cast<cufftComplex*>(input->get_p_data());
264 CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(vars.data_i_1ch_d)));
265 cudaDeviceSynchronize();
266 *real_result = *real_result/(m_width*m_height);
270 else if(n_channels == (int) m_num_of_scales){
271 cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), vars.data_i_1ch_all_scales);
273 CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal*>(vars.data_i_1ch_all_scales_d)));
274 cudaDeviceSynchronize();
276 return real_result/(m_width*m_height);
277 } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){
278 cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features_all_scales);
280 CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(vars.data_i_features_all_scales_d)));
281 cudaDeviceSynchronize();
283 return real_result/(m_width*m_height);
287 CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(vars.data_i_features_d)));
292 cudaDeviceSynchronize();
293 *real_result = *real_result/(m_width*m_height);
300 CufftErrorCheck(cufftDestroy(plan_f));
301 CufftErrorCheck(cufftDestroy(plan_fw));
302 CufftErrorCheck(cufftDestroy(plan_i_1ch));
303 CufftErrorCheck(cufftDestroy(plan_i_features));
305 CudaSafeCall(cudaFree(data_f));
306 CudaSafeCall(cudaFreeHost(data_fw));
307 CudaSafeCall(cudaFreeHost(data_i_1ch));
308 CudaSafeCall(cudaFreeHost(data_i_features));
310 if (m_big_batch_mode) {
311 CufftErrorCheck(cufftDestroy(plan_f_all_scales));
312 CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
313 CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
314 CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));
316 CudaSafeCall(cudaFree(data_f_all_scales));
317 CudaSafeCall(cudaFreeHost(data_fw_all_scales));
318 CudaSafeCall(cudaFreeHost(data_i_1ch_all_scales));
319 CudaSafeCall(cudaFreeHost(data_i_features_all_scales));