5 #include "complexmat.cuh"
7 #include "complexmat.hpp"
9 // For compatibility reasons between CuFFT and FFTW, OpenCVfft versions.
10 typedef int *cudaStream_t;
12 #include "cuda_runtime.h"
18 ThreadCtx(cv::Size windows_size, uint cell_size, uint num_of_feats, uint num_of_scales = 1,
19 ComplexMat *model_xf = nullptr, ComplexMat *yf = nullptr, bool zero_index = false)
24 cudaSetDeviceFlags(cudaDeviceMapHost);
25 this->zero_index = true;
28 #if defined(ASYNC) || defined(OPENMP)
29 CudaSafeCall(cudaStreamCreate(&this->stream));
32 this->patch_feats.reserve(uint(num_of_feats));
35 (uint(windows_size.width) / cell_size * uint(windows_size.height )/ cell_size * num_of_scales) * sizeof(cufftReal);
36 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_i_1ch), alloc_size, cudaHostAllocMapped));
37 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_i_1ch_d),
38 reinterpret_cast<void *>(this->data_i_1ch), 0));
41 (uint(windows_size.width) / cell_size * uint(windows_size.height) / cell_size * num_of_feats) * sizeof(cufftReal);
42 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_i_features), alloc_size, cudaHostAllocMapped));
43 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_i_features_d),
44 reinterpret_cast<void *>(this->data_i_features), 0));
46 this->ifft2_res = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
47 CV_32FC(int(num_of_feats)), this->data_i_features);
48 this->response = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
49 CV_32FC(int(num_of_scales)), this->data_i_1ch);
51 this->zf.create(uint(windows_size.height)/ cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats,
52 num_of_scales, this->stream);
53 this->kzf.create(uint(windows_size.height)/ cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_scales,
55 this->kf.create(uint(windows_size.height)/ cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_scales,
58 alloc_size = uint(num_of_scales);
60 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->xf_sqr_norm), alloc_size * sizeof(float),
61 cudaHostAllocMapped));
62 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->xf_sqr_norm_d),
63 reinterpret_cast<void *>(this->xf_sqr_norm), 0));
65 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->yf_sqr_norm), sizeof(float), cudaHostAllocMapped));
66 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->yf_sqr_norm_d),
67 reinterpret_cast<void *>(this->yf_sqr_norm), 0));
70 uint(windows_size.width) / cell_size * uint(windows_size.height)/ cell_size * alloc_size * sizeof(float);
71 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->gauss_corr_res), alloc_size, cudaHostAllocMapped));
72 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->gauss_corr_res_d),
73 reinterpret_cast<void *>(this->gauss_corr_res), 0));
74 this->in_all = cv::Mat(windows_size.height / int(cell_size) * int(num_of_scales), windows_size.width / int(cell_size),
75 CV_32F, this->gauss_corr_res);
78 alloc_size = uint(windows_size.width) / cell_size * uint(windows_size.height) / cell_size * sizeof(float);
80 cudaHostAlloc(reinterpret_cast<void **>(&this->rot_labels_data), alloc_size, cudaHostAllocMapped));
81 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->rot_labels_data_d),
82 reinterpret_cast<void *>(this->rot_labels_data), 0));
83 this->rot_labels = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), CV_32FC1,
84 this->rot_labels_data);
87 alloc_size = (uint(windows_size.width) / cell_size * uint(windows_size.height) / cell_size * num_of_feats) *
89 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_features), alloc_size, cudaHostAllocMapped));
90 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_features_d),
91 reinterpret_cast<void *>(this->data_features), 0));
92 this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), windows_size.width / int(cell_size),
93 CV_32F, this->data_features);
95 alloc_size = num_of_scales;
97 this->xf_sqr_norm = reinterpret_cast<float *>(malloc(alloc_size * sizeof(float)));
98 this->yf_sqr_norm = reinterpret_cast<float *>(malloc(sizeof(float)));
100 this->patch_feats.reserve(num_of_feats);
102 uint height = uint(windows_size.height) / cell_size;
104 uint width = (uint(windows_size.width) / cell_size) / 2 + 1;
106 int width = windows_size.width / cell_size;
109 this->ifft2_res = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_feats)));
110 this->response = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_scales)));
112 this->zf = ComplexMat(height, width, num_of_feats, num_of_scales);
113 this->kzf = ComplexMat(height, width, num_of_scales);
114 this->kf = ComplexMat(height, width, num_of_scales);
117 cv::Mat((windows_size.height / int(cell_size)) * int(num_of_scales), windows_size.width / int(cell_size), CV_32F);
119 cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), windows_size.width / int(cell_size), CV_32F);
121 this->in_all = cv::Mat((windows_size.height / int(cell_size)), windows_size.width / int(cell_size), CV_32F);
124 #if defined(FFTW) || defined(CUFFT)
126 model_xf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats);
127 yf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, 1);
128 // We use scale_vars[0] for updating the tracker, so we only allocate memory for its xf only.
130 this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats,
133 this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats);
135 } else if (num_of_scales > 1) {
136 this->max_responses.reserve(uint(num_of_scales));
137 this->max_locs.reserve(uint(num_of_scales));
138 this->response_maps.reserve(uint(num_of_scales));
142 model_xf->create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats);
143 yf->create(windows_size.height / cell_size, windows_size.width / cell_size, 1);
144 this->xf.create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats);
152 CudaSafeCall(cudaFreeHost(this->xf_sqr_norm));
153 CudaSafeCall(cudaFreeHost(this->yf_sqr_norm));
154 CudaSafeCall(cudaFreeHost(this->data_i_1ch));
155 CudaSafeCall(cudaFreeHost(this->data_i_features));
156 CudaSafeCall(cudaFreeHost(this->gauss_corr_res));
157 if (zero_index) CudaSafeCall(cudaFreeHost(this->rot_labels_data));
158 CudaSafeCall(cudaFreeHost(this->data_features));
159 #if defined(ASYNC) || defined(OPENMP)
160 CudaSafeCall(cudaStreamDestroy(this->stream));
163 free(this->xf_sqr_norm);
164 free(this->yf_sqr_norm);
168 float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr;
169 std::vector<cv::Mat> patch_feats;
171 cv::Mat in_all, fw_all, ifft2_res, response;
172 ComplexMat zf, kzf, kf, xyf, xf;
176 float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr, *gauss_corr_res_d = nullptr,
177 *rot_labels_data = nullptr, *rot_labels_data_d = nullptr, *data_features = nullptr,
178 *data_features_d = nullptr;
179 float *data_f = nullptr, *data_i_features = nullptr, *data_i_features_d = nullptr, *data_i_1ch = nullptr,
180 *data_i_1ch_d = nullptr;
182 cudaStream_t stream = nullptr;
183 ComplexMat model_alphaf, model_xf;
185 // Big batch variables
187 double max_val, max_response;
189 std::vector<double> max_responses;
190 std::vector<cv::Point2i> max_locs;
191 std::vector<cv::Mat> response_maps;
192 bool zero_index = false;
195 #endif // SCALE_VARS_HPP