5 #include "complexmat.cuh"
7 #include "complexmat.hpp"
9 // For compatibility reasons between CuFFT and FFTW, OpenCVfft versions.
10 typedef int *cudaStream_t;
12 #include "cuda_runtime.h"
18 ThreadCtx(cv::Size windows_size, uint cell_size, uint num_of_feats, uint num_of_scales = 1,
19 ComplexMat *model_xf = nullptr, ComplexMat *yf = nullptr, bool zero_index = false)
23 cudaSetDeviceFlags(cudaDeviceMapHost);
24 this->zero_index = true;
27 #if defined(ASYNC) || defined(OPENMP)
28 CudaSafeCall(cudaStreamCreate(&this->stream));
31 this->patch_feats.reserve(uint(num_of_feats));
32 // Size of cufftReal == float
34 ((uint(windows_size.width) / cell_size) * (uint(windows_size.height) / cell_size)) * sizeof(float);
36 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_i_1ch), cells_size * num_of_scales,
37 cudaHostAllocMapped));
38 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_i_1ch_d),
39 reinterpret_cast<void *>(this->data_i_1ch), 0));
41 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_i_features), cells_size * num_of_feats,
42 cudaHostAllocMapped));
43 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_i_features_d),
44 reinterpret_cast<void *>(this->data_i_features), 0));
46 this->ifft2_res = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
47 CV_32FC(int(num_of_feats)), this->data_i_features);
48 this->response = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
49 CV_32FC(int(num_of_scales)), this->data_i_1ch);
51 this->zf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
52 num_of_feats, num_of_scales, this->stream);
53 this->kzf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
54 num_of_scales, this->stream);
55 this->kf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
56 num_of_scales, this->stream);
58 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->xf_sqr_norm), num_of_scales * sizeof(float),
59 cudaHostAllocMapped));
60 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->xf_sqr_norm_d),
61 reinterpret_cast<void *>(this->xf_sqr_norm), 0));
63 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->yf_sqr_norm), sizeof(float), cudaHostAllocMapped));
64 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->yf_sqr_norm_d),
65 reinterpret_cast<void *>(this->yf_sqr_norm), 0));
67 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->gauss_corr_res), cells_size * num_of_scales,
68 cudaHostAllocMapped));
69 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->gauss_corr_res_d),
70 reinterpret_cast<void *>(this->gauss_corr_res), 0));
71 this->in_all = cv::Mat(windows_size.height / int(cell_size) * int(num_of_scales),
72 windows_size.width / int(cell_size), CV_32F, this->gauss_corr_res);
76 cudaHostAlloc(reinterpret_cast<void **>(&this->rot_labels_data), cells_size, cudaHostAllocMapped));
77 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->rot_labels_data_d),
78 reinterpret_cast<void *>(this->rot_labels_data), 0));
79 this->rot_labels = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
80 CV_32FC1, this->rot_labels_data);
83 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_features), cells_size*num_of_feats, cudaHostAllocMapped));
84 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_features_d),
85 reinterpret_cast<void *>(this->data_features), 0));
86 this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats),
87 windows_size.width / int(cell_size), CV_32F, this->data_features);
90 this->xf_sqr_norm = reinterpret_cast<float *>(malloc(num_of_scales * sizeof(float)));
91 this->yf_sqr_norm = reinterpret_cast<float *>(malloc(sizeof(float)));
93 this->patch_feats.reserve(num_of_feats);
95 uint height = uint(windows_size.height) / cell_size;
97 uint width = (uint(windows_size.width) / cell_size) / 2 + 1;
99 int width = windows_size.width / cell_size;
102 this->ifft2_res = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_feats)));
103 this->response = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_scales)));
105 this->zf = ComplexMat(height, width, num_of_feats, num_of_scales);
106 this->kzf = ComplexMat(height, width, num_of_scales);
107 this->kf = ComplexMat(height, width, num_of_scales);
109 this->in_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_scales),
110 windows_size.width / int(cell_size), CV_32F);
111 this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats),
112 windows_size.width / int(cell_size), CV_32F);
114 this->in_all = cv::Mat((windows_size.height / int(cell_size)), windows_size.width / int(cell_size), CV_32F);
117 #if defined(FFTW) || defined(CUFFT)
119 model_xf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
121 yf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, 1);
122 // We use scale_vars[0] for updating the tracker, so we only allocate memory for its xf only.
124 this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
125 num_of_feats, this->stream);
127 this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
130 } else if (num_of_scales > 1) {
131 this->max_responses.reserve(uint(num_of_scales));
132 this->max_locs.reserve(uint(num_of_scales));
133 this->response_maps.reserve(uint(num_of_scales));
137 model_xf->create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats);
138 yf->create(windows_size.height / cell_size, windows_size.width / cell_size, 1);
139 this->xf.create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats);
147 CudaSafeCall(cudaFreeHost(this->xf_sqr_norm));
148 CudaSafeCall(cudaFreeHost(this->yf_sqr_norm));
149 CudaSafeCall(cudaFreeHost(this->data_i_1ch));
150 CudaSafeCall(cudaFreeHost(this->data_i_features));
151 CudaSafeCall(cudaFreeHost(this->gauss_corr_res));
152 if (zero_index) CudaSafeCall(cudaFreeHost(this->rot_labels_data));
153 CudaSafeCall(cudaFreeHost(this->data_features));
154 #if defined(ASYNC) || defined(OPENMP)
155 CudaSafeCall(cudaStreamDestroy(this->stream));
158 free(this->xf_sqr_norm);
159 free(this->yf_sqr_norm);
163 float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr;
164 std::vector<cv::Mat> patch_feats;
166 cv::Mat in_all, fw_all, ifft2_res, response;
167 ComplexMat zf, kzf, kf, xyf, xf;
171 float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr, *gauss_corr_res_d = nullptr,
172 *rot_labels_data = nullptr, *rot_labels_data_d = nullptr, *data_features = nullptr,
173 *data_features_d = nullptr;
174 float *data_f = nullptr, *data_i_features = nullptr, *data_i_features_d = nullptr, *data_i_1ch = nullptr,
175 *data_i_1ch_d = nullptr;
177 cudaStream_t stream = nullptr;
178 ComplexMat model_alphaf, model_xf;
180 // Big batch variables
182 double max_val, max_response;
184 std::vector<double> max_responses;
185 std::vector<cv::Point2i> max_locs;
186 std::vector<cv::Mat> response_maps;
187 bool zero_index = false;
190 #endif // SCALE_VARS_HPP