]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/threadctx.hpp
1cf13378cb27d34d499a214e11c9d12ad59c5b5e
[hercules2020/kcf.git] / src / threadctx.hpp
1 #ifndef SCALE_VARS_HPP
2 #define SCALE_VARS_HPP
3
4 #ifdef CUFFT
5 #include "complexmat.cuh"
6 #else
7 #include "complexmat.hpp"
8 #ifndef CUFFTW
9 // For compatibility reasons between CuFFT and FFTW, OpenCVfft versions.
10 typedef int *cudaStream_t;
11 #else
12 #include "cuda_runtime.h"
13 #endif
14 #endif
15
16 struct ThreadCtx {
17   public:
18     ThreadCtx(cv::Size windows_size, uint cell_size, uint num_of_feats, uint num_of_scales = 1,
19               ComplexMat *model_xf = nullptr, ComplexMat *yf = nullptr, bool zero_index = false)
20     {
21 #ifdef CUFFT
22         if (zero_index) {
23             cudaSetDeviceFlags(cudaDeviceMapHost);
24             this->zero_index = true;
25         }
26
27 #if defined(ASYNC) || defined(OPENMP)
28         CudaSafeCall(cudaStreamCreate(&this->stream));
29 #endif
30
31         this->patch_feats.reserve(uint(num_of_feats));
32 // Size of cufftReal == float
33         uint cells_size =
34             ((uint(windows_size.width) / cell_size) * (uint(windows_size.height) / cell_size)) * sizeof(float);
35
36         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_i_1ch), cells_size * num_of_scales,
37                                    cudaHostAllocMapped));
38         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_i_1ch_d),
39                                               reinterpret_cast<void *>(this->data_i_1ch), 0));
40
41         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_i_features), cells_size * num_of_feats,
42                                    cudaHostAllocMapped));
43         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_i_features_d),
44                                               reinterpret_cast<void *>(this->data_i_features), 0));
45
46         this->ifft2_res = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
47                                   CV_32FC(int(num_of_feats)), this->data_i_features);
48         this->response = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
49                                  CV_32FC(int(num_of_scales)), this->data_i_1ch);
50
51         this->zf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
52                         num_of_feats, num_of_scales, this->stream);
53         this->kzf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
54                          num_of_scales, this->stream);
55         this->kf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
56                         num_of_scales, this->stream);
57
58         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->xf_sqr_norm), num_of_scales * sizeof(float),
59                                    cudaHostAllocMapped));
60         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->xf_sqr_norm_d),
61                                               reinterpret_cast<void *>(this->xf_sqr_norm), 0));
62
63         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->yf_sqr_norm), sizeof(float), cudaHostAllocMapped));
64         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->yf_sqr_norm_d),
65                                               reinterpret_cast<void *>(this->yf_sqr_norm), 0));
66
67         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->gauss_corr_res), cells_size * num_of_scales,
68                                    cudaHostAllocMapped));
69         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->gauss_corr_res_d),
70                                               reinterpret_cast<void *>(this->gauss_corr_res), 0));
71         this->in_all = cv::Mat(windows_size.height / int(cell_size) * int(num_of_scales),
72                                windows_size.width / int(cell_size), CV_32F, this->gauss_corr_res);
73
74         if (zero_index) {
75             CudaSafeCall(
76                 cudaHostAlloc(reinterpret_cast<void **>(&this->rot_labels_data), cells_size, cudaHostAllocMapped));
77             CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->rot_labels_data_d),
78                                                   reinterpret_cast<void *>(this->rot_labels_data), 0));
79             this->rot_labels = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
80                                        CV_32FC1, this->rot_labels_data);
81         }
82
83         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_features), cells_size*num_of_feats, cudaHostAllocMapped));
84         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_features_d),
85                                               reinterpret_cast<void *>(this->data_features), 0));
86         this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats),
87                                windows_size.width / int(cell_size), CV_32F, this->data_features);
88 #else
89
90         this->xf_sqr_norm = reinterpret_cast<float *>(malloc(num_of_scales * sizeof(float)));
91         this->yf_sqr_norm = reinterpret_cast<float *>(malloc(sizeof(float)));
92
93         this->patch_feats.reserve(num_of_feats);
94
95         uint height = uint(windows_size.height) / cell_size;
96 #ifdef FFTW
97         uint width = (uint(windows_size.width) / cell_size) / 2 + 1;
98 #else
99         int width = windows_size.width / cell_size;
100 #endif
101
102         this->ifft2_res = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_feats)));
103         this->response = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_scales)));
104
105         this->zf = ComplexMat(height, width, num_of_feats, num_of_scales);
106         this->kzf = ComplexMat(height, width, num_of_scales);
107         this->kf = ComplexMat(height, width, num_of_scales);
108 #ifdef FFTW
109         this->in_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_scales),
110                                windows_size.width / int(cell_size), CV_32F);
111         this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats),
112                                windows_size.width / int(cell_size), CV_32F);
113 #else
114         this->in_all = cv::Mat((windows_size.height / int(cell_size)), windows_size.width / int(cell_size), CV_32F);
115 #endif
116 #endif
117 #if defined(FFTW) || defined(CUFFT)
118         if (zero_index) {
119             model_xf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
120                              num_of_feats);
121             yf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, 1);
122             // We use scale_vars[0] for updating the tracker, so we only allocate memory for  its xf only.
123 #ifdef CUFFT
124             this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
125                             num_of_feats, this->stream);
126 #else
127             this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1,
128                             num_of_feats);
129 #endif
130         } else if (num_of_scales > 1) {
131             this->max_responses.reserve(uint(num_of_scales));
132             this->max_locs.reserve(uint(num_of_scales));
133             this->response_maps.reserve(uint(num_of_scales));
134         }
135 #else
136         if (zero_index) {
137             model_xf->create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats);
138             yf->create(windows_size.height / cell_size, windows_size.width / cell_size, 1);
139             this->xf.create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats);
140         }
141 #endif
142     }
143
144     ~ThreadCtx()
145     {
146 #ifdef CUFFT
147         CudaSafeCall(cudaFreeHost(this->xf_sqr_norm));
148         CudaSafeCall(cudaFreeHost(this->yf_sqr_norm));
149         CudaSafeCall(cudaFreeHost(this->data_i_1ch));
150         CudaSafeCall(cudaFreeHost(this->data_i_features));
151         CudaSafeCall(cudaFreeHost(this->gauss_corr_res));
152         if (zero_index) CudaSafeCall(cudaFreeHost(this->rot_labels_data));
153         CudaSafeCall(cudaFreeHost(this->data_features));
154 #if defined(ASYNC) || defined(OPENMP)
155         CudaSafeCall(cudaStreamDestroy(this->stream));
156 #endif
157 #else
158         free(this->xf_sqr_norm);
159         free(this->yf_sqr_norm);
160 #endif
161     }
162
163     float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr;
164     std::vector<cv::Mat> patch_feats;
165
166     cv::Mat in_all, fw_all, ifft2_res, response;
167     ComplexMat zf, kzf, kf, xyf, xf;
168
169     // CuFFT variables
170     cv::Mat rot_labels;
171     float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr, *gauss_corr_res_d = nullptr,
172           *rot_labels_data = nullptr, *rot_labels_data_d = nullptr, *data_features = nullptr,
173           *data_features_d = nullptr;
174     float *data_f = nullptr, *data_i_features = nullptr, *data_i_features_d = nullptr, *data_i_1ch = nullptr,
175           *data_i_1ch_d = nullptr;
176
177     cudaStream_t stream = nullptr;
178     ComplexMat model_alphaf, model_xf;
179
180     // Big batch variables
181     cv::Point2i max_loc;
182     double max_val, max_response;
183
184     std::vector<double> max_responses;
185     std::vector<cv::Point2i> max_locs;
186     std::vector<cv::Mat> response_maps;
187     bool zero_index = false;
188 };
189
190 #endif // SCALE_VARS_HPP