]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/threadctx.hpp
Changed variables in ThreadCtx to uint
[hercules2020/kcf.git] / src / threadctx.hpp
1 #ifndef SCALE_VARS_HPP
2 #define SCALE_VARS_HPP
3
4 #ifdef CUFFT
5 #include "complexmat.cuh"
6 #else
7 #include "complexmat.hpp"
8 #ifndef CUFFTW
9 // For compatibility reasons between CuFFT and FFTW, OpenCVfft versions.
10 typedef int *cudaStream_t;
11 #else
12 #include "cuda_runtime.h"
13 #endif
14 #endif
15
16 struct ThreadCtx {
17   public:
18     ThreadCtx(cv::Size windows_size, uint cell_size, uint num_of_feats, uint num_of_scales = 1,
19               ComplexMat *model_xf = nullptr, ComplexMat *yf = nullptr, bool zero_index = false)
20     {
21         uint alloc_size;
22 #ifdef CUFFT
23         if (zero_index) {
24             cudaSetDeviceFlags(cudaDeviceMapHost);
25             this->zero_index = true;
26         }
27
28 #if defined(ASYNC) || defined(OPENMP)
29         CudaSafeCall(cudaStreamCreate(&this->stream));
30 #endif
31
32         this->patch_feats.reserve(uint(num_of_feats));
33
34         alloc_size =
35             (uint(windows_size.width) / cell_size * uint(windows_size.height )/ cell_size * num_of_scales) * sizeof(cufftReal);
36         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_i_1ch), alloc_size, cudaHostAllocMapped));
37         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_i_1ch_d),
38                                               reinterpret_cast<void *>(this->data_i_1ch), 0));
39
40         alloc_size =
41             (uint(windows_size.width) / cell_size * uint(windows_size.height) / cell_size * num_of_feats) * sizeof(cufftReal);
42         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_i_features), alloc_size, cudaHostAllocMapped));
43         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_i_features_d),
44                                               reinterpret_cast<void *>(this->data_i_features), 0));
45
46         this->ifft2_res = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
47                                   CV_32FC(int(num_of_feats)), this->data_i_features);
48         this->response = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size),
49                                  CV_32FC(int(num_of_scales)), this->data_i_1ch);
50
51         this->zf.create(uint(windows_size.height)/ cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats,
52                         num_of_scales, this->stream);
53         this->kzf.create(uint(windows_size.height)/ cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_scales,
54                          this->stream);
55         this->kf.create(uint(windows_size.height)/ cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_scales,
56                         this->stream);
57
58         alloc_size = uint(num_of_scales);
59
60         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->xf_sqr_norm), alloc_size * sizeof(float),
61                                    cudaHostAllocMapped));
62         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->xf_sqr_norm_d),
63                                               reinterpret_cast<void *>(this->xf_sqr_norm), 0));
64
65         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->yf_sqr_norm), sizeof(float), cudaHostAllocMapped));
66         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->yf_sqr_norm_d),
67                                               reinterpret_cast<void *>(this->yf_sqr_norm), 0));
68
69         alloc_size =
70             uint(windows_size.width) / cell_size * uint(windows_size.height)/ cell_size * alloc_size * sizeof(float);
71         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->gauss_corr_res), alloc_size, cudaHostAllocMapped));
72         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->gauss_corr_res_d),
73                                               reinterpret_cast<void *>(this->gauss_corr_res), 0));
74         this->in_all = cv::Mat(windows_size.height / int(cell_size) * int(num_of_scales), windows_size.width / int(cell_size),
75                                CV_32F, this->gauss_corr_res);
76
77         if (zero_index) {
78             alloc_size = uint(windows_size.width) / cell_size * uint(windows_size.height) / cell_size * sizeof(float);
79             CudaSafeCall(
80                 cudaHostAlloc(reinterpret_cast<void **>(&this->rot_labels_data), alloc_size, cudaHostAllocMapped));
81             CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->rot_labels_data_d),
82                                                   reinterpret_cast<void *>(this->rot_labels_data), 0));
83             this->rot_labels = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), CV_32FC1,
84                                        this->rot_labels_data);
85         }
86
87         alloc_size = (uint(windows_size.width) / cell_size * uint(windows_size.height) / cell_size * num_of_feats) *
88                      sizeof(cufftReal);
89         CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&this->data_features), alloc_size, cudaHostAllocMapped));
90         CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->data_features_d),
91                                               reinterpret_cast<void *>(this->data_features), 0));
92         this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), windows_size.width / int(cell_size),
93                                CV_32F, this->data_features);
94 #else
95         alloc_size = num_of_scales;
96
97         this->xf_sqr_norm = reinterpret_cast<float *>(malloc(alloc_size * sizeof(float)));
98         this->yf_sqr_norm = reinterpret_cast<float *>(malloc(sizeof(float)));
99
100         this->patch_feats.reserve(num_of_feats);
101
102         uint height = uint(windows_size.height) / cell_size;
103 #ifdef FFTW
104         uint width = (uint(windows_size.width) / cell_size) / 2 + 1;
105 #else
106         int width = windows_size.width / cell_size;
107 #endif
108
109         this->ifft2_res = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_feats)));
110         this->response = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_scales)));
111
112         this->zf = ComplexMat(height, width, num_of_feats, num_of_scales);
113         this->kzf = ComplexMat(height, width, num_of_scales);
114         this->kf = ComplexMat(height, width, num_of_scales);
115 #ifdef FFTW
116         this->in_all =
117             cv::Mat((windows_size.height / int(cell_size)) * int(num_of_scales), windows_size.width / int(cell_size), CV_32F);
118         this->fw_all =
119             cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), windows_size.width / int(cell_size), CV_32F);
120 #else
121         this->in_all = cv::Mat((windows_size.height / int(cell_size)), windows_size.width / int(cell_size), CV_32F);
122 #endif
123 #endif
124 #if defined(FFTW) || defined(CUFFT)
125         if (zero_index) {
126             model_xf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats);
127             yf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, 1);
128             // We use scale_vars[0] for updating the tracker, so we only allocate memory for  its xf only.
129 #ifdef CUFFT
130             this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats,
131                             this->stream);
132 #else
133             this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats);
134 #endif
135         } else if (num_of_scales > 1) {
136             this->max_responses.reserve(uint(num_of_scales));
137             this->max_locs.reserve(uint(num_of_scales));
138             this->response_maps.reserve(uint(num_of_scales));
139         }
140 #else
141         if (zero_index) {
142             model_xf->create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats);
143             yf->create(windows_size.height / cell_size, windows_size.width / cell_size, 1);
144             this->xf.create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats);
145         }
146 #endif
147     }
148
149     ~ThreadCtx()
150     {
151 #ifdef CUFFT
152         CudaSafeCall(cudaFreeHost(this->xf_sqr_norm));
153         CudaSafeCall(cudaFreeHost(this->yf_sqr_norm));
154         CudaSafeCall(cudaFreeHost(this->data_i_1ch));
155         CudaSafeCall(cudaFreeHost(this->data_i_features));
156         CudaSafeCall(cudaFreeHost(this->gauss_corr_res));
157         if (zero_index) CudaSafeCall(cudaFreeHost(this->rot_labels_data));
158         CudaSafeCall(cudaFreeHost(this->data_features));
159 #if defined(ASYNC) || defined(OPENMP)
160         CudaSafeCall(cudaStreamDestroy(this->stream));
161 #endif
162 #else
163         free(this->xf_sqr_norm);
164         free(this->yf_sqr_norm);
165 #endif
166     }
167
168     float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr;
169     std::vector<cv::Mat> patch_feats;
170
171     cv::Mat in_all, fw_all, ifft2_res, response;
172     ComplexMat zf, kzf, kf, xyf, xf;
173
174     // CuFFT variables
175     cv::Mat rot_labels;
176     float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr, *gauss_corr_res_d = nullptr,
177           *rot_labels_data = nullptr, *rot_labels_data_d = nullptr, *data_features = nullptr,
178           *data_features_d = nullptr;
179     float *data_f = nullptr, *data_i_features = nullptr, *data_i_features_d = nullptr, *data_i_1ch = nullptr,
180           *data_i_1ch_d = nullptr;
181
182     cudaStream_t stream = nullptr;
183     ComplexMat model_alphaf, model_xf;
184
185     // Big batch variables
186     cv::Point2i max_loc;
187     double max_val, max_response;
188
189     std::vector<double> max_responses;
190     std::vector<cv::Point2i> max_locs;
191     std::vector<cv::Mat> response_maps;
192     bool zero_index = false;
193 };
194
195 #endif // SCALE_VARS_HPP