]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/threadctx.hpp
CUDA: Use per-thread default streams rather than explicit streams
[hercules2020/kcf.git] / src / threadctx.hpp
1 #ifndef SCALE_VARS_HPP
2 #define SCALE_VARS_HPP
3
4 #include <future>
5 #include "dynmem.hpp"
6
7 #ifdef CUFFT
8 #include "complexmat.cuh"
9 #else
10 #include "complexmat.hpp"
11 #endif
12
13 struct ThreadCtx {
14   public:
15     ThreadCtx(cv::Size roi, uint num_of_feats, double scale, uint num_of_scales)
16         : scale(scale)
17     {
18         this->xf_sqr_norm = DynMem(num_of_scales * sizeof(float));
19         this->yf_sqr_norm = DynMem(sizeof(float));
20
21         uint cells_size = roi.width * roi.height * sizeof(float);
22
23 #if defined(CUFFT) || defined(FFTW)
24         this->gauss_corr_res = DynMem(cells_size * num_of_scales);
25         this->data_features = DynMem(cells_size * num_of_feats);
26
27         uint width_freq = roi.width / 2 + 1;
28
29         this->in_all = cv::Mat(roi.height * num_of_scales, roi.width, CV_32F, this->gauss_corr_res.hostMem());
30         this->fw_all = cv::Mat(roi.height * num_of_feats, roi.width, CV_32F, this->data_features.hostMem());
31 #else
32         uint width_freq = roi.width;
33
34         this->in_all = cv::Mat(roi, CV_32F);
35 #endif
36
37         this->data_i_features = DynMem(cells_size * num_of_feats);
38         this->data_i_1ch = DynMem(cells_size * num_of_scales);
39
40         this->ifft2_res = cv::Mat(roi, CV_32FC(num_of_feats), this->data_i_features.hostMem());
41         this->response = cv::Mat(roi, CV_32FC(num_of_scales), this->data_i_1ch.hostMem());
42
43 #ifdef CUFFT
44         this->zf.create(roi.height, width_freq, num_of_feats, num_of_scales);
45         this->kzf.create(roi.height, width_freq, num_of_scales);
46         this->kf.create(roi.height, width_freq, num_of_scales);
47 #else
48         this->zf.create(roi.height, width_freq, num_of_feats, num_of_scales);
49         this->kzf.create(roi.height, width_freq, num_of_scales);
50         this->kf.create(roi.height, width_freq, num_of_scales);
51 #endif
52
53 #ifdef BIG_BATCH
54         if (num_of_scales > 1) {
55             this->max_responses.reserve(num_of_scales);
56             this->max_locs.reserve(num_of_scales);
57             this->response_maps.reserve(num_of_scales);
58         }
59 #endif
60     }
61     ThreadCtx(ThreadCtx &&) = default;
62
63     const double scale;
64 #ifdef ASYNC
65     std::future<void> async_res;
66 #endif
67
68     DynMem xf_sqr_norm, yf_sqr_norm;
69
70     cv::Mat in_all, fw_all, ifft2_res, response;
71     ComplexMat zf, kzf, kf, xyf;
72
73     DynMem data_i_features, data_i_1ch;
74     // CuFFT and FFTW variables
75     DynMem gauss_corr_res, data_features;
76
77     // CuFFT variables
78     ComplexMat model_alphaf, model_xf;
79
80     // Variables used during non big batch mode and in big batch mode with ThreadCtx in p_threadctxs in kcf  on zero index.
81     cv::Point2i max_loc;
82     double max_val, max_response;
83
84 #ifdef BIG_BATCH
85     // Stores value of responses, location of maximal response and response maps for each scale
86     std::vector<double> max_responses;
87     std::vector<cv::Point2i> max_locs;
88     std::vector<cv::Mat> response_maps;
89 #endif
90 };
91
92 #endif // SCALE_VARS_HPP