From: Shanigen Date: Wed, 5 Sep 2018 12:52:09 +0000 (+0200) Subject: Made dynamic memory allocation more readable X-Git-Url: https://rtime.felk.cvut.cz/gitweb/hercules2020/kcf.git/commitdiff_plain/09118e41a8e8bb9748065a0d4ceeef1c17961c2a Made dynamic memory allocation more readable --- diff --git a/src/threadctx.hpp b/src/threadctx.hpp index 2df2b05..1cf1337 100644 --- a/src/threadctx.hpp +++ b/src/threadctx.hpp @@ -18,7 +18,6 @@ struct ThreadCtx { ThreadCtx(cv::Size windows_size, uint cell_size, uint num_of_feats, uint num_of_scales = 1, ComplexMat *model_xf = nullptr, ComplexMat *yf = nullptr, bool zero_index = false) { - uint alloc_size; #ifdef CUFFT if (zero_index) { cudaSetDeviceFlags(cudaDeviceMapHost); @@ -30,16 +29,17 @@ struct ThreadCtx { #endif this->patch_feats.reserve(uint(num_of_feats)); +// Size of cufftReal == float + uint cells_size = + ((uint(windows_size.width) / cell_size) * (uint(windows_size.height) / cell_size)) * sizeof(float); - alloc_size = - (uint(windows_size.width) / cell_size * uint(windows_size.height )/ cell_size * num_of_scales) * sizeof(cufftReal); - CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->data_i_1ch), alloc_size, cudaHostAllocMapped)); + CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->data_i_1ch), cells_size * num_of_scales, + cudaHostAllocMapped)); CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast(&this->data_i_1ch_d), reinterpret_cast(this->data_i_1ch), 0)); - alloc_size = - (uint(windows_size.width) / cell_size * uint(windows_size.height) / cell_size * num_of_feats) * sizeof(cufftReal); - CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->data_i_features), alloc_size, cudaHostAllocMapped)); + CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->data_i_features), cells_size * num_of_feats, + cudaHostAllocMapped)); CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast(&this->data_i_features_d), reinterpret_cast(this->data_i_features), 0)); @@ -48,16 +48,14 @@ struct ThreadCtx { this->response = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), CV_32FC(int(num_of_scales)), this->data_i_1ch); - this->zf.create(uint(windows_size.height)/ cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats, + this->zf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, + num_of_feats, num_of_scales, this->stream); + this->kzf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, + num_of_scales, this->stream); + this->kf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, num_of_scales, this->stream); - this->kzf.create(uint(windows_size.height)/ cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_scales, - this->stream); - this->kf.create(uint(windows_size.height)/ cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_scales, - this->stream); - alloc_size = uint(num_of_scales); - - CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->xf_sqr_norm), alloc_size * sizeof(float), + CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->xf_sqr_norm), num_of_scales * sizeof(float), cudaHostAllocMapped)); CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast(&this->xf_sqr_norm_d), reinterpret_cast(this->xf_sqr_norm), 0)); @@ -66,35 +64,30 @@ struct ThreadCtx { CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast(&this->yf_sqr_norm_d), reinterpret_cast(this->yf_sqr_norm), 0)); - alloc_size = - uint(windows_size.width) / cell_size * uint(windows_size.height)/ cell_size * alloc_size * sizeof(float); - CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->gauss_corr_res), alloc_size, cudaHostAllocMapped)); + CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->gauss_corr_res), cells_size * num_of_scales, + cudaHostAllocMapped)); CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast(&this->gauss_corr_res_d), reinterpret_cast(this->gauss_corr_res), 0)); - this->in_all = cv::Mat(windows_size.height / int(cell_size) * int(num_of_scales), windows_size.width / int(cell_size), - CV_32F, this->gauss_corr_res); + this->in_all = cv::Mat(windows_size.height / int(cell_size) * int(num_of_scales), + windows_size.width / int(cell_size), CV_32F, this->gauss_corr_res); if (zero_index) { - alloc_size = uint(windows_size.width) / cell_size * uint(windows_size.height) / cell_size * sizeof(float); CudaSafeCall( - cudaHostAlloc(reinterpret_cast(&this->rot_labels_data), alloc_size, cudaHostAllocMapped)); + cudaHostAlloc(reinterpret_cast(&this->rot_labels_data), cells_size, cudaHostAllocMapped)); CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast(&this->rot_labels_data_d), reinterpret_cast(this->rot_labels_data), 0)); - this->rot_labels = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), CV_32FC1, - this->rot_labels_data); + this->rot_labels = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), + CV_32FC1, this->rot_labels_data); } - alloc_size = (uint(windows_size.width) / cell_size * uint(windows_size.height) / cell_size * num_of_feats) * - sizeof(cufftReal); - CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->data_features), alloc_size, cudaHostAllocMapped)); + CudaSafeCall(cudaHostAlloc(reinterpret_cast(&this->data_features), cells_size*num_of_feats, cudaHostAllocMapped)); CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast(&this->data_features_d), reinterpret_cast(this->data_features), 0)); - this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), windows_size.width / int(cell_size), - CV_32F, this->data_features); + this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), + windows_size.width / int(cell_size), CV_32F, this->data_features); #else - alloc_size = num_of_scales; - this->xf_sqr_norm = reinterpret_cast(malloc(alloc_size * sizeof(float))); + this->xf_sqr_norm = reinterpret_cast(malloc(num_of_scales * sizeof(float))); this->yf_sqr_norm = reinterpret_cast(malloc(sizeof(float))); this->patch_feats.reserve(num_of_feats); @@ -113,24 +106,26 @@ struct ThreadCtx { this->kzf = ComplexMat(height, width, num_of_scales); this->kf = ComplexMat(height, width, num_of_scales); #ifdef FFTW - this->in_all = - cv::Mat((windows_size.height / int(cell_size)) * int(num_of_scales), windows_size.width / int(cell_size), CV_32F); - this->fw_all = - cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), windows_size.width / int(cell_size), CV_32F); + this->in_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_scales), + windows_size.width / int(cell_size), CV_32F); + this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), + windows_size.width / int(cell_size), CV_32F); #else this->in_all = cv::Mat((windows_size.height / int(cell_size)), windows_size.width / int(cell_size), CV_32F); #endif #endif #if defined(FFTW) || defined(CUFFT) if (zero_index) { - model_xf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats); - yf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, 1); + model_xf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, + num_of_feats); + yf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, 1); // We use scale_vars[0] for updating the tracker, so we only allocate memory for its xf only. #ifdef CUFFT - this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats, - this->stream); + this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, + num_of_feats, this->stream); #else - this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width)/ cell_size) / 2 + 1, num_of_feats); + this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, + num_of_feats); #endif } else if (num_of_scales > 1) { this->max_responses.reserve(uint(num_of_scales));