From 9e4e614741e6c4386cd7e17c03d81658cf66ec73 Mon Sep 17 00:00:00 2001 From: Shanigen Date: Fri, 7 Sep 2018 12:06:51 +0200 Subject: [PATCH] Streamlined ThreadCtx Removed obsolete variables and variables that were only used in zero index and moved them to kcf. --- src/CMakeLists.txt | 13 +-- src/complexmat.cpp | 231 --------------------------------------------- src/complexmat.hpp | 88 ----------------- src/kcf.cpp | 54 +++++++---- src/kcf.h | 5 + src/threadctx.hpp | 117 +++++++---------------- 6 files changed, 78 insertions(+), 430 deletions(-) delete mode 100644 src/complexmat.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f0b0bd1..7d3eab5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,7 +12,6 @@ option(OPENMP "Use OpenMP library. Works with FFTW and OpenCV implementation." O option(ASYNC "Works only if OPENCV_CUFFT is not ON. Will enable C++ async directive." OFF) option(CUDA_DEBUG "Enables error cheking for cuda and cufft. " OFF) option(BIG_BATCH "Enable transforming all features from all scales together." OFF) -option(TEMPLATE_COMPLEXMAT "Use ComplexMat as abstract class." ON) IF(PROFILING) add_definitions(-DPROFILING ) @@ -24,11 +23,6 @@ IF(BIG_BATCH) MESSAGE(STATUS "Big_batch mode") ENDIF() -IF(TEMPLATE_COMPLEXMAT) - add_definitions(-DTEMPLATE_COMPLEXMAT ) - MESSAGE(STATUS "ComplexMat abstract class version.") -ENDIF() - SET(use_cuda OFF) IF(FFT STREQUAL "OpenCV") @@ -92,12 +86,7 @@ add_subdirectory(piotr_fhog) add_subdirectory(cn) add_library(kcf STATIC ${KCF_LIB_SRC}) -IF(NOT TEMPLATE_COMPLEXMAT) - add_library(complexmat complexmat.cpp) - target_link_libraries(kcf fhog cndata complexmat ${OpenCV_LIBS}) -ELSE() - target_link_libraries(kcf fhog cndata ${OpenCV_LIBS}) -ENDIF() +target_link_libraries(kcf fhog cndata ${OpenCV_LIBS}) set_target_properties(kcf PROPERTIES VERSION 1.0.0 SOVERSION 1) IF(FFT STREQUAL "fftw") diff --git a/src/complexmat.cpp b/src/complexmat.cpp deleted file mode 100644 index 72a2a3f..0000000 --- a/src/complexmat.cpp +++ /dev/null @@ -1,231 +0,0 @@ -#include "complexmat.hpp" - -ComplexMat::ComplexMat() : cols(0), rows(0), n_channels(0) {} -ComplexMat::ComplexMat(int _rows, int _cols, int _n_channels) : cols(_cols), rows(_rows), n_channels(_n_channels) -{ - p_data.resize(n_channels*cols*rows); -} - - -//assuming that mat has 2 channels (real, img) -ComplexMat::ComplexMat(const cv::Mat & mat) : cols(mat.cols), rows(mat.rows), n_channels(1) -{ - p_data = convert(mat); -} - -void ComplexMat::create(int _rows, int _cols, int _n_channels) -{ - rows = _rows; - cols = _cols; - n_channels = _n_channels; - p_data.resize(n_channels*cols*rows); -} - -void ComplexMat::create(int _rows, int _cols, int _n_channels, int _n_scales) -{ - rows = _rows; - cols = _cols; - n_channels = _n_channels; - n_scales = _n_scales; - p_data.resize(n_channels*cols*rows); -} -// cv::Mat API compatibility -cv::Size ComplexMat::size() { return cv::Size(cols, rows); } -int ComplexMat::channels() { return n_channels; } -int ComplexMat::channels() const { return n_channels; } - -//assuming that mat has 2 channels (real, imag) -void ComplexMat::set_channel(int idx, const cv::Mat & mat) -{ - assert(idx >= 0 && idx < n_channels); - for (int i = 0; i < rows; ++i){ - const std::complex *row = mat.ptr>(i); - for (int j = 0; j < cols; ++j) - p_data[idx*rows*cols+i*cols+j]=row[j]; - } -} - - -void ComplexMat::sqr_norm(float *sums_sqr_norms) const -{ - int n_channels_per_scale = n_channels/n_scales; - int scale_offset = n_channels_per_scale*rows*cols; - float sum_sqr_norm; - for (int scale = 0; scale < n_scales; ++scale) { - sum_sqr_norm = 0; - for (int i = 0; i < n_channels_per_scale; ++i) - for (auto lhs = p_data.begin()+i*rows*cols+scale*scale_offset; lhs != p_data.begin()+(i+1)*rows*cols+scale*scale_offset; ++lhs) - sum_sqr_norm += lhs->real()*lhs->real() + lhs->imag()*lhs->imag(); - sums_sqr_norms[scale] = sum_sqr_norm/static_cast(cols*rows); - } - return; -} - -ComplexMat ComplexMat::sqr_mag() const -{ - return mat_const_operator( [](std::complex & c) { c = c.real()*c.real() + c.imag()*c.imag(); } ); -} - -ComplexMat ComplexMat::conj() const -{ - return mat_const_operator( [](std::complex & c) { c = std::complex(c.real(), -c.imag()); } ); -} - -ComplexMat ComplexMat::sum_over_channels() const -{ - assert(p_data.size() > 1); - - int n_channels_per_scale = n_channels/n_scales; - int scale_offset = n_channels_per_scale*rows*cols; - - ComplexMat result(this->rows, this->cols, n_scales); - for (int scale = 0; scale < n_scales; ++scale) { - std::copy(p_data.begin()+scale*scale_offset,p_data.begin()+rows*cols+scale*scale_offset, result.p_data.begin()+scale*rows*cols); - for (int i = 1; i < n_channels_per_scale; ++i) { - std::transform(result.p_data.begin()+scale*rows*cols, result.p_data.begin()+(scale+1)*rows*cols, p_data.begin()+i*rows*cols+scale*scale_offset, - result.p_data.begin()+scale*rows*cols, std::plus>()); - } - } - return result; -} - -//return 2 channels (real, imag) for first complex channel -cv::Mat ComplexMat::to_cv_mat() const -{ - assert(p_data.size() >= 1); - return channel_to_cv_mat(0); -} -// return a vector of 2 channels (real, imag) per one complex channel -std::vector ComplexMat::to_cv_mat_vector() const -{ - std::vector result; - result.reserve(n_channels); - - for (int i = 0; i < n_channels; ++i) - result.push_back(channel_to_cv_mat(i)); - - return result; -} - -std::complex* ComplexMat::get_p_data() const -{ - return p_data.data(); -} - -//element-wise per channel multiplication, division and addition -ComplexMat ComplexMat::operator*(const ComplexMat & rhs) const -{ - return ComplexMat::mat_mat_operator( [](std::complex & c_lhs, const std::complex & c_rhs) { c_lhs *= c_rhs; }, rhs); -} -ComplexMat ComplexMat::operator/(const ComplexMat & rhs) const -{ - return ComplexMat::mat_mat_operator( [](std::complex & c_lhs, const std::complex & c_rhs) { c_lhs /= c_rhs; }, rhs); -} -ComplexMat ComplexMat::operator+(const ComplexMat & rhs) const -{ - return ComplexMat::mat_mat_operator( [](std::complex & c_lhs, const std::complex & c_rhs) { c_lhs += c_rhs; }, rhs); -} - -//multiplying or adding constant -ComplexMat ComplexMat::operator*(const float & rhs) const -{ - return ComplexMat::mat_const_operator( [&rhs](std::complex & c) { c *= rhs; }); -} -ComplexMat ComplexMat::operator+(const float & rhs) const -{ - return ComplexMat::mat_const_operator( [&rhs](std::complex & c) { c += rhs; }); -} - -//multiplying element-wise multichannel by one channel mats (rhs mat is with one channel) -ComplexMat ComplexMat::mul(const ComplexMat & rhs) const -{ - return ComplexMat::matn_mat1_operator( [](std::complex & c_lhs, const std::complex & c_rhs) { c_lhs *= c_rhs; }, rhs); -} - -//multiplying element-wise multichannel by one channel mats (rhs mat is with multiple channel) -ComplexMat ComplexMat::mul2(const ComplexMat & rhs) const -{ - return ComplexMat::matn_mat2_operator( [](std::complex & c_lhs, const std::complex & c_rhs) { c_lhs *= c_rhs; }, rhs); -} - - -//convert 2 channel mat (real, imag) to vector row-by-row -std::vector> ComplexMat::convert(const cv::Mat & mat) -{ - std::vector> result; - result.reserve(mat.cols*mat.rows); - for (int y = 0; y < mat.rows; ++y) { - const float * row_ptr = mat.ptr(y); - for (int x = 0; x < 2*mat.cols; x += 2){ - result.push_back(std::complex(row_ptr[x], row_ptr[x+1])); - } - } - return result; -} - -ComplexMat ComplexMat::mat_mat_operator(void (*op)(std::complex & c_lhs, const std::complex & c_rhs), const ComplexMat & mat_rhs) const -{ - assert(mat_rhs.n_channels == n_channels && mat_rhs.cols == cols && mat_rhs.rows == rows); - - ComplexMat result = *this; - for (int i = 0; i < n_channels; ++i) { - auto lhs = result.p_data.begin()+i*rows*cols; - auto rhs = mat_rhs.p_data.begin()+i*rows*cols; - for ( ; lhs != result.p_data.begin()+(i+1)*rows*cols; ++lhs, ++rhs) - op(*lhs, *rhs); - } - - return result; -} -ComplexMat ComplexMat::matn_mat1_operator(void (*op)(std::complex & c_lhs, const std::complex & c_rhs), const ComplexMat & mat_rhs) const -{ - assert(mat_rhs.n_channels == 1 && mat_rhs.cols == cols && mat_rhs.rows == rows); - - ComplexMat result = *this; - for (int i = 0; i < n_channels; ++i) { - auto lhs = result.p_data.begin()+i*rows*cols; - auto rhs = mat_rhs.p_data.begin(); - for ( ; lhs != result.p_data.begin()+(i+1)*rows*cols; ++lhs, ++rhs) - op(*lhs, *rhs); - } - - return result; -} -ComplexMat ComplexMat::matn_mat2_operator(void (*op)(std::complex & c_lhs, const std::complex & c_rhs), const ComplexMat & mat_rhs) const -{ - assert(mat_rhs.n_channels == n_channels/n_scales && mat_rhs.cols == cols && mat_rhs.rows == rows); - - int n_channels_per_scale = n_channels/n_scales; - int scale_offset = n_channels_per_scale*rows*cols; - ComplexMat result = *this; - for (int i = 0; i < n_scales; ++i) { - for (int j = 0; j < n_channels_per_scale; ++j) { - auto lhs = result.p_data.begin()+(j*rows*cols)+(i*scale_offset); - auto rhs = mat_rhs.p_data.begin()+(j*rows*cols); - for ( ; lhs != result.p_data.begin()+((j+1)*rows*cols)+(i*scale_offset); ++lhs, ++rhs) - op(*lhs, *rhs); - } - } - - return result; -} -ComplexMat ComplexMat::mat_const_operator(const std::function & c_rhs)> & op) const -{ - ComplexMat result = *this; - for (int i = 0; i < n_channels; ++i) - for (auto lhs = result.p_data.begin()+i*rows*cols; lhs != result.p_data.begin()+(i+1)*rows*cols; ++lhs) - op(*lhs); - return result; -} - -cv::Mat ComplexMat::channel_to_cv_mat(int channel_id) const -{ - cv::Mat result(rows, cols, CV_32FC2); - for (int y = 0; y < rows; ++y) { - std::complex * row_ptr = result.ptr>(y); - for (int x = 0; x < cols; ++x){ - row_ptr[x] = p_data[channel_id*rows*cols+y*cols+x]; - } - } - return result; -} diff --git a/src/complexmat.hpp b/src/complexmat.hpp index 7ba3f60..29628cd 100644 --- a/src/complexmat.hpp +++ b/src/complexmat.hpp @@ -6,7 +6,6 @@ #include #include -#ifdef TEMPLATE_COMPLEXMAT template class ComplexMat_ { public: uint cols; @@ -284,92 +283,5 @@ template class ComplexMat_ { }; typedef ComplexMat_ ComplexMat; -#else -class ComplexMat { - public: - int cols; - int rows; - int n_channels; - int n_scales = 1; - - ComplexMat(); - ComplexMat(int _rows, int _cols, int _n_channels); - ComplexMat(int _rows, int _cols, int _n_channels, int _n_scales); - ComplexMat(const cv::Mat &mat); - - void create(int _rows, int _cols, int _n_channels); - - void create(int _rows, int _cols, int _n_channels, int _n_scales); - // cv::Mat API compatibility - cv::Size size(); - int channels(); - int channels() const; - - // assuming that mat has 2 channels (real, imag) - void set_channel(int idx, const cv::Mat &mat); - - float sqr_norm(); - void sqr_norm(float *sums_sqr_norms) const; - - ComplexMat sqr_mag() const; - - ComplexMat conj() const; - - ComplexMat sum_over_channels() const; - - // return 2 channels (real, imag) for first complex channel - cv::Mat to_cv_mat() const; - // return a vector of 2 channels (real, imag) per one complex channel - std::vector to_cv_mat_vector() const; - - std::complex *get_p_data() const; - - // element-wise per channel multiplication, division and addition - ComplexMat operator*(const ComplexMat &rhs) const; - ComplexMat operator/(const ComplexMat &rhs) const; - ComplexMat operator+(const ComplexMat &rhs) const; - - // multiplying or adding constant - ComplexMat operator*(const float &rhs) const; - ComplexMat operator+(const float &rhs) const; - - // multiplying element-wise multichannel by one channel mats (rhs mat is with one channel) - ComplexMat mul(const ComplexMat &rhs) const; - - // multiplying element-wise multichannel by one channel mats (rhs mat is with multiple channel) - ComplexMat mul2(const ComplexMat &rhs) const; - - // text output - friend std::ostream &operator<<(std::ostream &os, const ComplexMat &mat) - { - // for (int i = 0; i < mat.n_channels; ++i){ - for (int i = 0; i < 1; ++i) { - os << "Channel " << i << std::endl; - for (int j = 0; j < mat.rows; ++j) { - for (int k = 0; k < mat.cols - 1; ++k) - os << mat.p_data[j * mat.cols + k] << ", "; - os << mat.p_data[j * mat.cols + mat.cols - 1] << std::endl; - } - } - return os; - } - - private: - mutable std::vector> p_data; - - // convert 2 channel mat (real, imag) to vector row-by-row - std::vector> convert(const cv::Mat &mat); - - ComplexMat mat_mat_operator(void (*op)(std::complex &c_lhs, const std::complex &c_rhs), - const ComplexMat &mat_rhs) const; - ComplexMat matn_mat1_operator(void (*op)(std::complex &c_lhs, const std::complex &c_rhs), - const ComplexMat &mat_rhs) const; - ComplexMat matn_mat2_operator(void (*op)(std::complex &c_lhs, const std::complex &c_rhs), - const ComplexMat &mat_rhs) const; - ComplexMat mat_const_operator(const std::function &c_rhs)> &op) const; - - cv::Mat channel_to_cv_mat(int channel_id) const; -}; -#endif #endif // COMPLEX_MAT_HPP_213123048309482094 diff --git a/src/kcf.cpp b/src/kcf.cpp index 663623c..9c6554b 100644 --- a/src/kcf.cpp +++ b/src/kcf.cpp @@ -127,6 +127,12 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f p_windows_size.width = int(round(p_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size); p_windows_size.height = int(round(p_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size); + p_num_of_feats = 31; + if (m_use_color) p_num_of_feats += 3; + if (m_use_cnfeat) p_num_of_feats += 10; + p_roi_width = p_windows_size.width / p_cell_size; + p_roi_height = p_windows_size.height / p_cell_size; + p_scales.clear(); if (m_use_scale) for (int i = -p_num_scales / 2; i <= p_num_scales / 2; ++i) @@ -148,20 +154,33 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f std::cerr << "cuFFT supports only Gaussian kernel." << std::endl; std::exit(EXIT_FAILURE); } + CudaSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost)); + p_xf.create(uint(p_windows_size.height) / p_cell_size, (uint(p_windows_size.width) / p_cell_size) / 2 + 1, p_num_of_feats, this->stream); + p_rot_labels_data = DynMem( + ((uint(p_windows_size.width) / p_cell_size) * (uint(p_windows_size.height) / p_cell_size)) * sizeof(float)); + p_rot_labels = cv::Mat(p_windows_size.height / int(p_cell_size), windows_size.width / int(p_cell_size), CV_32FC1, + p_rot_labels_data.hostMem()); +#else + p_xf.create(uint(p_windows_size.height / p_cell_size), (uint(p_windows_size.height / p_cell_size)) / 2 + 1, + p_num_of_feats); #endif - p_num_of_feats = 31; - if (m_use_color) p_num_of_feats += 3; - if (m_use_cnfeat) p_num_of_feats += 10; - p_roi_width = p_windows_size.width / p_cell_size; - p_roi_height = p_windows_size.height / p_cell_size; +#if defined(CUFFT) || defined(FFTW) + p_model_xf.create(uint(p_windows_size.height / p_cell_size), (uint(p_windows_size.width / p_cell_size)) / 2 + 1, + uint(p_num_of_feats)); + p_yf.create(uint(p_windows_size.height / p_cell_size), (uint(p_windows_size.width / p_cell_size)) / 2 + 1, 1); + p_xf.create(uint(p_windows_size.height) / p_cell_size, (uint(p_windows_size.width) / p_cell_size) / 2 + 1, + p_num_of_feats); +#else + p_model_xf.create(uint(p_windows_size.height / p_cell_size), (uint(p_windows_size.width / p_cell_size)), + uint(p_num_of_feats)); + p_yf.create(uint(p_windows_size.height / p_cell_size), (uint(p_windows_size.width / p_cell_size)), 1); + p_xf.create(uint(p_windows_size.height) / p_cell_size, (uint(p_windows_size.width) / p_cell_size), p_num_of_feats); +#endif int max = m_use_big_batch ? 2 : p_num_scales; for (int i = 0; i < max; ++i) { - if (i == 0) { - p_scale_vars.emplace_back( - new ThreadCtx(p_windows_size, p_cell_size, p_num_of_feats, 1, &p_model_xf, &p_yf, true)); - } else if (m_use_big_batch) { + if (m_use_big_batch && i == 1) { p_scale_vars.emplace_back( new ThreadCtx(p_windows_size, p_cell_size, p_num_of_feats * p_num_scales, p_num_scales)); } else { @@ -191,7 +210,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f // window weights, i.e. labels fft.forward( gaussian_shaped_labels(p_output_sigma, p_windows_size.width / p_cell_size, p_windows_size.height / p_cell_size), p_yf, - m_use_cuda ? p_scale_vars.front()->rot_labels_data.deviceMem() : nullptr, p_scale_vars.front()->stream); + m_use_cuda ? p_rot_labels_data.deviceMem() : nullptr, p_scale_vars.front()->stream); DEBUG_PRINTM(p_yf); // obtain a sub-window for training initial model @@ -206,6 +225,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f p_scale_vars.front()->model_xf.set_stream(p_scale_vars.front()->stream); p_yf.set_stream(p_scale_vars.front()->stream); p_model_xf.set_stream(p_scale_vars.front()->stream); + p_xf.set_stream(p_scale_vars.front()->stream); #endif if (m_use_linearkernel) { @@ -401,21 +421,21 @@ void KCF_Tracker::track(cv::Mat &img) p_scale_vars.front()->patch_feats.clear(); get_features(input_rgb, input_gray, int(p_pose.cx), int(p_pose.cy), p_windows_size.width, p_windows_size.height, *p_scale_vars.front(), p_current_scale); - fft.forward_window(p_scale_vars.front()->patch_feats, p_scale_vars.front()->xf, p_scale_vars.front()->fw_all, + fft.forward_window(p_scale_vars.front()->patch_feats, p_xf, p_scale_vars.front()->fw_all, m_use_cuda ? p_scale_vars.front()->data_features.deviceMem() : nullptr, p_scale_vars.front()->stream); // subsequent frames, interpolate model - p_model_xf = p_model_xf * float((1. - p_interp_factor)) + p_scale_vars.front()->xf * float(p_interp_factor); + p_model_xf = p_model_xf * float((1. - p_interp_factor)) + p_xf * float(p_interp_factor); ComplexMat alphaf_num, alphaf_den; if (m_use_linearkernel) { - ComplexMat xfconj = p_scale_vars.front()->xf.conj(); + ComplexMat xfconj = p_xf.conj(); alphaf_num = xfconj.mul(p_yf); - alphaf_den = (p_scale_vars.front()->xf * xfconj); + alphaf_den = (p_xf * xfconj); } else { // Kernel Ridge Regression, calculate alphas (in Fourier domain) - gaussian_correlation(*p_scale_vars.front(), p_scale_vars.front()->xf, p_scale_vars.front()->xf, p_kernel_sigma, + gaussian_correlation(*p_scale_vars.front(), p_xf, p_xf, p_kernel_sigma, true); // ComplexMat alphaf = p_yf / (kf + p_lambda); //equation for fast training // p_model_alphaf = p_model_alphaf * (1. - p_interp_factor) + alphaf * p_interp_factor; @@ -580,9 +600,9 @@ cv::Mat KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2) // rotate so that 1 is at top-left corner (see KCF paper for explanation) #ifdef CUFFT cv::Mat tmp = circshift(labels, range_x[0], range_y[0]); - tmp.copyTo(p_scale_vars.front()->rot_labels); + tmp.copyTo(p_rot_labels); - assert(p_scale_vars[0].rot_labels.at(0, 0) >= 1.f - 1e-10f); + assert(p_rot_labels.at(0, 0) >= 1.f - 1e-10f); return tmp; #else cv::Mat rot_labels = circshift(labels, range_x[0], range_y[0]); diff --git a/src/kcf.h b/src/kcf.h index e0743b5..9bbd94a 100644 --- a/src/kcf.h +++ b/src/kcf.h @@ -132,12 +132,17 @@ private: std::list> p_scale_vars; + //CUDA compability + cv::Mat p_rot_labels; + DynMem p_rot_labels_data; + //model ComplexMat p_yf; ComplexMat p_model_alphaf; ComplexMat p_model_alphaf_num; ComplexMat p_model_alphaf_den; ComplexMat p_model_xf; + ComplexMat p_xf; //helping functions void scale_track(ThreadCtx & vars, cv::Mat & input_rgb, cv::Mat & input_gray, double scale); cv::Mat get_subwindow(const cv::Mat & input, int cx, int cy, int size_x, int size_y); diff --git a/src/threadctx.hpp b/src/threadctx.hpp index 2316d9b..6fcbadd 100644 --- a/src/threadctx.hpp +++ b/src/threadctx.hpp @@ -15,109 +15,62 @@ typedef int *cudaStream_t; struct ThreadCtx { public: - ThreadCtx(cv::Size windows_size, uint cell_size, uint num_of_feats, uint num_of_scales = 1, - ComplexMat *model_xf = nullptr, ComplexMat *yf = nullptr, bool zero_index = false) + ThreadCtx(cv::Size windows_size, uint cell_size, uint num_of_feats, uint num_of_scales = 1) { -#ifdef CUFFT - if (zero_index) { - cudaSetDeviceFlags(cudaDeviceMapHost); - this->zero_index = true; - } - -#if defined(ASYNC) || defined(OPENMP) - CudaSafeCall(cudaStreamCreate(&this->stream)); -#endif - + this->xf_sqr_norm = DynMem(num_of_scales * sizeof(float)); + this->yf_sqr_norm = DynMem(sizeof(float)); this->patch_feats.reserve(uint(num_of_feats)); - // Size of cufftReal == float + uint cells_size = ((uint(windows_size.width) / cell_size) * (uint(windows_size.height) / cell_size)) * sizeof(float); - this->data_i_1ch = DynMem(cells_size * num_of_scales); - this->data_i_features = DynMem(cells_size * num_of_feats); - - this->ifft2_res = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), - CV_32FC(int(num_of_feats)), this->data_i_features.hostMem()); - this->response = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), - CV_32FC(int(num_of_scales)), this->data_i_1ch.hostMem()); +#if defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) + CudaSafeCall(cudaStreamCreate(&this->stream)); +#endif - this->zf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, - num_of_feats, num_of_scales, this->stream); - this->kzf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, - num_of_scales, this->stream); - this->kf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, - num_of_scales, this->stream); +#if defined(CUFFT) || defined(FFTW) + this->gauss_corr_res = DynMem(cells_size * num_of_scales); + this->data_features = DynMem(cells_size * num_of_feats); - this->xf_sqr_norm = DynMem(num_of_scales * sizeof(float)); - this->yf_sqr_norm = DynMem(sizeof(float)); + uint width_freq = (uint(windows_size.width) / cell_size) / 2 + 1; - this->gauss_corr_res = DynMem(cells_size * num_of_scales); this->in_all = cv::Mat(windows_size.height / int(cell_size) * int(num_of_scales), windows_size.width / int(cell_size), CV_32F, this->gauss_corr_res.hostMem()); - if (zero_index) { - this->rot_labels_data = DynMem(cells_size); - this->rot_labels = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), - CV_32FC1, this->rot_labels_data.hostMem()); - } - - this->data_features = DynMem(cells_size * num_of_feats); this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), windows_size.width / int(cell_size), CV_32F, this->data_features.hostMem()); #else + uint width_freq = uint(windows_size.width) / cell_size; - this->xf_sqr_norm = DynMem(num_of_scales * sizeof(float)); - this->yf_sqr_norm = DynMem(sizeof (float)); + this->in_all = cv::Mat((windows_size.height / int(cell_size)), windows_size.width / int(cell_size), CV_32F); +#endif - this->patch_feats.reserve(num_of_feats); + this->data_i_features = DynMem(cells_size * num_of_feats); + this->data_i_1ch = DynMem(cells_size * num_of_scales); - uint height = uint(windows_size.height) / cell_size; -#ifdef FFTW - uint width = (uint(windows_size.width) / cell_size) / 2 + 1; -#else - int width = windows_size.width / cell_size; -#endif + this->ifft2_res = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), + CV_32FC(int(num_of_feats)), this->data_i_features.hostMem()); + + this->response = cv::Mat(windows_size.height / int(cell_size), windows_size.width / int(cell_size), + CV_32FC(int(num_of_scales)), this->data_i_1ch.hostMem()); - this->ifft2_res = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_feats))); - this->response = cv::Mat(int(height), windows_size.width / int(cell_size), CV_32FC(int(num_of_scales))); + this->patch_feats.reserve(num_of_feats); - this->zf = ComplexMat(height, width, num_of_feats, num_of_scales); - this->kzf = ComplexMat(height, width, num_of_scales); - this->kf = ComplexMat(height, width, num_of_scales); -#ifdef FFTW - this->in_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_scales), - windows_size.width / int(cell_size), CV_32F); - this->fw_all = cv::Mat((windows_size.height / int(cell_size)) * int(num_of_feats), - windows_size.width / int(cell_size), CV_32F); -#else - this->in_all = cv::Mat((windows_size.height / int(cell_size)), windows_size.width / int(cell_size), CV_32F); -#endif -#endif -#if defined(FFTW) || defined(CUFFT) - if (zero_index) { - model_xf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, - num_of_feats); - yf->create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, 1); - // We use scale_vars[0] for updating the tracker, so we only allocate memory for its xf only. #ifdef CUFFT - this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, - num_of_feats, this->stream); + this->zf.create(uint(windows_size.height) / cell_size, width_freq, num_of_feats, num_of_scales, this->stream); + this->kzf.create(uint(windows_size.height) / cell_size, width_freq, num_of_scales, this->stream); + this->kf.create(uint(windows_size.height) / cell_size, width_freq, num_of_scales, this->stream); #else - this->xf.create(uint(windows_size.height) / cell_size, (uint(windows_size.width) / cell_size) / 2 + 1, - num_of_feats); + this->zf.create(uint(windows_size.height) / cell_size, width_freq, num_of_feats, num_of_scales); + this->kzf.create(uint(windows_size.height) / cell_size, width_freq, num_of_scales); + this->kf.create(uint(windows_size.height) / cell_size, width_freq, num_of_scales); #endif - } else if (num_of_scales > 1) { + + if (num_of_scales > 1) { this->max_responses.reserve(uint(num_of_scales)); this->max_locs.reserve(uint(num_of_scales)); this->response_maps.reserve(uint(num_of_scales)); } -#else - if (zero_index) { - model_xf->create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats); - yf->create(windows_size.height / cell_size, windows_size.width / cell_size, 1); - this->xf.create(windows_size.height / cell_size, windows_size.width / cell_size, num_of_feats); - } -#endif } ~ThreadCtx() @@ -131,12 +84,13 @@ struct ThreadCtx { std::vector patch_feats; cv::Mat in_all, fw_all, ifft2_res, response; - ComplexMat zf, kzf, kf, xyf, xf; + ComplexMat zf, kzf, kf, xyf; - // CuFFT variables - cv::Mat rot_labels; - DynMem gauss_corr_res, rot_labels_data, data_features, data_f, data_i_features, data_i_1ch; + DynMem data_i_features, data_i_1ch; + // CuFFT and FFTW variables + DynMem gauss_corr_res, data_features; + // CuFFT variables cudaStream_t stream = nullptr; ComplexMat model_alphaf, model_xf; @@ -147,7 +101,6 @@ struct ThreadCtx { std::vector max_responses; std::vector max_locs; std::vector response_maps; - bool zero_index = false; }; #endif // SCALE_VARS_HPP -- 2.39.2