From b4530b895ff0af4ea6ba71645b98973988d9d81c Mon Sep 17 00:00:00 2001 From: Shanigen Date: Mon, 30 Jul 2018 17:32:25 +0200 Subject: [PATCH] Work done so far on making Scale_var struct This is work done so far in making the tracker workflow easier to follow and also the step to add support for CUDA streams. --- src/CMakeLists.txt | 2 +- src/fft.h | 16 ++- src/fft_cufft.cpp | 47 ++++--- src/fft_cufft.h | 15 +- src/fft_fftw.cpp | 19 ++- src/fft_fftw.h | 15 +- src/fft_opencv.cpp | 51 ++++++- src/fft_opencv.h | 15 +- src/kcf.cpp | 295 ++++++++++++++++------------------------ src/kcf.h | 18 +-- src/piotr_fhog/fhog.hpp | 13 +- src/scale_vars.hpp | 35 +++++ 12 files changed, 301 insertions(+), 240 deletions(-) create mode 100644 src/scale_vars.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bb31ccb..a7adb53 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -61,7 +61,7 @@ IF((FFT STREQUAL "OpenCV") AND BIG_BATCH) message(SEND_ERROR "OpenCV version does not support big batch mode.") ENDIF() -IF((FFT STREQUAL "cuFFT") AND (ASYNC)) +IF((FFT STREQUAL "cuFFT") AND (ASYNC OR (OPENMP AND NOT BIG_BATCH))) message(SEND_ERROR "cuFFT version does not support ASYNC and OpenMP only if used with big batch mode.") ENDIF() diff --git a/src/fft.h b/src/fft.h index c8ce998..a2b9730 100644 --- a/src/fft.h +++ b/src/fft.h @@ -4,6 +4,7 @@ #include #include +#include "scale_vars.hpp" #ifdef CUFFT #include "complexmat.cuh" @@ -11,16 +12,21 @@ #include "complexmat.hpp" #endif +struct Scale_vars; + class Fft { public: virtual void init(unsigned width, unsigned height,unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) = 0; - virtual void set_window(const cv::Mat &window) = 0; - virtual ComplexMat forward(const cv::Mat &input) = 0; + virtual void set_window(const cv::Mat & window) = 0; + virtual ComplexMat forward(const cv::Mat & input) = 0; + virtual void forward(Scale_vars & vars) = 0; virtual ComplexMat forward_raw(float *input, bool all_scales) = 0; - virtual ComplexMat forward_window(const std::vector &input) = 0; - virtual cv::Mat inverse(const ComplexMat &input) = 0; - virtual float* inverse_raw(const ComplexMat &input) = 0; + virtual ComplexMat forward_window(const std::vector & input) = 0; + virtual void forward_window(Scale_vars & vars) = 0; + virtual cv::Mat inverse(const ComplexMat & input) = 0; + virtual void inverse(Scale_vars & vars) = 0; + virtual float* inverse_raw(const ComplexMat & input) = 0; virtual ~Fft() = 0; }; diff --git a/src/fft_cufft.cpp b/src/fft_cufft.cpp index f220015..cfd3dee 100644 --- a/src/fft_cufft.cpp +++ b/src/fft_cufft.cpp @@ -23,17 +23,17 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne { CudaSafeCall(cudaMalloc(&data_f_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal))); - int rank = 2; - int n[] = {(int)m_height, (int)m_width}; - int howmany = m_num_of_scales; - int idist = m_height*m_width, odist = m_height*(m_width/2+1); - int istride = 1, ostride = 1; - int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1}; - - CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n, - inembed, istride, idist, - onembed, ostride, odist, - CUFFT_R2C, howmany)); + int rank = 2; + int n[] = {(int)m_height, (int)m_width}; + int howmany = m_num_of_scales; + int idist = m_height*m_width, odist = m_height*(m_width/2+1); + int istride = 1, ostride = 1; + int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1}; + + CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n, + inembed, istride, idist, + onembed, ostride, odist, + CUFFT_R2C, howmany)); } //FFT forward window one scale { @@ -144,12 +144,12 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne } } -void cuFFT::set_window(const cv::Mat &window) +void cuFFT::set_window(const cv::Mat & window) { m_window = window; } -ComplexMat cuFFT::forward(const cv::Mat &input) +ComplexMat cuFFT::forward(const cv::Mat & input) { ComplexMat complex_result; if(m_big_batch_mode && input.rows == (int)(m_height*m_num_of_scales)){ @@ -167,6 +167,11 @@ ComplexMat cuFFT::forward(const cv::Mat &input) return complex_result; } +void cuFFT::forward(Scale_var & vars) +{ + return; +} + ComplexMat cuFFT::forward_raw(float *input, bool all_scales) { ComplexMat complex_result; @@ -182,7 +187,7 @@ ComplexMat cuFFT::forward_raw(float *input, bool all_scales) return complex_result; } -ComplexMat cuFFT::forward_window(const std::vector &input) +ComplexMat cuFFT::forward_window(const std::vector & input) { int n_channels = input.size(); ComplexMat result; @@ -210,7 +215,12 @@ ComplexMat cuFFT::forward_window(const std::vector &input) return result; } -cv::Mat cuFFT::inverse(const ComplexMat &input) +void cuFFT::forward_window(Scale_var & vars) +{ + return; +} + +cv::Mat cuFFT::inverse(const ComplexMat & input) { int n_channels = input.n_channels; cufftComplex *in = reinterpret_cast(input.get_p_data()); @@ -246,7 +256,12 @@ cv::Mat cuFFT::inverse(const ComplexMat &input) return real_result/(m_width*m_height); } -float* cuFFT::inverse_raw(const ComplexMat &input) +void cuFFT::inverse(Scale_var & vars) +{ + return; +} + +float* cuFFT::inverse_raw(const ComplexMat & input) { int n_channels = input.n_channels; cufftComplex *in = reinterpret_cast(input.get_p_data()); diff --git a/src/fft_cufft.h b/src/fft_cufft.h index a71bf34..6d4aba4 100644 --- a/src/fft_cufft.h +++ b/src/fft_cufft.h @@ -16,16 +16,21 @@ #include #include +struct Scale_vars; + class cuFFT : public Fft { public: void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override; - void set_window(const cv::Mat &window) override; - ComplexMat forward(const cv::Mat &input) override; + void set_window(const cv::Mat & window) override; + ComplexMat forward(const cv::Mat & input) override; + void forward(Scale_vars & vars) override; ComplexMat forward_raw(float *input, bool all_scales) override; - ComplexMat forward_window(const std::vector &input) override; - cv::Mat inverse(const ComplexMat &input) override; - float* inverse_raw(const ComplexMat &input) override; + ComplexMat forward_window(const std::vector & input) override; + void forward_window(Scale_vars & vars) override; + cv::Mat inverse(const ComplexMat & input) override; + void inverse(Scale_vars & vars) override; + float* inverse_raw(const ComplexMat & input) override; ~cuFFT() override; private: cv::Mat m_window; diff --git a/src/fft_fftw.cpp b/src/fft_fftw.cpp index ffeadbe..9258644 100644 --- a/src/fft_fftw.cpp +++ b/src/fft_fftw.cpp @@ -188,7 +188,7 @@ void Fftw::set_window(const cv::Mat &window) m_window = window; } -ComplexMat Fftw::forward(const cv::Mat &input) +ComplexMat Fftw::forward(const cv::Mat & input) { ComplexMat complex_result; if(m_big_batch_mode && input.rows == (int)(m_height*m_num_of_scales)){ @@ -203,13 +203,18 @@ ComplexMat Fftw::forward(const cv::Mat &input) return complex_result; } +void Fftw::forward(Scale_var & vars) +{ + return; +} + ComplexMat Fftw::forward_raw(float *input, bool all_scales) { ComplexMat dummy; return dummy; } -ComplexMat Fftw::forward_window(const std::vector &input) +ComplexMat Fftw::forward_window(const std::vector & input) { int n_channels = input.size(); cv::Mat in_all(m_height * n_channels, m_width, CV_32F); @@ -234,6 +239,11 @@ ComplexMat Fftw::forward_window(const std::vector &input) return result; } +void Fftw::forward_window(Scale_var & vars) +{ + return; +} + cv::Mat Fftw::inverse(const ComplexMat &input) { int n_channels = input.n_channels; @@ -253,6 +263,11 @@ cv::Mat Fftw::inverse(const ComplexMat &input) return real_result/(m_width*m_height); } +void Fftw::inverse(Scale_var & vars) +{ + return; +} + float* Fftw::inverse_raw(const ComplexMat &input) { return nullptr; diff --git a/src/fft_fftw.h b/src/fft_fftw.h index 8c23cda..058dd2f 100644 --- a/src/fft_fftw.h +++ b/src/fft_fftw.h @@ -14,18 +14,23 @@ #include #endif //CUFFTW +struct Scale_vars; + class Fftw : public Fft { public: Fftw(); Fftw(int num_of_threads); void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override; - void set_window(const cv::Mat &window) override; - ComplexMat forward(const cv::Mat &input) override; + void set_window(const cv::Mat & window) override; + ComplexMat forward(const cv::Mat & input) override; + void forward(Scale_vars & vars) override; ComplexMat forward_raw(float *input, bool all_scales) override; - ComplexMat forward_window(const std::vector &input) override; - cv::Mat inverse(const ComplexMat &input) override; - float* inverse_raw(const ComplexMat &input) override; + ComplexMat forward_window(const std::vector & input) override; + void forward_window(Scale_vars & vars) override; + cv::Mat inverse(const ComplexMat & input) override; + void inverse(Scale_vars & vars) override; + float* inverse_raw(const ComplexMat & input) override; ~Fftw() override; private: unsigned m_num_threads = 6; diff --git a/src/fft_opencv.cpp b/src/fft_opencv.cpp index e8046eb..b69b84e 100644 --- a/src/fft_opencv.cpp +++ b/src/fft_opencv.cpp @@ -10,25 +10,36 @@ void FftOpencv::init(unsigned width, unsigned height, unsigned num_of_feats, uns std::cout << "FFT: OpenCV" << std::endl; } -void FftOpencv::set_window(const cv::Mat &window) +void FftOpencv::set_window(const cv::Mat & window) { m_window = window; } -ComplexMat FftOpencv::forward(const cv::Mat &input) +ComplexMat FftOpencv::forward(const cv::Mat & input) { cv::Mat complex_result; cv::dft(input, complex_result, cv::DFT_COMPLEX_OUTPUT); return ComplexMat(complex_result); } +void FftOpencv::forward(Scale_vars & vars) +{ + cv::Mat complex_result; + cv::dft(vars.in_all, complex_result, cv::DFT_COMPLEX_OUTPUT); + if (vars.flag & Track_flags::AUTO_CORRELATION) + vars.kf = ComplexMat(complex_result); + else + vars.kzf = ComplexMat(complex_result); + return; +} + ComplexMat FftOpencv::forward_raw(float *input, bool all_scales) { ComplexMat dummy; return dummy; } -ComplexMat FftOpencv::forward_window(const std::vector &input) +ComplexMat FftOpencv::forward_window(const std::vector & input) { int n_channels = input.size(); ComplexMat result(input[0].rows, input[0].cols, n_channels); @@ -41,7 +52,19 @@ ComplexMat FftOpencv::forward_window(const std::vector &input) return result; } -cv::Mat FftOpencv::inverse(const ComplexMat &input) +void FftOpencv::forward_window(Scale_vars & vars) +{ + int n_channels = vars.patch_feats.size(); + + for (int i = 0; i < n_channels; ++i) { + cv::Mat complex_result; + cv::dft(vars.patch_feats[i].mul(m_window), complex_result, cv::DFT_COMPLEX_OUTPUT); + vars.zf.set_channel(i, complex_result); + } + return; +} + +cv::Mat FftOpencv::inverse(const ComplexMat & input) { cv::Mat real_result; if (input.n_channels == 1) { @@ -57,7 +80,25 @@ cv::Mat FftOpencv::inverse(const ComplexMat &input) return real_result; } -float* FftOpencv::inverse_raw(const ComplexMat &input) +void FftOpencv::inverse(Scale_vars & vars) +{ + ComplexMat *input = vars.flag & Track_flags::RESPONSE ? & vars.kzf : & vars.xyf; + cv::Mat *result = vars.flag & Track_flags::RESPONSE ? & vars.response : & vars.ifft2_res; + + if (input->n_channels == 1) { + cv::dft(input->to_cv_mat(), *result, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE); + } else { + std::vector mat_channels = input->to_cv_mat_vector(); + std::vector ifft_mats(input->n_channels); + for (int i = 0; i < input->n_channels; ++i) { + cv::dft(mat_channels[i], ifft_mats[i], cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE); + } + cv::merge(ifft_mats, *result); + } + return; +} + +float* FftOpencv::inverse_raw(const ComplexMat & input) { return nullptr; } diff --git a/src/fft_opencv.h b/src/fft_opencv.h index 7050b2e..92648c3 100644 --- a/src/fft_opencv.h +++ b/src/fft_opencv.h @@ -4,16 +4,21 @@ #include "fft.h" +struct Scale_vars; + class FftOpencv : public Fft { public: void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override; - void set_window(const cv::Mat &window) override; - ComplexMat forward(const cv::Mat &input) override; + void set_window(const cv::Mat & window) override; + ComplexMat forward(const cv::Mat & input) override; + void forward(Scale_vars & vars) override; ComplexMat forward_raw(float *input, bool all_scales) override; - ComplexMat forward_window(const std::vector &input) override; - cv::Mat inverse(const ComplexMat &input) override; - float* inverse_raw(const ComplexMat &input) override; + ComplexMat forward_window(const std::vector & input) override; + void forward_window(Scale_vars & vars) override; + cv::Mat inverse(const ComplexMat & input) override; + void inverse(Scale_vars & vars) override; + float* inverse_raw(const ComplexMat & input) override; ~FftOpencv() override; private: cv::Mat m_window; diff --git a/src/kcf.cpp b/src/kcf.cpp index 2c74972..d1776ee 100644 --- a/src/kcf.cpp +++ b/src/kcf.cpp @@ -19,8 +19,8 @@ #include #endif //OPENMP -#define DEBUG_PRINT(obj) if (m_debug) {std::cout << #obj << " @" << __LINE__ << std::endl << (obj) << std::endl;} -#define DEBUG_PRINTM(obj) if (m_debug) {std::cout << #obj << " @" << __LINE__ << " " << (obj).size() << " CH: " << (obj).channels() << std::endl << (obj) << std::endl;} +#define DEBUG_PRINT(obj) if (m_debug) {std::cout << #obj << " @" /*<< __LINE__*/ << std::endl << (obj) << std::endl;} +#define DEBUG_PRINTM(obj) if (m_debug) {std::cout << #obj << " @" /*<< __LINE__ */<< " " << (obj).size() << " CH: " << (obj).channels() << std::endl << (obj) << std::endl;} KCF_Tracker::KCF_Tracker(double padding, double kernel_sigma, double lambda, double interp_factor, double output_sigma_factor, int cell_size) : fft(*new FFT()), @@ -137,9 +137,20 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int p_scales.push_back(1.); for (int i = 0;i 1024) { @@ -157,18 +168,22 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int cudaSetDeviceFlags(cudaDeviceMapHost); for (int i = 0;i(p_cell_size); //window weights, i.e. labels - p_num_of_feats = 31; - if(m_use_color) p_num_of_feats += 3; - if(m_use_cnfeat) p_num_of_feats += 10; - p_roi_width = p_windows_size[0]/p_cell_size; - p_roi_height = p_windows_size[1]/p_cell_size; - fft.init(p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size, p_num_of_feats, p_num_scales, m_use_big_batch); p_yf = fft.forward(gaussian_shaped_labels(p_output_sigma, p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size)); fft.set_window(cosine_window_function(p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size)); //obtain a sub-window for training initial model - std::vector path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]); - p_model_xf = fft.forward_window(path_feat); + get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], scale_vars[0]); + p_model_xf = fft.forward_window(scale_vars[0].patch_feats); DEBUG_PRINTM(p_model_xf); + scale_vars[0].flag = Track_flags::AUTO_CORRELATION; if (m_use_linearkernel) { ComplexMat xfconj = p_model_xf.conj(); @@ -207,11 +217,11 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int p_model_alphaf_den = (p_model_xf * xfconj); } else { //Kernel Ridge Regression, calculate alphas (in Fourier domain) - ComplexMat kf = gaussian_correlation(scale_vars[0], p_model_xf, p_model_xf, p_kernel_sigma, true); - DEBUG_PRINTM(kf); - p_model_alphaf_num = p_yf * kf; + gaussian_correlation(scale_vars[0], p_model_xf, p_model_xf, p_kernel_sigma, true); + DEBUG_PRINTM(scale_vars[0].kf); + p_model_alphaf_num = p_yf * scale_vars[0].kf; DEBUG_PRINTM(p_model_alphaf_num); - p_model_alphaf_den = kf * (kf + p_lambda); + p_model_alphaf_den = scale_vars[0].kf * (scale_vars[0].kf + p_lambda); DEBUG_PRINTM(p_model_alphaf_den); } p_model_alphaf = p_model_alphaf_num / p_model_alphaf_den; @@ -284,147 +294,36 @@ void KCF_Tracker::track(cv::Mat &img) } } - - std::vector patch_feat; double max_response = -1.; - cv::Mat max_response_map; - cv::Point2i max_response_pt; int scale_index = 0; - std::vector scale_responses; + cv::Point2i *max_response_pt = nullptr; + cv::Mat *max_response_map = nullptr; - if (m_use_multithreading){ - std::vector> async_res(p_scales.size()); - for (size_t i = 0; i < p_scales.size(); ++i) { - async_res[i] = std::async(std::launch::async, - [this, &input_gray, &input_rgb, i]() -> cv::Mat - { - std::vector patch_feat_async = get_features(input_rgb, input_gray, this->p_pose.cx, this->p_pose.cy, this->p_windows_size[0], - this->p_windows_size[1], this->p_current_scale * this->p_scales[i]); - ComplexMat zf = fft.forward_window(patch_feat_async); - if (m_use_linearkernel) - return fft.inverse((p_model_alphaf * zf).sum_over_channels()); - else { - ComplexMat kzf = gaussian_correlation(this->scale_vars[i], zf, this->p_model_xf, this->p_kernel_sigma); - return fft.inverse(this->p_model_alphaf * kzf); - } - }); - } + for (size_t i = 0; i < p_scales.size(); ++i) { + scale_track(this->scale_vars[i], input_rgb, input_gray, this->p_current_scale * this->p_scales[i]); - for (size_t i = 0; i < p_scales.size(); ++i) { - // wait for result - async_res[i].wait(); - cv::Mat response = async_res[i].get(); - - double min_val, max_val; - cv::Point2i min_loc, max_loc; - cv::minMaxLoc(response, &min_val, &max_val, &min_loc, &max_loc); - - double weight = p_scales[i] < 1. ? p_scales[i] : 1./p_scales[i]; - if (max_val*weight > max_response) { - max_response = max_val*weight; - max_response_map = response; - max_response_pt = max_loc; - scale_index = i; - } - scale_responses.push_back(max_val*weight); - } - } else if (m_use_big_batch){ -#pragma omp parallel for ordered - for (size_t i = 0; i < p_scales.size(); ++i) { - std::vector tmp = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale * p_scales[i]); -#pragma omp ordered - patch_feat.insert(std::end(patch_feat), std::begin(tmp), std::end(tmp)); - } - ComplexMat zf = fft.forward_window(patch_feat); - DEBUG_PRINTM(zf); - cv::Mat response; - - if (m_use_linearkernel) - response = fft.inverse((zf.mul2(p_model_alphaf)).sum_over_channels()); - else { - ComplexMat kzf = gaussian_correlation(scale_vars[0], zf, p_model_xf, p_kernel_sigma); - DEBUG_PRINTM(p_model_alphaf); - DEBUG_PRINTM(kzf); - response = fft.inverse(kzf.mul(p_model_alphaf)); - } - DEBUG_PRINTM(response); - std::vector scales; - cv::split(response,scales); - - /* target location is at the maximum response. we must take into - account the fact that, if the target doesn't move, the peak - will appear at the top-left corner, not at the center (this is - discussed in the paper). the responses wrap around cyclically. */ - for (size_t i = 0; i < p_scales.size(); ++i) { - double min_val, max_val; - cv::Point2i min_loc, max_loc; - cv::minMaxLoc(scales[i], &min_val, &max_val, &min_loc, &max_loc); - DEBUG_PRINT(max_loc); - - double weight = p_scales[i] < 1. ? p_scales[i] : 1./p_scales[i]; - - if (max_val*weight > max_response) { - max_response = max_val*weight; - max_response_map = scales[i]; - max_response_pt = max_loc; - scale_index = i; - } - scale_responses.push_back(max_val*weight); - } - } else { -#pragma omp parallel for ordered private(patch_feat) schedule(dynamic) - for (size_t i = 0; i < p_scales.size(); ++i) { - patch_feat = get_features(input_rgb, input_gray, this->p_pose.cx, this->p_pose.cy, this->p_windows_size[0], this->p_windows_size[1], this->p_current_scale * this->p_scales[i]); - ComplexMat zf = fft.forward_window(patch_feat); - DEBUG_PRINTM(zf); - cv::Mat response; - if (m_use_linearkernel) - response = fft.inverse((p_model_alphaf * zf).sum_over_channels()); - else { - ComplexMat kzf = gaussian_correlation(this->scale_vars[i], zf, this->p_model_xf, this->p_kernel_sigma); - DEBUG_PRINTM(p_model_alphaf); - DEBUG_PRINTM(kzf); - DEBUG_PRINTM(p_model_alphaf * kzf); - response = fft.inverse(this->p_model_alphaf * kzf); - } - DEBUG_PRINTM(response); - - /* target location is at the maximum response. we must take into - account the fact that, if the target doesn't move, the peak - will appear at the top-left corner, not at the center (this is - discussed in the paper). the responses wrap around cyclically. */ - double min_val, max_val; - cv::Point2i min_loc, max_loc; - cv::minMaxLoc(response, &min_val, &max_val, &min_loc, &max_loc); - DEBUG_PRINT(max_loc); - - double weight = this->p_scales[i] < 1. ? this->p_scales[i] : 1./this->p_scales[i]; -#pragma omp critical - { - if (max_val*weight > max_response) { - max_response = max_val*weight; - max_response_map = response; - max_response_pt = max_loc; - scale_index = i; - } - } -#pragma omp ordered - scale_responses.push_back(max_val*weight); + if (this->scale_vars[i].max_response > max_response) { + max_response = this->scale_vars[i].max_response; + max_response_pt = & this->scale_vars[i].max_loc; + max_response_map = & this->scale_vars[i].response; + scale_index = i; } } - DEBUG_PRINTM(max_response_map); - DEBUG_PRINT(max_response_pt); + + DEBUG_PRINTM(*max_response_map); + DEBUG_PRINT(*max_response_pt); + //sub pixel quadratic interpolation from neighbours - if (max_response_pt.y > max_response_map.rows / 2) //wrap around to negative half-space of vertical axis - max_response_pt.y = max_response_pt.y - max_response_map.rows; - if (max_response_pt.x > max_response_map.cols / 2) //same for horizontal axis - max_response_pt.x = max_response_pt.x - max_response_map.cols; + if (max_response_pt->y > max_response_map->rows / 2) //wrap around to negative half-space of vertical axis + max_response_pt->y = max_response_pt->y - max_response_map->rows; + if (max_response_pt->x > max_response_map->cols / 2) //same for horizontal axis + max_response_pt->x = max_response_pt->x - max_response_map->cols; - cv::Point2f new_location(max_response_pt.x, max_response_pt.y); + cv::Point2f new_location(max_response_pt->x, max_response_pt->y); DEBUG_PRINT(new_location); if (m_use_subpixel_localization) - new_location = sub_pixel_peak(max_response_pt, max_response_map); + new_location = sub_pixel_peak(*max_response_pt, *max_response_map); DEBUG_PRINT(new_location); p_pose.cx += p_current_scale*p_cell_size*new_location.x; @@ -444,7 +343,7 @@ void KCF_Tracker::track(cv::Mat &img) //sub grid scale interpolation double new_scale = p_scales[scale_index]; if (m_use_subgrid_scale) - new_scale = sub_grid_scale(scale_responses, scale_index); + new_scale = sub_grid_scale(scale_index); p_current_scale *= new_scale; @@ -453,13 +352,14 @@ void KCF_Tracker::track(cv::Mat &img) if (p_current_scale > p_min_max_scale[1]) p_current_scale = p_min_max_scale[1]; //obtain a subwindow for training at newly estimated target position - patch_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale); - ComplexMat xf = fft.forward_window(patch_feat); + get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], scale_vars[0], p_current_scale); + ComplexMat xf = fft.forward_window(scale_vars[0].patch_feats); //subsequent frames, interpolate model p_model_xf = p_model_xf * (1. - p_interp_factor) + xf * p_interp_factor; ComplexMat alphaf_num, alphaf_den; + scale_vars[0].flag = Track_flags::AUTO_CORRELATION; if (m_use_linearkernel) { ComplexMat xfconj = xf.conj(); @@ -467,11 +367,11 @@ void KCF_Tracker::track(cv::Mat &img) alphaf_den = (xf * xfconj); } else { //Kernel Ridge Regression, calculate alphas (in Fourier domain) - ComplexMat kf = gaussian_correlation(scale_vars[0], xf, xf, p_kernel_sigma, true); + gaussian_correlation(scale_vars[0], xf, xf, p_kernel_sigma, true); // ComplexMat alphaf = p_yf / (kf + p_lambda); //equation for fast training // p_model_alphaf = p_model_alphaf * (1. - p_interp_factor) + alphaf * p_interp_factor; - alphaf_num = p_yf * kf; - alphaf_den = kf * (kf + p_lambda); + alphaf_num = p_yf * scale_vars[0].kf; + alphaf_den = scale_vars[0].kf * (scale_vars[0].kf + p_lambda); } p_model_alphaf_num = p_model_alphaf_num * (1. - p_interp_factor) + alphaf_num * p_interp_factor; @@ -481,7 +381,45 @@ void KCF_Tracker::track(cv::Mat &img) // **************************************************************************** -std::vector KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale) +void KCF_Tracker::scale_track(Scale_vars & vars, cv::Mat & input_rgb, cv::Mat & input_gray, double scale) +{ + get_features(input_rgb, input_gray, this->p_pose.cx, this->p_pose.cy, this->p_windows_size[0], this->p_windows_size[1], + vars, scale); + for (size_t i = 0; ip_model_xf, this->p_kernel_sigma); + + DEBUG_PRINTM(p_model_alphaf); + DEBUG_PRINTM(vars.kzf); + DEBUG_PRINTM(p_model_alphaf * vars.kzf); + + vars.flag = Track_flags::RESPONSE; + vars.kzf = p_model_alphaf * vars.kzf; + fft.inverse(vars); + + DEBUG_PRINTM(vars.response); + + /* target location is at the maximum response. we must take into + account the fact that, if the target doesn't move, the peak + will appear at the top-left corner, not at the center (this is + discussed in the paper). the responses wrap around cyclically. */ + double min_val; + cv::Point2i min_loc; + cv::minMaxLoc(vars.response, &min_val, &vars.max_val, &min_loc, &vars.max_loc); + + DEBUG_PRINT(vars.max_loc); + + double weight = scale < 1. ? scale : 1./scale; + vars.max_response = vars.max_val*weight; +} + +void KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, Scale_vars &vars, double scale) { int size_x_scaled = floor(size_x*scale); int size_y_scaled = floor(size_y*scale); @@ -498,7 +436,7 @@ std::vector KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & in } // get hog(Histogram of Oriented Gradients) features - std::vector hog_feat = FHoG::extract(patch_gray, 2, p_cell_size, 9); + FHoG::extract(patch_gray, vars, 2, p_cell_size, 9); //get color rgb features (simple r,g,b channels) std::vector color_feat; @@ -529,8 +467,8 @@ std::vector KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & in color_feat.insert(color_feat.end(), cn_feat.begin(), cn_feat.end()); } - hog_feat.insert(hog_feat.end(), color_feat.begin(), color_feat.end()); - return hog_feat; + vars.patch_feats.insert(vars.patch_feats.end(), color_feat.begin(), color_feat.end()); + return; } cv::Mat KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2) @@ -638,7 +576,7 @@ cv::Mat KCF_Tracker::cosine_window_function(int dim1, int dim2) // Returns sub-window of image input centered at [cx, cy] coordinates), // with size [width, height]. If any pixels are outside of the image, // they will replicate the values at the borders. -cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int width, int height) +cv::Mat KCF_Tracker::get_subwindow(const cv::Mat & input, int cx, int cy, int width, int height) { cv::Mat patch; @@ -692,7 +630,7 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int wid return patch; } -ComplexMat KCF_Tracker::gaussian_correlation(struct Scale_var &vars, const ComplexMat &xf, const ComplexMat &yf, double sigma, bool auto_correlation) +void KCF_Tracker::gaussian_correlation(struct Scale_vars & vars, const ComplexMat & xf, const ComplexMat & yf, double sigma, bool auto_correlation) { #ifdef CUFFT xf.sqr_norm(vars.xf_sqr_norm_d); @@ -706,9 +644,8 @@ ComplexMat KCF_Tracker::gaussian_correlation(struct Scale_var &vars, const Compl yf.sqr_norm(vars.yf_sqr_norm); } #endif - ComplexMat xyf; - xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj()); - DEBUG_PRINTM(xyf); + vars.xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj()); + DEBUG_PRINTM(vars.xyf); #ifdef CUFFT if(auto_correlation) cuda_gaussian_correlation(fft.inverse_raw(xyf), vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.xf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width); @@ -718,20 +655,21 @@ ComplexMat KCF_Tracker::gaussian_correlation(struct Scale_var &vars, const Compl return fft.forward_raw(vars.gauss_corr_res, xf.n_scales==p_num_scales); #else //ifft2 and sum over 3rd dimension, we dont care about individual channels - cv::Mat ifft2_res = fft.inverse(xyf); - DEBUG_PRINTM(ifft2_res); + fft.inverse(vars); + DEBUG_PRINTM(vars.ifft2_res); cv::Mat xy_sum; if (xf.channels() != p_num_scales*p_num_of_feats) - xy_sum.create(ifft2_res.size(), CV_32FC1); + xy_sum.create(vars.ifft2_res.size(), CV_32FC1); else - xy_sum.create(ifft2_res.size(), CV_32FC(p_scales.size())); + xy_sum.create(vars.ifft2_res.size(), CV_32FC(p_scales.size())); xy_sum.setTo(0); - for (int y = 0; y < ifft2_res.rows; ++y) { - float * row_ptr = ifft2_res.ptr(y); + for (int y = 0; y < vars.ifft2_res.rows; ++y) { + float * row_ptr = vars.ifft2_res.ptr(y); float * row_ptr_sum = xy_sum.ptr(y); - for (int x = 0; x < ifft2_res.cols; ++x) { + for (int x = 0; x < vars.ifft2_res.cols; ++x) { for (int sum_ch = 0; sum_ch < xy_sum.channels(); ++sum_ch) { - row_ptr_sum[(x*xy_sum.channels())+sum_ch] += std::accumulate(row_ptr + x*ifft2_res.channels() + sum_ch*(ifft2_res.channels()/xy_sum.channels()), (row_ptr + x*ifft2_res.channels() + (sum_ch+1)*(ifft2_res.channels()/xy_sum.channels())), 0.f); + row_ptr_sum[(x*xy_sum.channels())+sum_ch] += std::accumulate(row_ptr + x*vars.ifft2_res.channels() + sum_ch*(vars.ifft2_res.channels()/xy_sum.channels()), + (row_ptr + x*vars.ifft2_res.channels() + (sum_ch+1)*(vars.ifft2_res.channels()/xy_sum.channels())), 0.f); } } } @@ -739,17 +677,18 @@ ComplexMat KCF_Tracker::gaussian_correlation(struct Scale_var &vars, const Compl std::vector scales; cv::split(xy_sum,scales); - cv::Mat in_all(scales[0].rows * xf.n_scales, scales[0].cols, CV_32F); + vars.in_all = cv::Mat(scales[0].rows * xf.n_scales, scales[0].cols, CV_32F); float numel_xf_inv = 1.f/(xf.cols * xf.rows * (xf.channels()/xf.n_scales)); for (int i = 0; i < xf.n_scales; ++i){ - cv::Mat in_roi(in_all, cv::Rect(0, i*scales[0].rows, scales[0].cols, scales[0].rows)); + cv::Mat in_roi(vars.in_all, cv::Rect(0, i*scales[0].rows, scales[0].cols, scales[0].rows)); cv::exp(- 1.f / (sigma * sigma) * cv::max((vars.xf_sqr_norm[i] + vars.yf_sqr_norm[0] - 2 * scales[i]) * numel_xf_inv, 0), in_roi); DEBUG_PRINTM(in_roi); } - DEBUG_PRINTM(in_all); - return fft.forward(in_all); + DEBUG_PRINTM(vars.in_all ); + fft.forward(vars); + return; #endif } @@ -817,7 +756,7 @@ cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point & max_loc, cv::Mat & response) return sub_peak; } -double KCF_Tracker::sub_grid_scale(std::vector & responses, int index) +double KCF_Tracker::sub_grid_scale(int index) { cv::Mat A, fval; if (index < 0 || index > (int)p_scales.size()-1) { @@ -829,7 +768,7 @@ double KCF_Tracker::sub_grid_scale(std::vector & responses, int index) A.at(i, 0) = p_scales[i] * p_scales[i]; A.at(i, 1) = p_scales[i]; A.at(i, 2) = 1; - fval.at(i) = responses[i]; + fval.at(i) = scale_vars[i].max_response; } } else { //only from neighbours @@ -840,7 +779,7 @@ double KCF_Tracker::sub_grid_scale(std::vector & responses, int index) p_scales[index-1] * p_scales[index-1], p_scales[index-1], 1, p_scales[index] * p_scales[index], p_scales[index], 1, p_scales[index+1] * p_scales[index+1], p_scales[index+1], 1); - fval = (cv::Mat_(3, 1) << responses[index-1], responses[index], responses[index+1]); + fval = (cv::Mat_(3, 1) << scale_vars[index-1].max_response, scale_vars[index].max_response, scale_vars[index+1].max_response); } cv::Mat x; diff --git a/src/kcf.h b/src/kcf.h index e652b96..b689152 100644 --- a/src/kcf.h +++ b/src/kcf.h @@ -16,6 +16,7 @@ #include "cnfeat.hpp" #include "fft.h" +#include "scale_vars.hpp" struct BBox_c { @@ -48,14 +49,6 @@ struct BBox_c }; -struct Scale_var -{ - float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr; -#ifdef CUFFT - float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr; -#endif -}; - class KCF_Tracker { public: @@ -134,7 +127,7 @@ private: int p_num_of_feats; int p_roi_height, p_roi_width; - std::vector scale_vars; + std::vector scale_vars; //model ComplexMat p_yf; @@ -143,14 +136,15 @@ private: ComplexMat p_model_alphaf_den; ComplexMat p_model_xf; //helping functions + void scale_track(Scale_vars & vars, cv::Mat & input_rgb, cv::Mat & input_gray, double scale); cv::Mat get_subwindow(const cv::Mat & input, int cx, int cy, int size_x, int size_y); cv::Mat gaussian_shaped_labels(double sigma, int dim1, int dim2); - ComplexMat gaussian_correlation(struct Scale_var &vars, const ComplexMat & xf, const ComplexMat & yf, double sigma, bool auto_correlation = false); + void gaussian_correlation(struct Scale_vars &vars, const ComplexMat & xf, const ComplexMat & yf, double sigma, bool auto_correlation = false); cv::Mat circshift(const cv::Mat & patch, int x_rot, int y_rot); cv::Mat cosine_window_function(int dim1, int dim2); - std::vector get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale = 1.); + void get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, Scale_vars & vars, double scale = 1.); cv::Point2f sub_pixel_peak(cv::Point & max_loc, cv::Mat & response); - double sub_grid_scale(std::vector & responses, int index = -1); + double sub_grid_scale(int index = -1); }; diff --git a/src/piotr_fhog/fhog.hpp b/src/piotr_fhog/fhog.hpp index d3b9d16..291d03b 100644 --- a/src/piotr_fhog/fhog.hpp +++ b/src/piotr_fhog/fhog.hpp @@ -11,7 +11,9 @@ #include #include "gradientMex.h" +#include "scale_vars.hpp" +struct Scale_vars; class FHoG { @@ -19,7 +21,7 @@ public: //description: extract hist. of gradients(use_hog == 0), hog(use_hog == 1) or fhog(use_hog == 2) //input: float one channel image as input, hog type //return: computed descriptor - static std::vector extract(const cv::Mat & img, int use_hog = 2, int bin_size = 4, int n_orients = 9, int soft_bin = -1, float clip = 0.2) + static void extract(const cv::Mat & img, Scale_vars & vars,int use_hog = 2, int bin_size = 4, int n_orients = 9, int soft_bin = -1, float clip = 0.2) { // d image dimension -> gray image d = 1 // h, w -> height, width of image @@ -29,7 +31,7 @@ public: bool full = true; if (h < 2 || w < 2) { std::cerr << "I must be at least 2x2." << std::endl; - return std::vector(); + return; } // //image rows-by-rows @@ -69,9 +71,8 @@ public: } //convert, assuming row-by-row-by-channel storage - std::vector res; int n_res_channels = (use_hog == 2) ? n_chns-1 : n_chns; //last channel all zeros for fhog - res.reserve(n_res_channels); + vars.patch_feats.clear(); for (int i = 0; i < n_res_channels; ++i) { //output rows-by-rows // cv::Mat desc(hb, wb, CV_32F, (H+hb*wb*i)); @@ -84,7 +85,7 @@ public: } } - res.push_back(desc.clone()); + vars.patch_feats.push_back(desc.clone()); } //clean @@ -93,7 +94,7 @@ public: delete [] O; delete [] H; - return res; + return; } }; diff --git a/src/scale_vars.hpp b/src/scale_vars.hpp new file mode 100644 index 0000000..458ed43 --- /dev/null +++ b/src/scale_vars.hpp @@ -0,0 +1,35 @@ +#ifndef SCALE_VARS_HPP +#define SCALE_VARS_HPP + +#ifdef CUFFT + #include "complexmat.cuh" +#else + #include "complexmat.hpp" +#endif + +enum Track_flags +{ + RESPONSE = 1 << 0, // binary 0001 + AUTO_CORRELATION = 1 << 1, // binary 0010 + CROSS_CORRELATION = 1 << 2, // binary 0100 +}; + +struct Scale_vars +{ + float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr; +#ifdef CUFFT + float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr; +#endif + + std::vector patch_feats; + + cv::Mat in_all, ifft2_res, response; + ComplexMat zf, kzf, kf, xyf; + + Track_flags flag; + + cv::Point2i max_loc; + double max_val, max_response; +}; + +#endif // SCALE_VARS_HPP -- 2.39.2