X-Git-Url: http://rtime.felk.cvut.cz/gitweb/hercules2020/kcf.git/blobdiff_plain/c61d560c5012596085648e41b7319121bc1be51b..4321d083e1d533cbfa0a1a88599a52f7edd685d9:/src/kcf.cpp diff --git a/src/kcf.cpp b/src/kcf.cpp index ef1c1a8..60db47b 100644 --- a/src/kcf.cpp +++ b/src/kcf.cpp @@ -1,531 +1,496 @@ #include "kcf.h" #include #include -#include #include +#include "threadctx.hpp" +#include "debug.h" +#include #ifdef FFTW - #include "fft_fftw.h" - #define FFT Fftw +#include "fft_fftw.h" +#define FFT Fftw #elif defined(CUFFT) - #include "fft_cufft.h" - #define FFT cuFFT +#include "fft_cufft.h" +#define FFT cuFFT #else - #include "fft_opencv.h" - #define FFT FftOpencv +#include "fft_opencv.h" +#define FFT FftOpencv #endif #ifdef OPENMP #include -#endif //OPENMP +#endif // OPENMP -#define DEBUG_PRINT(obj) if (m_debug) {std::cout << #obj << " @" << __LINE__ << std::endl << (obj) << std::endl;} -#define DEBUG_PRINTM(obj) if (m_debug) {std::cout << #obj << " @" << __LINE__ << " " << (obj).size() << " CH: " << (obj).channels() << std::endl << (obj) << std::endl;} +DbgTracer __dbgTracer; -KCF_Tracker::KCF_Tracker(double padding, double kernel_sigma, double lambda, double interp_factor, double output_sigma_factor, int cell_size) : - fft(*new FFT()), - p_padding(padding), p_output_sigma_factor(output_sigma_factor), p_kernel_sigma(kernel_sigma), - p_lambda(lambda), p_interp_factor(interp_factor), p_cell_size(cell_size) {} +template +T clamp(const T& n, const T& lower, const T& upper) +{ + return std::max(lower, std::min(n, upper)); +} + +template +void clamp2(T& n, const T& lower, const T& upper) +{ + n = std::max(lower, std::min(n, upper)); +} + +#if CV_MAJOR_VERSION < 3 +template static inline +cv::Size_<_Tp> operator / (const cv::Size_<_Tp>& a, _Tp b) +{ + return cv::Size_<_Tp>(a.width / b, a.height / b); +} +#endif -KCF_Tracker::KCF_Tracker() - : fft(*new FFT()) {} +class Kcf_Tracker_Private { + friend KCF_Tracker; + std::vector threadctxs; +}; + +KCF_Tracker::KCF_Tracker(double padding, double kernel_sigma, double lambda, double interp_factor, + double output_sigma_factor, int cell_size) + : p_cell_size(cell_size), fft(*new FFT()), p_padding(padding), p_output_sigma_factor(output_sigma_factor), p_kernel_sigma(kernel_sigma), + p_lambda(lambda), p_interp_factor(interp_factor), d(*new Kcf_Tracker_Private) +{ +} + +KCF_Tracker::KCF_Tracker() : fft(*new FFT()), d(*new Kcf_Tracker_Private) {} KCF_Tracker::~KCF_Tracker() { delete &fft; + delete &d; } -void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int fit_size_y) +void KCF_Tracker::train(cv::Mat input_rgb, cv::Mat input_gray, double interp_factor) { - //check boundary, enforce min size + TRACE(""); + + // obtain a sub-window for training + // TODO: Move Mats outside from here + MatScaleFeats patch_feats(1, p_num_of_feats, feature_size); + MatScaleFeats temp(1, p_num_of_feats, feature_size); + get_features(input_rgb, input_gray, p_current_center.x, p_current_center.y, + p_windows_size.width, p_windows_size.height, + p_current_scale).copyTo(patch_feats.scale(0)); + DEBUG_PRINT(patch_feats); + fft.forward_window(patch_feats, model->xf, temp); + DEBUG_PRINTM(model->xf); + model->model_xf = model->model_xf * (1. - interp_factor) + model->xf * interp_factor; + DEBUG_PRINTM(model->model_xf); + + if (m_use_linearkernel) { + ComplexMat xfconj = model->xf.conj(); + model->model_alphaf_num = xfconj.mul(model->yf); + model->model_alphaf_den = (model->xf * xfconj); + } else { + // Kernel Ridge Regression, calculate alphas (in Fourier domain) + cv::Size sz(Fft::freq_size(feature_size)); + ComplexMat kf(sz.height, sz.width, 1); + (*gaussian_correlation)(kf, model->model_xf, model->model_xf, p_kernel_sigma, true, *this); + DEBUG_PRINTM(kf); + model->model_alphaf_num = model->yf * kf; + model->model_alphaf_den = kf * (kf + p_lambda); + } + model->model_alphaf = model->model_alphaf_num / model->model_alphaf_den; + DEBUG_PRINTM(model->model_alphaf); + // p_model_alphaf = p_yf / (kf + p_lambda); //equation for fast training +} + +static int round_pw2_down(int x) +{ + for (int i = 1; i < 32; i <<= 1) + x |= x >> i; + x++; + return x >> 1; +} + + +void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int fit_size_y) +{ + __dbgTracer.debug = m_debug; + TRACE(""); + + // check boundary, enforce min size double x1 = bbox.x, x2 = bbox.x + bbox.width, y1 = bbox.y, y2 = bbox.y + bbox.height; if (x1 < 0) x1 = 0.; - if (x2 > img.cols-1) x2 = img.cols - 1; + if (x2 > img.cols - 1) x2 = img.cols - 1; if (y1 < 0) y1 = 0; - if (y2 > img.rows-1) y2 = img.rows - 1; + if (y2 > img.rows - 1) y2 = img.rows - 1; - if (x2-x1 < 2*p_cell_size) { - double diff = (2*p_cell_size -x2+x1)/2.; - if (x1 - diff >= 0 && x2 + diff < img.cols){ + if (x2 - x1 < 2 * p_cell_size) { + double diff = (2 * p_cell_size - x2 + x1) / 2.; + if (x1 - diff >= 0 && x2 + diff < img.cols) { x1 -= diff; x2 += diff; - } else if (x1 - 2*diff >= 0) { - x1 -= 2*diff; + } else if (x1 - 2 * diff >= 0) { + x1 -= 2 * diff; } else { - x2 += 2*diff; + x2 += 2 * diff; } } - if (y2-y1 < 2*p_cell_size) { - double diff = (2*p_cell_size -y2+y1)/2.; - if (y1 - diff >= 0 && y2 + diff < img.rows){ + if (y2 - y1 < 2 * p_cell_size) { + double diff = (2 * p_cell_size - y2 + y1) / 2.; + if (y1 - diff >= 0 && y2 + diff < img.rows) { y1 -= diff; y2 += diff; - } else if (y1 - 2*diff >= 0) { - y1 -= 2*diff; + } else if (y1 - 2 * diff >= 0) { + y1 -= 2 * diff; } else { - y2 += 2*diff; + y2 += 2 * diff; } } - p_pose.w = x2-x1; - p_pose.h = y2-y1; - p_pose.cx = x1 + p_pose.w/2.; - p_pose.cy = y1 + p_pose.h /2.; - + p_init_pose.w = x2 - x1; + p_init_pose.h = y2 - y1; + p_init_pose.cx = x1 + p_init_pose.w / 2.; + p_init_pose.cy = y1 + p_init_pose.h / 2.; cv::Mat input_gray, input_rgb = img.clone(); - if (img.channels() == 3){ + if (img.channels() == 3) { cv::cvtColor(img, input_gray, CV_BGR2GRAY); input_gray.convertTo(input_gray, CV_32FC1); - }else + } else img.convertTo(input_gray, CV_32FC1); // don't need too large image - if (p_pose.w * p_pose.h > 100.*100. && (fit_size_x == -1 || fit_size_y == -1)) { - std::cout << "resizing image by factor of " << 1/p_downscale_factor << std::endl; + if (p_init_pose.w * p_init_pose.h > 100. * 100.) { + std::cout << "resizing image by factor of " << 1 / p_downscale_factor << std::endl; p_resize_image = true; - p_pose.scale(p_downscale_factor); - cv::resize(input_gray, input_gray, cv::Size(0,0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); - cv::resize(input_rgb, input_rgb, cv::Size(0,0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); - } else if (!(fit_size_x == -1 && fit_size_y == -1)) { - if (fit_size_x%p_cell_size != 0 || fit_size_y%p_cell_size != 0) { - std::cerr << "Fit size does not fit to hog cell size. The dimensions have to be divisible by HOG cell size, which is: " << p_cell_size << std::endl;; - std::exit(EXIT_FAILURE); - } - double tmp = (p_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size ; - if (fabs(tmp-fit_size_x) > p_floating_error) - p_scale_factor_x = fit_size_x/tmp; - tmp = (p_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size; - if (fabs(tmp-fit_size_y) > p_floating_error) - p_scale_factor_y = fit_size_y/tmp; - std::cout << "resizing image horizontaly by factor of " << p_scale_factor_x - << " and verticaly by factor of " << p_scale_factor_y << std::endl; - p_fit_to_pw2 = true; - p_pose.scale_x(p_scale_factor_x); - p_pose.scale_y(p_scale_factor_y); - if (fabs(p_scale_factor_x-1) > p_floating_error && fabs(p_scale_factor_y-1) > p_floating_error) { - if (p_scale_factor_x < 1 && p_scale_factor_y < 1) { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - } else { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - } - } + p_init_pose.scale(p_downscale_factor); + cv::resize(input_gray, input_gray, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); + cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); } - //compute win size + fit to fhog cell size - p_windows_size[0] = int(round(p_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size); - p_windows_size[1] = int(round(p_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size); + // compute win size + fit to fhog cell size + p_windows_size.width = round(p_init_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size; + p_windows_size.height = round(p_init_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size; + + if (fit_size_x == 0 || fit_size_y == 0) { + // Round down to the next highest power of 2 + fit_size = cv::Size(round_pw2_down(p_windows_size.width), + round_pw2_down(p_windows_size.height)); + } else if (fit_size_x == -1 || fit_size_y == -1) { + fit_size = p_windows_size; + } else { + fit_size = cv::Size(fit_size_x, fit_size_y); + } + + feature_size = fit_size / p_cell_size; p_scales.clear(); - if (m_use_scale) - for (int i = -p_num_scales/2; i <= p_num_scales/2; ++i) - p_scales.push_back(std::pow(p_scale_step, i)); - else - p_scales.push_back(1.); + for (int i = -int(p_num_scales) / 2; i <= int(p_num_scales) / 2; ++i) + p_scales.push_back(std::pow(p_scale_step, i)); #ifdef CUFFT - if (p_windows_size[1]/p_cell_size*(p_windows_size[0]/p_cell_size/2+1) > 1024) { + if (Fft::freq_size(feature_size).area() > 1024) { std::cerr << "Window after forward FFT is too big for CUDA kernels. Plese use -f to set " - "the window dimensions so its size is less or equal to " << 1024*p_cell_size*p_cell_size*2+1 << - " pixels . Currently the size of the window is: " << p_windows_size[0] << "x" << p_windows_size[1] << - " which is " << p_windows_size[0]*p_windows_size[1] << " pixels. " << std::endl; + "the window dimensions so its size is less or equal to " + << 1024 * p_cell_size * p_cell_size * 2 + 1 + << " pixels. Currently the size of the window is: " << fit_size + << " which is " << fit_size.area() << " pixels. " << std::endl; std::exit(EXIT_FAILURE); } - if (m_use_linearkernel){ + if (m_use_linearkernel) { std::cerr << "cuFFT supports only Gaussian kernel." << std::endl; std::exit(EXIT_FAILURE); } #endif - p_num_of_feats = 31; - if(m_use_color) p_num_of_feats += 3; - if(m_use_cnfeat) p_num_of_feats += 10; - p_roi_width = p_windows_size[0]/p_cell_size; - p_roi_height = p_windows_size[1]/p_cell_size; + model.reset(new Model(Fft::freq_size(feature_size), p_num_of_feats)); - int max =m_use_big_batch ? 2: p_num_scales; - for (int i = 0;i(p_cell_size); - - fft.init(uint(p_windows_size[0]/p_cell_size), uint(p_windows_size[1]/p_cell_size), uint(p_num_of_feats), uint(p_num_scales), m_use_big_batch); - fft.set_window(cosine_window_function(p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size)); - - //window weights, i.e. labels - fft.forward(gaussian_shaped_labels(p_output_sigma, p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size), p_yf, - m_use_cuda ? p_scale_vars.front()->rot_labels_data_d: nullptr, p_scale_vars.front()->stream); - DEBUG_PRINTM(p_yf); - - //obtain a sub-window for training initial model - p_scale_vars.front()->patch_feats.clear(); - get_features(input_rgb, input_gray, int(p_pose.cx), int(p_pose.cy), p_windows_size[0], p_windows_size[1], *p_scale_vars.front()); - fft.forward_window(p_scale_vars.front()->patch_feats, p_model_xf, p_scale_vars.front()->fw_all, - m_use_cuda ? p_scale_vars.front()->data_features_d : nullptr, p_scale_vars.front()->stream); - DEBUG_PRINTM(p_model_xf); -#if defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - p_scale_vars.front()->model_xf = p_model_xf; - p_scale_vars.front()->model_xf.set_stream(p_scale_vars.front()->stream); - p_yf.set_stream(p_scale_vars.front()->stream); - p_model_xf.set_stream(p_scale_vars.front()->stream); -#endif + p_output_sigma = std::sqrt(p_init_pose.w * p_init_pose.h * double(fit_size.area()) / p_windows_size.area()) + * p_output_sigma_factor / p_cell_size; - if (m_use_linearkernel) { - ComplexMat xfconj = p_model_xf.conj(); - p_model_alphaf_num = xfconj.mul(p_yf); - p_model_alphaf_den = (p_model_xf * xfconj); - } else { - //Kernel Ridge Regression, calculate alphas (in Fourier domain) -#if defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - gaussian_correlation(*p_scale_vars.front(), p_scale_vars.front()->model_xf, p_scale_vars.front()->model_xf, p_kernel_sigma, true); -#else - gaussian_correlation(*p_scale_vars.front(), p_model_xf, p_model_xf, p_kernel_sigma, true); -#endif - DEBUG_PRINTM(p_scale_vars.front()->kf); - p_model_alphaf_num = p_yf * p_scale_vars.front()->kf; - DEBUG_PRINTM(p_model_alphaf_num); - p_model_alphaf_den = p_scale_vars.front()->kf * (p_scale_vars.front()->kf + float(p_lambda)); - DEBUG_PRINTM(p_model_alphaf_den); - } - p_model_alphaf = p_model_alphaf_num / p_model_alphaf_den; - DEBUG_PRINTM(p_model_alphaf); -// p_model_alphaf = p_yf / (kf + p_lambda); //equation for fast training - -#if defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - for (auto it = p_scale_vars.begin();it != p_scale_vars.end();++it) { - (*it)->model_xf = p_model_xf; - (*it)->model_xf.set_stream((*it)->stream); - (*it)->model_alphaf = p_model_alphaf; - (*it)->model_alphaf.set_stream((*it)->stream); - } -#endif + fft.init(feature_size.width, feature_size.height, p_num_of_feats, p_num_scales); + fft.set_window(MatDynMem(cosine_window_function(feature_size.width, feature_size.height))); + + // window weights, i.e. labels + MatScales gsl(1, feature_size); + gaussian_shaped_labels(p_output_sigma, feature_size.width, feature_size.height).copyTo(gsl.plane(0)); + fft.forward(gsl, model->yf); + DEBUG_PRINTM(model->yf); + + // train initial model + train(input_rgb, input_gray, 1.0); } -void KCF_Tracker::setTrackerPose(BBox_c &bbox, cv::Mat & img, int fit_size_x, int fit_size_y) +void KCF_Tracker::setTrackerPose(BBox_c &bbox, cv::Mat &img, int fit_size_x, int fit_size_y) { init(img, bbox.get_rect(), fit_size_x, fit_size_y); } void KCF_Tracker::updateTrackerPosition(BBox_c &bbox) { + BBox_c tmp = bbox; if (p_resize_image) { - BBox_c tmp = bbox; tmp.scale(p_downscale_factor); - p_pose.cx = tmp.cx; - p_pose.cy = tmp.cy; - } else if (p_fit_to_pw2) { - BBox_c tmp = bbox; - tmp.scale_x(p_scale_factor_x); - tmp.scale_y(p_scale_factor_y); - p_pose.cx = tmp.cx; - p_pose.cy = tmp.cy; - } else { - p_pose.cx = bbox.cx; - p_pose.cy = bbox.cy; } + p_current_center = tmp.center(); } BBox_c KCF_Tracker::getBBox() { - BBox_c tmp = p_pose; - tmp.w *= p_current_scale; - tmp.h *= p_current_scale; + BBox_c tmp; + tmp.cx = p_current_center.x; + tmp.cy = p_current_center.y; + tmp.w = p_init_pose.w * p_current_scale; + tmp.h = p_init_pose.h * p_current_scale; if (p_resize_image) - tmp.scale(1/p_downscale_factor); - if (p_fit_to_pw2) { - tmp.scale_x(1/p_scale_factor_x); - tmp.scale_y(1/p_scale_factor_y); - } + tmp.scale(1 / p_downscale_factor); return tmp; } -void KCF_Tracker::track(cv::Mat &img) +double KCF_Tracker::getFilterResponse() const { - if (m_debug) - std::cout << "NEW FRAME" << '\n'; - cv::Mat input_gray, input_rgb = img.clone(); - if (img.channels() == 3){ - cv::cvtColor(img, input_gray, CV_BGR2GRAY); - input_gray.convertTo(input_gray, CV_32FC1); - }else - img.convertTo(input_gray, CV_32FC1); + return this->max_response; +} - // don't need too large image +void KCF_Tracker::resizeImgs(cv::Mat &input_rgb, cv::Mat &input_gray) +{ if (p_resize_image) { cv::resize(input_gray, input_gray, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); - } else if (p_fit_to_pw2 && fabs(p_scale_factor_x-1) > p_floating_error && fabs(p_scale_factor_y-1) > p_floating_error) { - if (p_scale_factor_x < 1 && p_scale_factor_y < 1) { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - } else { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - } } +} - double max_response = -1.; - int scale_index = 0; - cv::Point2i *max_response_pt = nullptr; - cv::Mat *max_response_map = nullptr; - - if(m_use_multithreading) { - std::vector> async_res(p_scales.size()); - for (auto it = p_scale_vars.begin();it != p_scale_vars.end();++it) { - uint index = uint(std::distance(p_scale_vars.begin(), it)); - async_res[index] = std::async(std::launch::async, - [this, &input_gray, &input_rgb, index, it]() -> void - {return scale_track(*(*it), input_rgb, input_gray, this->p_scales[index]);}); +double KCF_Tracker::findMaxReponse(uint &max_idx, cv::Point2d &new_location) const +{ + double max = -1.; + max_idx = std::numeric_limits::max(); + +#ifndef BIG_BATCH + for (uint j = 0; j < d.threadctxs.size(); ++j) { + if (d.threadctxs[j].max.response > max) { + max = d.threadctxs[j].max.response; + max_idx = j; } - for (auto it = p_scale_vars.begin();it != p_scale_vars.end();++it) { - uint index = uint(std::distance(p_scale_vars.begin(), it)); - async_res[index].wait(); - if ((*it)->max_response > max_response) { - max_response = (*it)->max_response; - max_response_pt = & (*it)->max_loc; - max_response_map = & (*it)->response; - scale_index = int(index); - } + } +#else + for (uint j = 0; j < p_scales.size(); ++j) { + if (d.threadctxs[0].max[j].response > max) { + max = d.threadctxs[0].max[j].response; + max_idx = j; } - } else { - uint start = m_use_big_batch ? 1 : 0; - uint end = m_use_big_batch ? 2 : uint(p_num_scales); - NORMAL_OMP_PARALLEL_FOR - for (uint i = start; i < end; ++i) { - auto it = p_scale_vars.begin(); - std::advance(it, i); - scale_track(*(*it), input_rgb, input_gray, this->p_scales[i]); - - if (m_use_big_batch) { - for (size_t j = 0;jmax_responses[j] > max_response) { - max_response = (*it)->max_responses[j]; - max_response_pt = & (*it)->max_locs[j]; - max_response_map = & (*it)->response_maps[j]; - scale_index = int(j); - } - } - } else { - NORMAL_OMP_CRITICAL - { - if ((*it)->max_response > max_response) { - max_response = (*it)->max_response; - max_response_pt = & (*it)->max_loc; - max_response_map = & (*it)->response; - scale_index = int(i); - } - } + } +#endif + assert(max_idx < IF_BIG_BATCH(p_scales.size(), d.threadctxs.size())); + + if (m_visual_debug) { + int w = 100; //feature_size.width; + int h = 100; //feature_size.height; + cv::Mat all_responses(h * p_num_scales, w * p_num_angles, + d.threadctxs[0].response.type(), cv::Scalar::all(0)); + for (size_t i = 0; i < p_num_scales; ++i) { + for (size_t j = 0; j < p_num_angles; ++j) { + cv::Mat tmp = d.threadctxs[IF_BIG_BATCH(0, p_num_angles * i + j)].response.plane(IF_BIG_BATCH(p_num_angles * i + j, 0)); + tmp = circshift(tmp, -tmp.cols/2, -tmp.rows/2); + cv::resize(tmp, tmp, cv::Size(w, h)); + cv::Mat resp_roi(all_responses, cv::Rect(j * w, i * h, w, h)); + tmp.copyTo(resp_roi); } } + cv::namedWindow("All responses", CV_WINDOW_AUTOSIZE); + cv::imshow("All responses", all_responses); } - DEBUG_PRINTM(*max_response_map); - DEBUG_PRINT(*max_response_pt); + cv::Point2i &max_response_pt = IF_BIG_BATCH(d.threadctxs[0].max[max_idx].loc, d.threadctxs[max_idx].max.loc); + cv::Mat max_response_map = IF_BIG_BATCH(d.threadctxs[0].response.plane(max_idx), d.threadctxs[max_idx].response.plane(0)); - //sub pixel quadratic interpolation from neighbours - if (max_response_pt->y > max_response_map->rows / 2) //wrap around to negative half-space of vertical axis - max_response_pt->y = max_response_pt->y - max_response_map->rows; - if (max_response_pt->x > max_response_map->cols / 2) //same for horizontal axis - max_response_pt->x = max_response_pt->x - max_response_map->cols; + DEBUG_PRINTM(max_response_map); + DEBUG_PRINT(max_response_pt); - cv::Point2f new_location(max_response_pt->x, max_response_pt->y); - DEBUG_PRINT(new_location); + // sub pixel quadratic interpolation from neighbours + if (max_response_pt.y > max_response_map.rows / 2) // wrap around to negative half-space of vertical axis + max_response_pt.y = max_response_pt.y - max_response_map.rows; + if (max_response_pt.x > max_response_map.cols / 2) // same for horizontal axis + max_response_pt.x = max_response_pt.x - max_response_map.cols; - if (m_use_subpixel_localization) - new_location = sub_pixel_peak(*max_response_pt, *max_response_map); - DEBUG_PRINT(new_location); - p_pose.cx += p_current_scale*p_cell_size*double(new_location.x); - p_pose.cy += p_current_scale*p_cell_size*double(new_location.y); - if (p_fit_to_pw2) { - if (p_pose.cx < 0) p_pose.cx = 0; - if (p_pose.cx > (img.cols*p_scale_factor_x)-1) p_pose.cx = (img.cols*p_scale_factor_x)-1; - if (p_pose.cy < 0) p_pose.cy = 0; - if (p_pose.cy > (img.rows*p_scale_factor_y)-1) p_pose.cy = (img.rows*p_scale_factor_y)-1; + if (m_use_subpixel_localization) { + new_location = sub_pixel_peak(max_response_pt, max_response_map); } else { - if (p_pose.cx < 0) p_pose.cx = 0; - if (p_pose.cx > img.cols-1) p_pose.cx = img.cols-1; - if (p_pose.cy < 0) p_pose.cy = 0; - if (p_pose.cy > img.rows-1) p_pose.cy = img.rows-1; + new_location = max_response_pt; } + DEBUG_PRINT(new_location); + return max; +} - //sub grid scale interpolation - double new_scale = p_scales[uint(scale_index)]; - if (m_use_subgrid_scale) - new_scale = sub_grid_scale(scale_index); +void KCF_Tracker::track(cv::Mat &img) +{ + __dbgTracer.debug = m_debug; + TRACE(""); + + cv::Mat input_gray, input_rgb = img.clone(); + if (img.channels() == 3) { + cv::cvtColor(img, input_gray, CV_BGR2GRAY); + input_gray.convertTo(input_gray, CV_32FC1); + } else + img.convertTo(input_gray, CV_32FC1); - p_current_scale *= new_scale; + // don't need too large image + resizeImgs(input_rgb, input_gray); + +#ifdef ASYNC + for (auto &it : d.threadctxs) + it.async_res = std::async(std::launch::async, [this, &input_gray, &input_rgb, &it]() -> void { + it.track(*this, input_rgb, input_gray); + }); + for (auto const &it : d.threadctxs) + it.async_res.wait(); + +#else // !ASYNC + NORMAL_OMP_PARALLEL_FOR + for (uint i = 0; i < d.threadctxs.size(); ++i) + d.threadctxs[i].track(*this, input_rgb, input_gray); +#endif - if (p_current_scale < p_min_max_scale[0]) - p_current_scale = p_min_max_scale[0]; - if (p_current_scale > p_min_max_scale[1]) - p_current_scale = p_min_max_scale[1]; + cv::Point2d new_location; + uint max_idx; + max_response = findMaxReponse(max_idx, new_location); - //obtain a subwindow for training at newly estimated target position - p_scale_vars.front()->patch_feats.clear(); - get_features(input_rgb, input_gray, int(p_pose.cx), int(p_pose.cy), p_windows_size[0], p_windows_size[1], - *p_scale_vars.front(), p_current_scale); - fft.forward_window(p_scale_vars.front()->patch_feats, p_scale_vars.front()->xf, p_scale_vars.front()->fw_all, - m_use_cuda ? p_scale_vars.front()->data_features_d : nullptr, p_scale_vars.front()->stream); + new_location.x *= double(p_windows_size.width) / fit_size.width; + new_location.y *= double(p_windows_size.height) / fit_size.height; - //subsequent frames, interpolate model - p_model_xf = p_model_xf *float((1. - p_interp_factor)) + p_scale_vars.front()->xf * float(p_interp_factor); + p_current_center += p_current_scale * p_cell_size * new_location; - ComplexMat alphaf_num, alphaf_den; + clamp2(p_current_center.x, 0.0, img.cols - 1.0); + clamp2(p_current_center.y, 0.0, img.rows - 1.0); - if (m_use_linearkernel) { - ComplexMat xfconj = p_scale_vars.front()->xf.conj(); - alphaf_num = xfconj.mul(p_yf); - alphaf_den = (p_scale_vars.front()->xf * xfconj); + // sub grid scale interpolation + if (m_use_subgrid_scale) { + p_current_scale *= sub_grid_scale(max_idx); } else { - //Kernel Ridge Regression, calculate alphas (in Fourier domain) - gaussian_correlation(*p_scale_vars.front(), p_scale_vars.front()->xf, p_scale_vars.front()->xf, p_kernel_sigma, true); -// ComplexMat alphaf = p_yf / (kf + p_lambda); //equation for fast training -// p_model_alphaf = p_model_alphaf * (1. - p_interp_factor) + alphaf * p_interp_factor; - alphaf_num = p_yf * p_scale_vars.front()->kf; - alphaf_den = p_scale_vars.front()->kf * (p_scale_vars.front()->kf + float(p_lambda)); + p_current_scale *= p_scales[max_idx]; } - p_model_alphaf_num = p_model_alphaf_num * float((1. - p_interp_factor)) + alphaf_num * float(p_interp_factor); - p_model_alphaf_den = p_model_alphaf_den * float((1. - p_interp_factor)) + alphaf_den * float(p_interp_factor); - p_model_alphaf = p_model_alphaf_num / p_model_alphaf_den; + clamp2(p_current_scale, p_min_max_scale[0], p_min_max_scale[1]); -#if defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - for (auto it = p_scale_vars.begin(); it != p_scale_vars.end(); ++it) { - (*it)->model_xf = p_model_xf; - (*it)->model_xf.set_stream((*it)->stream); - (*it)->model_alphaf = p_model_alphaf; - (*it)->model_alphaf.set_stream((*it)->stream); - } -#endif + // train at newly estimated target position + train(input_rgb, input_gray, p_interp_factor); } -void KCF_Tracker::scale_track(Scale_vars & vars, cv::Mat & input_rgb, cv::Mat & input_gray, double scale) +void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input_gray) { - if (m_use_big_batch) { - vars.patch_feats.clear(); - BIG_BATCH_OMP_PARALLEL_FOR - for (uint i = 0; i < uint(p_num_scales); ++i) { - get_features(input_rgb, input_gray, int(this->p_pose.cx), int(this->p_pose.cy), this->p_windows_size[0], - this->p_windows_size[1], vars, this->p_current_scale * this->p_scales[i]); - } - } else { - vars.patch_feats.clear(); - get_features(input_rgb, input_gray, int(this->p_pose.cx), int(this->p_pose.cy), this->p_windows_size[0], - this->p_windows_size[1], vars, this->p_current_scale *scale); + TRACE(""); + + BIG_BATCH_OMP_PARALLEL_FOR + for (uint i = 0; i < IF_BIG_BATCH(kcf.p_num_scales, 1); ++i) + { + kcf.get_features(input_rgb, input_gray, kcf.p_current_center.x, kcf.p_current_center.y, + kcf.p_windows_size.width, kcf.p_windows_size.height, + kcf.p_current_scale * IF_BIG_BATCH(kcf.p_scales[i], scale)) + .copyTo(patch_feats.scale(i)); + DEBUG_PRINT(patch_feats.scale(i)); } - fft.forward_window(vars.patch_feats, vars.zf, vars.fw_all, m_use_cuda ? vars.data_features_d : nullptr, vars.stream); - DEBUG_PRINTM(vars.zf); + kcf.fft.forward_window(patch_feats, zf, temp); + DEBUG_PRINTM(zf); - if (m_use_linearkernel) { - vars.kzf = m_use_big_batch ? (vars.zf.mul2(this->p_model_alphaf)).sum_over_channels() - : (p_model_alphaf * vars.zf).sum_over_channels(); - fft.inverse(vars.kzf, vars.response, m_use_cuda ? vars.data_i_1ch_d : nullptr, vars.stream); + if (kcf.m_use_linearkernel) { + kzf = zf.mul(kcf.model->model_alphaf).sum_over_channels(); } else { -#if !defined(BIG_BATCH) && defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - gaussian_correlation(vars, vars.zf, vars.model_xf, this->p_kernel_sigma); - vars.kzf = vars.model_alphaf * vars.kzf; -#else - gaussian_correlation(vars, vars.zf, this->p_model_xf, this->p_kernel_sigma); - DEBUG_PRINTM(this->p_model_alphaf); - DEBUG_PRINTM(vars.kzf); - vars.kzf = m_use_big_batch ? vars.kzf.mul(this->p_model_alphaf) : this->p_model_alphaf * vars.kzf; -#endif - fft.inverse(vars.kzf, vars.response, m_use_cuda ? vars.data_i_1ch_d : nullptr, vars.stream); + gaussian_correlation(kzf, zf, kcf.model->model_xf, kcf.p_kernel_sigma, false, kcf); + DEBUG_PRINTM(kzf); + kzf = kzf.mul(kcf.model->model_alphaf); } + kcf.fft.inverse(kzf, response); - DEBUG_PRINTM(vars.response); + DEBUG_PRINTM(response); /* target location is at the maximum response. we must take into account the fact that, if the target doesn't move, the peak will appear at the top-left corner, not at the center (this is discussed in the paper). the responses wrap around cyclically. */ - if (m_use_big_batch) { - cv::split(vars.response,vars.response_maps); - - for (size_t i = 0; i < p_scales.size(); ++i) { - double min_val, max_val; - cv::Point2i min_loc, max_loc; - cv::minMaxLoc(vars.response_maps[i], &min_val, &max_val, &min_loc, &max_loc); - DEBUG_PRINT(max_loc); - double weight = p_scales[i] < 1. ? p_scales[i] : 1./p_scales[i]; - vars.max_responses[i] = max_val*weight; - vars.max_locs[i] = max_loc; - } - } else { - double min_val; - cv::Point2i min_loc; - cv::minMaxLoc(vars.response, &min_val, &vars.max_val, &min_loc, &vars.max_loc); + double min_val, max_val; + cv::Point2i min_loc, max_loc; +#ifdef BIG_BATCH + for (size_t i = 0; i < kcf.p_scales.size(); ++i) { + cv::minMaxLoc(response.plane(i), &min_val, &max_val, &min_loc, &max_loc); + DEBUG_PRINT(max_loc); + double weight = kcf.p_scales[i] < 1. ? kcf.p_scales[i] : 1. / kcf.p_scales[i]; + max[i].response = max_val * weight; + max[i].loc = max_loc; + } +#else + cv::minMaxLoc(response.plane(0), &min_val, &max_val, &min_loc, &max_loc); - DEBUG_PRINT(vars.max_loc); + DEBUG_PRINT(max_loc); + DEBUG_PRINT(max_val); - double weight = scale < 1. ? scale : 1./scale; - vars.max_response = vars.max_val*weight; - } - return; + double weight = scale < 1. ? scale : 1. / scale; + max.response = max_val * weight; + max.loc = max_loc; +#endif } // **************************************************************************** -void KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, Scale_vars &vars, double scale) +cv::Mat KCF_Tracker::get_features(cv::Mat &input_rgb, cv::Mat &input_gray, int cx, int cy, + int size_x, int size_y, double scale) const { - int size_x_scaled = int(floor(size_x*scale)); - int size_y_scaled = int(floor(size_y*scale)); + cv::Size scaled = cv::Size(floor(size_x * scale), floor(size_y * scale)); - cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, size_x_scaled, size_y_scaled); - cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, size_x_scaled, size_y_scaled); + cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, scaled.width, scaled.height); + cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, scaled.width, scaled.height); - //resize to default size - if (scale > 1.){ - //if we downsample use INTER_AREA interpolation - cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_AREA); - }else { - cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_LINEAR); + // resize to default size + if (scaled.area() > fit_size.area()) { + // if we downsample use INTER_AREA interpolation + // note: this is just a guess - we may downsample in X and upsample in Y (or vice versa) + cv::resize(patch_gray, patch_gray, fit_size, 0., 0., cv::INTER_AREA); + } else { + cv::resize(patch_gray, patch_gray, fit_size, 0., 0., cv::INTER_LINEAR); } // get hog(Histogram of Oriented Gradients) features - FHoG::extract(patch_gray, vars, 2, p_cell_size, 9); + std::vector hog_feat = FHoG::extract(patch_gray, 2, p_cell_size, 9); - //get color rgb features (simple r,g,b channels) + // get color rgb features (simple r,g,b channels) std::vector color_feat; if ((m_use_color || m_use_cnfeat) && input_rgb.channels() == 3) { - //resize to default size - if (scale > 1.){ - //if we downsample use INTER_AREA interpolation - cv::resize(patch_rgb, patch_rgb, cv::Size(size_x/p_cell_size, size_y/p_cell_size), 0., 0., cv::INTER_AREA); - }else { - cv::resize(patch_rgb, patch_rgb, cv::Size(size_x/p_cell_size, size_y/p_cell_size), 0., 0., cv::INTER_LINEAR); + // resize to default size + if (scaled.area() > (fit_size / p_cell_size).area()) { + // if we downsample use INTER_AREA interpolation + cv::resize(patch_rgb, patch_rgb, fit_size / p_cell_size, 0., 0., cv::INTER_AREA); + } else { + cv::resize(patch_rgb, patch_rgb, fit_size / p_cell_size, 0., 0., cv::INTER_LINEAR); } } if (m_use_color && input_rgb.channels() == 3) { - //use rgb color space + // use rgb color space cv::Mat patch_rgb_norm; patch_rgb.convertTo(patch_rgb_norm, CV_32F, 1. / 255., -0.5); cv::Mat ch1(patch_rgb_norm.size(), CV_32FC1); @@ -540,9 +505,15 @@ void KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx std::vector cn_feat = CNFeat::extract(patch_rgb); color_feat.insert(color_feat.end(), cn_feat.begin(), cn_feat.end()); } - BIG_BATCH_OMP_ORDERED - vars.patch_feats.insert(vars.patch_feats.end(), color_feat.begin(), color_feat.end()); - return; + + hog_feat.insert(hog_feat.end(), color_feat.begin(), color_feat.end()); + + int size[] = {p_num_of_feats, feature_size.height, feature_size.width}; + cv::Mat result(3, size, CV_32F); + for (uint i = 0; i < hog_feat.size(); ++i) + hog_feat[i].copyTo(cv::Mat(size[1], size[2], CV_32FC1, result.ptr(i))); + + return result; } cv::Mat KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2) @@ -551,88 +522,80 @@ cv::Mat KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2) int range_y[2] = {-dim2 / 2, dim2 - dim2 / 2}; int range_x[2] = {-dim1 / 2, dim1 - dim1 / 2}; - double sigma_s = sigma*sigma; + double sigma_s = sigma * sigma; - for (int y = range_y[0], j = 0; y < range_y[1]; ++y, ++j){ - float * row_ptr = labels.ptr(j); - double y_s = y*y; - for (int x = range_x[0], i = 0; x < range_x[1]; ++x, ++i){ - row_ptr[i] = float(std::exp(-0.5 * (y_s + x*x) / sigma_s));//-1/2*e^((y^2+x^2)/sigma^2) + for (int y = range_y[0], j = 0; y < range_y[1]; ++y, ++j) { + float *row_ptr = labels.ptr(j); + double y_s = y * y; + for (int x = range_x[0], i = 0; x < range_x[1]; ++x, ++i) { + row_ptr[i] = std::exp(-0.5 * (y_s + x * x) / sigma_s); //-1/2*e^((y^2+x^2)/sigma^2) } } - //rotate so that 1 is at top-left corner (see KCF paper for explanation) -#ifdef CUFFT - cv::Mat tmp = circshift(labels, range_x[0], range_y[0]); - tmp.copyTo(p_scale_vars.front()->rot_labels); - - assert(p_scale_vars[0].rot_labels.at(0,0) >= 1.f - 1e-10f); - return tmp; -#else - cv::Mat rot_labels = circshift(labels, range_x[0], range_y[0]); - //sanity check, 1 at top left corner - assert(rot_labels.at(0,0) >= 1.f - 1e-10f); + // rotate so that 1 is at top-left corner (see KCF paper for explanation) + MatDynMem rot_labels = circshift(labels, range_x[0], range_y[0]); + // sanity check, 1 at top left corner + assert(rot_labels.at(0, 0) >= 1.f - 1e-10f); return rot_labels; -#endif } -cv::Mat KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot) +cv::Mat KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot) const { cv::Mat rot_patch(patch.size(), CV_32FC1); cv::Mat tmp_x_rot(patch.size(), CV_32FC1); - //circular rotate x-axis + // circular rotate x-axis if (x_rot < 0) { - //move part that does not rotate over the edge + // move part that does not rotate over the edge cv::Range orig_range(-x_rot, patch.cols); cv::Range rot_range(0, patch.cols - (-x_rot)); patch(cv::Range::all(), orig_range).copyTo(tmp_x_rot(cv::Range::all(), rot_range)); - //rotated part + // rotated part orig_range = cv::Range(0, -x_rot); rot_range = cv::Range(patch.cols - (-x_rot), patch.cols); patch(cv::Range::all(), orig_range).copyTo(tmp_x_rot(cv::Range::all(), rot_range)); - }else if (x_rot > 0){ - //move part that does not rotate over the edge + } else if (x_rot > 0) { + // move part that does not rotate over the edge cv::Range orig_range(0, patch.cols - x_rot); cv::Range rot_range(x_rot, patch.cols); patch(cv::Range::all(), orig_range).copyTo(tmp_x_rot(cv::Range::all(), rot_range)); - //rotated part + // rotated part orig_range = cv::Range(patch.cols - x_rot, patch.cols); rot_range = cv::Range(0, x_rot); patch(cv::Range::all(), orig_range).copyTo(tmp_x_rot(cv::Range::all(), rot_range)); - }else { //zero rotation - //move part that does not rotate over the edge + } else { // zero rotation + // move part that does not rotate over the edge cv::Range orig_range(0, patch.cols); cv::Range rot_range(0, patch.cols); patch(cv::Range::all(), orig_range).copyTo(tmp_x_rot(cv::Range::all(), rot_range)); } - //circular rotate y-axis + // circular rotate y-axis if (y_rot < 0) { - //move part that does not rotate over the edge + // move part that does not rotate over the edge cv::Range orig_range(-y_rot, patch.rows); cv::Range rot_range(0, patch.rows - (-y_rot)); tmp_x_rot(orig_range, cv::Range::all()).copyTo(rot_patch(rot_range, cv::Range::all())); - //rotated part + // rotated part orig_range = cv::Range(0, -y_rot); rot_range = cv::Range(patch.rows - (-y_rot), patch.rows); tmp_x_rot(orig_range, cv::Range::all()).copyTo(rot_patch(rot_range, cv::Range::all())); - }else if (y_rot > 0){ - //move part that does not rotate over the edge + } else if (y_rot > 0) { + // move part that does not rotate over the edge cv::Range orig_range(0, patch.rows - y_rot); cv::Range rot_range(y_rot, patch.rows); tmp_x_rot(orig_range, cv::Range::all()).copyTo(rot_patch(rot_range, cv::Range::all())); - //rotated part + // rotated part orig_range = cv::Range(patch.rows - y_rot, patch.rows); rot_range = cv::Range(0, y_rot); tmp_x_rot(orig_range, cv::Range::all()).copyTo(rot_patch(rot_range, cv::Range::all())); - }else { //zero rotation - //move part that does not rotate over the edge + } else { // zero rotation + // move part that does not rotate over the edge cv::Range orig_range(0, patch.rows); cv::Range rot_range(0, patch.rows); tmp_x_rot(orig_range, cv::Range::all()).copyTo(rot_patch(rot_range, cv::Range::all())); @@ -641,33 +604,33 @@ cv::Mat KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot) return rot_patch; } -//hann window actually (Power-of-cosine windows) +// hann window actually (Power-of-cosine windows) cv::Mat KCF_Tracker::cosine_window_function(int dim1, int dim2) { cv::Mat m1(1, dim1, CV_32FC1), m2(dim2, 1, CV_32FC1); - double N_inv = 1./(static_cast(dim1)-1.); + double N_inv = 1. / (static_cast(dim1) - 1.); for (int i = 0; i < dim1; ++i) - m1.at(i) = float(0.5*(1. - std::cos(2. * CV_PI * static_cast(i) * N_inv))); - N_inv = 1./(static_cast(dim2)-1.); + m1.at(i) = float(0.5 * (1. - std::cos(2. * CV_PI * static_cast(i) * N_inv))); + N_inv = 1. / (static_cast(dim2) - 1.); for (int i = 0; i < dim2; ++i) - m2.at(i) = float(0.5*(1. - std::cos(2. * CV_PI * static_cast(i) * N_inv))); - cv::Mat ret = m2*m1; + m2.at(i) = float(0.5 * (1. - std::cos(2. * CV_PI * static_cast(i) * N_inv))); + cv::Mat ret = m2 * m1; return ret; } // Returns sub-window of image input centered at [cx, cy] coordinates), // with size [width, height]. If any pixels are outside of the image, // they will replicate the values at the borders. -cv::Mat KCF_Tracker::get_subwindow(const cv::Mat & input, int cx, int cy, int width, int height) +cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int width, int height) const { cv::Mat patch; - int x1 = cx - width/2; - int y1 = cy - height/2; - int x2 = cx + width/2; - int y2 = cy + height/2; + int x1 = cx - width / 2; + int y1 = cy - height / 2; + int x2 = cx + width / 2; + int y2 = cy + height / 2; - //out of image + // out of image if (x1 >= input.cols || y1 >= input.rows || x2 < 0 || y2 < 0) { patch.create(height, width, input.type()); patch.setTo(double(0.f)); @@ -676,7 +639,7 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat & input, int cx, int cy, int wi int top = 0, bottom = 0, left = 0, right = 0; - //fit to image coordinates, set border extensions; + // fit to image coordinates, set border extensions; if (x1 < 0) { left = -x1; x1 = 0; @@ -699,111 +662,78 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat & input, int cx, int cy, int wi if (x2 - x1 == 0 || y2 - y1 == 0) patch = cv::Mat::zeros(height, width, CV_32FC1); - else - { - cv::copyMakeBorder(input(cv::Range(y1, y2), cv::Range(x1, x2)), patch, top, bottom, left, right, cv::BORDER_REPLICATE); -// imshow( "copyMakeBorder", patch); -// cv::waitKey(); - } + else { + cv::copyMakeBorder(input(cv::Range(y1, y2), cv::Range(x1, x2)), patch, top, bottom, left, right, + cv::BORDER_REPLICATE); + // imshow( "copyMakeBorder", patch); + // cv::waitKey(); + } - //sanity check + // sanity check assert(patch.cols == width && patch.rows == height); return patch; } -void KCF_Tracker::gaussian_correlation(struct Scale_vars & vars, const ComplexMat & xf, const ComplexMat & yf, double sigma, bool auto_correlation) +void KCF_Tracker::GaussianCorrelation::operator()(ComplexMat &result, const ComplexMat &xf, const ComplexMat &yf, + double sigma, bool auto_correlation, const KCF_Tracker &kcf) { -#ifdef CUFFT - xf.sqr_norm(vars.xf_sqr_norm_d); - if (!auto_correlation) - yf.sqr_norm(vars.yf_sqr_norm_d); -#else - xf.sqr_norm(vars.xf_sqr_norm); - if (auto_correlation){ - vars.yf_sqr_norm[0] = vars.xf_sqr_norm[0]; + TRACE(""); + xf.sqr_norm(xf_sqr_norm); + if (auto_correlation) { + yf_sqr_norm = xf_sqr_norm; } else { - yf.sqr_norm(vars.yf_sqr_norm); - } -#endif - vars.xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj()); - DEBUG_PRINTM(vars.xyf); - fft.inverse(vars.xyf, vars.ifft2_res, m_use_cuda ? vars.data_i_features_d : nullptr, vars.stream); -#ifdef CUFFT - if (auto_correlation) - cuda_gaussian_correlation(vars.data_i_features, vars.gauss_corr_res_d, vars.xf_sqr_norm_d, vars.xf_sqr_norm_d, - sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width, vars.stream); - else - cuda_gaussian_correlation(vars.data_i_features, vars.gauss_corr_res_d, vars.xf_sqr_norm_d, vars.yf_sqr_norm_d, - sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width, vars.stream); + yf.sqr_norm(yf_sqr_norm); + } + xyf = auto_correlation ? xf.sqr_mag() : xf * yf.conj(); // xf.muln(yf.conj()); + DEBUG_PRINTM(xyf); + + // ifft2 and sum over 3rd dimension, we dont care about individual channels + ComplexMat xyf_sum = xyf.sum_over_channels(); + DEBUG_PRINTM(xyf_sum); + kcf.fft.inverse(xyf_sum, ifft_res); + DEBUG_PRINTM(ifft_res); +#if 0 && defined(CUFFT) + // FIXME + cuda_gaussian_correlation(ifft_res.deviceMem(), k.deviceMem(), xf_sqr_norm.deviceMem(), + auto_correlation ? xf_sqr_norm.deviceMem() : yf_sqr_norm.deviceMem(), sigma, + xf.n_channels, xf.n_scales, kcf.feature_size.height, kcf.feature_size.width); #else - //ifft2 and sum over 3rd dimension, we dont care about individual channels - DEBUG_PRINTM(vars.ifft2_res); - cv::Mat xy_sum; - if (xf.channels() != p_num_scales * p_num_of_feats) - xy_sum.create(vars.ifft2_res.size(), CV_32FC1); - else - xy_sum.create(vars.ifft2_res.size(), CV_32FC(int(p_scales.size()))); - xy_sum.setTo(0); - for (int y = 0; y < vars.ifft2_res.rows; ++y) { - float *row_ptr = vars.ifft2_res.ptr(y); - float *row_ptr_sum = xy_sum.ptr(y); - for (int x = 0; x < vars.ifft2_res.cols; ++x) { - for (int sum_ch = 0; sum_ch < xy_sum.channels(); ++sum_ch) { - row_ptr_sum[(x * xy_sum.channels()) + sum_ch] += std::accumulate( - row_ptr + x * vars.ifft2_res.channels() + sum_ch * (vars.ifft2_res.channels() / xy_sum.channels()), - (row_ptr + x * vars.ifft2_res.channels() + - (sum_ch + 1) * (vars.ifft2_res.channels() / xy_sum.channels())), - 0.f); - } - } - } - DEBUG_PRINTM(xy_sum); - - std::vector scales; - cv::split(xy_sum,scales); float numel_xf_inv = 1.f / (xf.cols * xf.rows * (xf.channels() / xf.n_scales)); - for (uint i = 0; i < uint(xf.n_scales); ++i) { - cv::Mat in_roi(vars.in_all, cv::Rect(0, int(i) * scales[0].rows, scales[0].cols, scales[0].rows)); - cv::exp( - -1. / (sigma * sigma) * - cv::max((double(vars.xf_sqr_norm[i] + vars.yf_sqr_norm[0]) - 2 * scales[i]) * double(numel_xf_inv), 0), - in_roi); - DEBUG_PRINTM(in_roi); + for (uint i = 0; i < xf.n_scales; ++i) { + cv::Mat plane = ifft_res.plane(i); + DEBUG_PRINT(ifft_res.plane(i)); + cv::exp(-1. / (sigma * sigma) * cv::max((xf_sqr_norm[i] + yf_sqr_norm[0] - 2 * ifft_res.plane(i)) + * numel_xf_inv, 0), plane); + DEBUG_PRINTM(plane); } #endif - DEBUG_PRINTM(vars.in_all); - fft.forward(vars.in_all, auto_correlation ? vars.kf : vars.kzf, m_use_cuda ? vars.gauss_corr_res_d : nullptr, - vars.stream); - return; + kcf.fft.forward(ifft_res, result); } -float get_response_circular(cv::Point2i & pt, cv::Mat & response) +float get_response_circular(cv::Point2i &pt, cv::Mat &response) { int x = pt.x; int y = pt.y; - if (x < 0) - x = response.cols + x; - if (y < 0) - y = response.rows + y; - if (x >= response.cols) - x = x - response.cols; - if (y >= response.rows) - y = y - response.rows; - - return response.at(y,x); + assert(response.dims == 2); // ensure .cols and .rows are valid + if (x < 0) x = response.cols + x; + if (y < 0) y = response.rows + y; + if (x >= response.cols) x = x - response.cols; + if (y >= response.rows) y = y - response.rows; + + return response.at(y, x); } -cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point & max_loc, cv::Mat & response) +cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point &max_loc, cv::Mat &response) const { - //find neighbourhood of max_loc (response is circular) + // find neighbourhood of max_loc (response is circular) // 1 2 3 // 4 5 // 6 7 8 - cv::Point2i p1(max_loc.x-1, max_loc.y-1), p2(max_loc.x, max_loc.y-1), p3(max_loc.x+1, max_loc.y-1); - cv::Point2i p4(max_loc.x-1, max_loc.y), p5(max_loc.x+1, max_loc.y); - cv::Point2i p6(max_loc.x-1, max_loc.y+1), p7(max_loc.x, max_loc.y+1), p8(max_loc.x+1, max_loc.y+1); + cv::Point2i p1(max_loc.x - 1, max_loc.y - 1), p2(max_loc.x, max_loc.y - 1), p3(max_loc.x + 1, max_loc.y - 1); + cv::Point2i p4(max_loc.x - 1, max_loc.y), p5(max_loc.x + 1, max_loc.y); + cv::Point2i p6(max_loc.x - 1, max_loc.y + 1), p7(max_loc.x, max_loc.y + 1), p8(max_loc.x + 1, max_loc.y + 1); // clang-format off // fit 2d quadratic function f(x, y) = a*x^2 + b*x*y + c*y^2 + d*x + e*y + f @@ -831,8 +761,7 @@ cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point & max_loc, cv::Mat & response) cv::Mat x; cv::solve(A, fval, x, cv::DECOMP_SVD); - float a = x.at(0), b = x.at(1), c = x.at(2), - d = x.at(3), e = x.at(4); + float a = x.at(0), b = x.at(1), c = x.at(2), d = x.at(3), e = x.at(4); cv::Point2f sub_peak(max_loc.x, max_loc.y); if (b > 0 || b < 0) { @@ -843,48 +772,47 @@ cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point & max_loc, cv::Mat & response) return sub_peak; } -double KCF_Tracker::sub_grid_scale(int index) +double KCF_Tracker::sub_grid_scale(uint index) { cv::Mat A, fval; - if (index < 0 || index > int(p_scales.size())-1) { + if (index >= p_scales.size()) { // interpolate from all values // fit 1d quadratic function f(x) = a*x^2 + b*x + c - A.create(int(p_scales.size()), 3, CV_32FC1); - fval.create(int(p_scales.size()), 1, CV_32FC1); - for (auto it = p_scale_vars.begin(); it != p_scale_vars.end(); ++it) { - uint i = uint(std::distance(p_scale_vars.begin(), it)); - int j = int(i); - A.at(j, 0) = float(p_scales[i] * p_scales[i]); - A.at(j, 1) = float(p_scales[i]); - A.at(j, 2) = 1; - fval.at(j) = m_use_big_batch ? float(p_scale_vars.back()->max_responses[i]) : float((*it)->max_response); + A.create(p_scales.size(), 3, CV_32FC1); + fval.create(p_scales.size(), 1, CV_32FC1); + for (size_t i = 0; i < p_scales.size(); ++i) { + A.at(i, 0) = float(p_scales[i] * p_scales[i]); + A.at(i, 1) = float(p_scales[i]); + A.at(i, 2) = 1; + fval.at(i) = d.threadctxs.back().IF_BIG_BATCH(max[i].response, max.response); } } else { - //only from neighbours - if (index == 0 || index == int(p_scales.size())-1) - return p_scales[uint(index)]; + // only from neighbours + if (index == 0 || index == p_scales.size() - 1) + return p_scales[index]; A = (cv::Mat_(3, 3) << - p_scales[uint(index)-1] * p_scales[uint(index)-1], p_scales[uint(index)-1], 1, - p_scales[uint(index)] * p_scales[uint(index)], p_scales[uint(index)], 1, - p_scales[uint(index)+1] * p_scales[uint(index)+1], p_scales[uint(index)+1], 1); - auto it1 = p_scale_vars.begin(); - std::advance(it1, index-1); - auto it2 = p_scale_vars.begin(); - std::advance(it2, index); - auto it3 = p_scale_vars.begin(); - std::advance(it3, index+1); - fval = (cv::Mat_(3, 1) << (m_use_big_batch ? p_scale_vars.back()->max_responses[uint(index) - 1] - : (*it1)->max_response), - (m_use_big_batch ? p_scale_vars.back()->max_responses[uint(index)] : (*it2)->max_response), - (m_use_big_batch ? p_scale_vars.back()->max_responses[uint(index) + 1] : (*it3)->max_response)); + p_scales[index - 1] * p_scales[index - 1], p_scales[index - 1], 1, + p_scales[index + 0] * p_scales[index + 0], p_scales[index + 0], 1, + p_scales[index + 1] * p_scales[index + 1], p_scales[index + 1], 1); +#ifdef BIG_BATCH + fval = (cv::Mat_(3, 1) << + d.threadctxs.back().max[index - 1].response, + d.threadctxs.back().max[index + 0].response, + d.threadctxs.back().max[index + 1].response); +#else + fval = (cv::Mat_(3, 1) << + d.threadctxs[index - 1].max.response, + d.threadctxs[index + 0].max.response, + d.threadctxs[index + 1].max.response); +#endif } cv::Mat x; cv::solve(A, fval, x, cv::DECOMP_SVD); float a = x.at(0), b = x.at(1); - double scale = p_scales[uint(index)]; + double scale = p_scales[index]; if (a > 0 || a < 0) - scale = double(-b / (2 * a)); + scale = -b / (2 * a); return scale; }