X-Git-Url: http://rtime.felk.cvut.cz/gitweb/hercules2020/kcf.git/blobdiff_plain/169236cb75d0206b75dafe0498756c3f003e55d2..fe1f105e94b12100d225e50354e1bd054fa7cf61:/src/kcf.cpp diff --git a/src/kcf.cpp b/src/kcf.cpp index 7b1421c..6668925 100644 --- a/src/kcf.cpp +++ b/src/kcf.cpp @@ -2,6 +2,9 @@ #include #include #include +#include "threadctx.hpp" +#include "debug.h" +#include #ifdef FFTW #include "fft_fftw.h" @@ -18,15 +21,7 @@ #include #endif // OPENMP -#define DEBUG_PRINT(obj) \ - if (m_debug) { \ - std::cout << #obj << " @" << __LINE__ << std::endl << (obj) << std::endl; \ - } -#define DEBUG_PRINTM(obj) \ - if (m_debug) { \ - std::cout << #obj << " @" << __LINE__ << " " << (obj).size() << " CH: " << (obj).channels() << std::endl \ - << (obj) << std::endl; \ - } +DbgTracer __dbgTracer; template T clamp(const T& n, const T& lower, const T& upper) @@ -40,10 +35,38 @@ void clamp2(T& n, const T& lower, const T& upper) n = std::max(lower, std::min(n, upper)); } +#if CV_MAJOR_VERSION < 3 +template static inline +cv::Size_<_Tp> operator / (const cv::Size_<_Tp>& a, _Tp b) +{ + return cv::Size_<_Tp>(a.width / b, a.height / b); +} + +template static inline +cv::Point_<_Tp> operator / (const cv::Point_<_Tp>& a, double b) +{ + return cv::Point_<_Tp>(a.x / b, a.y / b); +} + +#endif + +class Kcf_Tracker_Private { + friend KCF_Tracker; + + Kcf_Tracker_Private(const KCF_Tracker &kcf) : kcf(kcf) {} + + const KCF_Tracker &kcf; +#ifdef BIG_BATCH + std::vector threadctxs; +#else + ScaleRotVector threadctxs{kcf.p_scales, kcf.p_angles}; +#endif +}; + KCF_Tracker::KCF_Tracker(double padding, double kernel_sigma, double lambda, double interp_factor, double output_sigma_factor, int cell_size) - : fft(*new FFT()), p_padding(padding), p_output_sigma_factor(output_sigma_factor), p_kernel_sigma(kernel_sigma), - p_lambda(lambda), p_interp_factor(interp_factor), p_cell_size(cell_size) + : p_cell_size(cell_size), fft(*new FFT()), p_padding(padding), p_output_sigma_factor(output_sigma_factor), p_kernel_sigma(kernel_sigma), + p_lambda(lambda), p_interp_factor(interp_factor) { } @@ -54,8 +77,52 @@ KCF_Tracker::~KCF_Tracker() delete &fft; } +void KCF_Tracker::train(cv::Mat input_rgb, cv::Mat input_gray, double interp_factor) +{ + TRACE(""); + + // obtain a sub-window for training + get_features(input_rgb, input_gray, nullptr, p_current_center.x, p_current_center.y, + p_windows_size.width, p_windows_size.height, + p_current_scale, p_current_angle).copyTo(model->patch_feats.scale(0)); + DEBUG_PRINT(model->patch_feats); + fft.forward_window(model->patch_feats, model->xf, model->temp); + DEBUG_PRINTM(model->xf); + model->model_xf = model->model_xf * (1. - interp_factor) + model->xf * interp_factor; + DEBUG_PRINTM(model->model_xf); + + if (m_use_linearkernel) { + ComplexMat xfconj = model->xf.conj(); + model->model_alphaf_num = xfconj.mul(model->yf); + model->model_alphaf_den = (model->xf * xfconj); + } else { + // Kernel Ridge Regression, calculate alphas (in Fourier domain) + cv::Size sz(Fft::freq_size(feature_size)); + ComplexMat kf(sz.height, sz.width, 1); + (*gaussian_correlation)(kf, model->model_xf, model->model_xf, p_kernel_sigma, true, *this); + DEBUG_PRINTM(kf); + model->model_alphaf_num = model->yf * kf; + model->model_alphaf_den = kf * (kf + p_lambda); + } + model->model_alphaf = model->model_alphaf_num / model->model_alphaf_den; + DEBUG_PRINTM(model->model_alphaf); + // p_model_alphaf = p_yf / (kf + p_lambda); //equation for fast training +} + +static int round_pw2_down(int x) +{ + for (int i = 1; i < 32; i <<= 1) + x |= x >> i; + x++; + return x >> 1; +} + + void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int fit_size_y) { + __dbgTracer.debug = m_debug; + TRACE(""); + // check boundary, enforce min size double x1 = bbox.x, x2 = bbox.x + bbox.width, y1 = bbox.y, y2 = bbox.y + bbox.height; if (x1 < 0) x1 = 0.; @@ -86,10 +153,10 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f } } - p_pose.w = x2 - x1; - p_pose.h = y2 - y1; - p_pose.cx = x1 + p_pose.w / 2.; - p_pose.cy = y1 + p_pose.h / 2.; + p_init_pose.w = x2 - x1; + p_init_pose.h = y2 - y1; + p_init_pose.cx = x1 + p_init_pose.w / 2.; + p_init_pose.cy = y1 + p_init_pose.h / 2.; cv::Mat input_gray, input_rgb = img.clone(); if (img.channels() == 3) { @@ -99,90 +166,59 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f img.convertTo(input_gray, CV_32FC1); // don't need too large image - if (p_pose.w * p_pose.h > 100. * 100. && (fit_size_x == -1 || fit_size_y == -1)) { + if (p_init_pose.w * p_init_pose.h > 100. * 100.) { std::cout << "resizing image by factor of " << 1 / p_downscale_factor << std::endl; p_resize_image = true; - p_pose.scale(p_downscale_factor); + p_init_pose.scale(p_downscale_factor); cv::resize(input_gray, input_gray, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); - } else if (!(fit_size_x == -1 && fit_size_y == -1)) { - if (fit_size_x % p_cell_size != 0 || fit_size_y % p_cell_size != 0) { - std::cerr << "Error: Fit size is not multiple of HOG cell size (" << p_cell_size << ")" << std::endl; - std::exit(EXIT_FAILURE); - } - p_scale_factor_x = (double)fit_size_x / round(p_pose.w * (1. + p_padding)); - p_scale_factor_y = (double)fit_size_y / round(p_pose.h * (1. + p_padding)); - std::cout << "resizing image horizontaly by factor of " << p_scale_factor_x << " and verticaly by factor of " - << p_scale_factor_y << std::endl; - p_fit_to_pw2 = true; - p_pose.scale_x(p_scale_factor_x); - p_pose.scale_y(p_scale_factor_y); - if (fabs(p_scale_factor_x - 1) > p_floating_error || fabs(p_scale_factor_y - 1) > p_floating_error) { - if (p_scale_factor_x < 1 && p_scale_factor_y < 1) { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - } else { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - } - } } // compute win size + fit to fhog cell size - p_windows_size.width = round(p_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size; - p_windows_size.height = round(p_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size; - p_roi.width = p_windows_size.width / p_cell_size; - p_roi.height = p_windows_size.height / p_cell_size; + p_windows_size.width = round(p_init_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size; + p_windows_size.height = round(p_init_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size; + + if (fit_size_x == 0 || fit_size_y == 0) { + // Round down to the next highest power of 2 + fit_size = cv::Size(round_pw2_down(p_windows_size.width), + round_pw2_down(p_windows_size.height)); + } else if (fit_size_x == -1 || fit_size_y == -1) { + fit_size = p_windows_size; + } else { + fit_size = cv::Size(fit_size_x, fit_size_y); + } - p_num_of_feats = 31; - if (m_use_color) p_num_of_feats += 3; - if (m_use_cnfeat) p_num_of_feats += 10; + feature_size = fit_size / p_cell_size; p_scales.clear(); - if (m_use_scale) - for (int i = -int(p_num_scales) / 2; i <= int(p_num_scales) / 2; ++i) - p_scales.push_back(std::pow(p_scale_step, i)); - else - p_scales.push_back(1.); + for (int i = -int(p_num_scales - 1) / 2; i <= int(p_num_scales) / 2; ++i) + p_scales.push_back(std::pow(p_scale_step, i)); -#ifdef CUFFT - if (p_roi.height * (p_roi.width / 2 + 1) > 1024) { - std::cerr << "Window after forward FFT is too big for CUDA kernels. Plese use -f to set " - "the window dimensions so its size is less or equal to " - << 1024 * p_cell_size * p_cell_size * 2 + 1 - << " pixels . Currently the size of the window is: " << p_windows_size.width << "x" << p_windows_size.height - << " which is " << p_windows_size.width * p_windows_size.height << " pixels. " << std::endl; - std::exit(EXIT_FAILURE); - } + p_angles.clear(); + for (int i = -int(p_num_angles - 1) / 2; i <= int(p_num_angles) / 2; ++i) + p_angles.push_back(i * p_angle_step); +#ifdef CUFFT if (m_use_linearkernel) { std::cerr << "cuFFT supports only Gaussian kernel." << std::endl; std::exit(EXIT_FAILURE); } - CudaSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost)); - p_rot_labels_data = DynMem(p_roi.width * p_roi.height * sizeof(float)); - p_rot_labels = cv::Mat(p_roi, CV_32FC1, p_rot_labels_data.hostMem()); -#else - p_xf.create(p_roi.height, p_roi.height / 2 + 1, p_num_of_feats); #endif -#if defined(CUFFT) || defined(FFTW) - uint width = p_roi.width / 2 + 1; + model.reset(new Model(feature_size, p_num_of_feats)); + d.reset(new Kcf_Tracker_Private(*this)); + +#ifndef BIG_BATCH + for (auto scale: p_scales) + for (auto angle : p_angles) + d->threadctxs.emplace_back(feature_size, p_num_of_feats, scale, angle); #else - uint width = p_roi.width; + d->threadctxs.emplace_back(feature_size, p_num_of_feats, p_scales, p_angles); #endif - p_model_xf.create(p_roi.height, width, p_num_of_feats); - p_yf.create(p_roi.height, width, 1); - p_xf.create(p_roi.height, width, p_num_of_feats); - - int max = m_use_big_batch ? 2 : p_num_scales; - for (int i = 0; i < max; ++i) { - if (m_use_big_batch && i == 1) - p_threadctxs.emplace_back(p_roi, p_num_of_feats * p_num_scales, 1, p_num_scales); - else - p_threadctxs.emplace_back(p_roi, p_num_of_feats, p_scales[i], 1); - } + gaussian_correlation.reset(new GaussianCorrelation(1, p_num_of_feats, feature_size)); + + p_current_center = p_init_pose.center(); p_current_scale = 1.; double min_size_ratio = std::max(5. * p_cell_size / p_windows_size.width, 5. * p_cell_size / p_windows_size.height); @@ -192,68 +228,28 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f p_min_max_scale[0] = std::pow(p_scale_step, std::ceil(std::log(min_size_ratio) / log(p_scale_step))); p_min_max_scale[1] = std::pow(p_scale_step, std::floor(std::log(max_size_ratio) / log(p_scale_step))); - std::cout << "init: img size " << img.cols << "x" << img.rows << std::endl; - std::cout << "init: win size " << p_windows_size.width << "x" << p_windows_size.height << std::endl; - std::cout << "init: FFT size " << p_roi.width << "x" << p_roi.height << std::endl; + std::cout << "init: img size " << img.size() << std::endl; + std::cout << "init: win size " << p_windows_size; + if (p_windows_size != fit_size) + std::cout << " resized to " << fit_size; + std::cout << std::endl; + std::cout << "init: FFT size " << feature_size << std::endl; std::cout << "init: min max scales factors: " << p_min_max_scale[0] << " " << p_min_max_scale[1] << std::endl; - p_output_sigma = std::sqrt(p_pose.w * p_pose.h) * p_output_sigma_factor / p_cell_size; + p_output_sigma = std::sqrt(p_init_pose.w * p_init_pose.h * double(fit_size.area()) / p_windows_size.area()) + * p_output_sigma_factor / p_cell_size; - fft.init(p_roi.width, p_roi.height, p_num_of_feats, p_num_scales, m_use_big_batch); - fft.set_window(cosine_window_function(p_roi.width, p_roi.height)); + fft.init(feature_size.width, feature_size.height, p_num_of_feats, p_num_scales * p_num_angles); + fft.set_window(MatDynMem(cosine_window_function(feature_size.width, feature_size.height))); // window weights, i.e. labels - fft.forward( - gaussian_shaped_labels(p_output_sigma, p_roi.width, p_roi.height), p_yf, - m_use_cuda ? p_rot_labels_data.deviceMem() : nullptr, p_threadctxs.front().stream); - DEBUG_PRINTM(p_yf); - - // obtain a sub-window for training initial model - p_threadctxs.front().patch_feats.clear(); - get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size.width, p_windows_size.height, - p_threadctxs.front()); - fft.forward_window(p_threadctxs.front().patch_feats, p_model_xf, p_threadctxs.front().fw_all, - m_use_cuda ? p_threadctxs.front().data_features.deviceMem() : nullptr, - p_threadctxs.front().stream); - DEBUG_PRINTM(p_model_xf); -#if !defined(BIG_BATCH) && defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - p_threadctxs.front().model_xf = p_model_xf; - p_threadctxs.front().model_xf.set_stream(p_threadctxs.front().stream); - p_yf.set_stream(p_threadctxs.front().stream); - p_model_xf.set_stream(p_threadctxs.front().stream); - p_xf.set_stream(p_threadctxs.front().stream); -#endif - - if (m_use_linearkernel) { - ComplexMat xfconj = p_model_xf.conj(); - p_model_alphaf_num = xfconj.mul(p_yf); - p_model_alphaf_den = (p_model_xf * xfconj); - } else { - // Kernel Ridge Regression, calculate alphas (in Fourier domain) -#if !defined(BIG_BATCH) && defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - gaussian_correlation(p_threadctxs.front(), p_threadctxs.front().model_xf, p_threadctxs.front().model_xf, - p_kernel_sigma, true); -#else - gaussian_correlation(p_threadctxs.front(), p_model_xf, p_model_xf, p_kernel_sigma, true); -#endif - DEBUG_PRINTM(p_threadctxs.front().kf); - p_model_alphaf_num = p_yf * p_threadctxs.front().kf; - DEBUG_PRINTM(p_model_alphaf_num); - p_model_alphaf_den = p_threadctxs.front().kf * (p_threadctxs.front().kf + float(p_lambda)); - DEBUG_PRINTM(p_model_alphaf_den); - } - p_model_alphaf = p_model_alphaf_num / p_model_alphaf_den; - DEBUG_PRINTM(p_model_alphaf); - // p_model_alphaf = p_yf / (kf + p_lambda); //equation for fast training + MatScales gsl(1, feature_size); + gaussian_shaped_labels(p_output_sigma, feature_size.width, feature_size.height).copyTo(gsl.plane(0)); + fft.forward(gsl, model->yf); + DEBUG_PRINTM(model->yf); -#if !defined(BIG_BATCH) && defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - for (auto it = p_threadctxs.begin(); it != p_threadctxs.end(); ++it) { - it->model_xf = p_model_xf; - it->model_xf.set_stream(it->stream); - it->model_alphaf = p_model_alphaf; - it->model_alphaf.set_stream(it->stream); - } -#endif + // train initial model + train(input_rgb, input_gray, 1.0); } void KCF_Tracker::setTrackerPose(BBox_c &bbox, cv::Mat &img, int fit_size_x, int fit_size_y) @@ -263,34 +259,24 @@ void KCF_Tracker::setTrackerPose(BBox_c &bbox, cv::Mat &img, int fit_size_x, int void KCF_Tracker::updateTrackerPosition(BBox_c &bbox) { + BBox_c tmp = bbox; if (p_resize_image) { - BBox_c tmp = bbox; tmp.scale(p_downscale_factor); - p_pose.cx = tmp.cx; - p_pose.cy = tmp.cy; - } else if (p_fit_to_pw2) { - BBox_c tmp = bbox; - tmp.scale_x(p_scale_factor_x); - tmp.scale_y(p_scale_factor_y); - p_pose.cx = tmp.cx; - p_pose.cy = tmp.cy; - } else { - p_pose.cx = bbox.cx; - p_pose.cy = bbox.cy; } + p_current_center = tmp.center(); } BBox_c KCF_Tracker::getBBox() { - BBox_c tmp = p_pose; - tmp.w *= p_current_scale; - tmp.h *= p_current_scale; - - if (p_resize_image) tmp.scale(1 / p_downscale_factor); - if (p_fit_to_pw2) { - tmp.scale_x(1 / p_scale_factor_x); - tmp.scale_y(1 / p_scale_factor_y); - } + BBox_c tmp; + tmp.cx = p_current_center.x; + tmp.cy = p_current_center.y; + tmp.w = p_init_pose.w * p_current_scale; + tmp.h = p_init_pose.h * p_current_scale; + tmp.a = p_current_angle; + + if (p_resize_image) + tmp.scale(1 / p_downscale_factor); return tmp; } @@ -300,9 +286,116 @@ double KCF_Tracker::getFilterResponse() const return this->max_response; } +void KCF_Tracker::resizeImgs(cv::Mat &input_rgb, cv::Mat &input_gray) +{ + if (p_resize_image) { + cv::resize(input_gray, input_gray, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); + cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); + } +} + +static void drawCross(cv::Mat &img, cv::Point center, bool green) +{ + cv::Scalar col = green ? cv::Scalar(0, 1, 0) : cv::Scalar(0, 0, 1); + cv::line(img, cv::Point(center.x, 0), cv::Point(center.x, img.size().height), col); + cv::line(img, cv::Point(0, center.y), cv::Point(img.size().height, center.y), col); +} + +static cv::Point2d wrapAroundFreq(cv::Point2d pt, cv::Mat &resp_map) +{ + if (pt.y > resp_map.rows / 2) // wrap around to negative half-space of vertical axis + pt.y = pt.y - resp_map.rows; + if (pt.x > resp_map.cols / 2) // same for horizontal axis + pt.x = pt.x - resp_map.cols; + return pt; +} + +double KCF_Tracker::findMaxReponse(uint &max_idx, cv::Point2d &new_location) const +{ + double max; + const auto &vec = IF_BIG_BATCH(d->threadctxs[0].max, d->threadctxs); + +#ifndef BIG_BATCH + auto max_it = std::max_element(vec.begin(), vec.end(), + [](const ThreadCtx &a, const ThreadCtx &b) + { return a.max.response < b.max.response; }); +#else + auto max_it = std::max_element(vec.begin(), vec.end(), + [](const ThreadCtx::Max &a, const ThreadCtx::Max &b) + { return a.response < b.response; }); +#endif + assert(max_it != vec.end()); + max = max_it->IF_BIG_BATCH(response, max.response); + + max_idx = std::distance(vec.begin(), max_it); + + cv::Point2i max_response_pt = IF_BIG_BATCH(max_it->loc, max_it->max.loc); + cv::Mat max_response_map = IF_BIG_BATCH(d->threadctxs[0].response.plane(max_idx), + max_it->response.plane(0)); + + DEBUG_PRINTM(max_response_map); + DEBUG_PRINT(max_response_pt); + + max_response_pt = wrapAroundFreq(max_response_pt, max_response_map); + + // sub pixel quadratic interpolation from neighbours + if (m_use_subpixel_localization) { + new_location = sub_pixel_peak(max_response_pt, max_response_map); + } else { + new_location = max_response_pt; + } + DEBUG_PRINT(new_location); + + if (m_visual_debug != vd::NONE) { + const bool fit = 1; + int w = fit ? 100 : (m_visual_debug == vd::PATCH ? fit_size.width : feature_size.width); + int h = fit ? 100 : (m_visual_debug == vd::PATCH ? fit_size.height : feature_size.height); + cv::Mat all_responses((h + 1) * p_num_scales - 1, + (w + 1) * p_num_angles - 1, CV_32FC3, cv::Scalar::all(0)); + for (size_t i = 0; i < p_num_scales; ++i) { + for (size_t j = 0; j < p_num_angles; ++j) { + auto &threadctx = d->IF_BIG_BATCH(threadctxs[0], threadctxs(i, j)); + cv::Mat tmp; + cv::Point2d cross = threadctx.IF_BIG_BATCH(max(i, j), max).loc; + cross = wrapAroundFreq(cross, max_response_map); + if (m_visual_debug == vd::PATCH ) { + threadctx.dbg_patch IF_BIG_BATCH((i, j),) + .convertTo(tmp, all_responses.type(), 1.0 / 255); + cross.x = cross.x / fit_size.width * tmp.cols + tmp.cols / 2; + cross.y = cross.y / fit_size.height * tmp.rows + tmp.rows / 2; + } else { + cv::cvtColor(threadctx.response.plane(IF_BIG_BATCH(threadctx.max.getIdx(i, j), 0)), + tmp, cv::COLOR_GRAY2BGR); + tmp /= max; // Normalize to 1 + cross += cv::Point2d(tmp.size())/2; + tmp = circshift(tmp, -tmp.cols/2, -tmp.rows/2); + } + bool green = false; + if (&*max_it == &IF_BIG_BATCH(threadctx.max(i, j), threadctx)) { + // Show the green cross at position of sub-pixel interpolation (if enabled) + cross = new_location + cv::Point2d(tmp.size())/2; + green = true; + } + cross.x *= double(w)/tmp.cols; + cross.y *= double(h)/tmp.rows; + cv::resize(tmp, tmp, cv::Size(w, h)); + drawCross(tmp, cross, green); + cv::Mat resp_roi(all_responses, cv::Rect(j * (w+1), i * (h+1), w, h)); + tmp.copyTo(resp_roi); + } + } + cv::namedWindow("KCF visual debug", CV_WINDOW_AUTOSIZE); + cv::imshow("KCF visual debug", all_responses); + } + + return max; +} + void KCF_Tracker::track(cv::Mat &img) { - if (m_debug) std::cout << "NEW FRAME" << '\n'; + __dbgTracer.debug = m_debug; + TRACE(""); + cv::Mat input_gray, input_rgb = img.clone(); if (img.channels() == 3) { cv::cvtColor(img, input_gray, CV_BGR2GRAY); @@ -311,237 +404,144 @@ void KCF_Tracker::track(cv::Mat &img) img.convertTo(input_gray, CV_32FC1); // don't need too large image - if (p_resize_image) { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); - } else if (p_fit_to_pw2 && fabs(p_scale_factor_x - 1) > p_floating_error && - fabs(p_scale_factor_y - 1) > p_floating_error) { - if (p_scale_factor_x < 1 && p_scale_factor_y < 1) { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - } else { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - } - } - - max_response = -1.; - ThreadCtx *max = nullptr; - cv::Point2i *max_response_pt = nullptr; - cv::Mat *max_response_map = nullptr; + resizeImgs(input_rgb, input_gray); #ifdef ASYNC - for (auto &it : p_threadctxs) + for (auto &it : d->threadctxs) it.async_res = std::async(std::launch::async, [this, &input_gray, &input_rgb, &it]() -> void { - scale_track(it, input_rgb, input_gray); + it.track(*this, input_rgb, input_gray); }); - for (auto const &it : p_threadctxs) + for (auto const &it : d->threadctxs) it.async_res.wait(); #else // !ASYNC - // FIXME: Iterate correctly in big batch mode - perhaps have only one element in the list NORMAL_OMP_PARALLEL_FOR - for (uint i = 0; i < p_threadctxs.size(); ++i) - scale_track(p_threadctxs[i], input_rgb, input_gray); + for (uint i = 0; i < d->threadctxs.size(); ++i) + d->threadctxs[i].track(*this, input_rgb, input_gray); #endif -#ifndef BIG_BATCH - for (auto &it : p_threadctxs) { - if (it.max_response > max_response) { - max_response = it.max_response; - max_response_pt = &it.max_loc; - max_response_map = &it.response; - max = ⁢ - } - } -#else - // FIXME: Iterate correctly in big batch mode - perhaps have only one element in the list - for (uint j = 0; j < p_scales.size(); ++j) { - if (p_threadctxs[0].max_responses[j] > max_response) { - max_response = p_threadctxs[0].max_responses[j]; - max_response_pt = &p_threadctxs[0].max_locs[j]; - max_response_map = &p_threadctxs[0].response_maps[j]; - max = &p_threadctxs[0]; - } - } -#endif + cv::Point2d new_location; + uint max_idx; + max_response = findMaxReponse(max_idx, new_location); - DEBUG_PRINTM(*max_response_map); - DEBUG_PRINT(*max_response_pt); + double angle_change = d->IF_BIG_BATCH(threadctxs[0].max, threadctxs).angle(max_idx); + p_current_angle += angle_change; - // sub pixel quadratic interpolation from neighbours - if (max_response_pt->y > max_response_map->rows / 2) // wrap around to negative half-space of vertical axis - max_response_pt->y = max_response_pt->y - max_response_map->rows; - if (max_response_pt->x > max_response_map->cols / 2) // same for horizontal axis - max_response_pt->x = max_response_pt->x - max_response_map->cols; + new_location.x = new_location.x * cos(-p_current_angle/180*M_PI) + new_location.y * sin(-p_current_angle/180*M_PI); + new_location.y = new_location.y * cos(-p_current_angle/180*M_PI) - new_location.x * sin(-p_current_angle/180*M_PI); - cv::Point2f new_location(max_response_pt->x, max_response_pt->y); - DEBUG_PRINT(new_location); + new_location.x *= double(p_windows_size.width) / fit_size.width; + new_location.y *= double(p_windows_size.height) / fit_size.height; - if (m_use_subpixel_localization) - new_location = sub_pixel_peak(*max_response_pt, *max_response_map); - DEBUG_PRINT(new_location); + p_current_center += p_current_scale * p_cell_size * new_location; - p_pose.cx += p_current_scale * p_cell_size * double(new_location.x); - p_pose.cy += p_current_scale * p_cell_size * double(new_location.y); - if (p_fit_to_pw2) { - clamp2(p_pose.cx, 0.0, (img.cols * p_scale_factor_x) - 1); - clamp2(p_pose.cy, 0.0, (img.rows * p_scale_factor_y) - 1); - } else { - clamp2(p_pose.cx, 0.0, img.cols - 1.0); - clamp2(p_pose.cy, 0.0, img.rows - 1.0); - } + clamp2(p_current_center.x, 0.0, img.cols - 1.0); + clamp2(p_current_center.y, 0.0, img.rows - 1.0); // sub grid scale interpolation if (m_use_subgrid_scale) { - auto it = std::find_if(p_threadctxs.begin(), p_threadctxs.end(), [max](ThreadCtx &ctx) { return &ctx == max; }); - p_current_scale *= sub_grid_scale(std::distance(p_threadctxs.begin(), it)); + p_current_scale *= sub_grid_scale(max_idx); } else { - p_current_scale *= max->scale; + p_current_scale *= d->IF_BIG_BATCH(threadctxs[0].max, threadctxs).scale(max_idx); } clamp2(p_current_scale, p_min_max_scale[0], p_min_max_scale[1]); - ThreadCtx &ctx = p_threadctxs.front(); - // obtain a subwindow for training at newly estimated target position - ctx.patch_feats.clear(); - get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size.width, p_windows_size.height, - ctx, p_current_scale); - fft.forward_window(ctx.patch_feats, p_xf, ctx.fw_all, - m_use_cuda ? ctx.data_features.deviceMem() : nullptr, ctx.stream); - - // subsequent frames, interpolate model - p_model_xf = p_model_xf * float((1. - p_interp_factor)) + p_xf * float(p_interp_factor); - - ComplexMat alphaf_num, alphaf_den; - - if (m_use_linearkernel) { - ComplexMat xfconj = p_xf.conj(); - alphaf_num = xfconj.mul(p_yf); - alphaf_den = (p_xf * xfconj); - } else { - // Kernel Ridge Regression, calculate alphas (in Fourier domain) - gaussian_correlation(ctx, p_xf, p_xf, p_kernel_sigma, - true); - // ComplexMat alphaf = p_yf / (kf + p_lambda); //equation for fast training - // p_model_alphaf = p_model_alphaf * (1. - p_interp_factor) + alphaf * p_interp_factor; - alphaf_num = p_yf * ctx.kf; - alphaf_den = ctx.kf * (ctx.kf + float(p_lambda)); - } - - p_model_alphaf_num = p_model_alphaf_num * float((1. - p_interp_factor)) + alphaf_num * float(p_interp_factor); - p_model_alphaf_den = p_model_alphaf_den * float((1. - p_interp_factor)) + alphaf_den * float(p_interp_factor); - p_model_alphaf = p_model_alphaf_num / p_model_alphaf_den; -#if !defined(BIG_BATCH) && defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - for (auto it = p_threadctxs.begin(); it != p_threadctxs.end(); ++it) { - it->model_xf = p_model_xf; - it->model_xf.set_stream(it->stream); - it->model_alphaf = p_model_alphaf; - it->model_alphaf.set_stream(it->stream); - } -#endif + // train at newly estimated target position + train(input_rgb, input_gray, p_interp_factor); } -void KCF_Tracker::scale_track(ThreadCtx &vars, cv::Mat &input_rgb, cv::Mat &input_gray) +void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input_gray) { - if (m_use_big_batch) { - vars.patch_feats.clear(); - BIG_BATCH_OMP_PARALLEL_FOR - for (uint i = 0; i < p_num_scales; ++i) { - get_features(input_rgb, input_gray, this->p_pose.cx, this->p_pose.cy, this->p_windows_size.width, - this->p_windows_size.height, vars, this->p_current_scale * this->p_scales[i]); - } - } else { - get_features(input_rgb, input_gray, this->p_pose.cx, this->p_pose.cy, this->p_windows_size.width, - this->p_windows_size.height, vars, this->p_current_scale * vars.scale); + TRACE(""); + + BIG_BATCH_OMP_PARALLEL_FOR + for (uint i = 0; i < IF_BIG_BATCH(max.size(), 1); ++i) + { + kcf.get_features(input_rgb, input_gray, &dbg_patch IF_BIG_BATCH([i],), + kcf.p_current_center.x, kcf.p_current_center.y, + kcf.p_windows_size.width, kcf.p_windows_size.height, + kcf.p_current_scale * IF_BIG_BATCH(max.scale(i), scale), + kcf.p_current_angle + IF_BIG_BATCH(max.angle(i), angle)) + .copyTo(patch_feats.scale(i)); + DEBUG_PRINT(patch_feats.scale(i)); } - fft.forward_window(vars.patch_feats, vars.zf, vars.fw_all, m_use_cuda ? vars.data_features.deviceMem() : nullptr, - vars.stream); - DEBUG_PRINTM(vars.zf); + kcf.fft.forward_window(patch_feats, zf, temp); + DEBUG_PRINTM(zf); - if (m_use_linearkernel) { - vars.kzf = m_use_big_batch ? (vars.zf.mul2(this->p_model_alphaf)).sum_over_channels() - : (p_model_alphaf * vars.zf).sum_over_channels(); - fft.inverse(vars.kzf, vars.response, m_use_cuda ? vars.data_i_1ch.deviceMem() : nullptr, vars.stream); + if (kcf.m_use_linearkernel) { + kzf = zf.mul(kcf.model->model_alphaf).sum_over_channels(); } else { -#if !defined(BIG_BATCH) && defined(CUFFT) && (defined(ASYNC) || defined(OPENMP)) - gaussian_correlation(vars, vars.zf, vars.model_xf, this->p_kernel_sigma); - vars.kzf = vars.model_alphaf * vars.kzf; -#else - gaussian_correlation(vars, vars.zf, this->p_model_xf, this->p_kernel_sigma); - DEBUG_PRINTM(this->p_model_alphaf); - DEBUG_PRINTM(vars.kzf); - vars.kzf = m_use_big_batch ? vars.kzf.mul(this->p_model_alphaf) : this->p_model_alphaf * vars.kzf; -#endif - fft.inverse(vars.kzf, vars.response, m_use_cuda ? vars.data_i_1ch.deviceMem() : nullptr, vars.stream); + gaussian_correlation(kzf, zf, kcf.model->model_xf, kcf.p_kernel_sigma, false, kcf); + DEBUG_PRINTM(kzf); + kzf = kzf.mul(kcf.model->model_alphaf); } + kcf.fft.inverse(kzf, response); - DEBUG_PRINTM(vars.response); + DEBUG_PRINTM(response); /* target location is at the maximum response. we must take into account the fact that, if the target doesn't move, the peak will appear at the top-left corner, not at the center (this is discussed in the paper). the responses wrap around cyclically. */ - if (m_use_big_batch) { - cv::split(vars.response, vars.response_maps); - - for (size_t i = 0; i < p_scales.size(); ++i) { - double min_val, max_val; - cv::Point2i min_loc, max_loc; - cv::minMaxLoc(vars.response_maps[i], &min_val, &max_val, &min_loc, &max_loc); - DEBUG_PRINT(max_loc); - double weight = p_scales[i] < 1. ? p_scales[i] : 1. / p_scales[i]; - vars.max_responses[i] = max_val * weight; - vars.max_locs[i] = max_loc; - } - } else { - double min_val; - cv::Point2i min_loc; - cv::minMaxLoc(vars.response, &min_val, &vars.max_val, &min_loc, &vars.max_loc); + double min_val, max_val; + cv::Point2i min_loc, max_loc; +#ifdef BIG_BATCH + for (size_t i = 0; i < max.size(); ++i) { + cv::minMaxLoc(response.plane(i), &min_val, &max_val, &min_loc, &max_loc); + DEBUG_PRINT(max_loc); + double weight = kcf.p_scales[i] < 1. ? kcf.p_scales[i] : 1. / kcf.p_scales[i]; + max[i].response = max_val * weight; + max[i].loc = max_loc; + } +#else + cv::minMaxLoc(response.plane(0), &min_val, &max_val, &min_loc, &max_loc); - DEBUG_PRINT(vars.max_loc); + DEBUG_PRINT(max_loc); + DEBUG_PRINT(max_val); - double weight = vars.scale < 1. ? vars.scale : 1. / vars.scale; - vars.max_response = vars.max_val * weight; - } - return; + double weight = scale < 1. ? scale : 1. / scale; + max.response = max_val * weight; + max.loc = max_loc; +#endif } // **************************************************************************** -void KCF_Tracker::get_features(cv::Mat &input_rgb, cv::Mat &input_gray, int cx, int cy, int size_x, int size_y, - ThreadCtx &vars, double scale) +cv::Mat KCF_Tracker::get_features(cv::Mat &input_rgb, cv::Mat &input_gray, cv::Mat *dbg_patch, + int cx, int cy, int size_x, int size_y, double scale, double angle) const { - int size_x_scaled = floor(size_x * scale); - int size_y_scaled = floor(size_y * scale); + cv::Size scaled = cv::Size(floor(size_x * scale), floor(size_y * scale)); - cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, size_x_scaled, size_y_scaled); - cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, size_x_scaled, size_y_scaled); + cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, scaled.width, scaled.height, angle); + cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, scaled.width, scaled.height, angle); + + if (dbg_patch) + patch_rgb.copyTo(*dbg_patch); // resize to default size - if (scale > 1.) { + if (scaled.area() > fit_size.area()) { // if we downsample use INTER_AREA interpolation - cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_AREA); + // note: this is just a guess - we may downsample in X and upsample in Y (or vice versa) + cv::resize(patch_gray, patch_gray, fit_size, 0., 0., cv::INTER_AREA); } else { - cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_LINEAR); + cv::resize(patch_gray, patch_gray, fit_size, 0., 0., cv::INTER_LINEAR); } // get hog(Histogram of Oriented Gradients) features - vars.patch_feats = FHoG::extract(patch_gray, 2, p_cell_size, 9); + std::vector hog_feat = FHoG::extract(patch_gray, 2, p_cell_size, 9); // get color rgb features (simple r,g,b channels) std::vector color_feat; if ((m_use_color || m_use_cnfeat) && input_rgb.channels() == 3) { // resize to default size - if (scale > 1.) { + if (scaled.area() > (fit_size / p_cell_size).area()) { // if we downsample use INTER_AREA interpolation - cv::resize(patch_rgb, patch_rgb, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., - cv::INTER_AREA); + cv::resize(patch_rgb, patch_rgb, fit_size / p_cell_size, 0., 0., cv::INTER_AREA); } else { - cv::resize(patch_rgb, patch_rgb, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., - cv::INTER_LINEAR); + cv::resize(patch_rgb, patch_rgb, fit_size / p_cell_size, 0., 0., cv::INTER_LINEAR); } } @@ -561,9 +561,15 @@ void KCF_Tracker::get_features(cv::Mat &input_rgb, cv::Mat &input_gray, int cx, std::vector cn_feat = CNFeat::extract(patch_rgb); color_feat.insert(color_feat.end(), cn_feat.begin(), cn_feat.end()); } - BIG_BATCH_OMP_ORDERED - vars.patch_feats.insert(vars.patch_feats.end(), color_feat.begin(), color_feat.end()); - return; + + hog_feat.insert(hog_feat.end(), color_feat.begin(), color_feat.end()); + + int size[] = {p_num_of_feats, feature_size.height, feature_size.width}; + cv::Mat result(3, size, CV_32F); + for (uint i = 0; i < hog_feat.size(); ++i) + hog_feat[i].copyTo(cv::Mat(size[1], size[2], CV_32FC1, result.ptr(i))); + + return result; } cv::Mat KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2) @@ -583,25 +589,17 @@ cv::Mat KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2) } // rotate so that 1 is at top-left corner (see KCF paper for explanation) -#ifdef CUFFT - cv::Mat tmp = circshift(labels, range_x[0], range_y[0]); - tmp.copyTo(p_rot_labels); - - assert(p_rot_labels.at(0, 0) >= 1.f - 1e-10f); - return tmp; -#else - cv::Mat rot_labels = circshift(labels, range_x[0], range_y[0]); + MatDynMem rot_labels = circshift(labels, range_x[0], range_y[0]); // sanity check, 1 at top left corner assert(rot_labels.at(0, 0) >= 1.f - 1e-10f); return rot_labels; -#endif } -cv::Mat KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot) +cv::Mat KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot) const { - cv::Mat rot_patch(patch.size(), CV_32FC1); - cv::Mat tmp_x_rot(patch.size(), CV_32FC1); + cv::Mat rot_patch(patch.size(), patch.type()); + cv::Mat tmp_x_rot(patch.size(), patch.type()); // circular rotate x-axis if (x_rot < 0) { @@ -679,14 +677,18 @@ cv::Mat KCF_Tracker::cosine_window_function(int dim1, int dim2) // Returns sub-window of image input centered at [cx, cy] coordinates), // with size [width, height]. If any pixels are outside of the image, // they will replicate the values at the borders. -cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int width, int height) +cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int width, int height, double angle) const { cv::Mat patch; - int x1 = cx - width / 2; - int y1 = cy - height / 2; - int x2 = cx + width / 2; - int y2 = cy + height / 2; + cv::Size sz(width, height); + cv::RotatedRect rr(cv::Point2f(cx, cy), sz, angle); + cv::Rect bb = rr.boundingRect(); + + int x1 = bb.tl().x; + int y1 = bb.tl().y; + int x2 = bb.br().x; + int y2 = bb.br().y; // out of image if (x1 >= input.cols || y1 >= input.rows || x2 < 0 || y2 < 0) { @@ -727,75 +729,61 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int wid // cv::waitKey(); } + cv::Point2f src_pts[4]; + cv::RotatedRect(cv::Point2f(patch.size()) / 2.0, sz, angle).points(src_pts); + cv::Point2f dst_pts[3] = { cv::Point2f(0, height), cv::Point2f(0, 0), cv::Point2f(width, 0)}; + auto rot = cv::getAffineTransform(src_pts, dst_pts); + cv::warpAffine(patch, patch, rot, sz); + // sanity check assert(patch.cols == width && patch.rows == height); return patch; } -void KCF_Tracker::gaussian_correlation(struct ThreadCtx &vars, const ComplexMat &xf, const ComplexMat &yf, - double sigma, bool auto_correlation) +void KCF_Tracker::GaussianCorrelation::operator()(ComplexMat &result, const ComplexMat &xf, const ComplexMat &yf, + double sigma, bool auto_correlation, const KCF_Tracker &kcf) { - xf.sqr_norm(vars.xf_sqr_norm); + TRACE(""); + DEBUG_PRINTM(xf); + DEBUG_PRINT(xf_sqr_norm.num_elem); + xf.sqr_norm(xf_sqr_norm); + for (uint s = 0; s < xf.n_scales; ++s) + DEBUG_PRINT(xf_sqr_norm[s]); if (auto_correlation) { - vars.yf_sqr_norm.hostMem()[0] = vars.xf_sqr_norm.hostMem()[0]; + yf_sqr_norm = xf_sqr_norm; } else { - yf.sqr_norm(vars.yf_sqr_norm); + DEBUG_PRINTM(yf); + yf.sqr_norm(yf_sqr_norm); } - vars.xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj()); - DEBUG_PRINTM(vars.xyf); - fft.inverse(vars.xyf, vars.ifft2_res, m_use_cuda ? vars.data_i_features.deviceMem() : nullptr, vars.stream); -#ifdef CUFFT - cuda_gaussian_correlation(vars.data_i_features.deviceMem(), vars.gauss_corr_res.deviceMem(), - vars.xf_sqr_norm.deviceMem(), vars.xf_sqr_norm.deviceMem(), sigma, xf.n_channels, - xf.n_scales, p_roi.height, p_roi.width, vars.stream); -#else - // ifft2 and sum over 3rd dimension, we dont care about individual channels - DEBUG_PRINTM(vars.ifft2_res); - cv::Mat xy_sum; - if (xf.channels() != p_num_scales * p_num_of_feats) - xy_sum.create(vars.ifft2_res.size(), CV_32FC1); - else - xy_sum.create(vars.ifft2_res.size(), CV_32FC(p_scales.size())); - xy_sum.setTo(0); - for (int y = 0; y < vars.ifft2_res.rows; ++y) { - float *row_ptr = vars.ifft2_res.ptr(y); - float *row_ptr_sum = xy_sum.ptr(y); - for (int x = 0; x < vars.ifft2_res.cols; ++x) { - for (int sum_ch = 0; sum_ch < xy_sum.channels(); ++sum_ch) { - row_ptr_sum[(x * xy_sum.channels()) + sum_ch] += std::accumulate( - row_ptr + x * vars.ifft2_res.channels() + sum_ch * (vars.ifft2_res.channels() / xy_sum.channels()), - (row_ptr + x * vars.ifft2_res.channels() + - (sum_ch + 1) * (vars.ifft2_res.channels() / xy_sum.channels())), - 0.f); - } - } - } - DEBUG_PRINTM(xy_sum); + for (uint s = 0; s < yf.n_scales; ++s) + DEBUG_PRINTM(yf_sqr_norm[s]); + xyf = auto_correlation ? xf.sqr_mag() : xf * yf.conj(); // xf.muln(yf.conj()); + DEBUG_PRINTM(xyf); - std::vector scales; - cv::split(xy_sum, scales); + // ifft2 and sum over 3rd dimension, we dont care about individual channels + ComplexMat xyf_sum = xyf.sum_over_channels(); + DEBUG_PRINTM(xyf_sum); + kcf.fft.inverse(xyf_sum, ifft_res); + DEBUG_PRINTM(ifft_res); float numel_xf_inv = 1.f / (xf.cols * xf.rows * (xf.channels() / xf.n_scales)); for (uint i = 0; i < xf.n_scales; ++i) { - cv::Mat in_roi(vars.in_all, cv::Rect(0, i * scales[0].rows, scales[0].cols, scales[0].rows)); - cv::exp( - -1. / (sigma * sigma) * - cv::max((double(vars.xf_sqr_norm.hostMem()[i] + vars.yf_sqr_norm.hostMem()[0]) - 2 * scales[i]) * double(numel_xf_inv), 0), - in_roi); - DEBUG_PRINTM(in_roi); + cv::Mat plane = ifft_res.plane(i); + DEBUG_PRINT(ifft_res.plane(i)); + cv::exp(-1. / (sigma * sigma) * cv::max((xf_sqr_norm[i] + yf_sqr_norm[0] - 2 * ifft_res.plane(i)) + * numel_xf_inv, 0), plane); + DEBUG_PRINTM(plane); } -#endif - DEBUG_PRINTM(vars.in_all); - fft.forward(vars.in_all, auto_correlation ? vars.kf : vars.kzf, m_use_cuda ? vars.gauss_corr_res.deviceMem() : nullptr, - vars.stream); - return; + + kcf.fft.forward(ifft_res, result); } float get_response_circular(cv::Point2i &pt, cv::Mat &response) { int x = pt.x; int y = pt.y; + assert(response.dims == 2); // ensure .cols and .rows are valid if (x < 0) x = response.cols + x; if (y < 0) y = response.rows + y; if (x >= response.cols) x = x - response.cols; @@ -804,7 +792,7 @@ float get_response_circular(cv::Point2i &pt, cv::Mat &response) return response.at(y, x); } -cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point &max_loc, cv::Mat &response) +cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point &max_loc, cv::Mat &response) const { // find neighbourhood of max_loc (response is circular) // 1 2 3 @@ -851,10 +839,14 @@ cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point &max_loc, cv::Mat &response) return sub_peak; } -double KCF_Tracker::sub_grid_scale(uint index) +double KCF_Tracker::sub_grid_scale(uint max_index) { cv::Mat A, fval; - if (index >= p_scales.size()) { + const auto &vec = d->IF_BIG_BATCH(threadctxs[0].max, threadctxs); + uint index = vec.getScaleIdx(max_index); + uint angle_idx = vec.getAngleIdx(max_index); + + if (index >= vec.size()) { // interpolate from all values // fit 1d quadratic function f(x) = a*x^2 + b*x + c A.create(p_scales.size(), 3, CV_32FC1); @@ -863,7 +855,7 @@ double KCF_Tracker::sub_grid_scale(uint index) A.at(i, 0) = float(p_scales[i] * p_scales[i]); A.at(i, 1) = float(p_scales[i]); A.at(i, 2) = 1; - fval.at(i) = m_use_big_batch ? p_threadctxs.back().max_responses[i] : p_threadctxs[i].max_response; + fval.at(i) = d->IF_BIG_BATCH(threadctxs[0].max[i].response, threadctxs(i, angle_idx).max.response); } } else { // only from neighbours @@ -874,10 +866,17 @@ double KCF_Tracker::sub_grid_scale(uint index) p_scales[index - 1] * p_scales[index - 1], p_scales[index - 1], 1, p_scales[index + 0] * p_scales[index + 0], p_scales[index + 0], 1, p_scales[index + 1] * p_scales[index + 1], p_scales[index + 1], 1); +#ifdef BIG_BATCH + fval = (cv::Mat_(3, 1) << + d->threadctxs[0].max(index - 1, angle_idx).response, + d->threadctxs[0].max(index + 0, angle_idx).response, + d->threadctxs[0].max(index + 1, angle_idx).response); +#else fval = (cv::Mat_(3, 1) << - (m_use_big_batch ? p_threadctxs.back().max_responses[index - 1] : p_threadctxs[index - 1].max_response), - (m_use_big_batch ? p_threadctxs.back().max_responses[index + 0] : p_threadctxs[index + 0].max_response), - (m_use_big_batch ? p_threadctxs.back().max_responses[index + 1] : p_threadctxs[index + 1].max_response)); + d->threadctxs(index - 1, angle_idx).max.response, + d->threadctxs(index + 0, angle_idx).max.response, + d->threadctxs(index + 1, angle_idx).max.response); +#endif } cv::Mat x;