X-Git-Url: http://rtime.felk.cvut.cz/gitweb/hercules2020/kcf.git/blobdiff_plain/ae108a35213e52b8bf283b860eb86f0f2e7d009c..863408c50c11bb5dd5ab91e365c13c5ea818678b:/src/kcf.cpp diff --git a/src/kcf.cpp b/src/kcf.cpp index 9071fc4..4cccea5 100644 --- a/src/kcf.cpp +++ b/src/kcf.cpp @@ -3,6 +3,8 @@ #include #include #include "threadctx.hpp" +#include "debug.h" +#include #ifdef FFTW #include "fft_fftw.h" @@ -19,17 +21,7 @@ #include #endif // OPENMP -static bool kcf_debug = false; - -#define DEBUG_PRINT(obj) \ - if (kcf_debug) { \ - std::cout << #obj << " @" << __LINE__ << std::endl << (obj) << std::endl; \ - } -#define DEBUG_PRINTM(obj) \ - if (kcf_debug) { \ - std::cout << #obj << " @" << __LINE__ << " " << (obj).size() << " CH: " << (obj).channels() << std::endl \ - << (obj) << std::endl; \ - } +DbgTracer __dbgTracer; template T clamp(const T& n, const T& lower, const T& upper) @@ -43,65 +35,94 @@ void clamp2(T& n, const T& lower, const T& upper) n = std::max(lower, std::min(n, upper)); } +#if CV_MAJOR_VERSION < 3 +template static inline +cv::Size_<_Tp> operator / (const cv::Size_<_Tp>& a, _Tp b) +{ + return cv::Size_<_Tp>(a.width / b, a.height / b); +} + +template static inline +cv::Point_<_Tp> operator / (const cv::Point_<_Tp>& a, double b) +{ + return cv::Point_<_Tp>(a.x / b, a.y / b); +} + +#endif + class Kcf_Tracker_Private { friend KCF_Tracker; + + Kcf_Tracker_Private(const KCF_Tracker &kcf) : kcf(kcf) {} + + const KCF_Tracker &kcf; +#ifdef BIG_BATCH std::vector threadctxs; +#else + ScaleRotVector threadctxs{kcf.p_scales, kcf.p_angles}; +#endif }; KCF_Tracker::KCF_Tracker(double padding, double kernel_sigma, double lambda, double interp_factor, double output_sigma_factor, int cell_size) - : fft(*new FFT()), p_padding(padding), p_output_sigma_factor(output_sigma_factor), p_kernel_sigma(kernel_sigma), - p_lambda(lambda), p_interp_factor(interp_factor), p_cell_size(cell_size), d(*new Kcf_Tracker_Private) + : p_cell_size(cell_size), fft(*new FFT()), p_padding(padding), p_output_sigma_factor(output_sigma_factor), p_kernel_sigma(kernel_sigma), + p_lambda(lambda), p_interp_factor(interp_factor) { } -KCF_Tracker::KCF_Tracker() : fft(*new FFT()), d(*new Kcf_Tracker_Private) {} +KCF_Tracker::KCF_Tracker() : fft(*new FFT()) {} KCF_Tracker::~KCF_Tracker() { delete &fft; - delete &d; } -void KCF_Tracker::train(cv::Mat input_gray, cv::Mat input_rgb, double interp_factor) +void KCF_Tracker::train(cv::Mat input_rgb, cv::Mat input_gray, double interp_factor) { + TRACE(""); + // obtain a sub-window for training - int sizes[3] = {p_num_of_feats, p_roi.height, p_roi.width}; - MatDynMem patch_feats(3, sizes, CV_32FC1); - MatDynMem temp(3, sizes, CV_32FC1); - get_features(patch_feats, input_rgb, input_gray, p_pose.cx, p_pose.cy, + get_features(input_rgb, input_gray, nullptr, p_current_center.x, p_current_center.y, p_windows_size.width, p_windows_size.height, - p_current_scale); - fft.forward_window(patch_feats, p_xf, temp); - p_model_xf = p_model_xf * (1. - interp_factor) + p_xf * interp_factor; - DEBUG_PRINTM(p_model_xf); - - ComplexMat alphaf_num, alphaf_den; + p_current_scale, p_current_angle).copyTo(model->patch_feats.scale(0)); + DEBUG_PRINT(model->patch_feats); + fft.forward_window(model->patch_feats, model->xf, model->temp); + DEBUG_PRINTM(model->xf); + model->model_xf = model->model_xf * (1. - interp_factor) + model->xf * interp_factor; + DEBUG_PRINTM(model->model_xf); if (m_use_linearkernel) { - ComplexMat xfconj = p_xf.conj(); - alphaf_num = xfconj.mul(p_yf); - alphaf_den = (p_xf * xfconj); + ComplexMat xfconj = model->xf.conj(); + model->model_alphaf_num = xfconj.mul(model->yf); + model->model_alphaf_den = (model->xf * xfconj); } else { // Kernel Ridge Regression, calculate alphas (in Fourier domain) - const uint num_scales = BIG_BATCH_MODE ? p_num_scales : 1; - cv::Size sz(Fft::freq_size(p_roi)); - ComplexMat kf(sz.height, sz.width, num_scales); - (*gaussian_correlation)(*this, kf, p_model_xf, p_model_xf, p_kernel_sigma, true); + cv::Size sz(Fft::freq_size(feature_size)); + ComplexMat kf(sz.height, sz.width, 1); + (*gaussian_correlation)(kf, model->model_xf, model->model_xf, p_kernel_sigma, true, *this); DEBUG_PRINTM(kf); - p_model_alphaf_num = p_yf * kf; - DEBUG_PRINTM(p_model_alphaf_num); - p_model_alphaf_den = kf * (kf + p_lambda); - DEBUG_PRINTM(p_model_alphaf_den); + model->model_alphaf_num = model->yf * kf; + model->model_alphaf_den = kf * (kf + p_lambda); } - p_model_alphaf = p_model_alphaf_num / p_model_alphaf_den; - DEBUG_PRINTM(p_model_alphaf); + model->model_alphaf = model->model_alphaf_num / model->model_alphaf_den; + DEBUG_PRINTM(model->model_alphaf); // p_model_alphaf = p_yf / (kf + p_lambda); //equation for fast training } +static int round_pw2_down(int x) +{ + for (int i = 1; i < 32; i <<= 1) + x |= x >> i; + x++; + return x >> 1; +} + + void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int fit_size_y) { - kcf_debug = m_debug; + __dbgTracer.debug = m_debug; + TRACE(""); + // check boundary, enforce min size double x1 = bbox.x, x2 = bbox.x + bbox.width, y1 = bbox.y, y2 = bbox.y + bbox.height; if (x1 < 0) x1 = 0.; @@ -132,10 +153,10 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f } } - p_pose.w = x2 - x1; - p_pose.h = y2 - y1; - p_pose.cx = x1 + p_pose.w / 2.; - p_pose.cy = y1 + p_pose.h / 2.; + p_init_pose.w = x2 - x1; + p_init_pose.h = y2 - y1; + p_init_pose.cx = x1 + p_init_pose.w / 2.; + p_init_pose.cy = y1 + p_init_pose.h / 2.; cv::Mat input_gray, input_rgb = img.clone(); if (img.channels() == 3) { @@ -145,86 +166,59 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f img.convertTo(input_gray, CV_32FC1); // don't need too large image - if (p_pose.w * p_pose.h > 100. * 100. && (fit_size_x == -1 || fit_size_y == -1)) { + if (p_init_pose.w * p_init_pose.h > 100. * 100.) { std::cout << "resizing image by factor of " << 1 / p_downscale_factor << std::endl; p_resize_image = true; - p_pose.scale(p_downscale_factor); + p_init_pose.scale(p_downscale_factor); cv::resize(input_gray, input_gray, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); - } else if (!(fit_size_x == -1 && fit_size_y == -1)) { - if (fit_size_x % p_cell_size != 0 || fit_size_y % p_cell_size != 0) { - std::cerr << "Error: Fit size is not multiple of HOG cell size (" << p_cell_size << ")" << std::endl; - std::exit(EXIT_FAILURE); - } - p_scale_factor_x = (double)fit_size_x / round(p_pose.w * (1. + p_padding)); - p_scale_factor_y = (double)fit_size_y / round(p_pose.h * (1. + p_padding)); - std::cout << "resizing image horizontaly by factor of " << p_scale_factor_x << " and verticaly by factor of " - << p_scale_factor_y << std::endl; - p_fit_to_pw2 = true; - p_pose.scale_x(p_scale_factor_x); - p_pose.scale_y(p_scale_factor_y); - if (fabs(p_scale_factor_x - 1) > p_floating_error || fabs(p_scale_factor_y - 1) > p_floating_error) { - if (p_scale_factor_x < 1 && p_scale_factor_y < 1) { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - } else { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - } - } } // compute win size + fit to fhog cell size - p_windows_size.width = round(p_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size; - p_windows_size.height = round(p_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size; - p_roi.width = p_windows_size.width / p_cell_size; - p_roi.height = p_windows_size.height / p_cell_size; + p_windows_size.width = round(p_init_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size; + p_windows_size.height = round(p_init_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size; + + if (fit_size_x == 0 || fit_size_y == 0) { + // Round down to the next highest power of 2 + fit_size = cv::Size(round_pw2_down(p_windows_size.width), + round_pw2_down(p_windows_size.height)); + } else if (fit_size_x == -1 || fit_size_y == -1) { + fit_size = p_windows_size; + } else { + fit_size = cv::Size(fit_size_x, fit_size_y); + } + + feature_size = fit_size / p_cell_size; p_scales.clear(); - if (m_use_scale) - for (int i = -int(p_num_scales) / 2; i <= int(p_num_scales) / 2; ++i) - p_scales.push_back(std::pow(p_scale_step, i)); - else - p_scales.push_back(1.); + for (int i = -int(p_num_scales - 1) / 2; i <= int(p_num_scales) / 2; ++i) + p_scales.push_back(std::pow(p_scale_step, i)); -#ifdef CUFFT - if (p_roi.height * (p_roi.width / 2 + 1) > 1024) { - std::cerr << "Window after forward FFT is too big for CUDA kernels. Plese use -f to set " - "the window dimensions so its size is less or equal to " - << 1024 * p_cell_size * p_cell_size * 2 + 1 - << " pixels . Currently the size of the window is: " << p_windows_size.width << "x" << p_windows_size.height - << " which is " << p_windows_size.width * p_windows_size.height << " pixels. " << std::endl; - std::exit(EXIT_FAILURE); - } + p_angles.clear(); + for (int i = -int(p_num_angles - 1) / 2; i <= int(p_num_angles) / 2; ++i) + p_angles.push_back(i * p_angle_step); +#ifdef CUFFT if (m_use_linearkernel) { std::cerr << "cuFFT supports only Gaussian kernel." << std::endl; std::exit(EXIT_FAILURE); } - CudaSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost)); -#else - p_xf.create(p_roi.height, p_roi.height / 2 + 1, p_num_of_feats); #endif -#if defined(CUFFT) || defined(FFTW) - uint width = p_roi.width / 2 + 1; -#else - uint width = p_roi.width; -#endif - p_model_xf.create(p_roi.height, width, p_num_of_feats); - p_yf.create(p_roi.height, width, 1); - p_xf.create(p_roi.height, width, p_num_of_feats); + model.reset(new Model(feature_size, p_num_of_feats)); + d.reset(new Kcf_Tracker_Private(*this)); #ifndef BIG_BATCH for (auto scale: p_scales) - d.threadctxs.emplace_back(p_roi, p_num_of_feats, 1, scale); + for (auto angle : p_angles) + d->threadctxs.emplace_back(feature_size, p_num_of_feats, scale, angle); #else - d.threadctxs.emplace_back(p_roi, p_num_of_feats * p_num_scales, p_num_scales); + d->threadctxs.emplace_back(feature_size, p_num_of_feats, p_scales, p_angles); #endif - gaussian_correlation.reset(new GaussianCorrelation(p_roi, IF_BIG_BATCH(p_num_scales, 1), - p_num_of_feats * IF_BIG_BATCH(p_num_scales, 1))); + gaussian_correlation.reset(new GaussianCorrelation(1, p_num_of_feats, feature_size)); + p_current_center = p_init_pose.center(); p_current_scale = 1.; double min_size_ratio = std::max(5. * p_cell_size / p_windows_size.width, 5. * p_cell_size / p_windows_size.height); @@ -234,22 +228,28 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f p_min_max_scale[0] = std::pow(p_scale_step, std::ceil(std::log(min_size_ratio) / log(p_scale_step))); p_min_max_scale[1] = std::pow(p_scale_step, std::floor(std::log(max_size_ratio) / log(p_scale_step))); - std::cout << "init: img size " << img.cols << "x" << img.rows << std::endl; - std::cout << "init: win size " << p_windows_size.width << "x" << p_windows_size.height << std::endl; - std::cout << "init: FFT size " << p_roi.width << "x" << p_roi.height << std::endl; + std::cout << "init: img size " << img.size() << std::endl; + std::cout << "init: win size " << p_windows_size; + if (p_windows_size != fit_size) + std::cout << " resized to " << fit_size; + std::cout << std::endl; + std::cout << "init: FFT size " << feature_size << std::endl; std::cout << "init: min max scales factors: " << p_min_max_scale[0] << " " << p_min_max_scale[1] << std::endl; - p_output_sigma = std::sqrt(p_pose.w * p_pose.h) * p_output_sigma_factor / p_cell_size; + p_output_sigma = std::sqrt(p_init_pose.w * p_init_pose.h * double(fit_size.area()) / p_windows_size.area()) + * p_output_sigma_factor / p_cell_size; - fft.init(p_roi.width, p_roi.height, p_num_of_feats, p_num_scales); - fft.set_window(MatDynMem(cosine_window_function(p_roi.width, p_roi.height))); + fft.init(feature_size.width, feature_size.height, p_num_of_feats, p_num_scales * p_num_angles); + fft.set_window(MatDynMem(cosine_window_function(feature_size.width, feature_size.height))); // window weights, i.e. labels - fft.forward(gaussian_shaped_labels(p_output_sigma, p_roi.width, p_roi.height), p_yf); - DEBUG_PRINTM(p_yf); + MatScales gsl(1, feature_size); + gaussian_shaped_labels(p_output_sigma, feature_size.width, feature_size.height).copyTo(gsl.plane(0)); + fft.forward(gsl, model->yf); + DEBUG_PRINTM(model->yf); // train initial model - train(input_gray, input_rgb, 1.0); + train(input_rgb, input_gray, 1.0); } void KCF_Tracker::setTrackerPose(BBox_c &bbox, cv::Mat &img, int fit_size_x, int fit_size_y) @@ -259,34 +259,24 @@ void KCF_Tracker::setTrackerPose(BBox_c &bbox, cv::Mat &img, int fit_size_x, int void KCF_Tracker::updateTrackerPosition(BBox_c &bbox) { + BBox_c tmp = bbox; if (p_resize_image) { - BBox_c tmp = bbox; tmp.scale(p_downscale_factor); - p_pose.cx = tmp.cx; - p_pose.cy = tmp.cy; - } else if (p_fit_to_pw2) { - BBox_c tmp = bbox; - tmp.scale_x(p_scale_factor_x); - tmp.scale_y(p_scale_factor_y); - p_pose.cx = tmp.cx; - p_pose.cy = tmp.cy; - } else { - p_pose.cx = bbox.cx; - p_pose.cy = bbox.cy; } + p_current_center = tmp.center(); } BBox_c KCF_Tracker::getBBox() { - BBox_c tmp = p_pose; - tmp.w *= p_current_scale; - tmp.h *= p_current_scale; - - if (p_resize_image) tmp.scale(1 / p_downscale_factor); - if (p_fit_to_pw2) { - tmp.scale_x(1 / p_scale_factor_x); - tmp.scale_y(1 / p_scale_factor_y); - } + BBox_c tmp; + tmp.cx = p_current_center.x; + tmp.cy = p_current_center.y; + tmp.w = p_init_pose.w * p_current_scale; + tmp.h = p_init_pose.h * p_current_scale; + tmp.a = p_current_angle; + + if (p_resize_image) + tmp.scale(1 / p_downscale_factor); return tmp; } @@ -301,62 +291,112 @@ void KCF_Tracker::resizeImgs(cv::Mat &input_rgb, cv::Mat &input_gray) if (p_resize_image) { cv::resize(input_gray, input_gray, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA); - } else if (p_fit_to_pw2 && fabs(p_scale_factor_x - 1) > p_floating_error && - fabs(p_scale_factor_y - 1) > p_floating_error) { - if (p_scale_factor_x < 1 && p_scale_factor_y < 1) { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_AREA); - } else { - cv::resize(input_gray, input_gray, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR); - } } } -void KCF_Tracker::findMaxReponse(uint &max_idx, cv::Point2f &new_location) const +static void drawCross(cv::Mat &img, cv::Point center, bool green) { - double max = -1.; + cv::Scalar col = green ? cv::Scalar(0, 1, 0) : cv::Scalar(0, 0, 1); + cv::line(img, cv::Point(center.x, 0), cv::Point(center.x, img.size().height), col); + cv::line(img, cv::Point(0, center.y), cv::Point(img.size().height, center.y), col); +} + +static cv::Point2d wrapAroundFreq(cv::Point2d pt, cv::Mat &resp_map) +{ + if (pt.y > resp_map.rows / 2) // wrap around to negative half-space of vertical axis + pt.y = pt.y - resp_map.rows; + if (pt.x > resp_map.cols / 2) // same for horizontal axis + pt.x = pt.x - resp_map.cols; + return pt; +} + +double KCF_Tracker::findMaxReponse(uint &max_idx, cv::Point2d &new_location) const +{ + double max; + const auto &vec = IF_BIG_BATCH(d->threadctxs[0].max, d->threadctxs); + #ifndef BIG_BATCH - for (uint j = 0; j < d.threadctxs.size(); ++j) { - if (d.threadctxs[j].max.response > max) { - max = d.threadctxs[j].max.response; - max_idx = j; - } - } + auto max_it = std::max_element(vec.begin(), vec.end(), + [](const ThreadCtx &a, const ThreadCtx &b) + { return a.max.response < b.max.response; }); #else - // FIXME: Iterate correctly in big batch mode - perhaps have only one element in the list - for (uint j = 0; j < p_scales.size(); ++j) { - if (d.threadctxs[0].max[j].response > max) { - max = d.threadctxs[0].max[j].response; - max_idx = j; - } - } + auto max_it = std::max_element(vec.begin(), vec.end(), + [](const ThreadCtx::Max &a, const ThreadCtx::Max &b) + { return a.response < b.response; }); #endif - cv::Point2i &max_response_pt = IF_BIG_BATCH(d.threadctxs[0].max[max_idx].loc, d.threadctxs[max_idx].max.loc); - cv::Mat max_response_map = IF_BIG_BATCH(d.threadctxs[0].response.plane(max_idx), d.threadctxs[max_idx].response); + assert(max_it != vec.end()); + max = max_it->IF_BIG_BATCH(response, max.response); + + max_idx = std::distance(vec.begin(), max_it); + + cv::Point2i max_response_pt = IF_BIG_BATCH(max_it->loc, max_it->max.loc); + cv::Mat max_response_map = IF_BIG_BATCH(d->threadctxs[0].response.plane(max_idx), + max_it->response.plane(0)); DEBUG_PRINTM(max_response_map); DEBUG_PRINT(max_response_pt); - // sub pixel quadratic interpolation from neighbours - if (max_response_pt.y > max_response_map.rows / 2) // wrap around to negative half-space of vertical axis - max_response_pt.y = max_response_pt.y - max_response_map.rows; - if (max_response_pt.x > max_response_map.cols / 2) // same for horizontal axis - max_response_pt.x = max_response_pt.x - max_response_map.cols; - + max_response_pt = wrapAroundFreq(max_response_pt, max_response_map); + // sub pixel quadratic interpolation from neighbours if (m_use_subpixel_localization) { new_location = sub_pixel_peak(max_response_pt, max_response_map); } else { new_location = max_response_pt; } DEBUG_PRINT(new_location); + + if (m_visual_debug != vd::NONE) { + const bool fit = 1; + int w = fit ? 100 : (m_visual_debug == vd::PATCH ? fit_size.width : feature_size.width); + int h = fit ? 100 : (m_visual_debug == vd::PATCH ? fit_size.height : feature_size.height); + cv::Mat all_responses((h + 1) * p_num_scales - 1, + (w + 1) * p_num_angles - 1, CV_32FC3, cv::Scalar::all(0)); + for (size_t i = 0; i < p_num_scales; ++i) { + for (size_t j = 0; j < p_num_angles; ++j) { + auto &threadctx = d->IF_BIG_BATCH(threadctxs[0], threadctxs(i, j)); + cv::Mat tmp; + cv::Point2d cross = threadctx.IF_BIG_BATCH(max(i, j), max).loc; + cross = wrapAroundFreq(cross, max_response_map); + if (m_visual_debug == vd::PATCH ) { + threadctx.dbg_patch IF_BIG_BATCH((i, j),) + .convertTo(tmp, all_responses.type(), 1.0 / 255); + cross.x = cross.x / fit_size.width * tmp.cols + tmp.cols / 2; + cross.y = cross.y / fit_size.height * tmp.rows + tmp.rows / 2; + } else { + cv::cvtColor(threadctx.response.plane(IF_BIG_BATCH(threadctx.max.getIdx(i, j), 0)), + tmp, cv::COLOR_GRAY2BGR); + tmp /= max; // Normalize to 1 + cross += cv::Point2d(tmp.size())/2; + tmp = circshift(tmp, -tmp.cols/2, -tmp.rows/2); + //drawCross(tmp, cross, false); + } + bool green = false; + if (&*max_it == &IF_BIG_BATCH(threadctx.max(i, j), threadctx)) { + // Show the green cross at position of sub-pixel interpolation (if enabled) + cross = new_location + cv::Point2d(tmp.size())/2; + green = true; + } + // Move to the center of pixes (if scaling up) and scale + cross.x = (cross.x + 0.5) * double(w)/tmp.cols; + cross.y = (cross.y + 0.5) * double(h)/tmp.rows; + cv::resize(tmp, tmp, cv::Size(w, h)); //, 0, 0, cv::INTER_NEAREST); + drawCross(tmp, cross, green); + cv::Mat resp_roi(all_responses, cv::Rect(j * (w+1), i * (h+1), w, h)); + tmp.copyTo(resp_roi); + } + } + cv::namedWindow("KCF visual debug", CV_WINDOW_AUTOSIZE); + cv::imshow("KCF visual debug", all_responses); + } + + return max; } void KCF_Tracker::track(cv::Mat &img) { - kcf_debug = m_debug; - if (m_debug) std::cout << "NEW FRAME" << '\n'; + __dbgTracer.debug = m_debug; + TRACE(""); cv::Mat input_gray, input_rgb = img.clone(); if (img.channels() == 3) { @@ -369,74 +409,76 @@ void KCF_Tracker::track(cv::Mat &img) resizeImgs(input_rgb, input_gray); #ifdef ASYNC - for (auto &it : d.threadctxs) + for (auto &it : d->threadctxs) it.async_res = std::async(std::launch::async, [this, &input_gray, &input_rgb, &it]() -> void { it.track(*this, input_rgb, input_gray); }); - for (auto const &it : d.threadctxs) + for (auto const &it : d->threadctxs) it.async_res.wait(); #else // !ASYNC - // FIXME: Iterate correctly in big batch mode - perhaps have only one element in the list NORMAL_OMP_PARALLEL_FOR - for (uint i = 0; i < d.threadctxs.size(); ++i) - d.threadctxs[i].track(*this, input_rgb, input_gray); + for (uint i = 0; i < d->threadctxs.size(); ++i) + d->threadctxs[i].track(*this, input_rgb, input_gray); #endif - cv::Point2f new_location; + cv::Point2d new_location; uint max_idx; - findMaxReponse(max_idx, new_location); + max_response = findMaxReponse(max_idx, new_location); - p_pose.cx += p_current_scale * p_cell_size * double(new_location.x); - p_pose.cy += p_current_scale * p_cell_size * double(new_location.y); - if (p_fit_to_pw2) { - clamp2(p_pose.cx, 0.0, (img.cols * p_scale_factor_x) - 1); - clamp2(p_pose.cy, 0.0, (img.rows * p_scale_factor_y) - 1); - } else { - clamp2(p_pose.cx, 0.0, img.cols - 1.0); - clamp2(p_pose.cy, 0.0, img.rows - 1.0); - } + double angle_change = d->IF_BIG_BATCH(threadctxs[0].max, threadctxs).angle(max_idx); + p_current_angle += angle_change; + + new_location.x = new_location.x * cos(-p_current_angle/180*M_PI) + new_location.y * sin(-p_current_angle/180*M_PI); + new_location.y = new_location.y * cos(-p_current_angle/180*M_PI) - new_location.x * sin(-p_current_angle/180*M_PI); + + new_location.x *= double(p_windows_size.width) / fit_size.width; + new_location.y *= double(p_windows_size.height) / fit_size.height; + + p_current_center += p_current_scale * p_cell_size * new_location; + + clamp2(p_current_center.x, 0.0, img.cols - 1.0); + clamp2(p_current_center.y, 0.0, img.rows - 1.0); // sub grid scale interpolation if (m_use_subgrid_scale) { p_current_scale *= sub_grid_scale(max_idx); } else { - p_current_scale *= p_scales[max_idx]; + p_current_scale *= d->IF_BIG_BATCH(threadctxs[0].max, threadctxs).scale(max_idx); } clamp2(p_current_scale, p_min_max_scale[0], p_min_max_scale[1]); + // train at newly estimated target position train(input_rgb, input_gray, p_interp_factor); } void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input_gray) { - // TODO: Move matrices to thread ctx - int sizes[3] = {kcf.p_num_of_feats, kcf.p_windows_size.height, kcf.p_windows_size.width}; - MatDynMem patch_feats(3, sizes, CV_32FC1); - MatDynMem temp(3, sizes, CV_32FC1); + TRACE(""); -#ifdef BIG_BATCH BIG_BATCH_OMP_PARALLEL_FOR - for (uint i = 0; i < kcf.p_num_scales; ++i) -#endif + for (uint i = 0; i < IF_BIG_BATCH(max.size(), 1); ++i) { - kcf.get_features(patch_feats, input_rgb, input_gray, kcf.p_pose.cx, kcf.p_pose.cy, + kcf.get_features(input_rgb, input_gray, &dbg_patch IF_BIG_BATCH([i],), + kcf.p_current_center.x, kcf.p_current_center.y, kcf.p_windows_size.width, kcf.p_windows_size.height, - kcf.p_current_scale * IF_BIG_BATCH(kcf.p_scales[i], scale)); + kcf.p_current_scale * IF_BIG_BATCH(max.scale(i), scale), + kcf.p_current_angle + IF_BIG_BATCH(max.angle(i), angle)) + .copyTo(patch_feats.scale(i)); + DEBUG_PRINT(patch_feats.scale(i)); } kcf.fft.forward_window(patch_feats, zf, temp); DEBUG_PRINTM(zf); if (kcf.m_use_linearkernel) { - kzf = zf.mul(kcf.p_model_alphaf).sum_over_channels(); + kzf = zf.mul(kcf.model->model_alphaf).sum_over_channels(); } else { - gaussian_correlation(kcf, kzf, zf, kcf.p_model_xf, kcf.p_kernel_sigma); - DEBUG_PRINTM(kcf.p_model_alphaf); + gaussian_correlation(kzf, zf, kcf.model->model_xf, kcf.p_kernel_sigma, false, kcf); DEBUG_PRINTM(kzf); - kzf = kzf.mul(kcf.p_model_alphaf); + kzf = kzf.mul(kcf.model->model_alphaf); } kcf.fft.inverse(kzf, response); @@ -449,7 +491,7 @@ void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input double min_val, max_val; cv::Point2i min_loc, max_loc; #ifdef BIG_BATCH - for (size_t i = 0; i < kcf.p_scales.size(); ++i) { + for (size_t i = 0; i < max.size(); ++i) { cv::minMaxLoc(response.plane(i), &min_val, &max_val, &min_loc, &max_loc); DEBUG_PRINT(max_loc); double weight = kcf.p_scales[i] < 1. ? kcf.p_scales[i] : 1. / kcf.p_scales[i]; @@ -457,9 +499,10 @@ void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input max[i].loc = max_loc; } #else - cv::minMaxLoc(response, &min_val, &max_val, &min_loc, &max_loc); + cv::minMaxLoc(response.plane(0), &min_val, &max_val, &min_loc, &max_loc); DEBUG_PRINT(max_loc); + DEBUG_PRINT(max_val); double weight = scale < 1. ? scale : 1. / scale; max.response = max_val * weight; @@ -469,25 +512,24 @@ void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input // **************************************************************************** -void KCF_Tracker::get_features(MatDynMem &result_3d, cv::Mat &input_rgb, cv::Mat &input_gray, int cx, int cy, - int size_x, int size_y, double scale) const +cv::Mat KCF_Tracker::get_features(cv::Mat &input_rgb, cv::Mat &input_gray, cv::Mat *dbg_patch, + int cx, int cy, int size_x, int size_y, double scale, double angle) const { - assert(result_3d.size[0] == p_num_of_feats); - assert(result_3d.size[1] == size_y / p_cell_size); - assert(result_3d.size[2] == size_x / p_cell_size); + cv::Size scaled = cv::Size(floor(size_x * scale), floor(size_y * scale)); - int size_x_scaled = floor(size_x * scale); - int size_y_scaled = floor(size_y * scale); + cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, scaled.width, scaled.height, angle); + cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, scaled.width, scaled.height, angle); - cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, size_x_scaled, size_y_scaled); - cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, size_x_scaled, size_y_scaled); + if (dbg_patch) + patch_rgb.copyTo(*dbg_patch); // resize to default size - if (scale > 1.) { + if (scaled.area() > fit_size.area()) { // if we downsample use INTER_AREA interpolation - cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_AREA); + // note: this is just a guess - we may downsample in X and upsample in Y (or vice versa) + cv::resize(patch_gray, patch_gray, fit_size, 0., 0., cv::INTER_AREA); } else { - cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_LINEAR); + cv::resize(patch_gray, patch_gray, fit_size, 0., 0., cv::INTER_LINEAR); } // get hog(Histogram of Oriented Gradients) features @@ -497,11 +539,11 @@ void KCF_Tracker::get_features(MatDynMem &result_3d, cv::Mat &input_rgb, cv::Mat std::vector color_feat; if ((m_use_color || m_use_cnfeat) && input_rgb.channels() == 3) { // resize to default size - if (scale > 1.) { + if (scaled.area() > (fit_size / p_cell_size).area()) { // if we downsample use INTER_AREA interpolation - cv::resize(patch_rgb, patch_rgb, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., cv::INTER_AREA); + cv::resize(patch_rgb, patch_rgb, fit_size / p_cell_size, 0., 0., cv::INTER_AREA); } else { - cv::resize(patch_rgb, patch_rgb, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., cv::INTER_LINEAR); + cv::resize(patch_rgb, patch_rgb, fit_size / p_cell_size, 0., 0., cv::INTER_LINEAR); } } @@ -524,15 +566,17 @@ void KCF_Tracker::get_features(MatDynMem &result_3d, cv::Mat &input_rgb, cv::Mat hog_feat.insert(hog_feat.end(), color_feat.begin(), color_feat.end()); - for (uint i = 0; i < hog_feat.size(); ++i) { - cv::Mat result_plane(result_3d.dims - 1, result_3d.size + 1, result_3d.cv::Mat::type(), result_3d.ptr(i)); - hog_feat[i].copyTo(result_plane); - } + int size[] = {p_num_of_feats, feature_size.height, feature_size.width}; + cv::Mat result(3, size, CV_32F); + for (uint i = 0; i < hog_feat.size(); ++i) + hog_feat[i].copyTo(cv::Mat(size[1], size[2], CV_32FC1, result.ptr(i))); + + return result; } -MatDynMem KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2) +cv::Mat KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2) { - MatDynMem labels(dim2, dim1, CV_32FC1); + cv::Mat labels(dim2, dim1, CV_32FC1); int range_y[2] = {-dim2 / 2, dim2 - dim2 / 2}; int range_x[2] = {-dim1 / 2, dim1 - dim1 / 2}; @@ -554,10 +598,10 @@ MatDynMem KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2) return rot_labels; } -MatDynMem KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot) +cv::Mat KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot) const { - MatDynMem rot_patch(patch.size(), CV_32FC1); - cv::Mat tmp_x_rot(patch.size(), CV_32FC1); + cv::Mat rot_patch(patch.size(), patch.type()); + cv::Mat tmp_x_rot(patch.size(), patch.type()); // circular rotate x-axis if (x_rot < 0) { @@ -635,14 +679,18 @@ cv::Mat KCF_Tracker::cosine_window_function(int dim1, int dim2) // Returns sub-window of image input centered at [cx, cy] coordinates), // with size [width, height]. If any pixels are outside of the image, // they will replicate the values at the borders. -cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int width, int height) const +cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int width, int height, double angle) const { cv::Mat patch; - int x1 = cx - width / 2; - int y1 = cy - height / 2; - int x2 = cx + width / 2; - int y2 = cy + height / 2; + cv::Size sz(width, height); + cv::RotatedRect rr(cv::Point2f(cx, cy), sz, angle); + cv::Rect bb = rr.boundingRect(); + + int x1 = bb.tl().x; + int y1 = bb.tl().y; + int x2 = bb.br().x; + int y2 = bb.br().y; // out of image if (x1 >= input.cols || y1 >= input.rows || x2 < 0 || y2 < 0) { @@ -683,70 +731,61 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int wid // cv::waitKey(); } + cv::Point2f src_pts[4]; + cv::RotatedRect(cv::Point2f(patch.size()) / 2.0, sz, angle).points(src_pts); + cv::Point2f dst_pts[3] = { cv::Point2f(0, height), cv::Point2f(0, 0), cv::Point2f(width, 0)}; + auto rot = cv::getAffineTransform(src_pts, dst_pts); + cv::warpAffine(patch, patch, rot, sz); + // sanity check assert(patch.cols == width && patch.rows == height); return patch; } -void KCF_Tracker::GaussianCorrelation::operator()(const KCF_Tracker &kcf, ComplexMat &result, const ComplexMat &xf, - const ComplexMat &yf, double sigma, bool auto_correlation) +void KCF_Tracker::GaussianCorrelation::operator()(ComplexMat &result, const ComplexMat &xf, const ComplexMat &yf, + double sigma, bool auto_correlation, const KCF_Tracker &kcf) { + TRACE(""); + DEBUG_PRINTM(xf); + DEBUG_PRINT(xf_sqr_norm.num_elem); xf.sqr_norm(xf_sqr_norm); + for (uint s = 0; s < xf.n_scales; ++s) + DEBUG_PRINT(xf_sqr_norm[s]); if (auto_correlation) { yf_sqr_norm = xf_sqr_norm; } else { + DEBUG_PRINTM(yf); yf.sqr_norm(yf_sqr_norm); } - xyf = auto_correlation ? xf.sqr_mag() : xf.mul(yf.conj()); - //DEBUG_PRINTM(xyf); - kcf.fft.inverse(xyf, ifft_res); -#ifdef CUFFT - cuda_gaussian_correlation(ifft_res.deviceMem(), k.deviceMem(), xf_sqr_norm.deviceMem(), - auto_correlation ? xf_sqr_norm.deviceMem() : yf_sqr_norm.deviceMem(), sigma, - xf.n_channels, xf.n_scales, kcf.p_roi.height, kcf.p_roi.width); -#else - // ifft2 and sum over 3rd dimension, we dont care about individual channels - //DEBUG_PRINTM(ifft_res); - cv::Mat xy_sum; - if (xf.channels() != kcf.p_num_scales * kcf.p_num_of_feats) - xy_sum.create(ifft_res.size(), CV_32FC1); - else - xy_sum.create(ifft_res.size(), CV_32FC(kcf.p_scales.size())); - xy_sum.setTo(0); - for (int y = 0; y < ifft_res.rows; ++y) { - float *row_ptr = ifft_res.ptr(y); - float *row_ptr_sum = xy_sum.ptr(y); - for (int x = 0; x < ifft_res.cols; ++x) { - for (int sum_ch = 0; sum_ch < xy_sum.channels(); ++sum_ch) { - row_ptr_sum[(x * xy_sum.channels()) + sum_ch] += std::accumulate( - row_ptr + x * ifft_res.channels() + sum_ch * (ifft_res.channels() / xy_sum.channels()), - (row_ptr + x * ifft_res.channels() + - (sum_ch + 1) * (ifft_res.channels() / xy_sum.channels())), - 0.f); - } - } - } - DEBUG_PRINTM(xy_sum); + for (uint s = 0; s < yf.n_scales; ++s) + DEBUG_PRINTM(yf_sqr_norm[s]); + xyf = auto_correlation ? xf.sqr_mag() : xf * yf.conj(); // xf.muln(yf.conj()); + DEBUG_PRINTM(xyf); - std::vector scales; - cv::split(xy_sum, scales); + // ifft2 and sum over 3rd dimension, we dont care about individual channels + ComplexMat xyf_sum = xyf.sum_over_channels(); + DEBUG_PRINTM(xyf_sum); + kcf.fft.inverse(xyf_sum, ifft_res); + DEBUG_PRINTM(ifft_res); float numel_xf_inv = 1.f / (xf.cols * xf.rows * (xf.channels() / xf.n_scales)); for (uint i = 0; i < xf.n_scales; ++i) { - cv::Mat k_roi(k, cv::Rect(0, i * scales[0].rows, scales[0].cols, scales[0].rows)); - cv::exp(-1. / (sigma * sigma) * cv::max((xf_sqr_norm[i] + yf_sqr_norm[0] - 2 * scales[i]) * numel_xf_inv, 0), - k_roi); - DEBUG_PRINTM(k_roi); + cv::Mat plane = ifft_res.plane(i); + DEBUG_PRINT(ifft_res.plane(i)); + cv::exp(-1. / (sigma * sigma) * cv::max((xf_sqr_norm[i] + yf_sqr_norm[0] - 2 * ifft_res.plane(i)) + * numel_xf_inv, 0), plane); + DEBUG_PRINTM(plane); } -#endif - kcf.fft.forward(k, result); + + kcf.fft.forward(ifft_res, result); } float get_response_circular(cv::Point2i &pt, cv::Mat &response) { int x = pt.x; int y = pt.y; + assert(response.dims == 2); // ensure .cols and .rows are valid if (x < 0) x = response.cols + x; if (y < 0) y = response.rows + y; if (x >= response.cols) x = x - response.cols; @@ -794,18 +833,25 @@ cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point &max_loc, cv::Mat &response) c float a = x.at(0), b = x.at(1), c = x.at(2), d = x.at(3), e = x.at(4); cv::Point2f sub_peak(max_loc.x, max_loc.y); - if (b > 0 || b < 0) { + if (4 * a * c - b * b > p_floating_error) { sub_peak.y = ((2.f * a * e) / b - d) / (b - (4 * a * c) / b); sub_peak.x = (-2 * c * sub_peak.y - e) / b; + if (fabs(sub_peak.x - max_loc.x) > 1 || + fabs(sub_peak.y - max_loc.y) > 1) + sub_peak = max_loc; } return sub_peak; } -double KCF_Tracker::sub_grid_scale(uint index) +double KCF_Tracker::sub_grid_scale(uint max_index) { cv::Mat A, fval; - if (index >= p_scales.size()) { + const auto &vec = d->IF_BIG_BATCH(threadctxs[0].max, threadctxs); + uint index = vec.getScaleIdx(max_index); + uint angle_idx = vec.getAngleIdx(max_index); + + if (index >= vec.size()) { // interpolate from all values // fit 1d quadratic function f(x) = a*x^2 + b*x + c A.create(p_scales.size(), 3, CV_32FC1); @@ -814,7 +860,7 @@ double KCF_Tracker::sub_grid_scale(uint index) A.at(i, 0) = float(p_scales[i] * p_scales[i]); A.at(i, 1) = float(p_scales[i]); A.at(i, 2) = 1; - fval.at(i) = d.threadctxs.back().IF_BIG_BATCH(max[i].response, max.response); + fval.at(i) = d->IF_BIG_BATCH(threadctxs[0].max[i].response, threadctxs(i, angle_idx).max.response); } } else { // only from neighbours @@ -827,14 +873,14 @@ double KCF_Tracker::sub_grid_scale(uint index) p_scales[index + 1] * p_scales[index + 1], p_scales[index + 1], 1); #ifdef BIG_BATCH fval = (cv::Mat_(3, 1) << - d.threadctxs.back().max[index - 1].response, - d.threadctxs.back().max[index + 0].response, - d.threadctxs.back().max[index + 1].response); + d->threadctxs[0].max(index - 1, angle_idx).response, + d->threadctxs[0].max(index + 0, angle_idx).response, + d->threadctxs[0].max(index + 1, angle_idx).response); #else fval = (cv::Mat_(3, 1) << - d.threadctxs[index - 1].max.response, - d.threadctxs[index + 0].max.response, - d.threadctxs[index + 1].max.response); + d->threadctxs(index - 1, angle_idx).max.response, + d->threadctxs(index + 0, angle_idx).max.response, + d->threadctxs(index + 1, angle_idx).max.response); #endif }