From 571f4d8d757c1c80ca19da30f7bd2937b60a7f56 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Sat, 13 Oct 2018 09:47:48 +0200 Subject: [PATCH] Extend to support not only multiple scales but also multiple angles So far, we neither rotate the patches nor use multiple angles. --- src/kcf.cpp | 121 +++++++++++++++++++++++++++------------------- src/kcf.h | 8 +-- src/threadctx.hpp | 62 ++++++++++++++++++------ 3 files changed, 124 insertions(+), 67 deletions(-) diff --git a/src/kcf.cpp b/src/kcf.cpp index a637ec7..d58ee8a 100644 --- a/src/kcf.cpp +++ b/src/kcf.cpp @@ -41,26 +41,40 @@ cv::Size_<_Tp> operator / (const cv::Size_<_Tp>& a, _Tp b) { return cv::Size_<_Tp>(a.width / b, a.height / b); } + +template static inline +cv::Point_<_Tp> operator / (const cv::Point_<_Tp>& a, double b) +{ + return cv::Point_<_Tp>(a.x / b, a.y / b); +} + #endif class Kcf_Tracker_Private { friend KCF_Tracker; + + Kcf_Tracker_Private(const KCF_Tracker &kcf) : kcf(kcf) {} + + const KCF_Tracker &kcf; +#ifdef BIG_BATCH std::vector threadctxs; +#else + ScaleRotVector threadctxs{kcf.p_scales, kcf.p_angles}; +#endif }; KCF_Tracker::KCF_Tracker(double padding, double kernel_sigma, double lambda, double interp_factor, double output_sigma_factor, int cell_size) : p_cell_size(cell_size), fft(*new FFT()), p_padding(padding), p_output_sigma_factor(output_sigma_factor), p_kernel_sigma(kernel_sigma), - p_lambda(lambda), p_interp_factor(interp_factor), d(*new Kcf_Tracker_Private) + p_lambda(lambda), p_interp_factor(interp_factor) { } -KCF_Tracker::KCF_Tracker() : fft(*new FFT()), d(*new Kcf_Tracker_Private) {} +KCF_Tracker::KCF_Tracker() : fft(*new FFT()) {} KCF_Tracker::~KCF_Tracker() { delete &fft; - delete &d; } void KCF_Tracker::train(cv::Mat input_rgb, cv::Mat input_gray, double interp_factor) @@ -70,7 +84,7 @@ void KCF_Tracker::train(cv::Mat input_rgb, cv::Mat input_gray, double interp_fac // obtain a sub-window for training get_features(input_rgb, input_gray, nullptr, p_current_center.x, p_current_center.y, p_windows_size.width, p_windows_size.height, - p_current_scale).copyTo(model->patch_feats.scale(0)); + p_current_scale, p_current_angle).copyTo(model->patch_feats.scale(0)); DEBUG_PRINT(model->patch_feats); fft.forward_window(model->patch_feats, model->xf, model->temp); DEBUG_PRINTM(model->xf); @@ -192,12 +206,14 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f #endif model.reset(new Model(feature_size, p_num_of_feats)); + d.reset(new Kcf_Tracker_Private(*this)); #ifndef BIG_BATCH for (auto scale: p_scales) - d.threadctxs.emplace_back(feature_size, p_num_of_feats, scale); + for (auto angle : p_angles) + d->threadctxs.emplace_back(feature_size, p_num_of_feats, scale, angle); #else - d.threadctxs.emplace_back(feature_size, p_num_of_feats, p_num_scales); + d->threadctxs.emplace_back(feature_size, p_num_of_feats, p_scales, p_angles); #endif gaussian_correlation.reset(new GaussianCorrelation(1, p_num_of_feats, feature_size)); @@ -223,7 +239,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f p_output_sigma = std::sqrt(p_init_pose.w * p_init_pose.h * double(fit_size.area()) / p_windows_size.area()) * p_output_sigma_factor / p_cell_size; - fft.init(feature_size.width, feature_size.height, p_num_of_feats, p_num_scales); + fft.init(feature_size.width, feature_size.height, p_num_of_feats, p_num_scales * p_num_angles); fft.set_window(MatDynMem(cosine_window_function(feature_size.width, feature_size.height))); // window weights, i.e. labels @@ -280,29 +296,25 @@ void KCF_Tracker::resizeImgs(cv::Mat &input_rgb, cv::Mat &input_gray) double KCF_Tracker::findMaxReponse(uint &max_idx, cv::Point2d &new_location) const { - double max = -1.; - max_idx = std::numeric_limits::max(); + double max; + const auto &vec = IF_BIG_BATCH(d->threadctxs[0].max, d->threadctxs); #ifndef BIG_BATCH - for (uint j = 0; j < d.threadctxs.size(); ++j) { - if (d.threadctxs[j].max.response > max) { - max = d.threadctxs[j].max.response; - max_idx = j; - } - } + auto max_it = std::max_element(vec.begin(), vec.end(), + [](const ThreadCtx &a, const ThreadCtx &b) + { return a.max.response < b.max.response; }); #else - for (uint j = 0; j < p_scales.size(); ++j) { - if (d.threadctxs[0].max[j].response > max) { - max = d.threadctxs[0].max[j].response; - max_idx = j; - } - } + auto max_it = std::max_element(vec.begin(), vec.end(), + [](const ThreadCtx::Max &a, const ThreadCtx::Max &b) + { return a.response < b.response; }); #endif - assert(max_idx < IF_BIG_BATCH(p_scales.size(), d.threadctxs.size())); + assert(max_it != vec.end()); + max = max_it->IF_BIG_BATCH(response, max.response); if (m_visual_debug) { const bool rgb = true; - int type = rgb ? d.threadctxs[0].dbg_patch[0].type() : d.threadctxs[0].response.type(); + int type = rgb ? d->threadctxs[0].IF_BIG_BATCH(dbg_patch[0], dbg_patch).type() + : d->threadctxs[0].response.type(); int w = true ? 100 : (rgb ? fit_size.width : feature_size.width); int h = true ? 100 : (rgb ? fit_size.height : feature_size.height); cv::Mat all_responses((h + 1) * p_num_scales - 1, @@ -311,9 +323,9 @@ double KCF_Tracker::findMaxReponse(uint &max_idx, cv::Point2d &new_location) con for (size_t j = 0; j < p_num_angles; ++j) { cv::Mat tmp; if (rgb) { - tmp = d.threadctxs[IF_BIG_BATCH(0, p_num_angles * i + j)].dbg_patch[IF_BIG_BATCH(p_num_angles * i + j, 0)]; + tmp = d->IF_BIG_BATCH(threadctxs[0], threadctxs(i, j)).dbg_patch IF_BIG_BATCH((i, j),); } else { - tmp = d.threadctxs[IF_BIG_BATCH(0, p_num_angles * i + j)].response.plane(IF_BIG_BATCH(p_num_angles * i + j, 0)); + tmp = d->IF_BIG_BATCH(threadctxs[0], threadctxs(i, j)).response.plane(IF_BIG_BATCH(d->threadctxs[0].max.getIdx(i, j), 0)); tmp = circshift(tmp, -tmp.cols/2, -tmp.rows/2); } cv::resize(tmp, tmp, cv::Size(w, h)); @@ -325,8 +337,11 @@ double KCF_Tracker::findMaxReponse(uint &max_idx, cv::Point2d &new_location) con cv::imshow("KCF visual debug", all_responses); } - cv::Point2i &max_response_pt = IF_BIG_BATCH(d.threadctxs[0].max[max_idx].loc, d.threadctxs[max_idx].max.loc); - cv::Mat max_response_map = IF_BIG_BATCH(d.threadctxs[0].response.plane(max_idx), d.threadctxs[max_idx].response.plane(0)); + max_idx = std::distance(vec.begin(), max_it); + + cv::Point2i max_response_pt = IF_BIG_BATCH(max_it->loc, max_it->max.loc); + cv::Mat max_response_map = IF_BIG_BATCH(d->threadctxs[0].response.plane(max_idx), + max_it->response.plane(0)); DEBUG_PRINTM(max_response_map); DEBUG_PRINT(max_response_pt); @@ -363,17 +378,17 @@ void KCF_Tracker::track(cv::Mat &img) resizeImgs(input_rgb, input_gray); #ifdef ASYNC - for (auto &it : d.threadctxs) + for (auto &it : d->threadctxs) it.async_res = std::async(std::launch::async, [this, &input_gray, &input_rgb, &it]() -> void { it.track(*this, input_rgb, input_gray); }); - for (auto const &it : d.threadctxs) + for (auto const &it : d->threadctxs) it.async_res.wait(); #else // !ASYNC NORMAL_OMP_PARALLEL_FOR - for (uint i = 0; i < d.threadctxs.size(); ++i) - d.threadctxs[i].track(*this, input_rgb, input_gray); + for (uint i = 0; i < d->threadctxs.size(); ++i) + d->threadctxs[i].track(*this, input_rgb, input_gray); #endif cv::Point2d new_location; @@ -392,11 +407,13 @@ void KCF_Tracker::track(cv::Mat &img) if (m_use_subgrid_scale) { p_current_scale *= sub_grid_scale(max_idx); } else { - p_current_scale *= p_scales[max_idx]; + p_current_scale *= d->IF_BIG_BATCH(threadctxs[0].max, threadctxs).scale(max_idx); } clamp2(p_current_scale, p_min_max_scale[0], p_min_max_scale[1]); + p_current_angle += d->IF_BIG_BATCH(threadctxs[0].max, threadctxs).angle(max_idx); + // train at newly estimated target position train(input_rgb, input_gray, p_interp_factor); } @@ -406,12 +423,13 @@ void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input TRACE(""); BIG_BATCH_OMP_PARALLEL_FOR - for (uint i = 0; i < IF_BIG_BATCH(kcf.p_num_scales, 1); ++i) + for (uint i = 0; i < IF_BIG_BATCH(max.size(), 1); ++i) { - kcf.get_features(input_rgb, input_gray, &dbg_patch[i], + kcf.get_features(input_rgb, input_gray, &dbg_patch IF_BIG_BATCH([i],), kcf.p_current_center.x, kcf.p_current_center.y, kcf.p_windows_size.width, kcf.p_windows_size.height, - kcf.p_current_scale * IF_BIG_BATCH(kcf.p_scales[i], scale)) + kcf.p_current_scale * IF_BIG_BATCH(max.scale(i), scale), + kcf.p_current_angle + IF_BIG_BATCH(max.angle(i), angle)) .copyTo(patch_feats.scale(i)); DEBUG_PRINT(patch_feats.scale(i)); } @@ -437,7 +455,7 @@ void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input double min_val, max_val; cv::Point2i min_loc, max_loc; #ifdef BIG_BATCH - for (size_t i = 0; i < kcf.p_scales.size(); ++i) { + for (size_t i = 0; i < max.size(); ++i) { cv::minMaxLoc(response.plane(i), &min_val, &max_val, &min_loc, &max_loc); DEBUG_PRINT(max_loc); double weight = kcf.p_scales[i] < 1. ? kcf.p_scales[i] : 1. / kcf.p_scales[i]; @@ -459,12 +477,12 @@ void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input // **************************************************************************** cv::Mat KCF_Tracker::get_features(cv::Mat &input_rgb, cv::Mat &input_gray, cv::Mat *dbg_patch, - int cx, int cy, int size_x, int size_y, double scale) const + int cx, int cy, int size_x, int size_y, double scale, double angle) const { cv::Size scaled = cv::Size(floor(size_x * scale), floor(size_y * scale)); - cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, scaled.width, scaled.height); - cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, scaled.width, scaled.height); + cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, scaled.width, scaled.height, angle); + cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, scaled.width, scaled.height, angle); if (dbg_patch) patch_rgb.copyTo(*dbg_patch); @@ -625,9 +643,10 @@ cv::Mat KCF_Tracker::cosine_window_function(int dim1, int dim2) // Returns sub-window of image input centered at [cx, cy] coordinates), // with size [width, height]. If any pixels are outside of the image, // they will replicate the values at the borders. -cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int width, int height) const +cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int width, int height, double angle) const { cv::Mat patch; + (void)angle; int x1 = cx - width / 2; int y1 = cy - height / 2; @@ -777,10 +796,14 @@ cv::Point2f KCF_Tracker::sub_pixel_peak(cv::Point &max_loc, cv::Mat &response) c return sub_peak; } -double KCF_Tracker::sub_grid_scale(uint index) +double KCF_Tracker::sub_grid_scale(uint max_index) { cv::Mat A, fval; - if (index >= p_scales.size()) { + const auto &vec = d->IF_BIG_BATCH(threadctxs[0].max, threadctxs); + uint index = vec.getScaleIdx(max_index); + uint angle_idx = vec.getAngleIdx(index); + + if (index >= vec.size()) { // interpolate from all values // fit 1d quadratic function f(x) = a*x^2 + b*x + c A.create(p_scales.size(), 3, CV_32FC1); @@ -789,7 +812,7 @@ double KCF_Tracker::sub_grid_scale(uint index) A.at(i, 0) = float(p_scales[i] * p_scales[i]); A.at(i, 1) = float(p_scales[i]); A.at(i, 2) = 1; - fval.at(i) = d.threadctxs.back().IF_BIG_BATCH(max[i].response, max.response); + fval.at(i) = d->IF_BIG_BATCH(threadctxs[0].max[i].response, threadctxs(i, angle_idx).max.response); } } else { // only from neighbours @@ -802,14 +825,14 @@ double KCF_Tracker::sub_grid_scale(uint index) p_scales[index + 1] * p_scales[index + 1], p_scales[index + 1], 1); #ifdef BIG_BATCH fval = (cv::Mat_(3, 1) << - d.threadctxs.back().max[index - 1].response, - d.threadctxs.back().max[index + 0].response, - d.threadctxs.back().max[index + 1].response); + d->threadctxs[0].max(index - 1, angle_idx).response, + d->threadctxs[0].max(index + 0, angle_idx).response, + d->threadctxs[0].max(index + 1, angle_idx).response); #else fval = (cv::Mat_(3, 1) << - d.threadctxs[index - 1].max.response, - d.threadctxs[index + 0].max.response, - d.threadctxs[index + 1].max.response); + d->threadctxs(index - 1, angle_idx).max.response, + d->threadctxs(index + 0, angle_idx).max.response, + d->threadctxs(index + 1, angle_idx).max.response); #endif } diff --git a/src/kcf.h b/src/kcf.h index 70662d7..76abc5e 100644 --- a/src/kcf.h +++ b/src/kcf.h @@ -43,6 +43,7 @@ struct BBox_c class KCF_Tracker { friend ThreadCtx; + friend Kcf_Tracker_Private; public: bool m_debug {false}; bool m_visual_debug {false}; @@ -86,6 +87,7 @@ private: // Information to calculate current pose of the tracked object cv::Point2d p_current_center; double p_current_scale = 1.; + double p_current_angle = 0.; double max_response = -1.; @@ -115,7 +117,7 @@ private: const int p_num_of_feats = 31 + (m_use_color ? 3 : 0) + (m_use_cnfeat ? 10 : 0); cv::Size feature_size; - Kcf_Tracker_Private &d; + std::unique_ptr d; class Model { cv::Size feature_size; @@ -163,12 +165,12 @@ private: //helping functions void scale_track(ThreadCtx &vars, cv::Mat &input_rgb, cv::Mat &input_gray); - cv::Mat get_subwindow(const cv::Mat &input, int cx, int cy, int size_x, int size_y) const; + cv::Mat get_subwindow(const cv::Mat &input, int cx, int cy, int size_x, int size_y, double angle) const; cv::Mat gaussian_shaped_labels(double sigma, int dim1, int dim2); std::unique_ptr gaussian_correlation; cv::Mat circshift(const cv::Mat &patch, int x_rot, int y_rot) const; cv::Mat cosine_window_function(int dim1, int dim2); - cv::Mat get_features(cv::Mat &input_rgb, cv::Mat &input_gray, cv::Mat *dbg_patch, int cx, int cy, int size_x, int size_y, double scale) const; + cv::Mat get_features(cv::Mat &input_rgb, cv::Mat &input_gray, cv::Mat *dbg_patch, int cx, int cy, int size_x, int size_y, double scale, double angle) const; cv::Point2f sub_pixel_peak(cv::Point &max_loc, cv::Mat &response) const; double sub_grid_scale(uint index); void resizeImgs(cv::Mat &input_rgb, cv::Mat &input_gray); diff --git a/src/threadctx.hpp b/src/threadctx.hpp index 0d084c7..0b23707 100644 --- a/src/threadctx.hpp +++ b/src/threadctx.hpp @@ -5,25 +5,56 @@ #include "dynmem.hpp" #include "kcf.h" #include "complexmat.hpp" +#include class KCF_Tracker; +template +class ScaleRotVector : public std::vector { +public: + ScaleRotVector(const std::vector &scales, const std::vector &angles) + : scales(scales) + , angles(angles) + {} + + uint getIdx(uint scale_idx, uint angle_idx) const { return angles.size() * scale_idx + angle_idx; } + uint getScaleIdx(uint idx) const { return idx / angles.size(); } + uint getAngleIdx(uint idx) const { return idx % angles.size(); } + T& operator()(uint scale_idx, uint angle_idx) { return std::vector::at(getIdx(scale_idx, angle_idx)); } + double scale(uint idx) const { return scales[getScaleIdx(idx)]; } + double angle(uint idx) const { return angles[getAngleIdx(idx)]; } +private: + const std::vector scales, angles; +}; + struct ThreadCtx { public: ThreadCtx(cv::Size roi, uint num_features #ifdef BIG_BATCH - , uint num_scales + , const std::vector &scales + , const std::vector &angles #else , double scale + , double angle #endif ) : roi(roi) , num_features(num_features) - , num_scales(IF_BIG_BATCH(num_scales, 1)) -#ifndef BIG_BATCH + , num_scales(IF_BIG_BATCH(scales.size(), 1)) + , num_angles(IF_BIG_BATCH(angles.size(), 1)) +#ifdef BIG_BATCH + , max(scales, angles) + , dbg_patch(scales, angles) + { + max.resize(scales.size() * angles.size()); + dbg_patch.resize(scales.size() * angles.size()); + } +#else , scale(scale) + , angle(angle) + {} #endif - {} + ThreadCtx(ThreadCtx &&) = default; @@ -32,26 +63,25 @@ private: cv::Size roi; uint num_features; uint num_scales; + uint num_angles; cv::Size freq_size = Fft::freq_size(roi); - MatScaleFeats patch_feats{num_scales, num_features, roi}; - MatScaleFeats temp{num_scales, num_features, roi}; + MatScaleFeats patch_feats{num_scales * num_angles, num_features, roi}; + MatScaleFeats temp{num_scales * num_angles, num_features, roi}; - KCF_Tracker::GaussianCorrelation gaussian_correlation{num_scales, num_features, roi}; + KCF_Tracker::GaussianCorrelation gaussian_correlation{num_scales * num_angles, num_features, roi}; - MatScales ifft2_res{num_scales, roi}; + MatScales ifft2_res{num_scales * num_angles, roi}; - ComplexMat zf{uint(freq_size.height), uint(freq_size.width), num_features, num_scales}; - ComplexMat kzf{uint(freq_size.height), uint(freq_size.width), 1, num_scales}; + ComplexMat zf{uint(freq_size.height), uint(freq_size.width), num_features, num_scales * num_angles}; + ComplexMat kzf{uint(freq_size.height), uint(freq_size.width), 1, num_scales * num_angles}; public: #ifdef ASYNC std::future async_res; #endif - std::vector dbg_patch{num_scales}; // images for visual debugging - - MatScales response{num_scales, roi}; + MatScales response{num_scales * num_angles, roi}; struct Max { cv::Point2i loc; @@ -59,10 +89,12 @@ public: }; #ifdef BIG_BATCH - std::vector max = std::vector(num_scales); + ScaleRotVector max; + ScaleRotVector dbg_patch; // images for visual debugging #else Max max; - const double scale; + const double scale, angle; + cv::Mat dbg_patch; // image for visual debugging #endif }; -- 2.39.2