From 06aa7e894427f06b04820aec314474ed816ac6fc Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Mon, 24 Sep 2018 00:56:16 +0200 Subject: [PATCH] Attempt to fix big_batch mode --- src/complexmat.hpp | 56 ++++++++++++++++++-------------------- src/debug.cpp | 13 ++++++--- src/fft.cpp | 15 ++++++++++- src/fft_fftw.cpp | 67 +++++++++++++++++----------------------------- src/fft_fftw.h | 2 +- src/kcf.cpp | 7 +++-- src/kcf.h | 2 +- src/threadctx.hpp | 2 +- 8 files changed, 79 insertions(+), 85 deletions(-) diff --git a/src/complexmat.hpp b/src/complexmat.hpp index c3877a8..ce93935 100644 --- a/src/complexmat.hpp +++ b/src/complexmat.hpp @@ -12,27 +12,22 @@ template class ComplexMat_ { uint cols; uint rows; uint n_channels; - uint n_scales = 1; + uint n_scales; - ComplexMat_() : cols(0), rows(0), n_channels(0) {} - ComplexMat_(uint _rows, uint _cols, uint _n_channels) : cols(_cols), rows(_rows), n_channels(_n_channels) + ComplexMat_() : cols(0), rows(0), n_channels(0), n_scales(0) {} + ComplexMat_(uint _rows, uint _cols, uint _n_channels, uint _n_scales = 1) + : cols(_cols), rows(_rows), n_channels(_n_channels * _n_scales), n_scales(_n_scales) { p_data.resize(n_channels * cols * rows); } - - ComplexMat_(uint _rows, uint _cols, uint _n_channels, uint _n_scales) - : cols(_cols), rows(_rows), n_channels(_n_channels), n_scales(_n_scales) - { - p_data.resize(n_channels * cols * rows); - } - ComplexMat_(cv::Size size, uint _n_channels) - : cols(size.width), rows(size.height), n_channels(_n_channels) + ComplexMat_(cv::Size size, uint _n_channels, uint _n_scales = 1) + : cols(size.width), rows(size.height), n_channels(_n_channels * _n_scales), n_scales(_n_scales) { p_data.resize(n_channels * cols * rows); } // assuming that mat has 2 channels (real, img) - ComplexMat_(const cv::Mat &mat) : cols(uint(mat.cols)), rows(uint(mat.rows)), n_channels(1) + ComplexMat_(const cv::Mat &mat) : cols(uint(mat.cols)), rows(uint(mat.rows)), n_channels(1), n_scales(1) { p_data = convert(mat); } @@ -42,6 +37,7 @@ template class ComplexMat_ { rows = _rows; cols = _cols; n_channels = _n_channels; + n_scales = 1; p_data.resize(n_channels * cols * rows); } @@ -49,7 +45,7 @@ template class ComplexMat_ { { rows = _rows; cols = _cols; - n_channels = _n_channels; + n_channels = _n_channels * _n_scales; n_scales = _n_scales; p_data.resize(n_channels * cols * rows); } @@ -70,6 +66,8 @@ template class ComplexMat_ { T sqr_norm() const { + assert(n_scales == 1); + int n_channels_per_scale = n_channels / n_scales; T sum_sqr_norm = 0; for (int i = 0; i < n_channels_per_scale; ++i) { @@ -107,20 +105,18 @@ template class ComplexMat_ { ComplexMat_ sum_over_channels() const { - assert(p_data.size() > 1); + assert(p_data.size() == n_channels * rows * cols); - int n_channels_per_scale = n_channels / n_scales; - int scale_offset = n_channels_per_scale * rows * cols; + uint n_channels_per_scale = n_channels / n_scales; + uint scale_offset = n_channels_per_scale * rows * cols; - ComplexMat_ result(this->rows, this->cols, n_scales); + ComplexMat_ result(this->rows, this->cols, 1, n_scales); for (uint scale = 0; scale < n_scales; ++scale) { - std::copy(p_data.begin() + scale * scale_offset, p_data.begin() + rows * cols + scale * scale_offset, - result.p_data.begin() + scale * rows * cols); - for (int i = 1; i < n_channels_per_scale; ++i) { - std::transform(result.p_data.begin() + scale * rows * cols, - result.p_data.begin() + (scale + 1) * rows * cols, - p_data.begin() + i * rows * cols + scale * scale_offset, - result.p_data.begin() + scale * rows * cols, std::plus>()); + for (uint i = 0; i < rows * cols; ++i) { + std::complex acc = 0; + for (uint ch = 0; ch < n_channels_per_scale; ++ch) + acc += p_data[scale * scale_offset + i + ch * rows * cols]; + result.p_data[scale * rows * cols + i] = acc; } } return result; @@ -218,14 +214,14 @@ template class ComplexMat_ { ComplexMat_ mat_mat_operator(void (*op)(std::complex &c_lhs, const std::complex &c_rhs), const ComplexMat_ &mat_rhs) const { - assert(mat_rhs.n_channels == n_channels && mat_rhs.cols == cols && mat_rhs.rows == rows); + assert(mat_rhs.n_channels == n_channels/n_scales && mat_rhs.cols == cols && mat_rhs.rows == rows); ComplexMat_ result = *this; - for (uint i = 0; i < n_channels; ++i) { - auto lhs = result.p_data.begin() + i * rows * cols; - auto rhs = mat_rhs.p_data.begin() + i * rows * cols; - for (; lhs != result.p_data.begin() + (i + 1) * rows * cols; ++lhs, ++rhs) - op(*lhs, *rhs); + for (uint s = 0; s < n_scales; ++s) { + auto lhs = result.p_data.begin() + (s * n_channels/n_scales * rows * cols); + auto rhs = mat_rhs.p_data.begin(); + for (uint i = 0; i < n_channels/n_scales * rows * cols; ++i) + op(*(lhs + i), *(rhs + i)); } return result; diff --git a/src/debug.cpp b/src/debug.cpp index 4200348..19d5a8c 100644 --- a/src/debug.cpp +++ b/src/debug.cpp @@ -1,4 +1,5 @@ #include "debug.h" +#include std::ostream &operator<<(std::ostream &os, const DbgTracer::Printer &p) { @@ -18,10 +19,14 @@ std::ostream &operator<<(std::ostream &os, const DbgTracer::Printer IOSave s(os); os << std::setprecision(3); os << " " << p.obj.size() << " " << p.obj.channels() << "ch "; // << p.obj.get_p_data(); - os << " = [ "; constexpr int num = 10; - for (int i = 0; i < std::min(num, p.obj.size().area()); ++i) - os << p.obj.get_p_data()[i] << ", "; - os << (num < p.obj.size().area() ? "... ]" : "]"); + for (uint s = 0; s < p.obj.n_scales; ++s) { + uint ofs = s * p.obj.rows * p.obj.cols * p.obj.n_channels / p.obj.n_scales; + os << " = [ "; + for (int i = 0; i < std::min(num, p.obj.size().area()); ++i) + os << p.obj.get_p_data()[ofs + i] << ", "; + os << (num < p.obj.size().area() ? "... ]" : "]"); + os << std::endl << std::string(20, ' '); + } return os; } diff --git a/src/fft.cpp b/src/fft.cpp index 76e1fab..412618d 100644 --- a/src/fft.cpp +++ b/src/fft.cpp @@ -1,6 +1,7 @@ #include "fft.h" #include +#include "debug.h" Fft::~Fft() { @@ -29,6 +30,8 @@ void Fft::set_window(const MatDynMem &window) void Fft::forward(const MatScales &real_input, ComplexMat &complex_result) { + TRACE(""); + DEBUG_PRINT(real_input); assert(real_input.dims == 3); #ifdef BIG_BATCH assert(real_input.size[0] == 1 || real_input.size[0] == int(m_num_of_scales)); @@ -38,6 +41,10 @@ void Fft::forward(const MatScales &real_input, ComplexMat &complex_result) assert(real_input.size[1] == int(m_height)); assert(real_input.size[2] == int(m_width)); + assert(complex_result.cols = freq_size(cv::Size(m_width, m_height)).width); + assert(complex_result.rows = freq_size(cv::Size(m_width, m_height)).height); + assert(complex_result.channels() == uint(real_input.size[0])); + (void)real_input; (void)complex_result; } @@ -67,8 +74,14 @@ void Fft::forward_window(MatScaleFeats &patch_feats, ComplexMat &complex_result, void Fft::inverse(ComplexMat &complex_input, MatScales &real_result) { + TRACE(""); + DEBUG_PRINT(complex_input); assert(real_result.dims == 3); - assert(real_result.size[0] == IF_BIG_BATCH(int(m_num_of_scales), 1)); +#ifdef BIG_BATCH + assert(real_result.size[0] == 1 || real_result.size[0] == int(m_num_of_scales)); +#else + assert(real_result.size[0] == 1); +#endif assert(real_result.size[1] == int(m_height)); assert(real_result.size[2] == int(m_width)); diff --git a/src/fft_fftw.cpp b/src/fft_fftw.cpp index 4df7e84..6afdeb7 100644 --- a/src/fft_fftw.cpp +++ b/src/fft_fftw.cpp @@ -90,40 +90,23 @@ void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned ostride, odist, FFTW_PATIENT); } #endif - // FFT inverse one scale - { - ComplexMat in_i(m_height, m_width, m_num_of_feats); - cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats))); - fftwf_complex *in = reinterpret_cast(in_i.get_p_data()); - float *out = reinterpret_cast(out_i.data); - int rank = 2; - int n[] = {int(m_height), int(m_width)}; - int howmany = 1; - int idist = int(m_height * (m_width / 2 + 1)), odist = 1; - int istride = 1, ostride = int(m_num_of_feats); - int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n; - - FFTW_PLAN_WITH_THREADS(); - plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, - odist, FFTW_PATIENT); - } - // FFT inverse all scales #ifdef BIG_BATCH - if (m_num_of_scales > 1) { + // FFT inverse all scales + { ComplexMat in_i_all(m_height, m_width, m_num_of_feats * m_num_of_scales); cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats * m_num_of_scales)); fftwf_complex *in = reinterpret_cast(in_i_all.get_p_data()); float *out = reinterpret_cast(out_i_all.data); int rank = 2; int n[] = {(int)m_height, (int)m_width}; - int howmany = m_num_of_feats * m_num_of_scales; + int howmany = m_num_of_scales; int idist = m_height * (m_width / 2 + 1), odist = 1; - int istride = 1, ostride = m_num_of_feats * m_num_of_scales; + int istride = 1, ostride = 1; int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n; FFTW_PLAN_WITH_THREADS(); - plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, - onembed, ostride, odist, FFTW_PATIENT); + plan_i_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, + onembed, ostride, odist, FFTW_PATIENT); } #endif // FFT inverse one channel @@ -134,7 +117,7 @@ void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned float *out = reinterpret_cast(out_i1.data); int rank = 2; int n[] = {int(m_height), int(m_width)}; - int howmany = IF_BIG_BATCH(m_num_of_scales, 1); + int howmany = 1; int idist = m_height * (m_width / 2 + 1), odist = 1; int istride = 1, ostride = 1; int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n; @@ -155,25 +138,25 @@ void Fftw::forward(const MatScales &real_input, ComplexMat &complex_result) { Fft::forward(real_input, complex_result); - if (BIG_BATCH_MODE && real_input.rows == int(m_height * IF_BIG_BATCH(m_num_of_scales, 1))) { - fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast(real_input.data), - reinterpret_cast(complex_result.get_p_data())); - } else { + if (real_input.size[0] == 1) fftwf_execute_dft_r2c(plan_f, reinterpret_cast(real_input.data), reinterpret_cast(complex_result.get_p_data())); - } - return; +#ifdef BIG_BATCH + else + fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast(real_input.data), + reinterpret_cast(complex_result.get_p_data())); +#endif } void Fftw::forward_window(MatScaleFeats &feat, ComplexMat & complex_result, MatScaleFeats &temp) { Fft::forward_window(feat, complex_result, temp); - uint n_channels = feat.size[0]; - for (uint i = 0; i < n_channels; ++i) { - for (uint j = 0; j < uint(feat.size[1]); ++j) { - cv::Mat feat_plane = feat.plane(i, j); - cv::Mat temp_plane = temp.plane(i, j); + uint n_scales = feat.size[0]; + for (uint s = 0; s < n_scales; ++s) { + for (uint ch = 0; ch < uint(feat.size[1]); ++ch) { + cv::Mat feat_plane = feat.plane(s, ch); + cv::Mat temp_plane = temp.plane(s, ch); temp_plane = feat_plane.mul(m_window); } } @@ -181,7 +164,7 @@ void Fftw::forward_window(MatScaleFeats &feat, ComplexMat & complex_result, Mat float *in = temp.ptr(); fftwf_complex *out = reinterpret_cast(complex_result.get_p_data()); - if (n_channels <= m_num_of_feats) + if (n_scales == 1) fftwf_execute_dft_r2c(plan_fw, in, out); else fftwf_execute_dft_r2c(plan_fw_all_scales, in, out); @@ -196,13 +179,12 @@ void Fftw::inverse(ComplexMat &complex_input, MatScales &real_result) fftwf_complex *in = reinterpret_cast(complex_input.get_p_data()); float *out = real_result.ptr(); - if (n_channels == 1|| (BIG_BATCH_MODE && n_channels == int(IF_BIG_BATCH(m_num_of_scales, 1)))) + if (n_channels == 1) fftwf_execute_dft_c2r(plan_i_1ch, in, out); - else if (BIG_BATCH_MODE && n_channels == int(m_num_of_feats) * int(IF_BIG_BATCH(m_num_of_scales, 1))) - fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out); +#ifdef BIG_BATCH else - fftwf_execute_dft_c2r(plan_i_features, in, out); - + fftwf_execute_dft_c2r(plan_i_all_scales, in, out); +#endif real_result *= 1.0 / (m_width * m_height); } @@ -210,12 +192,11 @@ Fftw::~Fftw() { fftwf_destroy_plan(plan_f); fftwf_destroy_plan(plan_fw); - fftwf_destroy_plan(plan_i_features); fftwf_destroy_plan(plan_i_1ch); if (BIG_BATCH_MODE) { fftwf_destroy_plan(plan_f_all_scales); - fftwf_destroy_plan(plan_i_features_all_scales); + fftwf_destroy_plan(plan_i_all_scales); fftwf_destroy_plan(plan_fw_all_scales); } } diff --git a/src/fft_fftw.h b/src/fft_fftw.h index 918789c..6afc679 100644 --- a/src/fft_fftw.h +++ b/src/fft_fftw.h @@ -27,7 +27,7 @@ class Fftw : public Fft private: cv::Mat m_window; - fftwf_plan plan_f, plan_f_all_scales, plan_fw, plan_fw_all_scales, plan_i_features, plan_i_features_all_scales, + fftwf_plan plan_f, plan_f_all_scales, plan_fw, plan_fw_all_scales, plan_i_all_scales, plan_i_1ch; }; diff --git a/src/kcf.cpp b/src/kcf.cpp index 51dc5c9..71da51e 100644 --- a/src/kcf.cpp +++ b/src/kcf.cpp @@ -80,9 +80,8 @@ void KCF_Tracker::train(cv::Mat input_rgb, cv::Mat input_gray, double interp_fac alphaf_den = (p_xf * xfconj); } else { // Kernel Ridge Regression, calculate alphas (in Fourier domain) - const uint num_scales = BIG_BATCH_MODE ? p_num_scales : 1; cv::Size sz(Fft::freq_size(p_roi)); - ComplexMat kf(sz.height, sz.width, num_scales); + ComplexMat kf(sz.height, sz.width, 1); (*gaussian_correlation)(kf, p_model_xf, p_model_xf, p_kernel_sigma, true, *this); DEBUG_PRINTM(kf); p_model_alphaf_num = p_yf * kf; @@ -218,8 +217,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f d.threadctxs.emplace_back(p_roi, p_num_of_feats, p_num_scales); #endif - gaussian_correlation.reset( - new GaussianCorrelation(IF_BIG_BATCH(p_num_scales, 1), p_roi)); + gaussian_correlation.reset(new GaussianCorrelation(1, p_roi)); p_current_scale = 1.; @@ -711,6 +709,7 @@ void KCF_Tracker::GaussianCorrelation::operator()(ComplexMat &result, const Comp float numel_xf_inv = 1.f / (xf.cols * xf.rows * (xf.channels() / xf.n_scales)); for (uint i = 0; i < xf.n_scales; ++i) { cv::Mat plane = ifft_res.plane(i); + DEBUG_PRINT(ifft_res.plane(i)); cv::exp(-1. / (sigma * sigma) * cv::max((xf_sqr_norm[i] + yf_sqr_norm[0] - 2 * ifft_res.plane(i)) * numel_xf_inv, 0), plane); DEBUG_PRINTM(plane); diff --git a/src/kcf.h b/src/kcf.h index 5427603..390466e 100644 --- a/src/kcf.h +++ b/src/kcf.h @@ -132,7 +132,7 @@ private: public: GaussianCorrelation(uint num_scales, cv::Size size) : xf_sqr_norm(num_scales) - , xyf(Fft::freq_size(size), num_scales) + , xyf(Fft::freq_size(size), 1, num_scales) , ifft_res(num_scales, size) , k(num_scales, size) {} diff --git a/src/threadctx.hpp b/src/threadctx.hpp index 06a0a4e..317f9f4 100644 --- a/src/threadctx.hpp +++ b/src/threadctx.hpp @@ -47,7 +47,7 @@ private: MatScales ifft2_res{num_scales, roi}; ComplexMat zf{uint(freq_size.height), uint(freq_size.width), num_features, num_scales}; - ComplexMat kzf{uint(freq_size.height), uint(freq_size.width), num_scales}; + ComplexMat kzf{uint(freq_size.height), uint(freq_size.width), 1, num_scales}; public: #ifdef ASYNC -- 2.39.2