Changes that should correct some race conditions

author Shanigen <vkaraf@gmail.com>

Wed, 25 Jul 2018 12:10:16 +0000 (14:10 +0200)

committer Shanigen <vkaraf@gmail.com>

Wed, 25 Jul 2018 12:10:16 +0000 (14:10 +0200)
author Shanigen <vkaraf@gmail.com>
Wed, 25 Jul 2018 12:10:16 +0000 (14:10 +0200)
committer Shanigen <vkaraf@gmail.com>
Wed, 25 Jul 2018 12:10:16 +0000 (14:10 +0200)
diff --git a/src/complexmat.hpp b/src/complexmat.hpp

index 22560c8839248f7d4cbbd91d55435a511b578e64..bf7bab46187b8544f251c1a2247c22462a139b0c 100644 (file)
--- a/src/complexmat.hpp
+++ b/src/complexmat.hpp
@@ -60,6 +60,17 @@ public:
          }
      }
  
+    T sqr_norm() const
+    {
+        int n_channels_per_scale = n_channels/n_scales;
+        T sum_sqr_norm = 0;
+         for (int i = 0; i < n_channels_per_scale; ++i) {
+             for (auto lhs = p_data.begin()+i*rows*cols; lhs != p_data.begin()+(i+1)*rows*cols; ++lhs)
+                 sum_sqr_norm += lhs->real()*lhs->real() + lhs->imag()*lhs->imag();
+         }
+        sum_sqr_norm = sum_sqr_norm/static_cast<T>(cols*rows);
+        return sum_sqr_norm;
+    }
  
      void sqr_norm(T *sums_sqr_norms) const
      {
@@ -290,7 +301,7 @@ public:
      //assuming that mat has 2 channels (real, imag)
      void set_channel(int idx, const cv::Mat & mat);
  
-
+    float sqr_norm();
      void sqr_norm(float *sums_sqr_norms) const;
  
      ComplexMat sqr_mag() const;
diff --git a/src/fft_fftw.cpp b/src/fft_fftw.cpp

index 399a8d96a933f550f46423b9bc0622ed3fe043e0..ffeadbe8c5b29ba02f1b42007724c6ebcf7811da 100644 (file)
--- a/src/fft_fftw.cpp
+++ b/src/fft_fftw.cpp
@@ -8,8 +8,6 @@
  
  #if !defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW)
  #define FFTW_PLAN_WITH_THREADS() fftw_plan_with_nthreads(m_num_threads);
-#elif defined(OPENMP) && defined(BIG_BATCH)&& !defined(CUFFTW)
-#define FFTW_PLAN_WITH_THREADS() fftw_plan_with_nthreads(omp_get_max_threads());
  #else
  #define FFTW_PLAN_WITH_THREADS()
  #endif
diff --git a/src/kcf.cpp b/src/kcf.cpp

index 42bfbda9853cac510d6ea45c6e11677f86fbf3d9..aeaf5079636740d5780afef56f18c312809b403d 100644 (file)
--- a/src/kcf.cpp
+++ b/src/kcf.cpp
@@ -33,6 +33,7 @@ KCF_Tracker::KCF_Tracker()
  KCF_Tracker::~KCF_Tracker()
  {
      delete &fft;
+#ifdef BIG_BATCH
  #ifdef CUFFT
      CudaSafeCall(cudaFreeHost(xf_sqr_norm));
      CudaSafeCall(cudaFreeHost(yf_sqr_norm));
@@ -41,6 +42,7 @@ KCF_Tracker::~KCF_Tracker()
      free(xf_sqr_norm);
      free(yf_sqr_norm);
  #endif
+#endif
  }
  
  void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int fit_size_y)
@@ -132,6 +134,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int
      else
          p_scales.push_back(1.);
  
+#ifdef BIG_BATCH
  #ifdef CUFFT
      if (p_windows_size[1]/p_cell_size*(p_windows_size[0]/p_cell_size/2+1) > 1024) {
          std::cerr << "Window after forward FFT is too big for CUDA kernels. Plese use -f to set "
@@ -155,7 +158,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int
      xf_sqr_norm = (float*) malloc(p_num_scales*sizeof(float));
      yf_sqr_norm = (float*) malloc(sizeof(float));
  #endif
-
+#endif
      p_current_scale = 1.;
  
      double min_size_ratio = std::max(5.*p_cell_size/p_windows_size[0], 5.*p_cell_size/p_windows_size[1]);
@@ -361,18 +364,18 @@ void KCF_Tracker::track(cv::Mat &img)
      } else {
  #pragma omp parallel for ordered  private(patch_feat) schedule(dynamic)
          for (size_t i = 0; i < p_scales.size(); ++i) {
-            patch_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale * p_scales[i]);
+            patch_feat = get_features(input_rgb, input_gray, this->p_pose.cx, this->p_pose.cy, this->p_windows_size[0], this->p_windows_size[1], this->p_current_scale * this->p_scales[i]);
              ComplexMat zf = fft.forward_window(patch_feat);
              DEBUG_PRINTM(zf);
              cv::Mat response;
              if (m_use_linearkernel)
                  response = fft.inverse((p_model_alphaf * zf).sum_over_channels());
              else {
-                ComplexMat kzf = gaussian_correlation(zf, p_model_xf, p_kernel_sigma);
+                ComplexMat kzf = gaussian_correlation(zf, this->p_model_xf, this->p_kernel_sigma);
                  DEBUG_PRINTM(p_model_alphaf);
                  DEBUG_PRINTM(kzf);
                  DEBUG_PRINTM(p_model_alphaf * kzf);
-                response = fft.inverse(p_model_alphaf * kzf);
+                response = fft.inverse(this->p_model_alphaf * kzf);
              }
              DEBUG_PRINTM(response);
  
@@ -385,7 +388,7 @@ void KCF_Tracker::track(cv::Mat &img)
              cv::minMaxLoc(response, &min_val, &max_val, &min_loc, &max_loc);
              DEBUG_PRINT(max_loc);
  
-            double weight = p_scales[i] < 1. ? p_scales[i] : 1./p_scales[i];
+            double weight = this->p_scales[i] < 1. ? this->p_scales[i] : 1./this->p_scales[i];
  #pragma omp critical
              {
                  if (max_val*weight > max_response) {
@@ -681,6 +684,7 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int wid
  
  ComplexMat KCF_Tracker::gaussian_correlation(const ComplexMat &xf, const ComplexMat &yf, double sigma, bool auto_correlation)
  {
+#ifdef BIG_BATCH
  #ifdef CUFFT
      xf.sqr_norm(xf_sqr_norm_d);
      if (!auto_correlation)
@@ -692,6 +696,10 @@ ComplexMat KCF_Tracker::gaussian_correlation(const ComplexMat &xf, const Complex
      } else {
         yf.sqr_norm(yf_sqr_norm);
      }
+#endif
+#else
+    float xf_sqr_norm = xf.sqr_norm();
+    float yf_sqr_norm =auto_correlation ? xf_sqr_norm : yf.sqr_norm();
  #endif
      ComplexMat xyf;
      xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj());
@@ -729,11 +737,15 @@ ComplexMat KCF_Tracker::gaussian_correlation(const ComplexMat &xf, const Complex
      cv::Mat in_all(scales[0].rows * xf.n_scales, scales[0].cols, CV_32F);
  
      float numel_xf_inv = 1.f/(xf.cols * xf.rows * (xf.channels()/xf.n_scales));
+#ifdef BIG_BATCH
      for (int i = 0; i < xf.n_scales; ++i){
          cv::Mat in_roi(in_all, cv::Rect(0, i*scales[0].rows, scales[0].cols, scales[0].rows));
          cv::exp(- 1.f / (sigma * sigma) * cv::max((xf_sqr_norm[i] + yf_sqr_norm[0] - 2 * scales[i]) * numel_xf_inv, 0), in_roi);
          DEBUG_PRINTM(in_roi);
      }
+#else
+    cv::exp(- 1.f / (sigma * sigma) * cv::max((xf_sqr_norm + yf_sqr_norm - 2 * xy_sum) * numel_xf_inv, 0), in_all);
+#endif
  
      DEBUG_PRINTM(in_all);
      return fft.forward(in_all);
diff --git a/src/kcf.h b/src/kcf.h

index fd2f86d47b6c31b086aca3eeace8952c2a162182..ade257fe6b134daf9a1cd2e5a41597d011d61edd 100644 (file)
--- a/src/kcf.h
+++ b/src/kcf.h
@@ -125,9 +125,11 @@ private:
      //for big batch
      int p_num_of_feats;
      int p_roi_height, p_roi_width;
+#ifdef BIG_BATCH
      float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr;
  #ifdef CUFFT
      float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr;
+#endif
  #endif
  
      //model
author	Shanigen <vkaraf@gmail.com>
	Wed, 25 Jul 2018 12:10:16 +0000 (14:10 +0200)
committer	Shanigen <vkaraf@gmail.com>
	Wed, 25 Jul 2018 12:10:16 +0000 (14:10 +0200)
src/complexmat.hpp		patch \| blob \| history
src/fft_fftw.cpp		patch \| blob \| history
src/kcf.cpp		patch \| blob \| history
src/kcf.h		patch \| blob \| history