Do not require ifdefs when using sqr_norm()

author Michal Sojka <michal.sojka@cvut.cz>

Thu, 13 Sep 2018 04:40:35 +0000 (06:40 +0200)

committer Michal Sojka <michal.sojka@cvut.cz>

Thu, 13 Sep 2018 04:40:44 +0000 (06:40 +0200)
author Michal Sojka <michal.sojka@cvut.cz>
Thu, 13 Sep 2018 04:40:35 +0000 (06:40 +0200)
committer Michal Sojka <michal.sojka@cvut.cz>
Thu, 13 Sep 2018 04:40:44 +0000 (06:40 +0200)
diff --git a/src/complexmat.cu b/src/complexmat.cu

index 7d4a43dbceece696d6dd03fd923f6f9d035ff6f9..cbf9076f6b09c7cc7014962a26498d324b346586 100644 (file)
--- a/src/complexmat.cu
+++ b/src/complexmat.cu
@@ -27,15 +27,15 @@ __global__ void sqr_norm_kernel(int n, float *out, float *data, float rows, floa
      }
  }
  
-void ComplexMat::sqr_norm(float *result) const
+void ComplexMat::sqr_norm(DynMem &result) const
  {
-    CudaSafeCall(cudaMemsetAsync(result, 0, n_scales * sizeof(float), this->stream));
+    CudaSafeCall(cudaMemsetAsync(result.deviceMem(), 0, n_scales * sizeof(float), this->stream));
  
      dim3 threadsPerBlock(rows, cols);
      dim3 numBlocks(n_channels / n_scales, n_scales);
  
      sqr_norm_kernel<<<numBlocks, threadsPerBlock, rows * cols * sizeof(float), this->stream>>>(
-        n_channels / n_scales, result, this->p_data, rows, cols);
+        n_channels / n_scales, result.deviceMem(), this->p_data, rows, cols);
      CudaCheckError();
  
      return;
diff --git a/src/complexmat.cuh b/src/complexmat.cuh

index 57ccee3f937dc20b71f46a60c0570b76a55139b5..7f19771203e199b63941d73858f9ea5dd1926b80 100644 (file)
--- a/src/complexmat.cuh
+++ b/src/complexmat.cuh
@@ -3,6 +3,7 @@
  
  #include <opencv2/opencv.hpp>
  
+#include "dynmem.hpp"
  #include "cuda_runtime.h"
  #include "cufft.h"
  
@@ -79,7 +80,7 @@ class ComplexMat {
          return;
      }
  
-    void sqr_norm(float *result) const;
+    void sqr_norm(DynMem &result) const;
  
      ComplexMat sqr_mag() const;
  
diff --git a/src/complexmat.hpp b/src/complexmat.hpp

index 2d1e939c01da8f278b68da0b4340596feaaad8a1..724ffaa61ef1faddb9d89c929e58bf46ddbf3f29 100644 (file)
--- a/src/complexmat.hpp
+++ b/src/complexmat.hpp
@@ -5,6 +5,7 @@
  #include <vector>
  #include <algorithm>
  #include <functional>
+#include "dynmem.hpp"
  
  template <typename T> class ComplexMat_ {
    public:
@@ -74,18 +75,17 @@ template <typename T> class ComplexMat_ {
          return sum_sqr_norm;
      }
  
-    void sqr_norm(T *sums_sqr_norms) const
+    void sqr_norm(DynMem_<T> &result) const
      {
          int n_channels_per_scale = n_channels / n_scales;
          int scale_offset = n_channels_per_scale * rows * cols;
-        T sum_sqr_norm;
          for (uint scale = 0; scale < n_scales; ++scale) {
-            sum_sqr_norm = 0;
+            T sum_sqr_norm = 0;
              for (int i = 0; i < n_channels_per_scale; ++i)
                  for (auto lhs = p_data.begin() + i * rows * cols + scale * scale_offset;
                       lhs != p_data.begin() + (i + 1) * rows * cols + scale * scale_offset; ++lhs)
                      sum_sqr_norm += lhs->real() * lhs->real() + lhs->imag() * lhs->imag();
-            sums_sqr_norms[scale] = sum_sqr_norm / static_cast<T>(cols * rows);
+            result.hostMem()[scale] = sum_sqr_norm / static_cast<T>(cols * rows);
          }
          return;
      }
diff --git a/src/kcf.cpp b/src/kcf.cpp

index c6b5f649932b552879909366597a77b8723400e1..4c9f66f572d98f6531859b4408ea38063a062e9d 100644 (file)
--- a/src/kcf.cpp
+++ b/src/kcf.cpp
@@ -728,27 +728,19 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int wid
  void KCF_Tracker::gaussian_correlation(struct ThreadCtx &vars, const ComplexMat &xf, const ComplexMat &yf,
                                         double sigma, bool auto_correlation)
  {
-#ifdef CUFFT
-    xf.sqr_norm(vars.xf_sqr_norm.deviceMem());
-    if (!auto_correlation) yf.sqr_norm(vars.yf_sqr_norm.deviceMem());
-#else
-    xf.sqr_norm(vars.xf_sqr_norm.hostMem());
+    xf.sqr_norm(vars.xf_sqr_norm);
      if (auto_correlation) {
          vars.yf_sqr_norm.hostMem()[0] = vars.xf_sqr_norm.hostMem()[0];
      } else {
-        yf.sqr_norm(vars.yf_sqr_norm.hostMem());
+        yf.sqr_norm(vars.yf_sqr_norm);
      }
-#endif
      vars.xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj());
      DEBUG_PRINTM(vars.xyf);
      fft.inverse(vars.xyf, vars.ifft2_res, m_use_cuda ? vars.data_i_features.deviceMem() : nullptr, vars.stream);
  #ifdef CUFFT
-    if (auto_correlation)
-        cuda_gaussian_correlation(vars.data_i_features.deviceMem(), vars.gauss_corr_res.deviceMem(), vars.xf_sqr_norm.deviceMem(), vars.xf_sqr_norm.deviceMem(),
-                                  sigma, xf.n_channels, xf.n_scales, p_roi.height, p_roi.width, vars.stream);
-    else
-        cuda_gaussian_correlation(vars.data_i_features.deviceMem(), vars.gauss_corr_res.deviceMem(), vars.xf_sqr_norm.deviceMem(), vars.yf_sqr_norm.deviceMem(),
-                                  sigma, xf.n_channels, xf.n_scales, p_roi.height, p_roi.width, vars.stream);
+    cuda_gaussian_correlation(vars.data_i_features.deviceMem(), vars.gauss_corr_res.deviceMem(),
+                              vars.xf_sqr_norm.deviceMem(), vars.xf_sqr_norm.deviceMem(), sigma, xf.n_channels,
+                              xf.n_scales, p_roi.height, p_roi.width, vars.stream);
  #else
      // ifft2 and sum over 3rd dimension, we dont care about individual channels
      DEBUG_PRINTM(vars.ifft2_res);
author	Michal Sojka <michal.sojka@cvut.cz>
	Thu, 13 Sep 2018 04:40:35 +0000 (06:40 +0200)
committer	Michal Sojka <michal.sojka@cvut.cz>
	Thu, 13 Sep 2018 04:40:44 +0000 (06:40 +0200)
src/complexmat.cu		patch \| blob \| history
src/complexmat.cuh		patch \| blob \| history
src/complexmat.hpp		patch \| blob \| history
src/kcf.cpp		patch \| blob \| history