]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/commitdiff
Do not require ifdefs when using sqr_norm()
authorMichal Sojka <michal.sojka@cvut.cz>
Thu, 13 Sep 2018 04:40:35 +0000 (06:40 +0200)
committerMichal Sojka <michal.sojka@cvut.cz>
Thu, 13 Sep 2018 04:40:44 +0000 (06:40 +0200)
src/complexmat.cu
src/complexmat.cuh
src/complexmat.hpp
src/kcf.cpp

index 7d4a43dbceece696d6dd03fd923f6f9d035ff6f9..cbf9076f6b09c7cc7014962a26498d324b346586 100644 (file)
@@ -27,15 +27,15 @@ __global__ void sqr_norm_kernel(int n, float *out, float *data, float rows, floa
     }
 }
 
-void ComplexMat::sqr_norm(float *result) const
+void ComplexMat::sqr_norm(DynMem &result) const
 {
-    CudaSafeCall(cudaMemsetAsync(result, 0, n_scales * sizeof(float), this->stream));
+    CudaSafeCall(cudaMemsetAsync(result.deviceMem(), 0, n_scales * sizeof(float), this->stream));
 
     dim3 threadsPerBlock(rows, cols);
     dim3 numBlocks(n_channels / n_scales, n_scales);
 
     sqr_norm_kernel<<<numBlocks, threadsPerBlock, rows * cols * sizeof(float), this->stream>>>(
-        n_channels / n_scales, result, this->p_data, rows, cols);
+        n_channels / n_scales, result.deviceMem(), this->p_data, rows, cols);
     CudaCheckError();
 
     return;
index 57ccee3f937dc20b71f46a60c0570b76a55139b5..7f19771203e199b63941d73858f9ea5dd1926b80 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <opencv2/opencv.hpp>
 
+#include "dynmem.hpp"
 #include "cuda_runtime.h"
 #include "cufft.h"
 
@@ -79,7 +80,7 @@ class ComplexMat {
         return;
     }
 
-    void sqr_norm(float *result) const;
+    void sqr_norm(DynMem &result) const;
 
     ComplexMat sqr_mag() const;
 
index 2d1e939c01da8f278b68da0b4340596feaaad8a1..724ffaa61ef1faddb9d89c929e58bf46ddbf3f29 100644 (file)
@@ -5,6 +5,7 @@
 #include <vector>
 #include <algorithm>
 #include <functional>
+#include "dynmem.hpp"
 
 template <typename T> class ComplexMat_ {
   public:
@@ -74,18 +75,17 @@ template <typename T> class ComplexMat_ {
         return sum_sqr_norm;
     }
 
-    void sqr_norm(T *sums_sqr_norms) const
+    void sqr_norm(DynMem_<T> &result) const
     {
         int n_channels_per_scale = n_channels / n_scales;
         int scale_offset = n_channels_per_scale * rows * cols;
-        T sum_sqr_norm;
         for (uint scale = 0; scale < n_scales; ++scale) {
-            sum_sqr_norm = 0;
+            sum_sqr_norm = 0;
             for (int i = 0; i < n_channels_per_scale; ++i)
                 for (auto lhs = p_data.begin() + i * rows * cols + scale * scale_offset;
                      lhs != p_data.begin() + (i + 1) * rows * cols + scale * scale_offset; ++lhs)
                     sum_sqr_norm += lhs->real() * lhs->real() + lhs->imag() * lhs->imag();
-            sums_sqr_norms[scale] = sum_sqr_norm / static_cast<T>(cols * rows);
+            result.hostMem()[scale] = sum_sqr_norm / static_cast<T>(cols * rows);
         }
         return;
     }
index c6b5f649932b552879909366597a77b8723400e1..4c9f66f572d98f6531859b4408ea38063a062e9d 100644 (file)
@@ -728,27 +728,19 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int wid
 void KCF_Tracker::gaussian_correlation(struct ThreadCtx &vars, const ComplexMat &xf, const ComplexMat &yf,
                                        double sigma, bool auto_correlation)
 {
-#ifdef CUFFT
-    xf.sqr_norm(vars.xf_sqr_norm.deviceMem());
-    if (!auto_correlation) yf.sqr_norm(vars.yf_sqr_norm.deviceMem());
-#else
-    xf.sqr_norm(vars.xf_sqr_norm.hostMem());
+    xf.sqr_norm(vars.xf_sqr_norm);
     if (auto_correlation) {
         vars.yf_sqr_norm.hostMem()[0] = vars.xf_sqr_norm.hostMem()[0];
     } else {
-        yf.sqr_norm(vars.yf_sqr_norm.hostMem());
+        yf.sqr_norm(vars.yf_sqr_norm);
     }
-#endif
     vars.xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj());
     DEBUG_PRINTM(vars.xyf);
     fft.inverse(vars.xyf, vars.ifft2_res, m_use_cuda ? vars.data_i_features.deviceMem() : nullptr, vars.stream);
 #ifdef CUFFT
-    if (auto_correlation)
-        cuda_gaussian_correlation(vars.data_i_features.deviceMem(), vars.gauss_corr_res.deviceMem(), vars.xf_sqr_norm.deviceMem(), vars.xf_sqr_norm.deviceMem(),
-                                  sigma, xf.n_channels, xf.n_scales, p_roi.height, p_roi.width, vars.stream);
-    else
-        cuda_gaussian_correlation(vars.data_i_features.deviceMem(), vars.gauss_corr_res.deviceMem(), vars.xf_sqr_norm.deviceMem(), vars.yf_sqr_norm.deviceMem(),
-                                  sigma, xf.n_channels, xf.n_scales, p_roi.height, p_roi.width, vars.stream);
+    cuda_gaussian_correlation(vars.data_i_features.deviceMem(), vars.gauss_corr_res.deviceMem(),
+                              vars.xf_sqr_norm.deviceMem(), vars.xf_sqr_norm.deviceMem(), sigma, xf.n_channels,
+                              xf.n_scales, p_roi.height, p_roi.width, vars.stream);
 #else
     // ifft2 and sum over 3rd dimension, we dont care about individual channels
     DEBUG_PRINTM(vars.ifft2_res);