]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/commitdiff
Added CUFFT support
authorShanigen <vkaraf@gmail.com>
Wed, 1 Aug 2018 14:02:35 +0000 (16:02 +0200)
committerMichal Sojka <michal.sojka@cvut.cz>
Wed, 5 Sep 2018 06:38:33 +0000 (08:38 +0200)
CUFFT now works with Scale_vars struct but priority now will be for
all versions of the tracker to use same API in FFT class.

src/fft.h
src/fft_cufft.cpp
src/fft_cufft.h
src/fft_fftw.cpp
src/fft_fftw.h
src/kcf.cpp

index a2b97300e6357af57bb09c49a3b3c0988fda1b58..1e6d03ce8b96d1c222198f9ce2af6ee42ab988a1 100644 (file)
--- a/src/fft.h
+++ b/src/fft.h
@@ -21,7 +21,7 @@ public:
     virtual void set_window(const cv::Mat & window) = 0;
     virtual ComplexMat forward(const cv::Mat & input) = 0;
     virtual void forward(Scale_vars & vars) = 0;
-    virtual ComplexMat forward_raw(float *input, bool all_scales) = 0;
+    virtual void forward_raw(Scale_vars & vars, bool all_scales) = 0;
     virtual ComplexMat forward_window(const std::vector<cv::Mat> & input) = 0;
     virtual void forward_window(Scale_vars & vars) = 0;
     virtual cv::Mat inverse(const ComplexMat & input) = 0;
index 1f1b9a78946831d72b1694ae5c5340897a4390ed..bc0d5254173c122a172f4f056e0f298fc4c259e9 100644 (file)
@@ -167,24 +167,22 @@ ComplexMat cuFFT::forward(const cv::Mat & input)
     return complex_result;
 }
 
-void cuFFT::forward(Scale_var & vars)
+void cuFFT::forward(Scale_vars & vars)
 {
     return;
 }
 
-ComplexMat cuFFT::forward_raw(float *input, bool all_scales)
+void cuFFT::forward_raw(Scale_vars & vars, bool all_scales)
 {
-    ComplexMat complex_result;
+    ComplexMat *result = vars.flag & Track_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
     if (all_scales){
-        complex_result.create(m_height, m_width / 2 + 1, m_num_of_scales);
-        CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(input),
-                                complex_result.get_p_data()));
+        CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
+                                result->get_p_data()));
     } else {
-        complex_result.create(m_height, m_width/ 2 + 1, 1);
-        CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(input),
-                                complex_result.get_p_data()));
+        CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(vars.gauss_corr_res),
+                                result->get_p_data()));
     }
-    return complex_result;
+    return;
 }
 
 ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> & input)
@@ -215,8 +213,27 @@ ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> & input)
     return result;
 }
 
-void cuFFT::forward_window(Scale_var & vars)
+void cuFFT::forward_window(Scale_vars & vars)
 {
+    int n_channels = vars.patch_feats.size();
+    ComplexMat *result = vars.flag & Track_flags::TRACKER_UPDATE ? & vars.xf : & vars.zf;
+    if(n_channels > (int) m_num_of_feats){
+        cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
+        for (int i = 0; i < n_channels; ++i) {
+            cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
+            in_roi = vars.patch_feats[i].mul(m_window);
+        }
+
+        CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result->get_p_data()));
+    } else {
+        cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
+        for (int i = 0; i < n_channels; ++i) {
+            cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
+            in_roi = vars.patch_feats[i].mul(m_window);
+        }
+
+        CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result->get_p_data()));
+    }
     return;
 }
 
@@ -256,7 +273,7 @@ cv::Mat cuFFT::inverse(const ComplexMat & input)
     return real_result/(m_width*m_height);
 }
 
-void cuFFT::inverse(Scale_var & vars)
+void cuFFT::inverse(Scale_vars & vars)
 {
     return;
 }
index 6d4aba491dfeb42a6b5790755a229e29c56aed02..44c3556625adbdc68eb52a5d17cfd7868314a84b 100644 (file)
@@ -25,7 +25,7 @@ public:
     void set_window(const cv::Mat & window) override;
     ComplexMat forward(const cv::Mat & input) override;
     void forward(Scale_vars & vars) override;
-    ComplexMat forward_raw(float *input, bool all_scales) override;
+    void forward_raw(Scale_vars & vars, bool all_scales) override;
     ComplexMat forward_window(const std::vector<cv::Mat> & input) override;
     void forward_window(Scale_vars & vars) override;
     cv::Mat inverse(const ComplexMat & input) override;
index cdfab999a61e4696431dff54b0610c308dab3120..d11001a9dc1ee137b25d23b845e3b7dd601c796d 100644 (file)
@@ -216,10 +216,9 @@ void Fftw::forward(Scale_vars & vars)
     return;
 }
 
-ComplexMat Fftw::forward_raw(float *input, bool all_scales)
+void Fftw::forward_raw(Scale_vars & vars, bool all_scales)
 {
-    ComplexMat dummy;
-    return dummy;
+    return;
 }
 
 ComplexMat Fftw::forward_window(const std::vector<cv::Mat> & input)
index 058dd2fe969c2ce4bdfc81f40595012ec60e2600..dfc6f8c7d4318c81af42d4adbad3de2a3f2e91e5 100644 (file)
@@ -25,7 +25,7 @@ public:
     void set_window(const cv::Mat & window) override;
     ComplexMat forward(const cv::Mat & input) override;
     void forward(Scale_vars & vars) override;
-    ComplexMat forward_raw(float *input, bool all_scales) override;
+    void forward_raw(Scale_vars & vars, bool all_scales) override;
     ComplexMat forward_window(const std::vector<cv::Mat> & input) override;
     void forward_window(Scale_vars & vars) override;
     cv::Mat inverse(const ComplexMat & input) override;
index 6a5f55d122c67b7d9ee884761a9be46449acaf8e..01728b875afe4179412b709bed8216e4665532da 100644 (file)
@@ -164,6 +164,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int
     //window weights, i.e. labels
     fft.init(p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size, p_num_of_feats, p_num_scales, m_use_big_batch);
     p_yf = fft.forward(gaussian_shaped_labels(p_output_sigma, p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size));
+    DEBUG_PRINTM(p_yf);
     fft.set_window(cosine_window_function(p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size));
 
     //obtain a sub-window for training initial model
@@ -172,6 +173,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int
     DEBUG_PRINTM(p_model_xf);
     scale_vars[0].flag = Track_flags::AUTO_CORRELATION;
 
+
     if (m_use_linearkernel) {
         ComplexMat xfconj = p_model_xf.conj();
         p_model_alphaf_num = xfconj.mul(p_yf);
@@ -214,6 +216,16 @@ void KCF_Tracker::init_scale_vars()
     cudaSetDeviceFlags(cudaDeviceMapHost);
 
     for (int i = 0;i<p_num_scales;++i) {
+        scale_vars[i].ifft2_res = cv::Mat(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, CV_32FC(p_num_of_feats));
+        scale_vars[i].response = cv::Mat(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, CV_32FC1);
+
+        scale_vars[i].zf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, p_num_of_feats);
+        scale_vars[i].kzf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, 1);
+        scale_vars[i].kf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, 1);
+
+        if (i==0)
+            scale_vars[i].xf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, p_num_of_feats);
+
         CudaSafeCall(cudaHostAlloc((void**)&scale_vars[i].xf_sqr_norm, alloc_size*sizeof(float), cudaHostAllocMapped));
         CudaSafeCall(cudaHostGetDevicePointer((void**)&scale_vars[i].xf_sqr_norm_d, (void*)scale_vars[i].xf_sqr_norm, 0));
 
@@ -233,8 +245,8 @@ void KCF_Tracker::init_scale_vars()
         scale_vars[i].response = cv::Mat(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, CV_32FC1);
 
         scale_vars[i].zf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, p_num_of_feats);
-        scale_vars[i].kzf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, p_num_of_feats);
-        scale_vars[i].kf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, p_num_of_feats);
+        scale_vars[i].kzf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, 1);
+        scale_vars[i].kf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, 1);
         //We use scale_vars[0] for updating the tracker, so we only allocate memory for  its xf only.
         if (i==0)
             scale_vars[i].xf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, p_num_of_feats);
@@ -442,7 +454,12 @@ void KCF_Tracker::scale_track(Scale_vars & vars, cv::Mat & input_rgb, cv::Mat &
         DEBUG_PRINTM(this->p_model_alphaf * vars.kzf);
         vars.flag = Track_flags::RESPONSE;
         vars.kzf = this->p_model_alphaf * vars.kzf;
+        //TODO Add support for fft.inverse(vars) for CUFFT
+#ifdef CUFFT
+        vars.response = fft.inverse(vars.kzf);
+#else
         fft.inverse(vars);
+#endif
     }
 
     DEBUG_PRINTM(vars.response);
@@ -690,11 +707,12 @@ void KCF_Tracker::gaussian_correlation(struct Scale_vars & vars, const ComplexMa
     DEBUG_PRINTM(vars.xyf);
 #ifdef CUFFT
     if(auto_correlation)
-        cuda_gaussian_correlation(fft.inverse_raw(xyf), vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.xf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width);
+        cuda_gaussian_correlation(fft.inverse_raw(vars.xyf), vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.xf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width);
     else
-        cuda_gaussian_correlation(fft.inverse_raw(xyf), vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.yf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width);
+        cuda_gaussian_correlation(fft.inverse_raw(vars.xyf), vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.yf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width);
 
-    return fft.forward_raw(vars.gauss_corr_res, xf.n_scales==p_num_scales);
+    fft.forward_raw(vars, xf.n_scales==p_num_scales);
+    return;
 #else
     //ifft2 and sum over 3rd dimension, we dont care about individual channels
     fft.inverse(vars);