]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/commitdiff
FFT does not take whole Scale_vars struct
authorShanigen <vkaraf@gmail.com>
Tue, 7 Aug 2018 08:26:35 +0000 (10:26 +0200)
committerMichal Sojka <michal.sojka@cvut.cz>
Wed, 5 Sep 2018 06:38:52 +0000 (08:38 +0200)
FFT class now does not need to know anything about the tracker. It only gets as
input parameters objects and variables on which to perform FFT. Also Scale_vars
constructor was added, which allocates memory for variables.

src/fft.h
src/fft_cufft.cpp
src/fft_cufft.h
src/fft_fftw.cpp
src/fft_fftw.h
src/fft_opencv.cpp
src/fft_opencv.h
src/kcf.cpp
src/kcf.h
src/scale_vars.hpp

index 895f31edb7b6def0012e3ceca93f23ae318381dd..6f91413265ee41c94c7a5a122b001e6d2289fb84 100644 (file)
--- a/src/fft.h
+++ b/src/fft.h
@@ -19,9 +19,9 @@ class Fft
 public:
     virtual void init(unsigned width, unsigned height,unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) = 0;
     virtual void set_window(const cv::Mat & window) = 0;
-    virtual void forward(Scale_vars & vars) = 0;
-    virtual void forward_window(Scale_vars & vars) = 0;
-    virtual void inverse(Scale_vars & vars) = 0;
+    virtual void forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr) = 0;
+    virtual void forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr) = 0;
+    virtual void inverse(ComplexMat &  complex_input, cv::Mat & real_result, float *real_result_arr) = 0;
     virtual ~Fft() = 0;
 };
 
index 66399d255f07960ae5e46b81aa95238ffbcf9429..8d5dff04902404cbff23d9221a9ac2eca9bd1f71 100644 (file)
@@ -142,61 +142,52 @@ void cuFFT::set_window(const cv::Mat & window)
      m_window = window;
 }
 
-void cuFFT::forward(Scale_vars & vars)
+void cuFFT::forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr)
 {
-    ComplexMat *complex_result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_yf_ptr :
-                                                  vars.flag & Tracker_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
-    cufftReal *input = vars.flag & Tracker_flags::TRACKER_INIT ? vars.rot_labels_data : vars.gauss_corr_res;
-
-    if(m_big_batch_mode && vars.in_all.rows == (int)(m_height*m_num_of_scales)){
+    //TODO WRONG real_input.data
+    if(m_big_batch_mode && real_input.rows == (int)(m_height*m_num_of_scales)){
         CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(data_f_all_scales),
-                                complex_result->get_p_data()));
+                                complex_result.get_p_data()));
     } else {
-                CufftErrorCheck(cufftExecR2C(plan_f, input,
-                                complex_result->get_p_data()));
+                CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(real_input_arr),
+                                complex_result.get_p_data()));
     }
     return;
 }
 
-void cuFFT::forward_window(Scale_vars & vars)
+void cuFFT::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr)
 {
-    int n_channels = vars.patch_feats.size();
-
-    ComplexMat *result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_model_xf_ptr :
-                                                  vars.flag & Tracker_flags::TRACKER_UPDATE ? & vars.xf : & vars.zf;
+    int n_channels = patch_feats.size();
 
     if(n_channels > (int) m_num_of_feats){
         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw_all_scales);
         for (int i = 0; i < n_channels; ++i) {
             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
-            in_roi = vars.patch_feats[i].mul(m_window);
+            in_roi = patch_feats[i].mul(m_window);
         }
 
-        CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result->get_p_data()));
+        CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), complex_result.get_p_data()));
     } else {
         for (int i = 0; i < n_channels; ++i) {
-            cv::Mat in_roi(vars.fw_all, cv::Rect(0, i*m_height, m_width, m_height));
-            in_roi = vars.patch_feats[i].mul(m_window);
+            cv::Mat in_roi(fw_all, cv::Rect(0, i*m_height, m_width, m_height));
+            in_roi = patch_feats[i].mul(m_window);
         }
-
-        CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(vars.data_features_d), result->get_p_data()));
+//TODO WRONG
+        CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(real_input_arr), complex_result.get_p_data()));
     }
     return;
 }
 
-void cuFFT::inverse(Scale_vars & vars)
+void cuFFT::inverse(ComplexMat &  complex_input, cv::Mat & real_result, float *real_result_arr)
 {
-    ComplexMat *input = vars.flag & Tracker_flags::RESPONSE ? & vars.kzf : &  vars.xyf;
-    cv::Mat *real_result = vars.flag & Tracker_flags::RESPONSE ? & vars.response : & vars.ifft2_res;
-
-    int n_channels = input->n_channels;
-    cufftComplex *in = reinterpret_cast<cufftComplex*>(input->get_p_data());
+    int n_channels = complex_input.n_channels;
+    cufftComplex *in = reinterpret_cast<cufftComplex*>(complex_input.get_p_data());
 
     if(n_channels == 1){
 
-        CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(vars.data_i_1ch_d)));
+        CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(real_result_arr)));
         cudaDeviceSynchronize();
-        *real_result = *real_result/(m_width*m_height);
+        real_result = real_result/(m_width*m_height);
         return;
     }
 #ifdef BIG_BATCH
@@ -217,13 +208,13 @@ void cuFFT::inverse(Scale_vars & vars)
     }
 #endif
 
-    CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(vars.data_i_features_d)));
+    CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(real_result_arr)));
 
-    if (vars.cuda_gauss)
+    if (true)
         return;
     else {
         cudaDeviceSynchronize();
-        *real_result = *real_result/(m_width*m_height);
+        real_result = real_result/(m_width*m_height);
     }
     return;
 }
index 5c20762d9e0d921320ba9e8aded59cf41b4d17b8..48a29800504b57e4fce662b4c35bfe63e1f7ae4c 100644 (file)
@@ -23,9 +23,9 @@ class cuFFT : public Fft
 public:
     void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override;
     void set_window(const cv::Mat & window) override;
-    void forward(Scale_vars & vars) override;
-    void forward_window(Scale_vars & vars) override;
-    void inverse(Scale_vars & vars) override;
+    void forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr) override;
+    void forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr) override;
+    void inverse(ComplexMat &  complex_input, cv::Mat & real_result, float *real_result_arr) override;
     ~cuFFT() override;
 private:
     cv::Mat m_window;
index 964986723de4993192fefdb2ac8788b70f55dfb5..b57095d7103a3d66ee590e470d2d4c4843c2dad6 100644 (file)
@@ -196,36 +196,33 @@ void Fftw::set_window(const cv::Mat &window)
     m_window = window;
 }
 
-void Fftw::forward(Scale_vars & vars)
+void Fftw::forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr)
 {
-    ComplexMat *complex_result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_yf_ptr :
-                                                  vars.flag & Tracker_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
-    cv::Mat *input = vars.flag & Tracker_flags::TRACKER_INIT ? & vars.rot_labels : & vars.in_all;
+    (void) real_input_arr;
 
-    if(m_big_batch_mode && vars.in_all.rows == (int)(m_height*m_num_of_scales)){
-        fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float*>(vars.in_all.data),
-                              reinterpret_cast<fftwf_complex*>(complex_result->get_p_data()));
+    if(m_big_batch_mode && real_input.rows == (int)(m_height*m_num_of_scales)){
+        fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float*>(real_input.data),
+                              reinterpret_cast<fftwf_complex*>(complex_result.get_p_data()));
     } else {
-        fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float*>(input->data),
-                              reinterpret_cast<fftwf_complex*>(complex_result->get_p_data()));
+        fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float*>(real_input.data),
+                              reinterpret_cast<fftwf_complex*>(complex_result.get_p_data()));
     }
     return;
 }
 
-void Fftw::forward_window(Scale_vars & vars)
+void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr)
 {
-    int n_channels = vars.patch_feats.size();
+    (void) real_input_arr;
 
-    ComplexMat *result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_model_xf_ptr :
-                                                  vars.flag & Tracker_flags::TRACKER_UPDATE ? & vars.xf : & vars.zf;
+    int n_channels = patch_feats.size();
 
     for (int i = 0; i < n_channels; ++i) {
-        cv::Mat in_roi(vars.fw_all, cv::Rect(0, i*m_height, m_width, m_height));
-        in_roi = vars.patch_feats[i].mul(m_window);
+        cv::Mat in_roi(fw_all, cv::Rect(0, i*m_height, m_width, m_height));
+        in_roi = patch_feats[i].mul(m_window);
     }
 
-    float *in = reinterpret_cast<float*>(vars.fw_all.data);
-    fftwf_complex *out = reinterpret_cast<fftwf_complex*>(result->get_p_data());
+    float *in = reinterpret_cast<float*>(fw_all.data);
+    fftwf_complex *out = reinterpret_cast<fftwf_complex*>(complex_result.get_p_data());
 
     if (n_channels <= (int) m_num_of_feats)
         fftwf_execute_dft_r2c(plan_fw, in, out);
@@ -234,14 +231,13 @@ void Fftw::forward_window(Scale_vars & vars)
     return;
 }
 
-void Fftw::inverse(Scale_vars & vars)
+void Fftw::inverse(ComplexMat &  complex_input, cv::Mat & real_result, float *real_result_arr)
 {
-    ComplexMat *input = vars.flag & Tracker_flags::RESPONSE ? & vars.kzf : &  vars.xyf;
-    cv::Mat *real_result = vars.flag & Tracker_flags::RESPONSE ? & vars.response : & vars.ifft2_res;
+    (void) real_input_arr;
 
-    int n_channels = input->n_channels;
-    fftwf_complex *in = reinterpret_cast<fftwf_complex*>(input->get_p_data());
-    float *out = reinterpret_cast<float*>(real_result->data);
+    int n_channels = complex_input.n_channels;
+    fftwf_complex *in = reinterpret_cast<fftwf_complex*>(complex_input.get_p_data());
+    float *out = reinterpret_cast<float*>(real_result.data);
 
     if(n_channels == 1)
         fftwf_execute_dft_c2r(plan_i_1ch, in, out);
@@ -252,7 +248,7 @@ void Fftw::inverse(Scale_vars & vars)
     else
         fftwf_execute_dft_c2r(plan_i_features, in, out);
 
-    *real_result = *real_result/(m_width*m_height);
+    real_result = real_result/(m_width*m_height);
     return;
 }
 
index bd1ed92468a442d84b94f9f62fcb3c04f68aa41e..afe2c974205b7ebdf6e893b5a5fdb0b78dc5e613 100644 (file)
@@ -23,9 +23,9 @@ public:
     Fftw(int num_of_threads);
     void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override;
     void set_window(const cv::Mat & window) override;
-    void forward(Scale_vars & vars) override;
-    void forward_window(Scale_vars & vars) override;
-    void inverse(Scale_vars & vars) override;
+    void forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr) override;
+    void forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr) override;
+    void inverse(ComplexMat &  complex_input, cv::Mat & real_result, float *real_result_arr) override;
     ~Fftw() override;
 private:
     unsigned m_num_threads = 6;
index 6c39c8317a87967022eb4cb1ad053e013912e1d7..4b8d8f39da113981f3168cec3a55dbaa1c12cce0 100644 (file)
@@ -15,47 +15,43 @@ void FftOpencv::set_window(const cv::Mat & window)
      m_window = window;
 }
 
-void FftOpencv::forward(Scale_vars & vars)
+void FftOpencv::forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr)
 {
-    ComplexMat *complex_result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_yf_ptr :
-                                                  vars.flag & Tracker_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf;
-    cv::Mat *input = vars.flag & Tracker_flags::TRACKER_INIT ? & vars.rot_labels : & vars.in_all;
+    (void) real_input_arr;
 
     cv::Mat tmp;
-    cv::dft(*input, tmp, cv::DFT_COMPLEX_OUTPUT);
-    *complex_result = ComplexMat(tmp);
+    cv::dft(real_input, tmp, cv::DFT_COMPLEX_OUTPUT);
+    complex_result = ComplexMat(tmp);
     return;
 }
 
-void FftOpencv::forward_window(Scale_vars & vars)
+void FftOpencv::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr)
 {
-    int n_channels = vars.patch_feats.size();
-
-    ComplexMat *result = vars.flag & Tracker_flags::TRACKER_INIT ? vars.p_model_xf_ptr :
-                                                  vars.flag & Tracker_flags::TRACKER_UPDATE ? & vars.xf : & vars.zf;
+    (void) real_input_arr;
+    (void) fw_all;
 
+    int n_channels = patch_feats.size();
     for (int i = 0; i < n_channels; ++i) {
-        cv::Mat complex_result;
-        cv::dft(vars.patch_feats[i].mul(m_window), complex_result, cv::DFT_COMPLEX_OUTPUT);
-        result->set_channel(i, complex_result);
+        cv::Mat complex_res;
+        cv::dft(patch_feats[i].mul(m_window), complex_res, cv::DFT_COMPLEX_OUTPUT);
+        complex_result.set_channel(i, complex_res);
     }
     return;
 }
 
-void FftOpencv::inverse(Scale_vars & vars)
+void FftOpencv::inverse(ComplexMat &  complex_input, cv::Mat & real_result, float *real_result_arr)
 {
-    ComplexMat *input = vars.flag & Tracker_flags::RESPONSE ? & vars.kzf : & vars.xyf;
-    cv::Mat *result = vars.flag & Tracker_flags::RESPONSE ? & vars.response : & vars.ifft2_res;
+    (void) real_result_arr;
 
-    if (input->n_channels == 1) {
-        cv::dft(input->to_cv_mat(), *result, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
+    if (complex_input.n_channels == 1) {
+        cv::dft(complex_input.to_cv_mat(), real_result, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
     } else {
-        std::vector<cv::Mat> mat_channels = input->to_cv_mat_vector();
-        std::vector<cv::Mat> ifft_mats(input->n_channels);
-        for (int i = 0; i < input->n_channels; ++i) {
+        std::vector<cv::Mat> mat_channels = complex_input.to_cv_mat_vector();
+        std::vector<cv::Mat> ifft_mats(complex_input.n_channels);
+        for (int i = 0; i < complex_input.n_channels; ++i) {
             cv::dft(mat_channels[i], ifft_mats[i], cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
         }
-        cv::merge(ifft_mats, *result);
+        cv::merge(ifft_mats, real_result);
     }
     return;
 }
index 557cd0b428ecf4b82ea4317bab2f582a6260ce17..061b7e8063882e7acd4f78b931f2b21317f4e762 100644 (file)
@@ -11,9 +11,9 @@ class FftOpencv : public Fft
 public:
     void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override;
     void set_window(const cv::Mat & window) override;
-    void forward(Scale_vars & vars) override;
-    void forward_window(Scale_vars & vars) override;
-    void inverse(Scale_vars & vars) override;
+    void forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr) override;
+    void forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr) override;
+    void inverse(ComplexMat &  complex_input, cv::Mat & real_result, float *real_result_arr) override;
     ~FftOpencv() override;
 private:
     cv::Mat m_window;
index 831e70be10bd253a5a52783eaf0badd3a813ac1f..c04c0087c3a2d945fb1068348eefd3971c173699 100644 (file)
@@ -20,7 +20,7 @@
 #endif //OPENMP
 
 #define DEBUG_PRINT(obj) if (m_debug) {std::cout << #obj << " @" << __LINE__ << std::endl << (obj) << std::endl;}
-#define DEBUG_PRINTM(obj) if (m_debug) {std::cout << #obj << " @" << __LINE__ << " " << (obj).size() << " CH: " << (obj).channels() << std::endl /*<< (obj) << std::endl*/;}
+#define DEBUG_PRINTM(obj) if (m_debug) {std::cout << #obj << " @" << __LINE__ << " " << (obj).size() << " CH: " << (obj).channels() << std::endl << (obj) << std::endl;}
 
 KCF_Tracker::KCF_Tracker(double padding, double kernel_sigma, double lambda, double interp_factor, double output_sigma_factor, int cell_size) :
     fft(*new FFT()),
@@ -35,18 +35,18 @@ KCF_Tracker::~KCF_Tracker()
     delete &fft;
 #ifdef CUFFT
     for (int i = 0;i < p_num_scales;++i) {
-        CudaSafeCall(cudaFreeHost(scale_vars[i].xf_sqr_norm));
-        CudaSafeCall(cudaFreeHost(scale_vars[i].yf_sqr_norm));
-        CudaSafeCall(cudaFreeHost(scale_vars[i].data_i_1ch));
-        CudaSafeCall(cudaFreeHost(scale_vars[i].data_i_features));
-        CudaSafeCall(cudaFree(scale_vars[i].gauss_corr_res));
-        CudaSafeCall(cudaFreeHost(scale_vars[i].rot_labels_data));
-        CudaSafeCall(cudaFreeHost(scale_vars[i].data_features));
+        CudaSafeCall(cudaFreeHost(p_scale_vars[i].xf_sqr_norm));
+        CudaSafeCall(cudaFreeHost(p_scale_vars[i].yf_sqr_norm));
+        CudaSafeCall(cudaFreeHost(p_scale_vars[i].data_i_1ch));
+        CudaSafeCall(cudaFreeHost(p_scale_vars[i].data_i_features));
+        CudaSafeCall(cudaFree(p_scale_vars[i].gauss_corr_res_d));
+        CudaSafeCall(cudaFreeHost(p_scale_vars[i].rot_labels_data));
+        CudaSafeCall(cudaFreeHost(p_scale_vars[i].data_features));
     }
 #else
     for (int i = 0;i < p_num_scales;++i) {
-        free(scale_vars[i].xf_sqr_norm);
-        free(scale_vars[i].yf_sqr_norm);
+        free(p_scale_vars[i].xf_sqr_norm);
+        free(p_scale_vars[i].yf_sqr_norm);
     }
 #endif
 }
@@ -140,9 +140,20 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int
     else
         p_scales.push_back(1.);
 
-    for (int i = 0;i<p_num_scales;++i) {
-        scale_vars.push_back(Scale_vars());
+#ifdef CUFFT
+    if (p_windows_size[1]/p_cell_size*(p_windows_size[0]/p_cell_size/2+1) > 1024) {
+        std::cerr << "Window after forward FFT is too big for CUDA kernels. Plese use -f to set "
+        "the window dimensions so its size is less or equal to " << 1024*p_cell_size*p_cell_size*2+1 <<
+        " pixels . Currently the size of the window is: " <<  p_windows_size[0] << "x" <<  p_windows_size[1] <<
+        " which is  " <<  p_windows_size[0]*p_windows_size[1] << " pixels. " << std::endl;
+        std::exit(EXIT_FAILURE);
+    }
+
+    if (m_use_linearkernel){
+        std::cerr << "cuFFT supports only Gaussian kernel." << std::endl;
+        std::exit(EXIT_FAILURE);
     }
+#endif
 
     p_num_of_feats = 31;
     if(m_use_color) p_num_of_feats += 3;
@@ -150,7 +161,12 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int
     p_roi_width = p_windows_size[0]/p_cell_size;
     p_roi_height = p_windows_size[1]/p_cell_size;
 
-    init_scale_vars();
+    for (int i = 0;i<p_num_scales;++i) {
+        if (i == 0)
+            p_scale_vars.push_back(Scale_vars(p_windows_size, p_cell_size, p_num_of_feats, &p_model_xf, &p_yf, true));
+        else
+            p_scale_vars.push_back(Scale_vars(p_windows_size, p_cell_size, p_num_of_feats));
+    }
 
     p_current_scale = 1.;
 
@@ -168,18 +184,15 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int
     fft.init(p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size, p_num_of_feats, p_num_scales, m_use_big_batch);
     fft.set_window(cosine_window_function(p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size));
 
-    scale_vars[0].flag = Tracker_flags::TRACKER_INIT;
     //window weights, i.e. labels
-    gaussian_shaped_labels(scale_vars[0], p_output_sigma, p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size);
-
-    fft.forward(scale_vars[0]);
+     fft.forward(gaussian_shaped_labels(p_output_sigma, p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size), p_yf,
+                                                                                m_use_cuda ? p_scale_vars[0].rot_labels_data_d: nullptr);
     DEBUG_PRINTM(p_yf);
 
     //obtain a sub-window for training initial model
-    get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], scale_vars[0]);
-    fft.forward_window(scale_vars[0]);
+    get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_scale_vars[0]);
+    fft.forward_window(p_scale_vars[0].patch_feats, p_model_xf, p_scale_vars[0].fw_all, m_use_cuda ? p_scale_vars[0].data_features_d : nullptr);
     DEBUG_PRINTM(p_model_xf);
-    scale_vars[0].flag = Tracker_flags::AUTO_CORRELATION;
 
 
     if (m_use_linearkernel) {
@@ -188,11 +201,11 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int
         p_model_alphaf_den = (p_model_xf * xfconj);
     } else {
         //Kernel Ridge Regression, calculate alphas (in Fourier domain)
-        gaussian_correlation(scale_vars[0], p_model_xf, p_model_xf, p_kernel_sigma, true);
-        DEBUG_PRINTM(scale_vars[0].kf);
-        p_model_alphaf_num = p_yf * scale_vars[0].kf;
+        gaussian_correlation(p_scale_vars[0], p_model_xf, p_model_xf, p_kernel_sigma, true);
+        DEBUG_PRINTM(p_scale_vars[0].kf);
+        p_model_alphaf_num = p_yf * p_scale_vars[0].kf;
         DEBUG_PRINTM(p_model_alphaf_num);
-        p_model_alphaf_den = scale_vars[0].kf * (scale_vars[0].kf + p_lambda);
+        p_model_alphaf_den = p_scale_vars[0].kf * (p_scale_vars[0].kf + p_lambda);
         DEBUG_PRINTM(p_model_alphaf_den);
     }
     p_model_alphaf = p_model_alphaf_num / p_model_alphaf_den;
@@ -200,116 +213,6 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox, int fit_size_x, int
 //        p_model_alphaf = p_yf / (kf + p_lambda);   //equation for fast training
 }
 
-void KCF_Tracker::init_scale_vars()
-{
-    double alloc_size;
-
-#ifdef CUFFT
-    if (p_windows_size[1]/p_cell_size*(p_windows_size[0]/p_cell_size/2+1) > 1024) {
-        std::cerr << "Window after forward FFT is too big for CUDA kernels. Plese use -f to set "
-        "the window dimensions so its size is less or equal to " << 1024*p_cell_size*p_cell_size*2+1 <<
-        " pixels . Currently the size of the window is: " <<  p_windows_size[0] << "x" <<  p_windows_size[1] <<
-        " which is  " <<  p_windows_size[0]*p_windows_size[1] << " pixels. " << std::endl;
-        std::exit(EXIT_FAILURE);
-    }
-
-    if (m_use_linearkernel){
-        std::cerr << "cuFFT supports only Gaussian kernel." << std::endl;
-        std::exit(EXIT_FAILURE);
-    }
-    cudaSetDeviceFlags(cudaDeviceMapHost);
-
-    for (int i = 0;i<p_num_scales;++i) {
-        alloc_size = p_windows_size[0]/p_cell_size*p_windows_size[1]/p_cell_size*sizeof(cufftReal);
-        CudaSafeCall(cudaHostAlloc((void**)&scale_vars[i].data_i_1ch, alloc_size, cudaHostAllocMapped));
-        CudaSafeCall(cudaHostGetDevicePointer((void**)&scale_vars[i].data_i_1ch_d, (void*)scale_vars[i].data_i_1ch, 0));
-
-        alloc_size = p_windows_size[0]/p_cell_size*p_windows_size[1]/p_cell_size*p_num_of_feats*sizeof(cufftReal);
-        CudaSafeCall(cudaHostAlloc((void**)&scale_vars[i].data_i_features, alloc_size, cudaHostAllocMapped));
-        CudaSafeCall(cudaHostGetDevicePointer((void**)&scale_vars[i].data_i_features_d, (void*)scale_vars[i].data_i_features, 0));
-
-
-        scale_vars[i].ifft2_res = cv::Mat(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, CV_32FC(p_num_of_feats), scale_vars[i].data_i_features);
-        scale_vars[i].response = cv::Mat(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, CV_32FC1, scale_vars[i].data_i_1ch);
-
-        scale_vars[i].zf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, p_num_of_feats);
-        scale_vars[i].kzf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, 1);
-        scale_vars[i].kf = ComplexMat(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, 1);
-
-#ifdef BIG_BATCH
-        alloc_size = p_num_of_feats;
-#else
-        alloc_size = 1;
-#endif
-
-        CudaSafeCall(cudaHostAlloc((void**)&scale_vars[i].xf_sqr_norm, alloc_size*sizeof(float), cudaHostAllocMapped));
-        CudaSafeCall(cudaHostGetDevicePointer((void**)&scale_vars[i].xf_sqr_norm_d, (void*)scale_vars[i].xf_sqr_norm, 0));
-
-        CudaSafeCall(cudaHostAlloc((void**)&scale_vars[i].yf_sqr_norm, sizeof(float), cudaHostAllocMapped));
-        CudaSafeCall(cudaHostGetDevicePointer((void**)&scale_vars[i].yf_sqr_norm_d, (void*)scale_vars[i].yf_sqr_norm, 0));
-
-        alloc_size =(p_windows_size[0]/p_cell_size)*(p_windows_size[1]/p_cell_size)*alloc_size*sizeof(float);
-        CudaSafeCall(cudaMalloc((void**)&scale_vars[i].gauss_corr_res, alloc_size));
-        scale_vars[i].in_all = cv::Mat(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, CV_32FC1, scale_vars[i].gauss_corr_res);
-
-        alloc_size = (p_windows_size[0]/p_cell_size)*(p_windows_size[1]/p_cell_size)*alloc_size*sizeof(float);
-        CudaSafeCall(cudaHostAlloc((void**)&scale_vars[i].rot_labels_data, alloc_size, cudaHostAllocMapped));
-        CudaSafeCall(cudaHostGetDevicePointer((void**)&scale_vars[i].rot_labels_data_d, (void*)scale_vars[i].rot_labels_data, 0));
-        scale_vars[i].rot_labels = cv::Mat(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, CV_32FC1, scale_vars[i].rot_labels_data);
-
-        alloc_size = (p_windows_size[0]/p_cell_size)*((p_windows_size[1]/p_cell_size)*p_num_of_feats)*sizeof(cufftReal);
-        CudaSafeCall(cudaHostAlloc((void**)&scale_vars[i].data_features, alloc_size, cudaHostAllocMapped));
-        CudaSafeCall(cudaHostGetDevicePointer((void**)&scale_vars[i].data_features_d, (void*)scale_vars[i].data_features, 0));
-        scale_vars[i].fw_all = cv::Mat((p_windows_size[1]/p_cell_size)*p_num_of_feats, p_windows_size[0]/p_cell_size, CV_32F, scale_vars[i].data_features);
-    }
-#else
-if(m_use_big_batch)
-        alloc_size = p_num_of_feats;
-else
-        alloc_size = 1;
-
-    for (int i = 0;i<p_num_scales;++i) {
-        scale_vars[i].xf_sqr_norm = (float*) malloc(alloc_size*sizeof(float));
-        scale_vars[i].yf_sqr_norm = (float*) malloc(sizeof(float));
-
-        scale_vars[i].patch_feats.reserve(p_num_of_feats);
-
-        int height = p_windows_size[1]/p_cell_size;
-#ifdef FFTW
-        int width = (p_windows_size[0]/p_cell_size)/2+1;
-#else
-        int width = p_windows_size[0]/p_cell_size;
-#endif
-
-        scale_vars[i].ifft2_res = cv::Mat(height, p_windows_size[0]/p_cell_size, CV_32FC(p_num_of_feats));
-        scale_vars[i].response = cv::Mat(height, p_windows_size[0]/p_cell_size, CV_32FC1);
-
-        scale_vars[i].zf = ComplexMat(height, width, p_num_of_feats);
-        scale_vars[i].kzf = ComplexMat(height, width, 1);
-        scale_vars[i].kf = ComplexMat(height, width, 1);
-        scale_vars[i].rot_labels = cv::Mat(height, p_windows_size[0]/p_cell_size, CV_32FC1);
-#ifdef FFTW
-        scale_vars[i].in_all = cv::Mat((p_windows_size[1]/p_cell_size)*p_num_of_feats, p_windows_size[0]/p_cell_size, CV_32F);
-        scale_vars[i].fw_all = cv::Mat((p_windows_size[1]/p_cell_size)*p_num_of_feats, p_windows_size[0]/p_cell_size, CV_32F);
-#else
-        scale_vars[i].in_all = cv::Mat((p_windows_size[1]/p_cell_size), p_windows_size[0]/p_cell_size, CV_32F);
-#endif
-    }
-#endif
-#if defined(FFTW) || defined(CUFFT)
-    p_model_xf.create(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, p_num_of_feats);
-    p_yf.create(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, 1);
-    //We use scale_vars[0] for updating the tracker, so we only allocate memory for  its xf only.
-    scale_vars[0].xf.create(p_windows_size[1]/p_cell_size, (p_windows_size[0]/p_cell_size)/2+1, p_num_of_feats);
-#else
-    p_model_xf.create(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, p_num_of_feats);
-    p_yf.create(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, 1);
-    scale_vars[0].xf = ComplexMat(p_windows_size[1]/p_cell_size, p_windows_size[0]/p_cell_size, p_num_of_feats);
-#endif
-    scale_vars[0].p_model_xf_ptr = & p_model_xf;
-    scale_vars[0].p_yf_ptr = & p_yf;
-}
-
 void KCF_Tracker::setTrackerPose(BBox_c &bbox, cv::Mat & img, int fit_size_x, int fit_size_y)
 {
     init(img, bbox.get_rect(), fit_size_x, fit_size_y);
@@ -382,30 +285,30 @@ void KCF_Tracker::track(cv::Mat &img)
 
     if(m_use_multithreading) {
         std::vector<std::future<void>> async_res(p_scales.size());
-        for (size_t i = 0; i < scale_vars.size(); ++i) {
+        for (size_t i = 0; i < p_scale_vars.size(); ++i) {
             async_res[i] = std::async(std::launch::async,
                                 [this, &input_gray, &input_rgb, i]() -> void
-                                {return scale_track(this->scale_vars[i], input_rgb, input_gray, this->p_scales[i]);});
+                                {return scale_track(this->p_scale_vars[i], input_rgb, input_gray, this->p_scales[i]);});
         }
         for (size_t i = 0; i < p_scales.size(); ++i) {
             async_res[i].wait();
-            if (this->scale_vars[i].max_response > max_response) {
-                max_response = this->scale_vars[i].max_response;
-                max_response_pt = & this->scale_vars[i].max_loc;
-                max_response_map = & this->scale_vars[i].response;
+            if (this->p_scale_vars[i].max_response > max_response) {
+                max_response = this->p_scale_vars[i].max_response;
+                max_response_pt = & this->p_scale_vars[i].max_loc;
+                max_response_map = & this->p_scale_vars[i].response;
                 scale_index = i;
             }
         }
     } else {
 #pragma omp parallel for schedule(dynamic)
-        for (size_t i = 0; i < scale_vars.size(); ++i) {
-            scale_track(this->scale_vars[i], input_rgb, input_gray, this->p_scales[i]);
+        for (size_t i = 0; i < p_scale_vars.size(); ++i) {
+            scale_track(this->p_scale_vars[i], input_rgb, input_gray, this->p_scales[i]);
 #pragma omp critical
             {
-                if (this->scale_vars[i].max_response > max_response) {
-                    max_response = this->scale_vars[i].max_response;
-                    max_response_pt = & this->scale_vars[i].max_loc;
-                    max_response_map = & this->scale_vars[i].response;
+                if (this->p_scale_vars[i].max_response > max_response) {
+                    max_response = this->p_scale_vars[i].max_response;
+                    max_response_pt = & this->p_scale_vars[i].max_loc;
+                    max_response_map = & this->p_scale_vars[i].response;
                     scale_index = i;
                 }
             }
@@ -454,27 +357,26 @@ void KCF_Tracker::track(cv::Mat &img)
     if (p_current_scale > p_min_max_scale[1])
         p_current_scale = p_min_max_scale[1];
     //obtain a subwindow for training at newly estimated target position
-    get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], scale_vars[0], p_current_scale);
-    scale_vars[0].flag = Tracker_flags::TRACKER_UPDATE;
-    fft.forward_window(scale_vars[0]);
+    get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_scale_vars[0], p_current_scale);
+    fft.forward_window(p_scale_vars[0].patch_feats, p_scale_vars[0].xf, p_scale_vars[0].fw_all, m_use_cuda ? p_scale_vars[0].data_features_d : nullptr);
 
     //subsequent frames, interpolate model
-    p_model_xf = p_model_xf * (1. - p_interp_factor) + scale_vars[0].xf * p_interp_factor;
+    p_model_xf = p_model_xf * (1. - p_interp_factor) + p_scale_vars[0].xf * p_interp_factor;
 
     ComplexMat alphaf_num, alphaf_den;
 
     if (m_use_linearkernel) {
-        ComplexMat xfconj = scale_vars[0].xf.conj();
+        ComplexMat xfconj = p_scale_vars[0].xf.conj();
         alphaf_num = xfconj.mul(p_yf);
-        alphaf_den = (scale_vars[0].xf * xfconj);
+        alphaf_den = (p_scale_vars[0].xf * xfconj);
     } else {
-        scale_vars[0].flag = Tracker_flags::AUTO_CORRELATION;
+        p_scale_vars[0].flag = Tracker_flags::AUTO_CORRELATION;
         //Kernel Ridge Regression, calculate alphas (in Fourier domain)
-        gaussian_correlation(scale_vars[0], scale_vars[0].xf, scale_vars[0].xf, p_kernel_sigma, true);
+        gaussian_correlation(p_scale_vars[0], p_scale_vars[0].xf, p_scale_vars[0].xf, p_kernel_sigma, true);
 //        ComplexMat alphaf = p_yf / (kf + p_lambda); //equation for fast training
 //        p_model_alphaf = p_model_alphaf * (1. - p_interp_factor) + alphaf * p_interp_factor;
-        alphaf_num = p_yf * scale_vars[0].kf;
-        alphaf_den = scale_vars[0].kf * (scale_vars[0].kf + p_lambda);
+        alphaf_num = p_yf * p_scale_vars[0].kf;
+        alphaf_den = p_scale_vars[0].kf * (p_scale_vars[0].kf + p_lambda);
     }
 
     p_model_alphaf_num = p_model_alphaf_num * (1. - p_interp_factor) + alphaf_num * p_interp_factor;
@@ -487,24 +389,20 @@ void KCF_Tracker::scale_track(Scale_vars & vars, cv::Mat & input_rgb, cv::Mat &
     get_features(input_rgb, input_gray, this->p_pose.cx, this->p_pose.cy, this->p_windows_size[0], this->p_windows_size[1],
                                 vars, this->p_current_scale * scale);
 
-    vars.flag = Tracker_flags::SCALE_RESPONSE;
-    fft.forward_window(vars);
+    fft.forward_window(vars.patch_feats, vars.zf, vars.fw_all, m_use_cuda ? vars.data_features_d : nullptr);
     DEBUG_PRINTM(vars.zf);
 
     if (m_use_linearkernel) {
                 vars.kzf = (vars.zf.mul2(this->p_model_alphaf)).sum_over_channels();
-                vars.flag = Tracker_flags::RESPONSE;
-                fft.inverse(vars);
+                fft.inverse(vars.kzf, vars.response, m_use_cuda ? vars.data_i_1ch_d : nullptr);
     } else {
-        vars.flag = Tracker_flags::CROSS_CORRELATION;
         gaussian_correlation(vars, vars.zf, this->p_model_xf, this->p_kernel_sigma);
         DEBUG_PRINTM(this->p_model_alphaf);
         DEBUG_PRINTM(vars.kzf);
         DEBUG_PRINTM(this->p_model_alphaf * vars.kzf);
-        vars.flag = Tracker_flags::RESPONSE;
         vars.kzf = this->p_model_alphaf * vars.kzf;
         //TODO Add support for fft.inverse(vars) for CUFFT
-        fft.inverse(vars);
+        fft.inverse(vars.kzf, vars.response, m_use_cuda ? vars.data_i_1ch_d : nullptr);
     }
 
     DEBUG_PRINTM(vars.response);
@@ -577,7 +475,7 @@ void KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx
     return;
 }
 
-void KCF_Tracker::gaussian_shaped_labels(Scale_vars & vars, double sigma, int dim1, int dim2)
+cv::Mat KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2)
 {
     cv::Mat labels(dim2, dim1, CV_32FC1);
     int range_y[2] = {-dim2 / 2, dim2 - dim2 / 2};
@@ -594,12 +492,19 @@ void KCF_Tracker::gaussian_shaped_labels(Scale_vars & vars, double sigma, int di
     }
 
     //rotate so that 1 is at top-left corner (see KCF paper for explanation)
+#ifdef CUFFT
     cv::Mat tmp = circshift(labels, range_x[0], range_y[0]);
-    tmp.copyTo(vars.rot_labels);
+    tmp.copyTo(p_scale_vars[0].rot_labels);
+
+    assert(p_scale_vars[0].rot_labels.at<float>(0,0) >= 1.f - 1e-10f);
+    return tmp;
+#else
+    cv::Mat rot_labels = circshift(labels, range_x[0], range_y[0]);
     //sanity check, 1 at top left corner
-    assert(vars.rot_labels.at<float>(0,0) >= 1.f - 1e-10f);
+    assert(rot_labels.at<float>(0,0) >= 1.f - 1e-10f);
 
-    return;
+    return rot_labels;
+#endif
 }
 
 cv::Mat KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot)
@@ -753,15 +658,14 @@ void KCF_Tracker::gaussian_correlation(struct Scale_vars & vars, const ComplexMa
 #endif
     vars.xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj());
     DEBUG_PRINTM(vars.xyf);
+    fft.inverse(vars.xyf, vars.ifft2_res, m_use_cuda ? vars.data_i_features_d : nullptr);
 #ifdef CUFFT
-    fft.inverse(vars);
     if(auto_correlation)
-        cuda_gaussian_correlation(vars.data_i_features, vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.xf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width);
+        cuda_gaussian_correlation(vars.data_i_features, vars.gauss_corr_res_d, vars.xf_sqr_norm_d, vars.xf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width);
     else
-        cuda_gaussian_correlation(vars.data_i_features, vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.yf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width);
+        cuda_gaussian_correlation(vars.data_i_features, vars.gauss_corr_res_d, vars.xf_sqr_norm_d, vars.yf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width);
 #else
     //ifft2 and sum over 3rd dimension, we dont care about individual channels
-    fft.inverse(vars);
     DEBUG_PRINTM(vars.ifft2_res);
     cv::Mat xy_sum;
     if (xf.channels() != p_num_scales*p_num_of_feats)
@@ -792,7 +696,7 @@ void KCF_Tracker::gaussian_correlation(struct Scale_vars & vars, const ComplexMa
     }
 #endif
     DEBUG_PRINTM(vars.in_all);
-    fft.forward(vars);
+    fft.forward(vars.in_all, auto_correlation ? vars.kf : vars.kzf, m_use_cuda ? vars.gauss_corr_res_d : nullptr);
     return;
 }
 
@@ -872,7 +776,7 @@ double KCF_Tracker::sub_grid_scale(int index)
             A.at<float>(i, 0) = p_scales[i] * p_scales[i];
             A.at<float>(i, 1) = p_scales[i];
             A.at<float>(i, 2) = 1;
-            fval.at<float>(i) = scale_vars[i].max_response;
+            fval.at<float>(i) = p_scale_vars[i].max_response;
         }
     } else {
         //only from neighbours
@@ -883,7 +787,7 @@ double KCF_Tracker::sub_grid_scale(int index)
              p_scales[index-1] * p_scales[index-1], p_scales[index-1], 1,
              p_scales[index] * p_scales[index], p_scales[index], 1,
              p_scales[index+1] * p_scales[index+1], p_scales[index+1], 1);
-        fval = (cv::Mat_<float>(3, 1) << scale_vars[index-1].max_response, scale_vars[index].max_response, scale_vars[index+1].max_response);
+        fval = (cv::Mat_<float>(3, 1) << p_scale_vars[index-1].max_response, p_scale_vars[index].max_response, p_scale_vars[index+1].max_response);
     }
 
     cv::Mat x;
index 1c0f5d22295990ebc43519c7836b9a79c926886c..8f217f473a9cd8ece6d9748ef3e80e3b38edde11 100644 (file)
--- a/src/kcf.h
+++ b/src/kcf.h
@@ -69,6 +69,11 @@ public:
 #else
     bool m_use_big_batch {false};
 #endif
+#ifdef CUFFT
+    bool m_use_cuda {true};
+#else
+    bool m_use_cuda {false};
+#endif
 
     /*
     padding             ... extra area surrounding the target           (1.5)
@@ -123,7 +128,7 @@ private:
     int p_num_of_feats;
     int p_roi_height, p_roi_width;
 
-    std::vector<Scale_vars> scale_vars;
+    std::vector<Scale_vars> p_scale_vars;
 
     //model
     ComplexMat p_yf;
@@ -134,7 +139,7 @@ private:
     //helping functions
     void scale_track(Scale_vars & vars, cv::Mat & input_rgb, cv::Mat & input_gray, double scale);
     cv::Mat get_subwindow(const cv::Mat & input, int cx, int cy, int size_x, int size_y);
-    void gaussian_shaped_labels(Scale_vars & vars, double sigma, int dim1, int dim2);
+    cv::Mat gaussian_shaped_labels(double sigma, int dim1, int dim2);
     void gaussian_correlation(struct Scale_vars &vars, const ComplexMat & xf, const ComplexMat & yf, double sigma, bool auto_correlation = false);
     cv::Mat circshift(const cv::Mat & patch, int x_rot, int y_rot);
     cv::Mat cosine_window_function(int dim1, int dim2);
index ed0ec9e1ca9679df7e03acbd00c1cb3fdde1db3c..16e06af6c4e5dfd9237144ac9188fe133770b759 100644 (file)
@@ -19,29 +19,121 @@ enum Tracker_flags
 
 struct Scale_vars
 {
-    float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr;
+public:
+    Scale_vars();
+    Scale_vars(int windows_size[2], int cell_size, int num_of_feats, ComplexMat *model_xf = nullptr, ComplexMat *yf = nullptr,bool zero_index = false)
+    {
+        double alloc_size;
+
 #ifdef CUFFT
-    float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr, *gauss_corr_res_d = nullptr, *rot_labels_data = nullptr,
-              *rot_labels_data_d = nullptr, *data_features = nullptr, *data_features_d = nullptr;
+        if (zero_index)
+            cudaSetDeviceFlags(cudaDeviceMapHost);
+
+        alloc_size = windows_size[0]/cell_size*windows_size[1]/cell_size*sizeof(cufftReal);
+        CudaSafeCall(cudaHostAlloc((void**)&this->data_i_1ch, alloc_size, cudaHostAllocMapped));
+        CudaSafeCall(cudaHostGetDevicePointer((void**)&this->data_i_1ch_d, (void*)this->data_i_1ch, 0));
+
+        alloc_size = windows_size[0]/cell_size*windows_size[1]/cell_size*num_of_feats*sizeof(cufftReal);
+        CudaSafeCall(cudaHostAlloc((void**)&this->data_i_features, alloc_size, cudaHostAllocMapped));
+        CudaSafeCall(cudaHostGetDevicePointer((void**)&this->data_i_features_d, (void*)this->data_i_features, 0));
+
+
+        this->ifft2_res = cv::Mat(windows_size[1]/cell_size, windows_size[0]/cell_size, CV_32FC(num_of_feats), this->data_i_features);
+        this->response = cv::Mat(windows_size[1]/cell_size, windows_size[0]/cell_size, CV_32FC1, this->data_i_1ch);
+
+        this->zf = ComplexMat(windows_size[1]/cell_size, (windows_size[0]/cell_size)/2+1, num_of_feats);
+        this->kzf = ComplexMat(windows_size[1]/cell_size, (windows_size[0]/cell_size)/2+1, 1);
+        this->kf = ComplexMat(windows_size[1]/cell_size, (windows_size[0]/cell_size)/2+1, 1);
+
+#ifdef BIG_BATCH
+        alloc_size = num_of_feats;
+#else
+        alloc_size = 1;
+#endif
+
+        CudaSafeCall(cudaHostAlloc((void**)&this->xf_sqr_norm, alloc_size*sizeof(float), cudaHostAllocMapped));
+        CudaSafeCall(cudaHostGetDevicePointer((void**)&this->xf_sqr_norm_d, (void*)this->xf_sqr_norm, 0));
+
+        CudaSafeCall(cudaHostAlloc((void**)&this->yf_sqr_norm, sizeof(float), cudaHostAllocMapped));
+        CudaSafeCall(cudaHostGetDevicePointer((void**)&this->yf_sqr_norm_d, (void*)this->yf_sqr_norm, 0));
+
+        alloc_size =(windows_size[0]/cell_size)*(windows_size[1]/cell_size)*alloc_size*sizeof(float);
+        CudaSafeCall(cudaMalloc((void**)&this->gauss_corr_res_d, alloc_size));
+        this->in_all = cv::Mat(windows_size[1]/cell_size, windows_size[0]/cell_size, CV_32FC1, this->gauss_corr_res_d);
+
+        if (zero_index) {
+            alloc_size = (windows_size[0]/cell_size)*(windows_size[1]/cell_size)*alloc_size*sizeof(float);
+            CudaSafeCall(cudaHostAlloc((void**)&this->rot_labels_data, alloc_size, cudaHostAllocMapped));
+            CudaSafeCall(cudaHostGetDevicePointer((void**)&this->rot_labels_data_d, (void*)this->rot_labels_data, 0));
+            this->rot_labels = cv::Mat(windows_size[1]/cell_size, windows_size[0]/cell_size, CV_32FC1, this->rot_labels_data);
+        }
+
+        alloc_size = (windows_size[0]/cell_size)*((windows_size[1]/cell_size)*num_of_feats)*sizeof(cufftReal);
+        CudaSafeCall(cudaHostAlloc((void**)&this->data_features, alloc_size, cudaHostAllocMapped));
+        CudaSafeCall(cudaHostGetDevicePointer((void**)&this->data_features_d, (void*)this->data_features, 0));
+        this->fw_all = cv::Mat((windows_size[1]/cell_size)*num_of_feats, windows_size[0]/cell_size, CV_32F, this->data_features);
+#else
+#ifdef BIG_BATCH
+        alloc_size = num_of_feats;
+#else
+        alloc_size = 1;
+#endif
+
+        this->xf_sqr_norm = (float*) malloc(alloc_size*sizeof(float));
+        this->yf_sqr_norm = (float*) malloc(sizeof(float));
+
+        this->patch_feats.reserve(num_of_feats);
+
+        int height = windows_size[1]/cell_size;
+#ifdef FFTW
+        int width = (windows_size[0]/cell_size)/2+1;
+#else
+        int width = windows_size[0]/cell_size;
+#endif
+
+        this->ifft2_res = cv::Mat(height, windows_size[0]/cell_size, CV_32FC(num_of_feats));
+        this->response = cv::Mat(height, windows_size[0]/cell_size, CV_32FC1);
+
+        this->zf = ComplexMat(height, width, num_of_feats);
+        this->kzf = ComplexMat(height, width, 1);
+        this->kf = ComplexMat(height, width, 1);
+#ifdef FFTW
+        this->in_all = cv::Mat((windows_size[1]/cell_size)*num_of_feats, windows_size[0]/cell_size, CV_32F);
+        this->fw_all = cv::Mat((windows_size[1]/cell_size)*num_of_feats, windows_size[0]/cell_size, CV_32F);
+#else
+        this->in_all = cv::Mat((windows_size[1]/cell_size), windows_size[0]/cell_size, CV_32F);
+#endif
+#endif
+#if defined(FFTW) || defined(CUFFT)
+        if (zero_index) {
+            model_xf->create(windows_size[1]/cell_size, (windows_size[0]/cell_size)/2+1, num_of_feats);
+            yf->create(windows_size[1]/cell_size, (windows_size[0]/cell_size)/2+1, 1);
+            //We use scale_vars[0] for updating the tracker, so we only allocate memory for  its xf only.
+            this->xf.create(windows_size[1]/cell_size, (windows_size[0]/cell_size)/2+1, num_of_feats);
+        }
+#else
+        if (zero_index) {
+            model_xf->create(windows_size[1]/cell_size, windows_size[0]/cell_size, num_of_feats);
+            yf->create(windows_size[1]/cell_size, windows_size[0]/cell_size, 1);
+            this->xf.create(windows_size[1]/cell_size, windows_size[0]/cell_size, num_of_feats);
+        }
+#endif
+    }
+
+    float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr, *rot_labels_data = nullptr;
+    cv::Mat rot_labels;
+    float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res_d = nullptr, *rot_labels_data_d = nullptr,
+              *data_features = nullptr, *data_features_d = nullptr;
     float *data_f = nullptr, *data_fw = nullptr, *data_fw_d = nullptr,  *data_i_features = nullptr,
               *data_i_features_d = nullptr, *data_i_1ch = nullptr, *data_i_1ch_d = nullptr;
-#ifdef BIG_BATCH
     float *data_f_all_scales = nullptr, *data_fw_all_scales = nullptr, *data_fw_all_scales_d = nullptr, *data_i_features_all_scales = nullptr,
               *data_i_features_all_scales_d = nullptr, *data_i_1ch_all_scales = nullptr, *data_i_1ch_all_scales_d = nullptr;
-#endif
-    bool cuda_gauss = true;
-#endif
 
     std::vector<cv::Mat> patch_feats;
 
     cv::Mat in_all, fw_all, ifft2_res, response;
     ComplexMat zf, kzf, kf, xyf, xf;
 
-    //Used only for the initialization of the KCF tracker
-    ComplexMat * p_model_xf_ptr, *p_yf_ptr;
-    cv::Mat rot_labels;
-
-
     Tracker_flags flag;
 
     cv::Point2i max_loc;