From b9a298f551b17fdd54e3a1c7f1cc2e5a13963003 Mon Sep 17 00:00:00 2001 From: Shanigen Date: Fri, 3 Aug 2018 10:55:15 +0200 Subject: [PATCH] CuFFT uses only inverse(Scale_vars & vars) CuFFT version of the tracker now only uses one inverse function. The next goal is to do the same for forward FFT function. --- src/fft.h | 2 -- src/fft_cufft.cpp | 88 +++++++++++----------------------------------- src/fft_cufft.h | 2 -- src/kcf.cpp | 5 +-- src/scale_vars.hpp | 8 +++-- 5 files changed, 28 insertions(+), 77 deletions(-) diff --git a/src/fft.h b/src/fft.h index 1e6d03c..f4a7698 100644 --- a/src/fft.h +++ b/src/fft.h @@ -24,9 +24,7 @@ public: virtual void forward_raw(Scale_vars & vars, bool all_scales) = 0; virtual ComplexMat forward_window(const std::vector & input) = 0; virtual void forward_window(Scale_vars & vars) = 0; - virtual cv::Mat inverse(const ComplexMat & input) = 0; virtual void inverse(Scale_vars & vars) = 0; - virtual float* inverse_raw(const ComplexMat & input) = 0; virtual ~Fft() = 0; }; diff --git a/src/fft_cufft.cpp b/src/fft_cufft.cpp index c703932..d42ce61 100644 --- a/src/fft_cufft.cpp +++ b/src/fft_cufft.cpp @@ -74,9 +74,6 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne } //FFT inverse one scale { - CudaSafeCall(cudaHostAlloc(&data_i_features, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped)); - CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_d, data_i_features, 0)); - int rank = 2; int n[] = {(int)m_height, (int)m_width}; int howmany = m_num_of_feats; @@ -90,6 +87,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne CUFFT_C2R, howmany)); } //FFT inverse all scales +#ifdef BIG_BATCH if(m_num_of_scales > 1) { CudaSafeCall(cudaHostAlloc(&data_i_features_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped)); @@ -107,11 +105,9 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne onembed, ostride, odist, CUFFT_C2R, howmany)); } +#endif //FFT inverse one channel one scale { - CudaSafeCall(cudaHostAlloc(&data_i_1ch, m_height*m_width*sizeof(cufftReal), cudaHostAllocMapped)); - CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_d, data_i_1ch, 0)); - int rank = 2; int n[] = {(int)m_height, (int)m_width}; int howmany = 1; @@ -124,6 +120,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne onembed, ostride, odist, CUFFT_C2R, howmany)); } +#ifdef BIG_BATCH //FFT inverse one channel all scales if(m_num_of_scales > 1 && m_big_batch_mode) { @@ -142,6 +139,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne onembed, ostride, odist, CUFFT_C2R, howmany)); } +#endif } void cuFFT::set_window(const cv::Mat & window) @@ -169,6 +167,16 @@ ComplexMat cuFFT::forward(const cv::Mat & input) void cuFFT::forward(Scale_vars & vars) { + ComplexMat *complex_result = vars.flag & Track_flags::AUTO_CORRELATION ? & vars.kf : & vars.kzf; + if(m_big_batch_mode && vars.in_all.rows == (int)(m_height*m_num_of_scales)){ + CudaSafeCall(cudaMemcpy(data_f_all_scales, vars.in_all.ptr(), m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice)); + CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast(data_f_all_scales), + complex_result->get_p_data())); + } else { + CudaSafeCall(cudaMemcpy(data_f, vars.in_all.ptr(), m_height*m_width*sizeof(cufftReal), cudaMemcpyHostToDevice)); + CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast(data_f), + complex_result->get_p_data())); + } return; } @@ -237,42 +245,6 @@ void cuFFT::forward_window(Scale_vars & vars) return; } -cv::Mat cuFFT::inverse(const ComplexMat & input) -{ - int n_channels = input.n_channels; - cufftComplex *in = reinterpret_cast(input.get_p_data()); - - if(n_channels == 1){ - cv::Mat real_result(m_height, m_width, CV_32FC1, data_i_1ch); - - CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast(data_i_1ch_d))); - cudaDeviceSynchronize(); - - return real_result/(m_width*m_height); - } else if(n_channels == (int) m_num_of_scales){ - cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_1ch_all_scales); - - CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast(data_i_1ch_all_scales_d))); - cudaDeviceSynchronize(); - - return real_result/(m_width*m_height); - } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){ - cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features_all_scales); - - CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast(data_i_features_all_scales_d))); - cudaDeviceSynchronize(); - - return real_result/(m_width*m_height); - } - - cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features); - - CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast(data_i_features_d))); - cudaDeviceSynchronize(); - - return real_result/(m_width*m_height); -} - void cuFFT::inverse(Scale_vars & vars) { ComplexMat *input = vars.flag & Track_flags::RESPONSE ? & vars.kzf : & vars.xyf; @@ -307,34 +279,14 @@ void cuFFT::inverse(Scale_vars & vars) #endif CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast(vars.data_i_features_d))); - cudaDeviceSynchronize(); - - *real_result = *real_result/(m_width*m_height); - return; -} - -float* cuFFT::inverse_raw(const ComplexMat & input) -{ - int n_channels = input.n_channels; - cufftComplex *in = reinterpret_cast(input.get_p_data()); - - if(n_channels == 1){ - CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast(data_i_1ch_d))); - - return data_i_1ch_d; - } else if(n_channels == (int) m_num_of_scales){ - CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast(data_i_1ch_all_scales_d))); - - return data_i_1ch_all_scales_d; - } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){ - CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast(data_i_features_all_scales_d))); - return data_i_features_all_scales_d; + if (vars.cuda_gauss) + return; + else { + cudaDeviceSynchronize(); + *real_result = *real_result/(m_width*m_height); } - - CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast(data_i_features_d))); - - return data_i_features_d; + return; } cuFFT::~cuFFT() diff --git a/src/fft_cufft.h b/src/fft_cufft.h index 44c3556..ae47e14 100644 --- a/src/fft_cufft.h +++ b/src/fft_cufft.h @@ -28,9 +28,7 @@ public: void forward_raw(Scale_vars & vars, bool all_scales) override; ComplexMat forward_window(const std::vector & input) override; void forward_window(Scale_vars & vars) override; - cv::Mat inverse(const ComplexMat & input) override; void inverse(Scale_vars & vars) override; - float* inverse_raw(const ComplexMat & input) override; ~cuFFT() override; private: cv::Mat m_window; diff --git a/src/kcf.cpp b/src/kcf.cpp index 61dc0f8..e6209a2 100644 --- a/src/kcf.cpp +++ b/src/kcf.cpp @@ -711,10 +711,11 @@ void KCF_Tracker::gaussian_correlation(struct Scale_vars & vars, const ComplexMa vars.xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj()); DEBUG_PRINTM(vars.xyf); #ifdef CUFFT + fft.inverse(vars); if(auto_correlation) - cuda_gaussian_correlation(fft.inverse_raw(vars.xyf), vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.xf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width); + cuda_gaussian_correlation(vars.data_i_features, vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.xf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width); else - cuda_gaussian_correlation(fft.inverse_raw(vars.xyf), vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.yf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width); + cuda_gaussian_correlation(vars.data_i_features, vars.gauss_corr_res, vars.xf_sqr_norm_d, vars.yf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width); fft.forward_raw(vars, xf.n_scales==p_num_scales); return; diff --git a/src/scale_vars.hpp b/src/scale_vars.hpp index c7d48da..b805ce9 100644 --- a/src/scale_vars.hpp +++ b/src/scale_vars.hpp @@ -14,6 +14,7 @@ enum Track_flags CROSS_CORRELATION = 1 << 2, // binary 0100 SCALE_RESPONSE = 1 << 3,// binary 1000 TRACKER_UPDATE = 1 << 4,// binary 0001 0000 + TRACKER_INIT = 1 << 5, // binary 0010 0000 }; struct Scale_vars @@ -22,11 +23,12 @@ struct Scale_vars #ifdef CUFFT float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr; float *data_f = nullptr, *data_fw = nullptr, *data_fw_d = nullptr, *data_i_features = nullptr, - *data_i_features_d = nullptr, *data_i_1ch = nullptr, *data_i_1ch_d = nullptr; + *data_i_features_d = nullptr, *data_i_1ch = nullptr, *data_i_1ch_d = nullptr; #ifdef BIG_BATCH - float *data_f_all_scales = nullptr, *data_fw_all_scales = nullptr, *data_fw_all_scales_d = nullptr, - *data_i_features_all_scales = nullptr, *data_i_features_all_scales_d = nullptr, *data_i_1ch_all_scales = nullptr, *data_i_1ch_all_scales_d = nullptr + float *data_f_all_scales = nullptr, *data_fw_all_scales = nullptr, *data_fw_all_scales_d = nullptr, *data_i_features_all_scales = nullptr, + *data_i_features_all_scales_d = nullptr, *data_i_1ch_all_scales = nullptr, *data_i_1ch_all_scales_d = nullptr; #endif + bool cuda_gauss = true; #endif std::vector patch_feats; -- 2.39.2