]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/commitdiff
Changed destructor of cuFFT to only destroy plans and delete data used in big batch...
authorShanigen <vkaraf@gmail.com>
Wed, 9 May 2018 15:14:50 +0000 (17:14 +0200)
committerShanigen <vkaraf@gmail.com>
Wed, 9 May 2018 15:14:50 +0000 (17:14 +0200)
src/fft_cufft.cpp

index fe3b6f494ec65d5fedfe206b21c7941b4db4aebf..714c599bf0b19ba8ca3326a908f96c71e653decf 100644 (file)
@@ -9,7 +9,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
     m_big_batch_mode = big_batch_mode;
 
     std::cout << "FFT: cuFFT" << std::endl;
-    
+
     if(m_height*(m_width/2+1) > 1024){
         std::cerr << "Image dimension after forward FFT are too big for CUDA kernels." << std::endl;
         std::exit(EXIT_FAILURE);
@@ -18,16 +18,16 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
     //FFT forward one scale
     {
         CudaSafeCall(cudaMalloc(&data_f, m_height*m_width*sizeof(cufftReal)));
-        
+
        CufftErrorCheck(cufftPlan2d(&plan_f, m_height, m_width, CUFFT_R2C));
-        
-        
+
+
     }
     //FFT forward all scales
     if(m_num_of_scales > 1 && m_big_batch_mode)
     {
         CudaSafeCall(cudaMalloc(&data_f_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal)));
-        
+
        int rank = 2;
        int n[] = {(int)m_height, (int)m_width};
        int howmany = m_num_of_scales;
@@ -44,7 +44,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
     {
         CudaSafeCall(cudaHostAlloc(&data_fw, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped));
         CudaSafeCall(cudaHostGetDevicePointer(&data_fw_d, data_fw, 0));
-        
+
         int rank = 2;
         int n[] = {(int)m_height, (int)m_width};
         int howmany = m_num_of_feats;
@@ -74,14 +74,14 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
                  inembed, istride, idist,
                  onembed, ostride, odist,
                  CUFFT_R2C, howmany));
-        
-        
+
+
     }
     //FFT inverse one scale
     {
         CudaSafeCall(cudaHostAlloc(&data_i_features, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped));
         CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_d, data_i_features, 0));
-        
+
         int rank = 2;
         int n[] = {(int)m_height, (int)m_width};
         int howmany = m_num_of_feats;
@@ -99,7 +99,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
     {
         CudaSafeCall(cudaHostAlloc(&data_i_features_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
         CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_all_scales_d, data_i_features_all_scales, 0));
-        
+
         int rank = 2;
         int n[] = {(int)m_height, (int)m_width};
         int howmany = m_num_of_feats*m_num_of_scales;
@@ -116,7 +116,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
     {
         CudaSafeCall(cudaHostAlloc(&data_i_1ch, m_height*m_width*sizeof(cufftReal), cudaHostAllocMapped));
         CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_d, data_i_1ch, 0));
-        
+
         int rank = 2;
         int n[] = {(int)m_height, (int)m_width};
         int howmany = 1;
@@ -134,7 +134,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
     {
         CudaSafeCall(cudaHostAlloc(&data_i_1ch_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
         CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_all_scales_d, data_i_1ch_all_scales, 0));
-        
+
         int rank = 2;
         int n[] = {(int)m_height, (int)m_width};
         int howmany = m_num_of_scales;
@@ -168,7 +168,7 @@ ComplexMat cuFFT::forward(const cv::Mat &input)
         CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(data_f),
                                 complex_result.get_p_data()));
     }
-    
+
     return complex_result;
 }
 
@@ -197,9 +197,9 @@ ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> &input)
             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
             in_roi = input[i].mul(m_window);
         }
-        
+
         result.create(m_height, m_width/2 + 1, n_channels,m_num_of_scales);
-        
+
         CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result.get_p_data()));
     } else {
         cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
@@ -207,9 +207,9 @@ ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> &input)
             cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
             in_roi = input[i].mul(m_window);
         }
-        
+
         result.create(m_height, m_width/2 + 1, n_channels);
-        
+
         CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result.get_p_data()));
     }
     return result;
@@ -219,35 +219,35 @@ cv::Mat cuFFT::inverse(const ComplexMat &input)
 {
     int n_channels = input.n_channels;
     cufftComplex *in = reinterpret_cast<cufftComplex*>(input.get_p_data());
-    
+
     if(n_channels == 1){
         cv::Mat real_result(m_height, m_width, CV_32FC1, data_i_1ch);
-        
+
         CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(data_i_1ch_d)));
         cudaDeviceSynchronize();
-        
+
         return real_result/(m_width*m_height);
     } else if(n_channels == (int) m_num_of_scales){
         cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_1ch_all_scales);
-        
+
         CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal*>(data_i_1ch_all_scales_d)));
         cudaDeviceSynchronize();
-        
+
         return real_result/(m_width*m_height);
     } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){
         cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features_all_scales);
-        
+
         CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(data_i_features_all_scales_d)));
         cudaDeviceSynchronize();
-        
+
         return real_result/(m_width*m_height);
     }
-    
+
     cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features);
-    
+
     CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(data_i_features_d)));
     cudaDeviceSynchronize();
-    
+
     return real_result/(m_width*m_height);
 }
 
@@ -271,27 +271,31 @@ float* cuFFT::inverse_raw(const ComplexMat &input)
     }
 
     CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(data_i_features_d)));
-    
+
     return data_i_features_d;
 }
 
 cuFFT::~cuFFT()
 {
   CufftErrorCheck(cufftDestroy(plan_f));
-  CufftErrorCheck(cufftDestroy(plan_f_all_scales));
   CufftErrorCheck(cufftDestroy(plan_fw));
-  CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
   CufftErrorCheck(cufftDestroy(plan_i_1ch));
-  CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
   CufftErrorCheck(cufftDestroy(plan_i_features));
-  CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));
-  
+
   CudaSafeCall(cudaFree(data_f));
-  CudaSafeCall(cudaFree(data_f_all_scales));
   CudaSafeCall(cudaFreeHost(data_fw));
-  CudaSafeCall(cudaFreeHost(data_fw_all_scales));
   CudaSafeCall(cudaFreeHost(data_i_1ch));
-  CudaSafeCall(cudaFreeHost(data_i_1ch_all_scales));
   CudaSafeCall(cudaFreeHost(data_i_features));
-  CudaSafeCall(cudaFreeHost(data_i_features_all_scales));
+  
+  if (m_big_batch_mode) {
+      CufftErrorCheck(cufftDestroy(plan_f_all_scales));
+      CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
+      CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
+      CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));
+      
+      CudaSafeCall(cudaFree(data_f_all_scales));
+      CudaSafeCall(cudaFreeHost(data_fw_all_scales));
+      CudaSafeCall(cudaFreeHost(data_i_1ch_all_scales));
+      CudaSafeCall(cudaFreeHost(data_i_features_all_scales));
+  }
 }