Changed destructor of cuFFT to only destroy plans and delete data used in big batch...

author Shanigen <vkaraf@gmail.com>

Wed, 9 May 2018 15:14:50 +0000 (17:14 +0200)

committer Shanigen <vkaraf@gmail.com>

Wed, 9 May 2018 15:14:50 +0000 (17:14 +0200)
author Shanigen <vkaraf@gmail.com>
Wed, 9 May 2018 15:14:50 +0000 (17:14 +0200)
committer Shanigen <vkaraf@gmail.com>
Wed, 9 May 2018 15:14:50 +0000 (17:14 +0200)
diff --git a/src/fft_cufft.cpp b/src/fft_cufft.cpp

index fe3b6f494ec65d5fedfe206b21c7941b4db4aebf..714c599bf0b19ba8ca3326a908f96c71e653decf 100644 (file)
--- a/src/fft_cufft.cpp
+++ b/src/fft_cufft.cpp
@@ -9,7 +9,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
      m_big_batch_mode = big_batch_mode;
  
      std::cout << "FFT: cuFFT" << std::endl;
-    
+
      if(m_height*(m_width/2+1) > 1024){
          std::cerr << "Image dimension after forward FFT are too big for CUDA kernels." << std::endl;
          std::exit(EXIT_FAILURE);
@@ -18,16 +18,16 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
      //FFT forward one scale
      {
          CudaSafeCall(cudaMalloc(&data_f, m_height*m_width*sizeof(cufftReal)));
-        
+
         CufftErrorCheck(cufftPlan2d(&plan_f, m_height, m_width, CUFFT_R2C));
-        
-        
+
+
      }
      //FFT forward all scales
      if(m_num_of_scales > 1 && m_big_batch_mode)
      {
          CudaSafeCall(cudaMalloc(&data_f_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal)));
-        
+
         int rank = 2;
         int n[] = {(int)m_height, (int)m_width};
         int howmany = m_num_of_scales;
@@ -44,7 +44,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
      {
          CudaSafeCall(cudaHostAlloc(&data_fw, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped));
          CudaSafeCall(cudaHostGetDevicePointer(&data_fw_d, data_fw, 0));
-        
+
          int rank = 2;
          int n[] = {(int)m_height, (int)m_width};
          int howmany = m_num_of_feats;
@@ -74,14 +74,14 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
                   inembed, istride, idist,
                   onembed, ostride, odist,
                   CUFFT_R2C, howmany));
-        
-        
+
+
      }
      //FFT inverse one scale
      {
          CudaSafeCall(cudaHostAlloc(&data_i_features, m_height*m_num_of_feats*m_width*sizeof(cufftReal), cudaHostAllocMapped));
          CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_d, data_i_features, 0));
-        
+
          int rank = 2;
          int n[] = {(int)m_height, (int)m_width};
          int howmany = m_num_of_feats;
@@ -99,7 +99,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
      {
          CudaSafeCall(cudaHostAlloc(&data_i_features_all_scales, m_height*m_num_of_feats*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
          CudaSafeCall(cudaHostGetDevicePointer(&data_i_features_all_scales_d, data_i_features_all_scales, 0));
-        
+
          int rank = 2;
          int n[] = {(int)m_height, (int)m_width};
          int howmany = m_num_of_feats*m_num_of_scales;
@@ -116,7 +116,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
      {
          CudaSafeCall(cudaHostAlloc(&data_i_1ch, m_height*m_width*sizeof(cufftReal), cudaHostAllocMapped));
          CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_d, data_i_1ch, 0));
-        
+
          int rank = 2;
          int n[] = {(int)m_height, (int)m_width};
          int howmany = 1;
@@ -134,7 +134,7 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
      {
          CudaSafeCall(cudaHostAlloc(&data_i_1ch_all_scales, m_height*m_num_of_scales*m_width*sizeof(cufftReal), cudaHostAllocMapped));
          CudaSafeCall(cudaHostGetDevicePointer(&data_i_1ch_all_scales_d, data_i_1ch_all_scales, 0));
-        
+
          int rank = 2;
          int n[] = {(int)m_height, (int)m_width};
          int howmany = m_num_of_scales;
@@ -168,7 +168,7 @@ ComplexMat cuFFT::forward(const cv::Mat &input)
          CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(data_f),
                                  complex_result.get_p_data()));
      }
-    
+
      return complex_result;
  }
  
@@ -197,9 +197,9 @@ ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> &input)
              cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
              in_roi = input[i].mul(m_window);
          }
-        
+
          result.create(m_height, m_width/2 + 1, n_channels,m_num_of_scales);
-        
+
          CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(data_fw_all_scales_d), result.get_p_data()));
      } else {
          cv::Mat in_all(m_height * n_channels, m_width, CV_32F, data_fw);
@@ -207,9 +207,9 @@ ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> &input)
              cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
              in_roi = input[i].mul(m_window);
          }
-        
+
          result.create(m_height, m_width/2 + 1, n_channels);
-        
+
          CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(data_fw_d), result.get_p_data()));
      }
      return result;
@@ -219,35 +219,35 @@ cv::Mat cuFFT::inverse(const ComplexMat &input)
  {
      int n_channels = input.n_channels;
      cufftComplex *in = reinterpret_cast<cufftComplex*>(input.get_p_data());
-    
+
      if(n_channels == 1){
          cv::Mat real_result(m_height, m_width, CV_32FC1, data_i_1ch);
-        
+
          CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(data_i_1ch_d)));
          cudaDeviceSynchronize();
-        
+
          return real_result/(m_width*m_height);
      } else if(n_channels == (int) m_num_of_scales){
          cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_1ch_all_scales);
-        
+
          CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal*>(data_i_1ch_all_scales_d)));
          cudaDeviceSynchronize();
-        
+
          return real_result/(m_width*m_height);
      } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){
          cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features_all_scales);
-        
+
          CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(data_i_features_all_scales_d)));
          cudaDeviceSynchronize();
-        
+
          return real_result/(m_width*m_height);
      }
-    
+
      cv::Mat real_result(m_height, m_width, CV_32FC(n_channels), data_i_features);
-    
+
      CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(data_i_features_d)));
      cudaDeviceSynchronize();
-    
+
      return real_result/(m_width*m_height);
  }
  
@@ -271,27 +271,31 @@ float* cuFFT::inverse_raw(const ComplexMat &input)
      }
  
      CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(data_i_features_d)));
-    
+
      return data_i_features_d;
  }
  
  cuFFT::~cuFFT()
  {
    CufftErrorCheck(cufftDestroy(plan_f));
-  CufftErrorCheck(cufftDestroy(plan_f_all_scales));
    CufftErrorCheck(cufftDestroy(plan_fw));
-  CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
    CufftErrorCheck(cufftDestroy(plan_i_1ch));
-  CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
    CufftErrorCheck(cufftDestroy(plan_i_features));
-  CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));
-  
+
    CudaSafeCall(cudaFree(data_f));
-  CudaSafeCall(cudaFree(data_f_all_scales));
    CudaSafeCall(cudaFreeHost(data_fw));
-  CudaSafeCall(cudaFreeHost(data_fw_all_scales));
    CudaSafeCall(cudaFreeHost(data_i_1ch));
-  CudaSafeCall(cudaFreeHost(data_i_1ch_all_scales));
    CudaSafeCall(cudaFreeHost(data_i_features));
-  CudaSafeCall(cudaFreeHost(data_i_features_all_scales));
+  
+  if (m_big_batch_mode) {
+      CufftErrorCheck(cufftDestroy(plan_f_all_scales));
+      CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
+      CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
+      CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));
+      
+      CudaSafeCall(cudaFree(data_f_all_scales));
+      CudaSafeCall(cudaFreeHost(data_fw_all_scales));
+      CudaSafeCall(cudaFreeHost(data_i_1ch_all_scales));
+      CudaSafeCall(cudaFreeHost(data_i_features_all_scales));
+  }
  }
author	Shanigen <vkaraf@gmail.com>
	Wed, 9 May 2018 15:14:50 +0000 (17:14 +0200)
committer	Shanigen <vkaraf@gmail.com>
	Wed, 9 May 2018 15:14:50 +0000 (17:14 +0200)