Added custom allocator for vectors in ComplexMat when using cuFFT.

author Shanigen <vkaraf@gmail.com>

Wed, 28 Mar 2018 08:37:56 +0000 (10:37 +0200)

committer Shanigen <vkaraf@gmail.com>

Wed, 28 Mar 2018 08:37:56 +0000 (10:37 +0200)
author Shanigen <vkaraf@gmail.com>
Wed, 28 Mar 2018 08:37:56 +0000 (10:37 +0200)
committer Shanigen <vkaraf@gmail.com>
Wed, 28 Mar 2018 08:37:56 +0000 (10:37 +0200)
diff --git a/src/complexmat.hpp b/src/complexmat.hpp

index 0dad25c722bf4415203181a3098ebb1d67582d2e..9dbe1ae83194172e5f9b1afc13c2205ee8d8457a 100644 (file)
--- a/src/complexmat.hpp
+++ b/src/complexmat.hpp
@@ -6,6 +6,13 @@
  #include <algorithm>
  #include <functional>
  
+#ifdef CUFFT
+  #include "managed_allocator.h"
+  
+  template<class T>
+  using managed_vector = std::vector<T,managed_allocator<T>>;
+#endif
+
  template<typename T> class ComplexMat_
  {
  public:
@@ -187,8 +194,11 @@ public:
  
  
  private:
+#ifdef CUFFT 
+    mutable managed_vector<std::complex<T>> p_data;
+#else
      mutable std::vector<std::complex<T>> p_data;
-
+#endif
      //convert 2 channel mat (real, imag) to vector row-by-row
      std::vector<std::complex<T>> convert(const cv::Mat & mat)
      {
diff --git a/src/fft_cufft.cpp b/src/fft_cufft.cpp

index 6269992707e010ace175206c310c12061dc40f8e..10ee873a2dd5252a61fda33feb37642160e6658b 100644 (file)
--- a/src/fft_cufft.cpp
+++ b/src/fft_cufft.cpp
@@ -1,17 +1,8 @@
  #include "fft_cufft.h"
  
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
-{
-   if (code != cudaSuccess)
-   {
-      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
-      if (abort) exit(code);
-   }
-}
-
  cuFFT::cuFFT(): m_num_of_streams(4)
  {}
+
  void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
  {
      m_width = width;
@@ -21,7 +12,9 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
  
      std::cout << "FFT: cuFFT" << std::endl;
  
-    for (unsigned i = 0; i < m_num_of_streams; i++) gpuErrchk(cudaStreamCreate(&streams[i]));
+    cudaSetDeviceFlags(cudaDeviceMapHost);
+
+    for (unsigned i = 0; i < m_num_of_streams; i++) cudaStreamCreate(&streams[i]);
  
      //FFT forward one scale
      {
@@ -138,6 +131,7 @@ void cuFFT::set_window(const cv::Mat &window)
  
  ComplexMat cuFFT::forward(const cv::Mat &input)
  {
+    CUDA::GpuMat input_d(input);
      ComplexMat complex_result;
      if(input.rows == (int)(m_height*m_num_of_scales)){
          complex_result.create(m_height, m_width / 2 + 1, m_num_of_scales);
@@ -159,6 +153,7 @@ ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> &input)
          cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
          in_roi = input[i].mul(m_window);
      }
+    CUDA::GpuMat in_all_d(in_all);
      ComplexMat result;
      if(n_channels > (int) m_num_of_feats)
          result.create(m_height, m_width/2 + 1, n_channels,m_num_of_scales);
@@ -198,7 +193,7 @@ cv::Mat cuFFT::inverse(const ComplexMat &inputf)
  cuFFT::~cuFFT()
  {
  
-  for(unsigned i = 0; i < m_num_of_streams; i++) gpuErrchk(cudaStreamDestroy(streams[i]));
+  for(unsigned i = 0; i < m_num_of_streams; i++) cudaStreamDestroy(streams[i]);
  
    cudaDeviceReset();
  }
diff --git a/src/managed_allocator.h b/src/managed_allocator.h

new file mode 100644 (file)

index 0000000..0e6c5f1
--- /dev/null
+++ b/src/managed_allocator.h
@@ -0,0 +1,56 @@
+// https://github.com/jaredhoberock/managed_allocator
+#ifndef MANAGED_ALLOCATOR_H
+#define MANAGED_ALLOCATOR_H
+
+#include <cuda_runtime.h>
+#include <thrust/system_error.h>
+#include <thrust/system/cuda/error.h>
+
+template<class T>
+class managed_allocator
+{
+public:
+    using value_type = T;
+    
+    managed_allocator() {}
+    
+    template<class U>
+    managed_allocator(const managed_allocator<U>&) {}
+    
+    value_type* allocate(size_t n)
+    {
+      value_type* result = nullptr;
+      
+      cudaError_t error = cudaMallocManaged(&result, n*sizeof(T), cudaMemAttachGlobal);
+      
+      if(error != cudaSuccess)
+      {
+       throw thrust::system_error(error, thrust::cuda_category(), "managed_allocator::allocate(): cudaMallocManaged");
+      }
+      
+      return result;
+    }
+    
+    void deallocate(value_type* ptr, size_t)
+    {
+      cudaError_t error = cudaFree(ptr);
+      
+      if(error != cudaSuccess)
+      {
+       throw thrust::system_error(error, thrust::cuda_category(), "managed_allocator::deallocate(): cudaFree");
+      }
+    }
+};   
+    template<class T1, class T2>
+    bool operator ==(const managed_allocator<T1>&, const managed_allocator<T2>&)
+    {
+      return true;
+    }
+    
+    template<class T1, class T2>
+    bool operator!=(const managed_allocator<T1>& lhs, const managed_allocator<T2>& rhs)
+    {
+      return !(lhs == rhs);
+    }
+
+#endif // MANAGED_ALLOCATOR_H
author	Shanigen <vkaraf@gmail.com>
	Wed, 28 Mar 2018 08:37:56 +0000 (10:37 +0200)
committer	Shanigen <vkaraf@gmail.com>
	Wed, 28 Mar 2018 08:37:56 +0000 (10:37 +0200)
src/complexmat.hpp		patch \| blob \| history
src/fft_cufft.cpp		patch \| blob \| history
src/managed_allocator.h	[new file with mode: 0644]	patch \| blob