]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/commitdiff
Added custom allocator for vectors in ComplexMat when using cuFFT.
authorShanigen <vkaraf@gmail.com>
Wed, 28 Mar 2018 08:37:56 +0000 (10:37 +0200)
committerShanigen <vkaraf@gmail.com>
Wed, 28 Mar 2018 08:37:56 +0000 (10:37 +0200)
src/complexmat.hpp
src/fft_cufft.cpp
src/managed_allocator.h [new file with mode: 0644]

index 0dad25c722bf4415203181a3098ebb1d67582d2e..9dbe1ae83194172e5f9b1afc13c2205ee8d8457a 100644 (file)
@@ -6,6 +6,13 @@
 #include <algorithm>
 #include <functional>
 
+#ifdef CUFFT
+  #include "managed_allocator.h"
+  
+  template<class T>
+  using managed_vector = std::vector<T,managed_allocator<T>>;
+#endif
+
 template<typename T> class ComplexMat_
 {
 public:
@@ -187,8 +194,11 @@ public:
 
 
 private:
+#ifdef CUFFT 
+    mutable managed_vector<std::complex<T>> p_data;
+#else
     mutable std::vector<std::complex<T>> p_data;
-
+#endif
     //convert 2 channel mat (real, imag) to vector row-by-row
     std::vector<std::complex<T>> convert(const cv::Mat & mat)
     {
index 6269992707e010ace175206c310c12061dc40f8e..10ee873a2dd5252a61fda33feb37642160e6658b 100644 (file)
@@ -1,17 +1,8 @@
 #include "fft_cufft.h"
 
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
-{
-   if (code != cudaSuccess)
-   {
-      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
-      if (abort) exit(code);
-   }
-}
-
 cuFFT::cuFFT(): m_num_of_streams(4)
 {}
+
 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
 {
     m_width = width;
@@ -21,7 +12,9 @@ void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigne
 
     std::cout << "FFT: cuFFT" << std::endl;
 
-    for (unsigned i = 0; i < m_num_of_streams; i++) gpuErrchk(cudaStreamCreate(&streams[i]));
+    cudaSetDeviceFlags(cudaDeviceMapHost);
+
+    for (unsigned i = 0; i < m_num_of_streams; i++) cudaStreamCreate(&streams[i]);
 
     //FFT forward one scale
     {
@@ -138,6 +131,7 @@ void cuFFT::set_window(const cv::Mat &window)
 
 ComplexMat cuFFT::forward(const cv::Mat &input)
 {
+    CUDA::GpuMat input_d(input);
     ComplexMat complex_result;
     if(input.rows == (int)(m_height*m_num_of_scales)){
         complex_result.create(m_height, m_width / 2 + 1, m_num_of_scales);
@@ -159,6 +153,7 @@ ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> &input)
         cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
         in_roi = input[i].mul(m_window);
     }
+    CUDA::GpuMat in_all_d(in_all);
     ComplexMat result;
     if(n_channels > (int) m_num_of_feats)
         result.create(m_height, m_width/2 + 1, n_channels,m_num_of_scales);
@@ -198,7 +193,7 @@ cv::Mat cuFFT::inverse(const ComplexMat &inputf)
 cuFFT::~cuFFT()
 {
 
-  for(unsigned i = 0; i < m_num_of_streams; i++) gpuErrchk(cudaStreamDestroy(streams[i]));
+  for(unsigned i = 0; i < m_num_of_streams; i++) cudaStreamDestroy(streams[i]);
 
   cudaDeviceReset();
 }
diff --git a/src/managed_allocator.h b/src/managed_allocator.h
new file mode 100644 (file)
index 0000000..0e6c5f1
--- /dev/null
@@ -0,0 +1,56 @@
+// https://github.com/jaredhoberock/managed_allocator
+#ifndef MANAGED_ALLOCATOR_H
+#define MANAGED_ALLOCATOR_H
+
+#include <cuda_runtime.h>
+#include <thrust/system_error.h>
+#include <thrust/system/cuda/error.h>
+
+template<class T>
+class managed_allocator
+{
+public:
+    using value_type = T;
+    
+    managed_allocator() {}
+    
+    template<class U>
+    managed_allocator(const managed_allocator<U>&) {}
+    
+    value_type* allocate(size_t n)
+    {
+      value_type* result = nullptr;
+      
+      cudaError_t error = cudaMallocManaged(&result, n*sizeof(T), cudaMemAttachGlobal);
+      
+      if(error != cudaSuccess)
+      {
+       throw thrust::system_error(error, thrust::cuda_category(), "managed_allocator::allocate(): cudaMallocManaged");
+      }
+      
+      return result;
+    }
+    
+    void deallocate(value_type* ptr, size_t)
+    {
+      cudaError_t error = cudaFree(ptr);
+      
+      if(error != cudaSuccess)
+      {
+       throw thrust::system_error(error, thrust::cuda_category(), "managed_allocator::deallocate(): cudaFree");
+      }
+    }
+};   
+    template<class T1, class T2>
+    bool operator ==(const managed_allocator<T1>&, const managed_allocator<T2>&)
+    {
+      return true;
+    }
+    
+    template<class T1, class T2>
+    bool operator!=(const managed_allocator<T1>& lhs, const managed_allocator<T2>& rhs)
+    {
+      return !(lhs == rhs);
+    }
+
+#endif // MANAGED_ALLOCATOR_H