From: Michal Sojka Date: Mon, 21 Jan 2019 23:59:32 +0000 (+0100) Subject: Add simple CUDA memory manager X-Git-Url: http://rtime.felk.cvut.cz/gitweb/hercules2020/kcf.git/commitdiff_plain/93502b823a0d73c5dd36f31261d4dd377ad09e08 Add simple CUDA memory manager Thanks to this, we avoid allocating CUDA host-mapped memory at runtime, which acts as implicit sync point and slows down the execution. --- diff --git a/src/dynmem.hpp b/src/dynmem.hpp index f456094..2d62352 100644 --- a/src/dynmem.hpp +++ b/src/dynmem.hpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #if defined(CUFFT) || defined(CUFFTW) #include "cuda_runtime.h" @@ -13,11 +15,33 @@ #endif #endif +class MemoryManager { + std::mutex mutex; + std::map > map; + +public: + void *get(size_t size) { + std::lock_guard guard(mutex); + auto &stack = map[size]; + void *ptr = nullptr; + if (!stack.empty()) { + ptr = stack.top(); + stack.pop(); + } + return ptr; + } + void put(void *ptr, size_t size) { + std::lock_guard guard(mutex); + map[size].push(ptr); + } +}; + template class DynMem_ { private: T *ptr_h = nullptr; #ifdef CUFFT T *ptr_d = nullptr; + static MemoryManager mmng; #endif public: typedef T value_type; @@ -26,7 +50,11 @@ template class DynMem_ { DynMem_(size_t num_elem) : num_elem(num_elem) { #ifdef CUFFT - CudaSafeCall(cudaHostAlloc(reinterpret_cast(&ptr_h), num_elem * sizeof(T), cudaHostAllocMapped)); + ptr_h = reinterpret_cast(mmng.get(num_elem)); + if (!ptr_h) { + printf("malloc(%zu)\n", num_elem); + CudaSafeCall(cudaHostAlloc(reinterpret_cast(&ptr_h), num_elem * sizeof(T), cudaHostAllocMapped)); + } CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast(&ptr_d), reinterpret_cast(ptr_h), 0)); #else ptr_h = new T[num_elem]; @@ -75,13 +103,20 @@ private: void release() { #ifdef CUFFT - CudaSafeCall(cudaFreeHost(ptr_h)); + if (ptr_h) + mmng.put(ptr_h, num_elem); + //CudaSafeCall(cudaFreeHost(ptr_h)); #else delete[] ptr_h; #endif } }; +#ifdef CUFFT +template +MemoryManager DynMem_::mmng; +#endif + typedef DynMem_ DynMem;