X-Git-Url: http://rtime.felk.cvut.cz/gitweb/hercules2020/kcf.git/blobdiff_plain/435ce10fdd459c9e95b2fb10545710ba28029ff4..dc09fcf3326df0870b03c0b29378d4b8915fa1b6:/src/dynmem.hpp diff --git a/src/dynmem.hpp b/src/dynmem.hpp index f456094..8e49643 100644 --- a/src/dynmem.hpp +++ b/src/dynmem.hpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #if defined(CUFFT) || defined(CUFFTW) #include "cuda_runtime.h" @@ -13,11 +15,33 @@ #endif #endif +class MemoryManager { + std::mutex mutex; + std::map > map; + +public: + void *get(size_t size) { + std::lock_guard guard(mutex); + auto &stack = map[size]; + void *ptr = nullptr; + if (!stack.empty()) { + ptr = stack.top(); + stack.pop(); + } + return ptr; + } + void put(void *ptr, size_t size) { + std::lock_guard guard(mutex); + map[size].push(ptr); + } +}; + template class DynMem_ { private: T *ptr_h = nullptr; #ifdef CUFFT T *ptr_d = nullptr; + static MemoryManager mmng; #endif public: typedef T value_type; @@ -26,7 +50,10 @@ template class DynMem_ { DynMem_(size_t num_elem) : num_elem(num_elem) { #ifdef CUFFT - CudaSafeCall(cudaHostAlloc(reinterpret_cast(&ptr_h), num_elem * sizeof(T), cudaHostAllocMapped)); + ptr_h = reinterpret_cast(mmng.get(num_elem)); + if (!ptr_h) + CudaSafeCall(cudaHostAlloc(reinterpret_cast(&ptr_h), num_elem * sizeof(T), cudaHostAllocMapped)); + CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast(&ptr_d), reinterpret_cast(ptr_h), 0)); #else ptr_h = new T[num_elem]; @@ -75,13 +102,20 @@ private: void release() { #ifdef CUFFT - CudaSafeCall(cudaFreeHost(ptr_h)); + if (ptr_h) + mmng.put(ptr_h, num_elem); + //CudaSafeCall(cudaFreeHost(ptr_h)); #else delete[] ptr_h; #endif } }; +#ifdef CUFFT +template +MemoryManager DynMem_::mmng; +#endif + typedef DynMem_ DynMem;