5 #include <opencv2/opencv.hpp>
11 #if defined(CUFFT) || defined(CUFFTW)
12 #include "cuda_runtime.h"
14 #include "cuda_error_check.hpp"
20 std::map<size_t, std::stack<void*> > map;
23 void *get(size_t size) {
24 std::lock_guard<std::mutex> guard(mutex);
25 auto &stack = map[size];
33 void put(void *ptr, size_t size) {
34 std::lock_guard<std::mutex> guard(mutex);
39 template <typename T> class DynMem_ {
44 static MemoryManager mmng;
48 const size_t num_elem;
50 DynMem_(size_t num_elem) : num_elem(num_elem)
53 ptr_h = reinterpret_cast<T*>(mmng.get(num_elem));
55 printf("malloc(%zu)\n", num_elem);
56 CudaSafeCall(cudaHostAlloc(reinterpret_cast<void **>(&ptr_h), num_elem * sizeof(T), cudaHostAllocMapped));
58 CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void **>(&ptr_d), reinterpret_cast<void *>(ptr_h), 0));
60 ptr_h = new T[num_elem];
63 DynMem_(const DynMem_ &other) : DynMem_(other.num_elem)
65 memcpy(ptr_h, other.ptr_h, num_elem * sizeof(T));
67 DynMem_(DynMem_ &&other) : num_elem(other.num_elem)
70 other.ptr_h = nullptr;
73 other.ptr_d = nullptr;
80 T *hostMem() { return ptr_h; }
81 const T *hostMem() const { return ptr_h; }
83 T *deviceMem() { return ptr_d; }
84 const T *deviceMem() const { return ptr_d; }
86 void operator=(DynMem_ &rhs) {
87 assert(num_elem == rhs.num_elem);
88 memcpy(ptr_h, rhs.ptr_h, num_elem * sizeof(T));
90 void operator=(DynMem_ &&rhs)
92 assert(num_elem == rhs.num_elem);
101 T operator[](uint i) const { return ptr_h[i]; }
107 mmng.put(ptr_h, num_elem);
108 //CudaSafeCall(cudaFreeHost(ptr_h));
116 template <typename T>
117 MemoryManager DynMem_<T>::mmng;
120 typedef DynMem_<float> DynMem;
123 class MatDynMem : public DynMem, public cv::Mat {
125 MatDynMem(cv::Size size, int type)
126 : DynMem(size.area() * CV_MAT_CN(type)), cv::Mat(size, type, hostMem())
128 assert((type & CV_MAT_DEPTH_MASK) == CV_32F);
130 MatDynMem(int height, int width, int type)
131 : DynMem(width * height * CV_MAT_CN(type)), cv::Mat(height, width, type, hostMem())
133 assert((type & CV_MAT_DEPTH_MASK) == CV_32F);
135 MatDynMem(int ndims, const int *sizes, int type)
136 : DynMem(volume(ndims, sizes) * CV_MAT_CN(type)), cv::Mat(ndims, sizes, type, hostMem())
138 assert((type & CV_MAT_DEPTH_MASK) == CV_32F);
140 MatDynMem(std::vector<int> size, int type)
141 : DynMem(std::accumulate(size.begin(), size.end(), 1, std::multiplies<int>()))
142 , cv::Mat(size.size(), size.data(), type, hostMem()) {}
143 MatDynMem(MatDynMem &&other) = default;
144 MatDynMem(const cv::Mat &other)
145 : DynMem(other.total()) , cv::Mat(other) {}
147 void operator=(const cv::MatExpr &expr) {
148 static_cast<cv::Mat>(*this) = expr;
152 static int volume(int ndims, const int *sizes)
155 for (int i = 0; i < ndims; i++)
160 using cv::Mat::create;
163 class Mat3d : public MatDynMem
166 Mat3d(uint dim0, cv::Size size) : MatDynMem({{int(dim0), size.height, size.width}}, CV_32F) {}
168 cv::Mat plane(uint idx) {
170 assert(int(idx) < size[0]);
171 return cv::Mat(size[1], size[2], cv::Mat::type(), ptr(idx));
173 const cv::Mat plane(uint idx) const {
175 assert(int(idx) < size[0]);
176 return cv::Mat(size[1], size[2], cv::Mat::type(), const_cast<uchar*>(ptr(idx)));
181 class MatFeats : public Mat3d
184 MatFeats(uint num_features, cv::Size size) : Mat3d(num_features, size) {}
186 class MatScales : public Mat3d
189 MatScales(uint num_scales, cv::Size size) : Mat3d(num_scales, size) {}
192 class MatScaleFeats : public MatDynMem
195 MatScaleFeats(uint num_scales, uint num_features, cv::Size size)
196 : MatDynMem({{int(num_scales), int(num_features), size.height, size.width}}, CV_32F) {}
198 cv::Mat plane(uint scale, uint feature) {
200 assert(int(scale) < size[0]);
201 assert(int(feature) < size[1]);
202 return cv::Mat(size[2], size[3], cv::Mat::type(), ptr(scale, feature));
204 cv::Mat scale(uint scale) {
206 assert(int(scale) < size[0]);
207 return cv::Mat(3, std::vector<int>({size[1], size[2], size[3]}).data(), cv::Mat::type(), ptr(scale));