From 7a8d1f09595a8ba7f41ab0c89b35135d54f35f3d Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Fri, 5 Oct 2018 00:44:47 +0200 Subject: [PATCH] ComplexMat: Add CUDA stream synchronization before accessing host memory --- src/complexmat.cu | 7 ++++++- src/complexmat.hpp | 18 ++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/complexmat.cu b/src/complexmat.cu index 8982f70..b9b5c48 100644 --- a/src/complexmat.cu +++ b/src/complexmat.cu @@ -35,7 +35,7 @@ void ComplexMat_::sqr_norm(DynMem &result) const sqr_norm_kernel<<>>((const float*)p_data.deviceMem(), block_res.deviceMem(), total); CudaCheckError(); - CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread)); + cudaSync(); T res = 0; for (int i = 0; i < blocks.x; i++) @@ -339,3 +339,8 @@ ComplexMat_ ComplexMat_::mul(const ComplexMat_ &rhs) const // rhs.p_data = nullptr; // } + +void ComplexMat_::cudaSync() const +{ + CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread)); +} diff --git a/src/complexmat.hpp b/src/complexmat.hpp index 9da81e6..f6aaef2 100644 --- a/src/complexmat.hpp +++ b/src/complexmat.hpp @@ -32,6 +32,7 @@ class ComplexMat_ { ComplexMat_(const cv::Mat &mat) : cols(uint(mat.cols)), rows(uint(mat.rows)), n_channels(1), n_scales(1) , p_data(n_channels * cols * rows) { + cudaSync(); memcpy(p_data.hostMem(), mat.ptr>(), mat.total() * mat.elemSize()); } @@ -48,6 +49,7 @@ class ComplexMat_ { void set_channel(uint idx, const cv::Mat &mat) { assert(idx < n_channels); + cudaSync(); for (uint i = 0; i < rows; ++i) { const std::complex *row = mat.ptr>(i); for (uint j = 0; j < cols; ++j) @@ -83,8 +85,14 @@ class ComplexMat_ { return result; } - std::complex *get_p_data() { return p_data.hostMem(); } - const std::complex *get_p_data() const { return p_data.hostMem(); } + std::complex *get_p_data() { + cudaSync(); + return p_data.hostMem(); + } + const std::complex *get_p_data() const { + cudaSync(); + return p_data.hostMem(); + } #ifdef CUFFT cufftComplex *get_dev_data() { return (cufftComplex*)p_data.deviceMem(); } @@ -160,6 +168,12 @@ class ComplexMat_ { } return result; } + +#ifdef CUFFT + void cudaSync() const; +#else + void cudaSync() const {} +#endif }; typedef ComplexMat_ ComplexMat; -- 2.39.2