From 2b6ca3ac7d86d4a06f0b22fbf3589aa344c676e2 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Fri, 5 Oct 2018 00:29:50 +0200 Subject: [PATCH] Add cudaStreamSynchronize after FFT This is just a temporary solution, it should be possible to get rid of CPU/GPU synchronization at most places. --- src/fft_cufft.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fft_cufft.cpp b/src/fft_cufft.cpp index 61bf1c9..e551eaa 100644 --- a/src/fft_cufft.cpp +++ b/src/fft_cufft.cpp @@ -92,6 +92,7 @@ void cuFFT::forward_window(MatScaleFeats &feat, ComplexMat &complex_result, MatS else cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_dev_data())); #endif + CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread)); } void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result) @@ -110,6 +111,7 @@ void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result) #endif // TODO: Investigate whether this scalling is needed or not cudaErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1)); + CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread)); } cuFFT::~cuFFT() -- 2.39.2