]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/commitdiff
Add cudaStreamSynchronize after FFT
authorMichal Sojka <michal.sojka@cvut.cz>
Thu, 4 Oct 2018 22:29:50 +0000 (00:29 +0200)
committerMichal Sojka <michal.sojka@cvut.cz>
Thu, 4 Oct 2018 22:29:50 +0000 (00:29 +0200)
This is just a temporary solution, it should be possible to get rid of
CPU/GPU synchronization at most places.

src/fft_cufft.cpp

index 61bf1c9550bdc829f7eedeeb54540838bb2adb79..e551eaa41726690a1bce8d42b1f415931e691ff6 100644 (file)
@@ -92,6 +92,7 @@ void cuFFT::forward_window(MatScaleFeats &feat, ComplexMat &complex_result, MatS
     else
         cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_dev_data()));
 #endif
+    CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
 }
 
 void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result)
@@ -110,6 +111,7 @@ void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result)
 #endif
     // TODO: Investigate whether this scalling is needed or not
     cudaErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1));
+    CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
 }
 
 cuFFT::~cuFFT()