]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/commitdiff
Remove unnecessary calls to cudaStreamSynchronize()
authorMichal Sojka <michal.sojka@cvut.cz>
Fri, 12 Oct 2018 13:13:58 +0000 (15:13 +0200)
committerMichal Sojka <michal.sojka@cvut.cz>
Fri, 12 Oct 2018 13:13:58 +0000 (15:13 +0200)
src/complexmat.cu
src/debug.h
src/fft_cufft.cpp

index 1ebd883993581f583952ef333dc124bb068442e0..c8430ed9f6314c9311efe05d556c2613abec502f 100644 (file)
@@ -24,6 +24,7 @@ __global__ void sqr_norm_kernel(const float *in, float *block_res, int total)
 
 void ComplexMat_::sqr_norm(DynMem &result) const
 {
+
     assert(result.num_elem == n_scales);
 
     const uint total = n_channels / n_scales * rows * cols;
@@ -127,7 +128,6 @@ ComplexMat_ ComplexMat_::sum_over_channels() const
                                           reinterpret_cast<const float*>(p_data.deviceMem() + scale * n_channels_per_scale * rows * cols),
                                           n_channels_per_scale, total);
     }
-    CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
     return result;
 }
 
index 07c6ee77a532ebc5bf2208e476441b26792c138b..cce8327201b44732d131ff4f080a01bed7813180 100644 (file)
@@ -83,9 +83,6 @@ class DbgTracer {
     {
         (void)line;
         if (debug || always) {
-#ifdef CUFFT
-            CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
-#endif
             IOSave s(std::cerr);
             std::cerr << std::setprecision(precision);
             std::cerr << indent() << name /*<< " @" << line */ << " " << print(obj) << std::endl;
index c7a9e2f1f3197de294b6adde40f60b610462eacb..5c0da667c6916ac78582b5dd9a4578b5cfc85a6d 100644 (file)
@@ -93,7 +93,6 @@ void cuFFT::forward_window(MatScaleFeats &feat, ComplexMat &complex_result, MatS
     else
         cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_dev_data()));
 #endif
-    CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
 }
 
 void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result)
@@ -111,8 +110,9 @@ void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result)
     else
         cudaErrorCheck(cufftExecC2R(plan_i_all_scales, in, out));
 #endif
-    // TODO: Investigate whether this scalling is needed or not
     cudaErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1));
+    // The result is a cv::Mat, which will be accesses by CPU, so we
+    // must synchronize with the GPU here
     CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
 }