Remove unnecessary calls to cudaStreamSynchronize()

author Michal Sojka <michal.sojka@cvut.cz>

Fri, 12 Oct 2018 13:13:58 +0000 (15:13 +0200)

committer Michal Sojka <michal.sojka@cvut.cz>

Fri, 12 Oct 2018 13:13:58 +0000 (15:13 +0200)
author Michal Sojka <michal.sojka@cvut.cz>
Fri, 12 Oct 2018 13:13:58 +0000 (15:13 +0200)
committer Michal Sojka <michal.sojka@cvut.cz>
Fri, 12 Oct 2018 13:13:58 +0000 (15:13 +0200)
diff --git a/src/complexmat.cu b/src/complexmat.cu

index 1ebd883993581f583952ef333dc124bb068442e0..c8430ed9f6314c9311efe05d556c2613abec502f 100644 (file)
--- a/src/complexmat.cu
+++ b/src/complexmat.cu
@@ -24,6 +24,7 @@ __global__ void sqr_norm_kernel(const float *in, float *block_res, int total)
  
  void ComplexMat_::sqr_norm(DynMem &result) const
  {
+
      assert(result.num_elem == n_scales);
  
      const uint total = n_channels / n_scales * rows * cols;
@@ -127,7 +128,6 @@ ComplexMat_ ComplexMat_::sum_over_channels() const
                                            reinterpret_cast<const float*>(p_data.deviceMem() + scale * n_channels_per_scale * rows * cols),
                                            n_channels_per_scale, total);
      }
-    CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
      return result;
  }
  
diff --git a/src/debug.h b/src/debug.h

index 07c6ee77a532ebc5bf2208e476441b26792c138b..cce8327201b44732d131ff4f080a01bed7813180 100644 (file)
--- a/src/debug.h
+++ b/src/debug.h
@@ -83,9 +83,6 @@ class DbgTracer {
      {
          (void)line;
          if (debug || always) {
-#ifdef CUFFT
-            CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
-#endif
              IOSave s(std::cerr);
              std::cerr << std::setprecision(precision);
              std::cerr << indent() << name /*<< " @" << line */ << " " << print(obj) << std::endl;
diff --git a/src/fft_cufft.cpp b/src/fft_cufft.cpp

index c7a9e2f1f3197de294b6adde40f60b610462eacb..5c0da667c6916ac78582b5dd9a4578b5cfc85a6d 100644 (file)
--- a/src/fft_cufft.cpp
+++ b/src/fft_cufft.cpp
@@ -93,7 +93,6 @@ void cuFFT::forward_window(MatScaleFeats &feat, ComplexMat &complex_result, MatS
      else
          cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_dev_data()));
  #endif
-    CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
  }
  
  void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result)
@@ -111,8 +110,9 @@ void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result)
      else
          cudaErrorCheck(cufftExecC2R(plan_i_all_scales, in, out));
  #endif
-    // TODO: Investigate whether this scalling is needed or not
      cudaErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1));
+    // The result is a cv::Mat, which will be accesses by CPU, so we
+    // must synchronize with the GPU here
      CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
  }
author	Michal Sojka <michal.sojka@cvut.cz>
	Fri, 12 Oct 2018 13:13:58 +0000 (15:13 +0200)
committer	Michal Sojka <michal.sojka@cvut.cz>
	Fri, 12 Oct 2018 13:13:58 +0000 (15:13 +0200)
src/complexmat.cu		patch \| blob \| history
src/debug.h		patch \| blob \| history
src/fft_cufft.cpp		patch \| blob \| history