void ComplexMat_::sqr_norm(DynMem &result) const
{
+
assert(result.num_elem == n_scales);
const uint total = n_channels / n_scales * rows * cols;
reinterpret_cast<const float*>(p_data.deviceMem() + scale * n_channels_per_scale * rows * cols),
n_channels_per_scale, total);
}
- CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
return result;
}
{
(void)line;
if (debug || always) {
-#ifdef CUFFT
- CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
-#endif
IOSave s(std::cerr);
std::cerr << std::setprecision(precision);
std::cerr << indent() << name /*<< " @" << line */ << " " << print(obj) << std::endl;
else
cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_dev_data()));
#endif
- CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
}
void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result)
else
cudaErrorCheck(cufftExecC2R(plan_i_all_scales, in, out));
#endif
- // TODO: Investigate whether this scalling is needed or not
cudaErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1));
+ // The result is a cv::Mat, which will be accesses by CPU, so we
+ // must synchronize with the GPU here
CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
}