From: Michal Sojka Date: Mon, 8 Oct 2018 12:04:47 +0000 (+0200) Subject: Fix CUDA bugs that manifest themselves in BIG_BATCH mode X-Git-Url: https://rtime.felk.cvut.cz/gitweb/hercules2020/kcf.git/commitdiff_plain/2230424ce6be376f11647c7b90876cbe2a7f4087 Fix CUDA bugs that manifest themselves in BIG_BATCH mode --- diff --git a/src/complexmat.cu b/src/complexmat.cu index 1580673..1ebd883 100644 --- a/src/complexmat.cu +++ b/src/complexmat.cu @@ -115,7 +115,6 @@ ComplexMat_ ComplexMat_::sum_over_channels() const assert(p_data.num_elem == n_channels * rows * cols); uint n_channels_per_scale = n_channels / n_scales; - uint scale_offset = n_channels_per_scale * rows * cols; ComplexMat_ result(this->rows, this->cols, 1, n_scales); @@ -124,8 +123,8 @@ ComplexMat_ ComplexMat_::sum_over_channels() const const dim3 blocks((total + threads.x - 1) / threads.x); for (uint scale = 0; scale < n_scales; ++scale) { - sum_channels<<>>(reinterpret_cast(result.p_data.deviceMem() + scale * scale_offset), - reinterpret_cast(p_data.deviceMem() + scale * scale_offset), + sum_channels<<>>(reinterpret_cast(result.p_data.deviceMem() + scale * rows * cols), + reinterpret_cast(p_data.deviceMem() + scale * n_channels_per_scale * rows * cols), n_channels_per_scale, total); } CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread)); diff --git a/src/fft_cufft.cpp b/src/fft_cufft.cpp index e551eaa..963c8b7 100644 --- a/src/fft_cufft.cpp +++ b/src/fft_cufft.cpp @@ -100,13 +100,14 @@ void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result) Fft::inverse(complex_input, real_result); uint n_channels = complex_input.n_channels; - cufftComplex *in = reinterpret_cast(complex_input.get_p_data()); + cufftComplex *in = reinterpret_cast(complex_input.get_dev_data()); cufftReal *out = real_result.deviceMem(); float alpha = 1.0 / (m_width * m_height); if (n_channels == 1) cudaErrorCheck(cufftExecC2R(plan_i_1ch, in, out)); #ifdef BIG_BATCH + else cudaErrorCheck(cufftExecC2R(plan_i_all_scales, in, out)); #endif // TODO: Investigate whether this scalling is needed or not