]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blobdiff - src/cuda_functions.cu
Work done so far on CUDA streams
[hercules2020/kcf.git] / src / cuda_functions.cu
index 44d41be8f27391b4020c02b485a467b3858b1747..4eef14cdc6f68683c4602fcc93ec04bc4c482b6e 100644 (file)
@@ -36,12 +36,12 @@ __global__ void  gaussian_correlation_kernel(float *data_in, float *data_out, fl
         }
 }
 
-void cuda_gaussian_correlation(float *data_in, float *data_out, float *xf_sqr_norm, float *yf_sqr_norm, double sigma, int n_channels, int n_scales,int rows, int cols)
+void cuda_gaussian_correlation(float *data_in, float *data_out, float *xf_sqr_norm, float *yf_sqr_norm, double sigma, int n_channels, int n_scales,int rows, int cols, cudaStream_t stream)
 {
     dim3 threadsPerBlock((n_channels/n_scales)/2);
     dim3 numBlocks(n_scales, rows*cols);
 
-    gaussian_correlation_kernel<<<numBlocks, threadsPerBlock, ((n_channels/n_scales)/2)*sizeof(float)>>>(data_in, data_out, xf_sqr_norm, yf_sqr_norm, rows, cols, n_channels/n_scales,  sigma);
+    gaussian_correlation_kernel<<<numBlocks, threadsPerBlock, ((n_channels/n_scales)/2)*sizeof(float), stream>>>(data_in, data_out, xf_sqr_norm, yf_sqr_norm, rows, cols, n_channels/n_scales,  sigma);
     CudaCheckError();
     
 //    float *data_cpu = (float*) malloc(rows*cols*n_scales*sizeof(float));