Use CPU version for now, because the CUDA version seems to have problems.
DEBUG_PRINTM(xyf_sum);
kcf.fft.inverse(xyf_sum, ifft_res);
DEBUG_PRINTM(ifft_res);
DEBUG_PRINTM(xyf_sum);
kcf.fft.inverse(xyf_sum, ifft_res);
DEBUG_PRINTM(ifft_res);
// FIXME
cuda_gaussian_correlation(ifft_res.deviceMem(), k.deviceMem(), xf_sqr_norm.deviceMem(),
auto_correlation ? xf_sqr_norm.deviceMem() : yf_sqr_norm.deviceMem(), sigma,
// FIXME
cuda_gaussian_correlation(ifft_res.deviceMem(), k.deviceMem(), xf_sqr_norm.deviceMem(),
auto_correlation ? xf_sqr_norm.deviceMem() : yf_sqr_norm.deviceMem(), sigma,