# Makefile to build all the available variants
-BUILDS = opencvfft-st opencvfft-async opencvfft-openmp fftw fftw-async fftw-openmp fftw-big fftw-big-openmp cufftw cufftw-big cufftw-big-openmp cufft cufft-big cufft-big-openmp
+BUILDS = opencvfft-st opencvfft-async opencvfft-openmp fftw fftw-async fftw-openmp fftw-big fftw-big-openmp cufftw cufftw-big cufftw-big-openmp cufft cufft-openmp cufft-big cufft-big-openmp cufft-opemp
all: $(foreach build,$(BUILDS),build-$(build)/kcf_vot)
CMAKE_OTPS_cufftw-big = -DFFT=cuFFTW -DBIG_BATCH=ON
CMAKE_OTPS_cufftw-big-openmp = -DFFT=cuFFTW -DBIG_BATCH=ON -DOPENMP=ON
CMAKE_OTPS_cufft = -DFFT=cuFFT
+CMAKE_OTPS_cufft-openmp = -DFFT=cuFFT -DOPENMP=ON
CMAKE_OTPS_cufft-big = -DFFT=cuFFT -DBIG_BATCH=ON
CMAKE_OTPS_cufft-big-openmp = -DFFT=cuFFT -DBIG_BATCH=ON -DOPENMP=ON
CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(real_input_arr),
complex_result.get_p_data()));
} else {
+#pragma omp critical
+ {
CufftErrorCheck(cufftSetStream(plan_f, stream));
- CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(real_input_arr),
- complex_result.get_p_data()));
+ CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(real_input_arr), complex_result.get_p_data()));
+ cudaStreamSynchronize(stream);
+ }
}
return;
}
cv::Mat in_roi(fw_all, cv::Rect(0, int(i*m_height), int(m_width), int(m_height)));
in_roi = patch_feats[i].mul(m_window);
}
+#pragma omp critical
+ {
CufftErrorCheck(cufftSetStream(plan_fw, stream));
CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(real_input_arr), complex_result.get_p_data()));
+ cudaStreamSynchronize(stream);
+ }
}
return;
}
cufftComplex *in = reinterpret_cast<cufftComplex*>(complex_input.get_p_data());
if(n_channels == 1){
+#pragma omp critical
+ {
CufftErrorCheck(cufftSetStream(plan_i_1ch, stream));
CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(real_result_arr)));
cudaStreamSynchronize(stream);
+ }
real_result = real_result/(m_width*m_height);
return;
} else if(n_channels == int(m_num_of_scales)){
CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(real_result_arr)));
return;
}
+#pragma omp critical
+ {
CufftErrorCheck(cufftSetStream(plan_i_features, stream));
CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(real_result_arr)));
+#if defined(OPENMP) && !defined(BIG_BATCH)
+ cudaStreamSynchronize(stream);
+#endif
+ }
return;
}
m_num_of_scales = num_of_scales;
m_big_batch_mode = big_batch_mode;
-#if (!defined(ASYNC) && !defined(CUFFTW))|| defined(OPENMP)
+#if (!defined(ASYNC) && !defined(CUFFTW)) && defined(OPENMP)
fftw_init_threads();
#endif //OPENMP
#define DEBUG_PRINT(obj) if (m_debug) {std::cout << #obj << " @" << __LINE__ << std::endl << (obj) << std::endl;}
#define DEBUG_PRINTM(obj) if (m_debug) {std::cout << #obj << " @" << __LINE__ << " " << (obj).size() << " CH: " << (obj).channels() << std::endl << (obj) << std::endl;}
+#define DEBUG_PRINTD(obj) {std::cout << #obj << " @" << __LINE__ << " " << (obj).size() << " CH: " << (obj).channels() << std::endl << (obj) << std::endl;}
+
KCF_Tracker::KCF_Tracker(double padding, double kernel_sigma, double lambda, double interp_factor, double output_sigma_factor, int cell_size) :
fft(*new FFT()),
else {
p_scale_vars.emplace_back(new Scale_vars(p_windows_size, p_cell_size, p_num_of_feats, 1));
}
-#ifdef CUFFT
- std::cout << p_scale_vars.back()->zf.stream << std::endl;
- std::cout << p_scale_vars.back()->kzf.stream << std::endl;
- std::cout << p_scale_vars.back()->kf.stream << std::endl << std::endl;
-
- std::cout << p_scale_vars.back()->zf.n_scales << std::endl;
- std::cout << p_scale_vars.back()->kzf.n_scales << std::endl;
- std::cout << p_scale_vars.back()->kf.n_scales << std::endl << std::endl;
-#endif
}
p_current_scale = 1.;
p_scale_vars.front()->model_xf = p_model_xf;
p_scale_vars.front()->model_xf.set_stream(p_scale_vars.front()->stream);
p_yf.set_stream(p_scale_vars.front()->stream);
+ p_model_xf.set_stream(p_scale_vars.front()->stream);
#endif
if (m_use_linearkernel) {
p_current_scale = p_min_max_scale[0];
if (p_current_scale > p_min_max_scale[1])
p_current_scale = p_min_max_scale[1];
+
//obtain a subwindow for training at newly estimated target position
p_scale_vars.front()->patch_feats.clear();
get_features(input_rgb, input_gray, int(p_pose.cx), int(p_pose.cy), p_windows_size[0], p_windows_size[1], *p_scale_vars.front(), p_current_scale);
#include "complexmat.cuh"
#else
#include "complexmat.hpp"
+#ifndef CUFFTW
//For compatibility reasons between CuFFT and FFTW, OpenCVfft versions.
typedef int* cudaStream_t;
+#else
+ #include "cuda_runtime.h"
+#endif
#endif
struct Scale_vars
ComplexMat *yf = nullptr,bool zero_index = false)
{
uint alloc_size;
- std::cout << __PRETTY_FUNCTION__ << std::endl;
#ifdef CUFFT
if (zero_index) {
cudaSetDeviceFlags(cudaDeviceMapHost);
CudaSafeCall(cudaStreamCreate(&this->stream));
#endif
+ this->patch_feats.reserve(uint(num_of_feats));
+
alloc_size = uint(windows_size[0]/cell_size*windows_size[1]/cell_size*num_of_scales)*sizeof(cufftReal);
CudaSafeCall(cudaHostAlloc(reinterpret_cast<void**>(&this->data_i_1ch), alloc_size, cudaHostAllocMapped));
CudaSafeCall(cudaHostGetDevicePointer(reinterpret_cast<void**>(&this->data_i_1ch_d), reinterpret_cast<void*>(this->data_i_1ch), 0));
this->response = cv::Mat(windows_size[1]/cell_size, windows_size[0]/cell_size, CV_32FC(num_of_scales), this->data_i_1ch);
this->zf.create(windows_size[1]/cell_size, (windows_size[0]/cell_size)/2+1, num_of_feats, num_of_scales, this->stream);
- std::cout << this->zf.stream << std::endl;
- std::cout << this->zf.n_scales << std::endl;
this->kzf.create(windows_size[1]/cell_size, (windows_size[0]/cell_size)/2+1, num_of_scales, this->stream);
- std::cout << this->kzf.stream << std::endl;
- std::cout << this->kzf.n_scales << std::endl;
this->kf.create(windows_size[1]/cell_size, (windows_size[0]/cell_size)/2+1, num_of_scales, this->stream);
- std::cout << this->kf.stream << std::endl;
- std::cout << this->kf.n_scales << std::endl << std::endl;
alloc_size = uint(num_of_scales);
~Scale_vars() {
#ifdef CUFFT
- std::cout << __PRETTY_FUNCTION__ << std::endl;
CudaSafeCall(cudaFreeHost(this->xf_sqr_norm));
CudaSafeCall(cudaFreeHost(this->yf_sqr_norm));
CudaSafeCall(cudaFreeHost(this->data_i_1ch));