project(kcf_tracker)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -pedantic -O3 -Wno-long-long -fno-omit-frame-pointer")
-#set(OpenCV_DIR /home/nvidia/Libraries/opencv-3.2-release/share/OpenCV)
+
+option(OPENCV_CUFFT "If OFF CPU implementation using OpenCV implementation of fft will be used. If ON Nvidia CUFFT implemented in OpenCV will be used. Together with Hostmem from OpenCV." OFF)
+option(VISULIZE_RESULT "Check if you want to visulize the result." OFF)
+option(ASYNC "Default setting is on. Works only if OPENCV_CUFFT is not ON. Will enable C++ async directive." ON)
+if(VISULIZE_RESULT)
+ add_definitions(-DVISULIZE_RESULT)
+ MESSAGE(STATUS "Visulize result")
+endif()
+if(OPENCV_CUFFT)
+ add_definitions(-DOPENCV_CUFFT)
+ MESSAGE(STATUS "OPENCV+CUFFT version")
+elseif(ASYNC)
+ add_definitions(-DASYNC)
+ MESSAGE(STATUS "CPU+ASYNC version")
+else()
+ MESSAGE(STATUS "CPU+SERIAL version")
+endif() #OPENCV_CUFFT
FIND_PACKAGE( OpenCV REQUIRED )
link_directories ( ${OpenCV_LIB_DIR} )
cuda_add_executable( kcf_vot main_vot.cpp vot.hpp )
-target_link_libraries(kcf_vot ${OpenCV_LIBS} kcf ${CUFFT_LIB} ${CUDA_LIB})
+target_link_libraries(kcf_vot ${CUDA_LIBRARIES} ${OpenCV_LIBS} kcf)
#target_link_libraries(kcf_vot ${OpenCV_LIBS} kcf)
#ENDIF()
p_cos_window = cosine_window_function(p_yf.cols, p_yf.rows);
//obtain a sub-window for training initial model
- std::vector<cv::Mat> path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);
+ std::vector<cv::Mat> path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);
p_model_xf = fft2(path_feat, p_cos_window);
if (m_use_linearkernel) {
} else {
for (size_t i = 0; i < p_scales.size(); ++i) {
patch_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale * p_scales[i]);
- ComplexMat zf = fft2(patch_feat, p_cos_window);
+ ComplexMat zf = fft2(patch_feat, p_cos_window);
cv::Mat response;
if (m_use_linearkernel)
response = ifft2((p_model_alphaf * zf).sum_over_channels());
ComplexMat KCF_Tracker::fft2(const cv::Mat &input)
{
- cv::Mat flip_h,imag_h,complex_result_h;
+#ifdef OPENCV_CUFFT
+ cv::Mat flip_h,imag_h,complex_result;
- cv::cuda::HostMem hostmem_input(input.size(), input.type(), cv::cuda::HostMem::SHARED);
+ cv::cuda::HostMem hostmem_input(input, cv::cuda::HostMem::SHARED);
cv::cuda::HostMem hostmem_real(cv::Size(input.cols,input.rows/2+1), CV_32FC2, cv::cuda::HostMem::SHARED);
- input.copyTo(hostmem_input);
-
cv::cuda::dft(hostmem_input,hostmem_real,hostmem_input.size(),0,stream);
stream.waitForCompletion();
std::vector<cv::Mat> matarray = {real_h,imag_h};
- cv::hconcat(matarray,complex_result_h);
-
+ cv::hconcat(matarray,complex_result);
+
// //extraxt x and y channels
// cv::Mat xy[2]; //X,Y
// cv::split(complex_result_h, xy);
// cv::imshow("DFT", bgr);
//
// cv::waitKey(0);
+#else
+ cv::Mat complex_result;
+ cv::dft(input, complex_result, cv::DFT_COMPLEX_OUTPUT);
+#endif //OPENCV_CUFFT
- return ComplexMat(complex_result_h);
+ return ComplexMat(complex_result);
}
ComplexMat KCF_Tracker::fft2(const std::vector<cv::Mat> &input, const cv::Mat &cos_window)
{
int n_channels = input.size();
+ cv::Mat complex_result;
+#ifdef OPENCV_CUFFT
+ cv::Mat flip_h,imag_h;
+ cv::cuda::GpuMat src_gpu;
+ cv::cuda::HostMem hostmem_real(cv::Size(input[0].cols,input[0].rows/2+1), CV_32FC2, cv::cuda::HostMem::SHARED);
+#endif //OPENCV_CUFFT
ComplexMat result(input[0].rows, input[0].cols, n_channels);
for (int i = 0; i < n_channels; ++i){
- cv::Mat complex_result;
-// cv::Mat padded; //expand input image to optimal size
-// int m = cv::getOptimalDFTSize( input[0].rows );
-// int n = cv::getOptimalDFTSize( input[0].cols ); // on the border add zero pixels
-
-// copyMakeBorder(input[i].mul(cos_window), padded, 0, m - input[0].rows, 0, n - input[0].cols, cv::BORDER_CONSTANT, cv::Scalar::all(0));
-// cv::dft(padded, complex_result, cv::DFT_COMPLEX_OUTPUT);
-// result.set_channel(i, complex_result(cv::Range(0, input[0].rows), cv::Range(0, input[0].cols)));
-
- cv::dft(input[i].mul(cos_window), complex_result, cv::DFT_COMPLEX_OUTPUT);
+#ifdef OPENCV_CUFFT
+ cv::cuda::HostMem hostmem_input(input[i], cv::cuda::HostMem::SHARED);
+ cv::cuda::multiply(hostmem_input,p_cos_window_d,src_gpu);
+ cv::cuda::dft(src_gpu,hostmem_real,src_gpu.size(),0,stream);
+ stream.waitForCompletion();
+
+ cv::Mat real_h = hostmem_real.createMatHeader();
+
+ //create reversed copy of result and merge them
+ cv::flip(hostmem_real,flip_h,1);
+ flip_h(cv::Range(0, flip_h.rows), cv::Range(1, flip_h.cols)).copyTo(imag_h);
+
+ std::vector<cv::Mat> matarray = {real_h,imag_h};
+
+ cv::hconcat(matarray,complex_result);
+#else
+ cv::dft(input[i].mul(cos_window), complex_result, cv::DFT_COMPLEX_OUTPUT);
+#endif //OPENCV_CUFFT
+
result.set_channel(i, complex_result);
}
return result;
for (int i = 0; i < dim2; ++i)
m2.at<float>(i) = 0.5*(1. - std::cos(2. * CV_PI * static_cast<double>(i) * N_inv));
cv::Mat ret = m2*m1;
- cv::cuda::createContinuous(cv::Size(ret.cols,ret.rows),CV_32FC1,p_cos_window_gpu);
- p_cos_window_gpu.upload(ret);
+#ifdef OPENCV_CUFFT
+ cv::cuda::createContinuous(cv::Size(ret.cols,ret.rows),CV_32FC1,p_cos_window_d);
+ p_cos_window_d.upload(ret);
+#endif
return ret;
}
class KCF_Tracker
{
public:
- bool m_use_scale {true};//true
- bool m_use_color {true};//true
+#ifdef OPENCV_CUFFT
+ bool m_use_scale {false};
+ bool m_use_color {false};
+#else //OPENCV_CUFFT
+ bool m_use_scale {true};
+ bool m_use_color {true};
+#endif //OPENCV_CUFFT
+#ifdef ASYNC
+ bool m_use_multithreading {true};
+#else
+ bool m_use_multithreading {false};
+#endif //ASYNC
bool m_use_subpixel_localization {true};
bool m_use_subgrid_scale {true};
- bool m_use_multithreading {true};
bool m_use_cnfeat {true};
bool m_use_linearkernel {false};
bool p_resize_image = false;
bool first = true;
- cv::cuda::Stream stream;
double p_padding = 1.5;
double p_output_sigma_factor = 0.1;
double p_current_scale = 1.;
double p_min_max_scale[2];
std::vector<double> p_scales;
- cv::cuda::GpuMat src_gpu,dst_gpu,p_cos_window_gpu;
-
+
+ #ifdef OPENCV_CUFFT
+ cv::cuda::GpuMat src_gpu,dst_gpu,p_cos_window_d;
+ cv::cuda::Stream stream;
+ #endif //OPENCV_CUFFT
+
//model
ComplexMat p_yf;
ComplexMat p_model_alphaf;
cv::Mat cosine_window_function(int dim1, int dim2);
ComplexMat fft2(const cv::Mat & input);
ComplexMat fft2(const std::vector<cv::Mat> & input, const cv::Mat & cos_window);
+
cv::Mat ifft2(const ComplexMat & inputf);
std::vector<cv::Mat> get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale = 1.);
cv::Point2f sub_pixel_peak(cv::Point & max_loc, cv::Mat & response);