From: Shanigen Date: Fri, 15 Dec 2017 13:54:40 +0000 (+0100) Subject: Updated cmake. Now you can enable visualization of results, opecv cufft implementatio... X-Git-Url: http://rtime.felk.cvut.cz/gitweb/hercules2020/kcf.git/commitdiff_plain/03184acb7170b5fa34466b004a43aff7a4ca0988 Updated cmake. Now you can enable visualization of results, opecv cufft implementation(not complete) and opencv fft cpu async or serial version. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 5df7e74..ed4d50a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,23 @@ cmake_minimum_required(VERSION 2.8) project(kcf_tracker) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -pedantic -O3 -Wno-long-long -fno-omit-frame-pointer") -#set(OpenCV_DIR /home/nvidia/Libraries/opencv-3.2-release/share/OpenCV) + +option(OPENCV_CUFFT "If OFF CPU implementation using OpenCV implementation of fft will be used. If ON Nvidia CUFFT implemented in OpenCV will be used. Together with Hostmem from OpenCV." OFF) +option(VISULIZE_RESULT "Check if you want to visulize the result." OFF) +option(ASYNC "Default setting is on. Works only if OPENCV_CUFFT is not ON. Will enable C++ async directive." ON) +if(VISULIZE_RESULT) + add_definitions(-DVISULIZE_RESULT) + MESSAGE(STATUS "Visulize result") +endif() +if(OPENCV_CUFFT) + add_definitions(-DOPENCV_CUFFT) + MESSAGE(STATUS "OPENCV+CUFFT version") +elseif(ASYNC) + add_definitions(-DASYNC) + MESSAGE(STATUS "CPU+ASYNC version") +else() + MESSAGE(STATUS "CPU+SERIAL version") +endif() #OPENCV_CUFFT FIND_PACKAGE( OpenCV REQUIRED ) link_directories ( ${OpenCV_LIB_DIR} ) @@ -38,7 +54,7 @@ list( APPEND CUDA_NVCC_FLAGS "-O3 --gpu-architecture compute_62 -std=c++11") cuda_add_executable( kcf_vot main_vot.cpp vot.hpp ) -target_link_libraries(kcf_vot ${OpenCV_LIBS} kcf ${CUFFT_LIB} ${CUDA_LIB}) +target_link_libraries(kcf_vot ${CUDA_LIBRARIES} ${OpenCV_LIBS} kcf) #target_link_libraries(kcf_vot ${OpenCV_LIBS} kcf) #ENDIF() diff --git a/main_vot.cpp b/main_vot.cpp index 6fa8277..5d52eba 100644 --- a/main_vot.cpp +++ b/main_vot.cpp @@ -31,10 +31,11 @@ int main() bb = tracker.getBBox(); vot_io.outputBoundingBox(cv::Rect(bb.cx - bb.w/2., bb.cy - bb.h/2., bb.w, bb.h)); - -// cv::rectangle(image, cv::Rect(bb.cx - bb.w/2., bb.cy - bb.h/2., bb.w, bb.h), CV_RGB(0,255,0), 2); -// cv::imshow("output", image); -// cv::waitKey(); +#ifdef VISULIZE_RESULT + cv::rectangle(image, cv::Rect(bb.cx - bb.w/2., bb.cy - bb.h/2., bb.w, bb.h), CV_RGB(0,255,0), 2); + cv::imshow("output", image); + cv::waitKey(); +#endif //VISULIZE // std::stringstream s; // std::string ss; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bbc8895..a37fa8c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,5 +8,5 @@ set(KCF_LIB_SRC kcf.cpp kcf.h complexmat.hpp) include_directories(${CUDA_INCLUDE_DIRS}) add_library(kcf STATIC ${KCF_LIB_SRC}) -target_link_libraries(kcf ${OpenCV_LIBS} fhog cndata) +target_link_libraries(kcf ${CUDA_LIBRARIES} ${OpenCV_LIBS} fhog cndata) set_target_properties(kcf PROPERTIES VERSION 1.0.0 SOVERSION 1) \ No newline at end of file diff --git a/src/kcf.cpp b/src/kcf.cpp index 6eb69b9..747a089 100644 --- a/src/kcf.cpp +++ b/src/kcf.cpp @@ -87,7 +87,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox) p_cos_window = cosine_window_function(p_yf.cols, p_yf.rows); //obtain a sub-window for training initial model - std::vector path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]); + std::vector path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]); p_model_xf = fft2(path_feat, p_cos_window); if (m_use_linearkernel) { @@ -197,7 +197,7 @@ void KCF_Tracker::track(cv::Mat &img) } else { for (size_t i = 0; i < p_scales.size(); ++i) { patch_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale * p_scales[i]); - ComplexMat zf = fft2(patch_feat, p_cos_window); + ComplexMat zf = fft2(patch_feat, p_cos_window); cv::Mat response; if (m_use_linearkernel) response = ifft2((p_model_alphaf * zf).sum_over_channels()); @@ -426,13 +426,12 @@ cv::Mat KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot) ComplexMat KCF_Tracker::fft2(const cv::Mat &input) { - cv::Mat flip_h,imag_h,complex_result_h; +#ifdef OPENCV_CUFFT + cv::Mat flip_h,imag_h,complex_result; - cv::cuda::HostMem hostmem_input(input.size(), input.type(), cv::cuda::HostMem::SHARED); + cv::cuda::HostMem hostmem_input(input, cv::cuda::HostMem::SHARED); cv::cuda::HostMem hostmem_real(cv::Size(input.cols,input.rows/2+1), CV_32FC2, cv::cuda::HostMem::SHARED); - input.copyTo(hostmem_input); - cv::cuda::dft(hostmem_input,hostmem_real,hostmem_input.size(),0,stream); stream.waitForCompletion(); @@ -444,8 +443,8 @@ ComplexMat KCF_Tracker::fft2(const cv::Mat &input) std::vector matarray = {real_h,imag_h}; - cv::hconcat(matarray,complex_result_h); - + cv::hconcat(matarray,complex_result); + // //extraxt x and y channels // cv::Mat xy[2]; //X,Y // cv::split(complex_result_h, xy); @@ -472,25 +471,44 @@ ComplexMat KCF_Tracker::fft2(const cv::Mat &input) // cv::imshow("DFT", bgr); // // cv::waitKey(0); +#else + cv::Mat complex_result; + cv::dft(input, complex_result, cv::DFT_COMPLEX_OUTPUT); +#endif //OPENCV_CUFFT - return ComplexMat(complex_result_h); + return ComplexMat(complex_result); } ComplexMat KCF_Tracker::fft2(const std::vector &input, const cv::Mat &cos_window) { int n_channels = input.size(); + cv::Mat complex_result; +#ifdef OPENCV_CUFFT + cv::Mat flip_h,imag_h; + cv::cuda::GpuMat src_gpu; + cv::cuda::HostMem hostmem_real(cv::Size(input[0].cols,input[0].rows/2+1), CV_32FC2, cv::cuda::HostMem::SHARED); +#endif //OPENCV_CUFFT ComplexMat result(input[0].rows, input[0].cols, n_channels); for (int i = 0; i < n_channels; ++i){ - cv::Mat complex_result; -// cv::Mat padded; //expand input image to optimal size -// int m = cv::getOptimalDFTSize( input[0].rows ); -// int n = cv::getOptimalDFTSize( input[0].cols ); // on the border add zero pixels - -// copyMakeBorder(input[i].mul(cos_window), padded, 0, m - input[0].rows, 0, n - input[0].cols, cv::BORDER_CONSTANT, cv::Scalar::all(0)); -// cv::dft(padded, complex_result, cv::DFT_COMPLEX_OUTPUT); -// result.set_channel(i, complex_result(cv::Range(0, input[0].rows), cv::Range(0, input[0].cols))); - - cv::dft(input[i].mul(cos_window), complex_result, cv::DFT_COMPLEX_OUTPUT); +#ifdef OPENCV_CUFFT + cv::cuda::HostMem hostmem_input(input[i], cv::cuda::HostMem::SHARED); + cv::cuda::multiply(hostmem_input,p_cos_window_d,src_gpu); + cv::cuda::dft(src_gpu,hostmem_real,src_gpu.size(),0,stream); + stream.waitForCompletion(); + + cv::Mat real_h = hostmem_real.createMatHeader(); + + //create reversed copy of result and merge them + cv::flip(hostmem_real,flip_h,1); + flip_h(cv::Range(0, flip_h.rows), cv::Range(1, flip_h.cols)).copyTo(imag_h); + + std::vector matarray = {real_h,imag_h}; + + cv::hconcat(matarray,complex_result); +#else + cv::dft(input[i].mul(cos_window), complex_result, cv::DFT_COMPLEX_OUTPUT); +#endif //OPENCV_CUFFT + result.set_channel(i, complex_result); } return result; @@ -532,8 +550,10 @@ cv::Mat KCF_Tracker::cosine_window_function(int dim1, int dim2) for (int i = 0; i < dim2; ++i) m2.at(i) = 0.5*(1. - std::cos(2. * CV_PI * static_cast(i) * N_inv)); cv::Mat ret = m2*m1; - cv::cuda::createContinuous(cv::Size(ret.cols,ret.rows),CV_32FC1,p_cos_window_gpu); - p_cos_window_gpu.upload(ret); +#ifdef OPENCV_CUFFT + cv::cuda::createContinuous(cv::Size(ret.cols,ret.rows),CV_32FC1,p_cos_window_d); + p_cos_window_d.upload(ret); +#endif return ret; } diff --git a/src/kcf.h b/src/kcf.h index 6c733b4..8f935e4 100644 --- a/src/kcf.h +++ b/src/kcf.h @@ -31,11 +31,20 @@ struct BBox_c class KCF_Tracker { public: - bool m_use_scale {true};//true - bool m_use_color {true};//true +#ifdef OPENCV_CUFFT + bool m_use_scale {false}; + bool m_use_color {false}; +#else //OPENCV_CUFFT + bool m_use_scale {true}; + bool m_use_color {true}; +#endif //OPENCV_CUFFT +#ifdef ASYNC + bool m_use_multithreading {true}; +#else + bool m_use_multithreading {false}; +#endif //ASYNC bool m_use_subpixel_localization {true}; bool m_use_subgrid_scale {true}; - bool m_use_multithreading {true}; bool m_use_cnfeat {true}; bool m_use_linearkernel {false}; @@ -66,7 +75,6 @@ private: bool p_resize_image = false; bool first = true; - cv::cuda::Stream stream; double p_padding = 1.5; double p_output_sigma_factor = 0.1; @@ -82,8 +90,12 @@ private: double p_current_scale = 1.; double p_min_max_scale[2]; std::vector p_scales; - cv::cuda::GpuMat src_gpu,dst_gpu,p_cos_window_gpu; - + + #ifdef OPENCV_CUFFT + cv::cuda::GpuMat src_gpu,dst_gpu,p_cos_window_d; + cv::cuda::Stream stream; + #endif //OPENCV_CUFFT + //model ComplexMat p_yf; ComplexMat p_model_alphaf; @@ -99,6 +111,7 @@ private: cv::Mat cosine_window_function(int dim1, int dim2); ComplexMat fft2(const cv::Mat & input); ComplexMat fft2(const std::vector & input, const cv::Mat & cos_window); + cv::Mat ifft2(const ComplexMat & inputf); std::vector get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale = 1.); cv::Point2f sub_pixel_peak(cv::Point & max_loc, cv::Mat & response); diff --git a/src/piotr_fhog/sse.hpp b/src/piotr_fhog/sse.hpp index 1c7c0ee..fc08b1a 100644 --- a/src/piotr_fhog/sse.hpp +++ b/src/piotr_fhog/sse.hpp @@ -6,10 +6,6 @@ #ifndef _SSE_HPP_ #define _SSE_HPP_ -#ifndef __ARM_NEON -#define __ARM_NEON -#endif - #ifdef __ARM_NEON #include "SSE2NEON.h" #else