]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/commitdiff
Updated cmake. Now you can enable visualization of results, opecv cufft implementatio...
authorShanigen <karafvit@tx2-stud.rtime.felk.cvut.cz>
Fri, 15 Dec 2017 13:54:40 +0000 (14:54 +0100)
committerShanigen <karafvit@tx2-stud.rtime.felk.cvut.cz>
Fri, 15 Dec 2017 13:54:40 +0000 (14:54 +0100)
CMakeLists.txt
main_vot.cpp
src/CMakeLists.txt
src/kcf.cpp
src/kcf.h
src/piotr_fhog/sse.hpp

index 5df7e740acaa37c5e911cff8c5680453f6578b0f..ed4d50ad92de3820be3f108dcaf41f0dcdb90be2 100644 (file)
@@ -3,7 +3,23 @@ cmake_minimum_required(VERSION 2.8)
 project(kcf_tracker)
 
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -pedantic -O3 -Wno-long-long -fno-omit-frame-pointer")
-#set(OpenCV_DIR /home/nvidia/Libraries/opencv-3.2-release/share/OpenCV)
+
+option(OPENCV_CUFFT "If OFF CPU implementation using OpenCV implementation of fft will be used. If ON Nvidia CUFFT implemented in OpenCV will be used. Together with Hostmem from OpenCV." OFF)
+option(VISULIZE_RESULT "Check if you want to visulize the result." OFF)
+option(ASYNC "Default setting is on. Works only if OPENCV_CUFFT is not ON. Will enable C++ async directive." ON)
+if(VISULIZE_RESULT)
+  add_definitions(-DVISULIZE_RESULT)
+  MESSAGE(STATUS "Visulize result")
+endif()
+if(OPENCV_CUFFT)
+  add_definitions(-DOPENCV_CUFFT)
+  MESSAGE(STATUS "OPENCV+CUFFT version")
+elseif(ASYNC)
+  add_definitions(-DASYNC)
+  MESSAGE(STATUS "CPU+ASYNC version")
+else()
+  MESSAGE(STATUS "CPU+SERIAL version")
+endif() #OPENCV_CUFFT
 
 FIND_PACKAGE( OpenCV REQUIRED )
 link_directories ( ${OpenCV_LIB_DIR} )
@@ -38,7 +54,7 @@ list( APPEND CUDA_NVCC_FLAGS "-O3 --gpu-architecture compute_62 -std=c++11")
 
 cuda_add_executable( kcf_vot main_vot.cpp vot.hpp )
 
-target_link_libraries(kcf_vot ${OpenCV_LIBS} kcf ${CUFFT_LIB} ${CUDA_LIB})
+target_link_libraries(kcf_vot ${CUDA_LIBRARIES} ${OpenCV_LIBS}  kcf)
 #target_link_libraries(kcf_vot ${OpenCV_LIBS} kcf)
 #ENDIF()
 
index 6fa827752b077d45d1abb64e44fcfde61fe1e431..5d52ebaedf818545a00c6e9df9259aad0c6952ce 100644 (file)
@@ -31,10 +31,11 @@ int main()
 
         bb = tracker.getBBox();
         vot_io.outputBoundingBox(cv::Rect(bb.cx - bb.w/2., bb.cy - bb.h/2., bb.w, bb.h));
-
-//        cv::rectangle(image, cv::Rect(bb.cx - bb.w/2., bb.cy - bb.h/2., bb.w, bb.h), CV_RGB(0,255,0), 2);
-//        cv::imshow("output", image);
-//        cv::waitKey();
+#ifdef VISULIZE_RESULT
+       cv::rectangle(image, cv::Rect(bb.cx - bb.w/2., bb.cy - bb.h/2., bb.w, bb.h), CV_RGB(0,255,0), 2);
+       cv::imshow("output", image);
+       cv::waitKey();
+#endif //VISULIZE
 
 //        std::stringstream s;
 //        std::string ss;
index bbc88952c27da06721548f26dead2165b12d4ba2..a37fa8c1cb4e53dcbb8b03bcab7786460fb4a85a 100644 (file)
@@ -8,5 +8,5 @@ set(KCF_LIB_SRC kcf.cpp kcf.h complexmat.hpp)
 include_directories(${CUDA_INCLUDE_DIRS})
 
 add_library(kcf STATIC ${KCF_LIB_SRC})
-target_link_libraries(kcf ${OpenCV_LIBS} fhog cndata)
+target_link_libraries(kcf ${CUDA_LIBRARIES} ${OpenCV_LIBS} fhog cndata)
 set_target_properties(kcf PROPERTIES VERSION 1.0.0 SOVERSION 1)
\ No newline at end of file
index 6eb69b9cf171609deb1d69d738a8c596d366fc71..747a08953602fac192bad66a0d1ff229583b1b3c 100644 (file)
@@ -87,7 +87,7 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox)
     p_cos_window = cosine_window_function(p_yf.cols, p_yf.rows);
     
     //obtain a sub-window for training initial model
-    std::vector<cv::Mat> path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);
+    std::vector<cv::Mat> path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);   
     p_model_xf = fft2(path_feat, p_cos_window);
     
     if (m_use_linearkernel) {
@@ -197,7 +197,7 @@ void KCF_Tracker::track(cv::Mat &img)
     } else {
         for (size_t i = 0; i < p_scales.size(); ++i) {
             patch_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale * p_scales[i]);
-            ComplexMat zf = fft2(patch_feat, p_cos_window);
+           ComplexMat zf = fft2(patch_feat, p_cos_window);
             cv::Mat response;
             if (m_use_linearkernel)
                 response = ifft2((p_model_alphaf * zf).sum_over_channels());
@@ -426,13 +426,12 @@ cv::Mat KCF_Tracker::circshift(const cv::Mat &patch, int x_rot, int y_rot)
 
 ComplexMat KCF_Tracker::fft2(const cv::Mat &input)
 {
-    cv::Mat flip_h,imag_h,complex_result_h;
+#ifdef OPENCV_CUFFT
+    cv::Mat flip_h,imag_h,complex_result;
 
-    cv::cuda::HostMem hostmem_input(input.size(), input.type(), cv::cuda::HostMem::SHARED);
+    cv::cuda::HostMem hostmem_input(input, cv::cuda::HostMem::SHARED);
     cv::cuda::HostMem hostmem_real(cv::Size(input.cols,input.rows/2+1), CV_32FC2, cv::cuda::HostMem::SHARED);
 
-    input.copyTo(hostmem_input);
-
     cv::cuda::dft(hostmem_input,hostmem_real,hostmem_input.size(),0,stream);
     stream.waitForCompletion();
 
@@ -444,8 +443,8 @@ ComplexMat KCF_Tracker::fft2(const cv::Mat &input)
 
     std::vector<cv::Mat> matarray = {real_h,imag_h};
 
-    cv::hconcat(matarray,complex_result_h);
-
+    cv::hconcat(matarray,complex_result);
+    
 //     //extraxt x and y channels
 //     cv::Mat xy[2]; //X,Y
 //     cv::split(complex_result_h, xy);
@@ -472,25 +471,44 @@ ComplexMat KCF_Tracker::fft2(const cv::Mat &input)
 //     cv::imshow("DFT", bgr);
 // 
 //     cv::waitKey(0);
+#else
+    cv::Mat complex_result;
+    cv::dft(input, complex_result, cv::DFT_COMPLEX_OUTPUT);
+#endif //OPENCV_CUFFT
     
-    return ComplexMat(complex_result_h);
+    return ComplexMat(complex_result);
 }
 
 ComplexMat KCF_Tracker::fft2(const std::vector<cv::Mat> &input, const cv::Mat &cos_window)
 {
     int n_channels = input.size();
+    cv::Mat complex_result;
+#ifdef OPENCV_CUFFT
+    cv::Mat flip_h,imag_h;
+    cv::cuda::GpuMat src_gpu;
+    cv::cuda::HostMem hostmem_real(cv::Size(input[0].cols,input[0].rows/2+1), CV_32FC2, cv::cuda::HostMem::SHARED);
+#endif //OPENCV_CUFFT
     ComplexMat result(input[0].rows, input[0].cols, n_channels);
     for (int i = 0; i < n_channels; ++i){
-        cv::Mat complex_result;
-//        cv::Mat padded;                            //expand input image to optimal size
-//        int m = cv::getOptimalDFTSize( input[0].rows );
-//        int n = cv::getOptimalDFTSize( input[0].cols ); // on the border add zero pixels
-
-//        copyMakeBorder(input[i].mul(cos_window), padded, 0, m - input[0].rows, 0, n - input[0].cols, cv::BORDER_CONSTANT, cv::Scalar::all(0));
-//        cv::dft(padded, complex_result, cv::DFT_COMPLEX_OUTPUT);
-//        result.set_channel(i, complex_result(cv::Range(0, input[0].rows), cv::Range(0, input[0].cols)));
-
-        cv::dft(input[i].mul(cos_window), complex_result, cv::DFT_COMPLEX_OUTPUT);
+#ifdef OPENCV_CUFFT
+       cv::cuda::HostMem hostmem_input(input[i], cv::cuda::HostMem::SHARED);
+       cv::cuda::multiply(hostmem_input,p_cos_window_d,src_gpu);
+       cv::cuda::dft(src_gpu,hostmem_real,src_gpu.size(),0,stream);
+       stream.waitForCompletion();
+
+       cv::Mat real_h = hostmem_real.createMatHeader();
+
+       //create reversed copy of result and merge them
+       cv::flip(hostmem_real,flip_h,1);
+       flip_h(cv::Range(0, flip_h.rows), cv::Range(1, flip_h.cols)).copyTo(imag_h);
+
+       std::vector<cv::Mat> matarray = {real_h,imag_h};
+       
+       cv::hconcat(matarray,complex_result);
+#else
+       cv::dft(input[i].mul(cos_window), complex_result, cv::DFT_COMPLEX_OUTPUT);
+#endif //OPENCV_CUFFT
+       
         result.set_channel(i, complex_result);
     }
     return result;
@@ -532,8 +550,10 @@ cv::Mat KCF_Tracker::cosine_window_function(int dim1, int dim2)
     for (int i = 0; i < dim2; ++i)
         m2.at<float>(i) = 0.5*(1. - std::cos(2. * CV_PI * static_cast<double>(i) * N_inv));
     cv::Mat ret = m2*m1;
-    cv::cuda::createContinuous(cv::Size(ret.cols,ret.rows),CV_32FC1,p_cos_window_gpu);
-    p_cos_window_gpu.upload(ret);
+#ifdef OPENCV_CUFFT
+    cv::cuda::createContinuous(cv::Size(ret.cols,ret.rows),CV_32FC1,p_cos_window_d);
+    p_cos_window_d.upload(ret);
+#endif
     return ret;
 }
 
index 6c733b4a93ee9d5c319da73cb4524e75d6a21473..8f935e431671f2acd97beb353025f807e4e65759 100644 (file)
--- a/src/kcf.h
+++ b/src/kcf.h
@@ -31,11 +31,20 @@ struct BBox_c
 class KCF_Tracker
 {
 public:
-    bool m_use_scale {true};//true
-    bool m_use_color {true};//true
+#ifdef OPENCV_CUFFT
+    bool m_use_scale {false};
+    bool m_use_color {false};
+#else //OPENCV_CUFFT
+    bool m_use_scale {true};
+    bool m_use_color {true};
+#endif //OPENCV_CUFFT
+#ifdef ASYNC
+    bool m_use_multithreading {true};
+#else
+    bool m_use_multithreading {false};
+#endif //ASYNC
     bool m_use_subpixel_localization {true};
     bool m_use_subgrid_scale {true};
-    bool m_use_multithreading {true};
     bool m_use_cnfeat {true};
     bool m_use_linearkernel {false};
 
@@ -66,7 +75,6 @@ private:
     bool p_resize_image = false;
     
     bool first = true;
-    cv::cuda::Stream stream;
 
     double p_padding = 1.5;
     double p_output_sigma_factor = 0.1;
@@ -82,8 +90,12 @@ private:
     double p_current_scale = 1.;
     double p_min_max_scale[2];
     std::vector<double> p_scales;
-     cv::cuda::GpuMat src_gpu,dst_gpu,p_cos_window_gpu;
-
+    
+    #ifdef OPENCV_CUFFT
+    cv::cuda::GpuMat src_gpu,dst_gpu,p_cos_window_d;
+    cv::cuda::Stream stream;
+    #endif //OPENCV_CUFFT
+    
     //model
     ComplexMat p_yf;
     ComplexMat p_model_alphaf;
@@ -99,6 +111,7 @@ private:
     cv::Mat cosine_window_function(int dim1, int dim2);
     ComplexMat fft2(const cv::Mat & input);
     ComplexMat fft2(const std::vector<cv::Mat> & input, const cv::Mat & cos_window);
+
     cv::Mat ifft2(const ComplexMat & inputf);
     std::vector<cv::Mat> get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale = 1.);
     cv::Point2f sub_pixel_peak(cv::Point & max_loc, cv::Mat & response);
index 1c7c0ee1d1ef063d2b842c7538d6d672862c0437..fc08b1aa11544cd6a04e97e57c22c65b1ef2a4ec 100644 (file)
@@ -6,10 +6,6 @@
 #ifndef _SSE_HPP_
 #define _SSE_HPP_
 
-#ifndef __ARM_NEON
-#define __ARM_NEON
-#endif
-
 #ifdef __ARM_NEON
 #include "SSE2NEON.h"
 #else