Added async support to fftw and deleted some obsolete code in fhog.hpp.

author Shanigen <vkaraf@gmail.com>

Fri, 2 Mar 2018 22:37:26 +0000 (23:37 +0100)

committer Shanigen <vkaraf@gmail.com>

Fri, 2 Mar 2018 22:37:26 +0000 (23:37 +0100)
author Shanigen <vkaraf@gmail.com>
Fri, 2 Mar 2018 22:37:26 +0000 (23:37 +0100)
committer Shanigen <vkaraf@gmail.com>
Fri, 2 Mar 2018 22:37:26 +0000 (23:37 +0100)
diff --git a/src/kcf.cpp b/src/kcf.cpp

index 03555abbf428cc233a7875bc8dabc575037dd221..b5ee5cde22cf631dc07df980f778ba4279d07a3d 100644 (file)
--- a/src/kcf.cpp
+++ b/src/kcf.cpp
@@ -488,10 +488,16 @@ ComplexMat KCF_Tracker::fft2(const cv::Mat &input)
      float* outdata = new float[2*width * height];
  
  //     data_in =  fftwf_alloc_real(width * height);
-    #pragma omp critical
+#pragma omp critical
      {
+#if defined(FFTW) && defined(ASYNC)
+      std::unique_lock<std::mutex> lock_i(fftw_init);
+#endif
      fft = fftwf_alloc_complex((width/2+1) * height);
      plan_f=fftwf_plan_dft_r2c_2d( height , width , (float*)input.data , fft ,  FFTW_ESTIMATE );
+#if defined(FFTW) && defined(ASYNC)
+    lock_i.unlock();
+#endif
      }
      // Prepare input data
  //     for(int i = 0,k=0; i < height; ++i) {
@@ -530,8 +536,14 @@ ComplexMat KCF_Tracker::fft2(const cv::Mat &input)
      // Destroy FFTW plan and variables
  #pragma omp critical
      {
+#if defined(FFTW) && defined(ASYNC)
+      std::unique_lock<std::mutex> lock_d(fftw_destroy);
+#endif
      fftwf_destroy_plan(plan_f);
      fftwf_free(fft); /*fftwf_free(data_in);*/
+#if defined(FFTW) && defined(ASYNC)
+      lock_d.unlock();
+#endif
      }
  #endif
  #if !defined OPENCV_CUFFT || !defined FFTW
@@ -594,10 +606,16 @@ ComplexMat KCF_Tracker::fft2(const std::vector<cv::Mat> &input, const cv::Mat &c
      float* outdata = new float[2*width * height];
      cv::Mat in_img  = cv::Mat::zeros(height, width, CV_32FC1);
  //     data_in =  fftwf_alloc_real(width * height);
-    #pragma omp critical 
+#pragma omp critical 
      {
+#if defined(FFTW) && defined(ASYNC)
+      std::unique_lock<std::mutex> lock_i(fftw_init);
+#endif
      fft = fftwf_alloc_complex((width/2+1) * height);
      plan_f=fftwf_plan_dft_r2c_2d( height , width , (float*) in_img.data , fft ,  FFTW_ESTIMATE );
+#if defined(FFTW) && defined(ASYNC)
+      lock_i.unlock();
+#endif
      }
  #endif
  
@@ -662,10 +680,16 @@ ComplexMat KCF_Tracker::fft2(const std::vector<cv::Mat> &input, const cv::Mat &c
      }
  #ifdef FFTW
      // Destroy FFT plans and variables
-    #pragma omp critical
+#pragma omp critical
  {
+#if defined(FFTW) && defined(ASYNC)
+      std::unique_lock<std::mutex> lock_d(fftw_destroy);
+#endif
      fftwf_destroy_plan(plan_f);
      fftwf_free(fft); /*fftwf_free(data_in);*/
+#if defined(FFTW) && defined(ASYNC)
+      lock_d.unlock();
+#endif
  }
  #endif //FFTW
      return result;
@@ -692,10 +716,15 @@ cv::Mat KCF_Tracker::ifft2(const ComplexMat &inputf)
          float* outdata = new float[width * height];
  #pragma omp critical
          {
+#if defined(FFTW) && defined(ASYNC)
+      std::unique_lock<std::mutex> lock_i(fftw_init);
+#endif
          data_in =  fftwf_alloc_complex(2*(width/2+1) * height);
          ifft = fftwf_alloc_real(width * height);
-
          plan_if=fftwf_plan_dft_c2r_2d( height , width , data_in , ifft ,  FFTW_MEASURE );
+#if defined(FFTW) && defined(ASYNC)
+      lock_i.unlock();
+#endif
          }
          //Prepare input data
          for(int x = 0,k=0; x< height; ++x) {
@@ -726,8 +755,14 @@ cv::Mat KCF_Tracker::ifft2(const ComplexMat &inputf)
          // Destroy FFTW plans and variables
  #pragma omp critical
          {
+#if defined(FFTW) && defined(ASYNC)
+      std::unique_lock<std::mutex> lock_d(fftw_destroy);
+#endif
          fftwf_destroy_plan(plan_if);
          fftwf_free(ifft); fftwf_free(data_in);
+#if defined(FFTW) && defined(ASYNC)
+      lock_d.unlock();
+#endif
          }
  #else
          cv::dft(inputf.to_cv_mat(),real_result, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
@@ -743,10 +778,15 @@ cv::Mat KCF_Tracker::ifft2(const ComplexMat &inputf)
          float* outdata = new float[width * height];
  #pragma omp critical
          {
+#if defined(FFTW) && defined(ASYNC)
+      std::unique_lock<std::mutex> lock_i(fftw_init);
+#endif
          data_in =  fftwf_alloc_complex(2*(width/2+1) * height);
          ifft = fftwf_alloc_real(width * height);
-            
          plan_if=fftwf_plan_dft_c2r_2d( height , width , data_in , ifft ,  FFTW_MEASURE );
+#if defined(FFTW) && defined(ASYNC)
+      lock_i.unlock();
+#endif
          }
  #endif //FFTW
          for (int i = 0; i < inputf.n_channels; ++i) {
@@ -787,8 +827,14 @@ cv::Mat KCF_Tracker::ifft2(const ComplexMat &inputf)
          // Destroy FFTW plans and variables
  #pragma omp critical
  {
+#if defined(FFTW) && defined(ASYNC)
+      std::unique_lock<std::mutex> lock_d(fftw_destroy);
+#endif
          fftwf_destroy_plan(plan_if);
          fftwf_free(ifft); fftwf_free(data_in);
+#if defined(FFTW) && defined(ASYNC)
+      lock_d.unlock();
+#endif
  }
  #endif //FFTW
          cv::merge(ifft_mats, real_result);
diff --git a/src/kcf.h b/src/kcf.h

index 59873fd534071b42fbd422e88fde99fc79476752..8c27e39973bae6fe644a6e495911c2a8a7c053e2 100644 (file)
--- a/src/kcf.h
+++ b/src/kcf.h
@@ -7,6 +7,10 @@
  #include "complexmat.hpp"
  #include "cnfeat.hpp"
  
+#if defined(FFTW) && defined(ASYNC)
+#include <mutex>
+#endif
+
  struct BBox_c
  {
      double cx, cy, w, h;
@@ -100,6 +104,10 @@ private:
      ComplexMat p_model_alphaf_num;
      ComplexMat p_model_alphaf_den;
      ComplexMat p_model_xf;
+    
+#if defined(FFTW) && defined(ASYNC)
+    std::mutex fftw_init, fftw_destroy;
+#endif
  
      //helping functions
      cv::Mat get_subwindow(const cv::Mat & input, int cx, int cy, int size_x, int size_y);
diff --git a/src/piotr_fhog/fhog.hpp b/src/piotr_fhog/fhog.hpp

index 4e8e0504b4b347f9c4ad9a7b7611ada945035df5..d3b9d161b57001eb569e75afd07d3145624c081d 100644 (file)
--- a/src/piotr_fhog/fhog.hpp
+++ b/src/piotr_fhog/fhog.hpp
@@ -96,89 +96,6 @@ public:
          return res;
      }
  
-//////////////////////////////////////////////////
-//CUDA
-//     static std::vector<cv::cuda::GpuMat> extract_cuda(const cv::cuda::GpuMat & img, int use_hog = 2, int bin_size = 4, int n_orients = 9, int soft_bin = -1, float clip = 0.2)
-//     {
-//         // d image dimension -> gray image d = 1
-//         // h, w -> height, width of image
-//         // full -> ??
-//         // I -> input image, M, O -> mag, orientation OUTPUT
-//         int h = img.rows, w = img.cols, d = 1;
-//         bool full = true;
-//         if (h < 2 || w < 2) {
-//             std::cerr << "I must be at least 2x2." << std::endl;
-//             return std::vector<cv::cuda::GpuMat>();
-//         }
-//
-// //        //image rows-by-rows
-//        float * I = new float[h*w];
-//        for (int y = 0; y < h; ++y) {
-//            const float * row_ptr = img.ptr<float>(y);
-//            for (int x = 0; x < w; ++x) {
-//                I[y*w + x] = row_ptr[x];
-//            }
-//        }
-//
-//
-//
-//
-//         //image cols-by-cols
-// //         float * I = new float[h*w];
-// //         for (int x = 0; x < w; ++x) {
-// //             for (int y = 0; y < h; ++y) {
-// //                 I[x*h + y] = img.at<float>(y, x)/255.f;
-// //             }
-// //         }
-//
-//         float *M = new float[h*w], *O = new float[h*w];
-//         gradMag(I, M, O, h, w, d, full);
-//
-//         int n_chns = (use_hog == 0) ? n_orients : (use_hog==1 ? n_orients*4 : n_orients*3+5);
-//         int hb = h/bin_size, wb = w/bin_size;
-//
-//         float *H = new float[hb*wb*n_chns];
-//         memset(H, 0, hb*wb*n_chns*sizeof(float));
-//
-//         if (use_hog == 0) {
-//             full = false;   //by default
-//             gradHist( M, O, H, h, w, bin_size, n_orients, soft_bin, full );
-//         } else if (use_hog == 1) {
-//             full = false;   //by default
-//             hog( M, O, H, h, w, bin_size, n_orients, soft_bin, full, clip );
-//         } else {
-//             fhog( M, O, H, h, w, bin_size, n_orients, soft_bin, clip );
-//         }
-//
-//         //convert, assuming row-by-row-by-channel storage
-//         std::vector<cv::cuda::GpuMat> res;
-//         int n_res_channels = (use_hog == 2) ? n_chns-1 : n_chns;    //last channel all zeros for fhog
-//         res.reserve(n_res_channels);
-//         for (int i = 0; i < n_res_channels; ++i) {
-//             //output rows-by-rows
-//            cv::cuda::GpuMat desc(hb, wb, CV_32F, (H+hb*wb*i));
-//
-//             //output cols-by-cols
-// //             cv::cuda::GpuMat desc(hb, wb, CV_32F);
-// //             for (int x = 0; x < wb; ++x) {
-// //                 for (int y = 0; y < hb; ++y) {
-// //                     desc.at<float>(y,x) = H[i*hb*wb + x*hb + y];
-// //                 }
-// //             }
-//
-//             res.push_back(desc.clone());
-//         }
-//
-//         //clean
-//         delete [] I;
-//         delete [] M;
-//         delete [] O;
-//         delete [] H;
-//
-//         return res;
-//     }
-/////////////////////////////////
-
  };
  
  #endif //FHOG_HEADER_7813784354687
author	Shanigen <vkaraf@gmail.com>
	Fri, 2 Mar 2018 22:37:26 +0000 (23:37 +0100)
committer	Shanigen <vkaraf@gmail.com>
	Fri, 2 Mar 2018 22:37:26 +0000 (23:37 +0100)
src/kcf.cpp		patch \| blob \| history
src/kcf.h		patch \| blob \| history
src/piotr_fhog/fhog.hpp		patch \| blob \| history