Gaussian correlation for CUFFT version is now on GPU. Also corrected << operator...

author Shanigen <vkaraf@gmail.com>

Mon, 23 Apr 2018 12:59:36 +0000 (14:59 +0200)

committer Shanigen <vkaraf@gmail.com>

Mon, 23 Apr 2018 12:59:36 +0000 (14:59 +0200)
author Shanigen <vkaraf@gmail.com>
Mon, 23 Apr 2018 12:59:36 +0000 (14:59 +0200)
committer Shanigen <vkaraf@gmail.com>
Mon, 23 Apr 2018 12:59:36 +0000 (14:59 +0200)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt

index 840d46278d8e0f4fc176cb0cd54ec712e764670f..c358b710bb025832e3b5755e67d87a713a566e2d 100644 (file)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -37,7 +37,7 @@ ELSEIF(FFT STREQUAL "cuFFTW")
    add_definitions(-DFFTW -DCUFFTW)
    set(use_cuda ON)
  ELSEIF(FFT STREQUAL "cuFFT")
-    list(APPEND KCF_LIB_SRC fft_cufft.cpp complexmat.cuh)
+    list(APPEND KCF_LIB_SRC fft_cufft.cpp complexmat.cuh cuda_functions.cuh)
      add_definitions(-DCUFFT)
      set(use_cuda ON)
      iF(CUDA_DEBUG)
@@ -71,6 +71,7 @@ IF(use_cuda)
    IF(FFT STREQUAL "cuFFT")
    add_subdirectory(cuda)
    cuda_add_library(complexmat complexmat.cu)
+  cuda_add_library(cuda_func cuda_functions.cu)
    ENDIF()
    
  ENDIF()
@@ -96,7 +97,7 @@ IF(FFT STREQUAL "cuFFTW")
  ENDIF() #cuFFTW
  
  IF(FFT STREQUAL "cuFFT")
-    target_link_libraries(kcf ${CUDA_cufft_LIBRARY} ${CUDA_LIBRARIES} complexmat)
+    target_link_libraries(kcf ${CUDA_cufft_LIBRARY} ${CUDA_LIBRARIES} complexmat cuda_func)
  ENDIF()
  
  IF(PROFILING)
diff --git a/src/complexmat.cuh b/src/complexmat.cuh

index aef660060708d967799c20b34795afec303c9250..f91cab7c4077678b7fcaf5edff19fb9c391792da 100644 (file)
--- a/src/complexmat.cuh
+++ b/src/complexmat.cuh
@@ -95,15 +95,18 @@ public:
      //text output
      friend std::ostream & operator<<(std::ostream & os, const ComplexMat & mat)
      {
+        float *data_cpu = (float*) malloc(mat.rows*mat.cols*mat.n_channels*sizeof(cufftComplex));
+        CudaSafeCall(cudaMemcpy(data_cpu, mat.p_data, mat.rows*mat.cols*mat.n_channels*sizeof(cufftComplex), cudaMemcpyDeviceToHost));
          //for (int i = 0; i < mat.n_channels; ++i){
          for (int i = 0; i < 1; ++i){
              os << "Channel " << i << std::endl;
              for (int j = 0; j < mat.rows; ++j) {
                  for (int k = 0; k < 2*mat.cols-2; k+=2)
-                    os << "(" << mat.p_data[j*2*mat.cols + k] << "," << mat.p_data[j*2*mat.cols + (k+1)] << ")" << ", ";
-                os << "(" << mat.p_data[j*2*mat.cols + 2*mat.cols-2] << "," << mat.p_data[j*2*mat.cols + 2*mat.cols-1] << ")" <<  std::endl;
+                    os << "(" << data_cpu[j*2*mat.cols + k] << "," << data_cpu[j*2*mat.cols + (k+1)] << ")" << ", ";
+                os << "(" << data_cpu[j*2*mat.cols + 2*mat.cols-2] << "," << data_cpu[j*2*mat.cols + 2*mat.cols-1] << ")" <<  std::endl;
              }
          }
+        free(data_cpu);
          return os;
      }
      
diff --git a/src/cuda/cuda_error_check.cuh b/src/cuda/cuda_error_check.cuh

index bdb58c080e6def79f3e90b58303a0a45b954fee9..55ddbacd6dd809bd04e68039c8f532e4f161a927 100644 (file)
--- a/src/cuda/cuda_error_check.cuh
+++ b/src/cuda/cuda_error_check.cuh
@@ -1,6 +1,8 @@
  #ifndef CUDA_ERROR_CHECK_H
  #define CUDA_ERROR_CHECK_H
  
+#include <iostream>
+
  #define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
  #define CudaCheckError()    __cudaCheckError( __FILE__, __LINE__ )
  
@@ -103,7 +105,6 @@ static const char *_cudaGetErrorEnum(cufftResult error)
  
      return "<unknown>";
  }
-#endif
  
  #define CufftErrorCheck(call) __cufftErrorCheck(call, __FILE__, __LINE__ )
  
@@ -118,5 +119,6 @@ inline void __cufftErrorCheck(cufftResult_t call, const char *file, const int li
  
      return;
  }
+#endif
  
  #endif
diff --git a/src/cuda_functions.cu b/src/cuda_functions.cu

new file mode 100644 (file)

index 0000000..44d41be
--- /dev/null
+++ b/src/cuda_functions.cu
@@ -0,0 +1,56 @@
+#include "cuda_functions.cuh"
+
+__global__ void  gaussian_correlation_kernel(float *data_in, float *data_out, float *xf_sqr_norm, float *yf_sqr_norm, int rows, int cols, int channels_per_scale, double sigma)
+{
+        extern __shared__ float sdata[];
+        int blockId   = blockIdx.y * gridDim.x + blockIdx.x;                           
+        int threadId = blockId *( blockDim.x+channels_per_scale/2) + threadIdx.x; 
+        
+        sdata[threadIdx.x] = 0;
+        sdata[threadIdx.x] = data_in[threadId] + data_in[threadId+blockDim.x];
+        __syncthreads();
+
+        for (unsigned int s= (channels_per_scale/2+1)/2, old_s = channels_per_scale/2;s>0; s>>=1) {
+                  
+                  if(old_s&1) s+=1;
+
+                    if (threadIdx.x < s && threadIdx.x+s < old_s) {
+                          sdata[threadIdx.x] += sdata[threadIdx.x + s];
+                    }
+                  old_s = s;
+                  __syncthreads();
+        }
+          
+        if(threadIdx.x == 0){
+          float accumulate_res = sdata[0]/(rows*cols);
+
+          float numel_xf_inv = 1.f/((cols/2+1) * rows * (channels_per_scale));
+
+          float tmp = (xf_sqr_norm[blockIdx.x] + yf_sqr_norm[0] - 2 * accumulate_res) * numel_xf_inv;
+
+          if (tmp > 0) {
+              data_out[blockIdx.x*rows*cols+blockIdx.y] = expf(- 1.f / (sigma * sigma) * tmp);
+          } else {
+              data_out[blockIdx.x*rows*cols+blockIdx.y] = expf(0);
+          }
+        }
+}
+
+void cuda_gaussian_correlation(float *data_in, float *data_out, float *xf_sqr_norm, float *yf_sqr_norm, double sigma, int n_channels, int n_scales,int rows, int cols)
+{
+    dim3 threadsPerBlock((n_channels/n_scales)/2);
+    dim3 numBlocks(n_scales, rows*cols);
+
+    gaussian_correlation_kernel<<<numBlocks, threadsPerBlock, ((n_channels/n_scales)/2)*sizeof(float)>>>(data_in, data_out, xf_sqr_norm, yf_sqr_norm, rows, cols, n_channels/n_scales,  sigma);
+    CudaCheckError();
+    
+//    float *data_cpu = (float*) malloc(rows*cols*n_scales*sizeof(float));
+//    CudaSafeCall(cudaMemcpy(data_cpu, data_out, rows*cols*n_scales*sizeof(float), cudaMemcpyDeviceToHost));
+//    for (int j = 0; j < rows*n_scales; ++j) {
+//                for (int k = 0; k < cols-1; ++k)
+//                   std::cout  << data_cpu[j*cols  + k]  << ", ";
+//                std::cout << data_cpu[j*cols + cols-1] <<  std::endl;
+//            }
+//    free(data_cpu);
+    return;
+}
diff --git a/src/cuda_functions.cuh b/src/cuda_functions.cuh

new file mode 100644 (file)

index 0000000..452313f
--- /dev/null
+++ b/src/cuda_functions.cuh
@@ -0,0 +1,9 @@
+#ifndef CUDA_FUNCTIONS_H
+#define CUDA_FUNCTIONS_H
+
+#include "cuda_runtime.h"
+#include "cuda/cuda_error_check.cuh"
+
+void cuda_gaussian_correlation(float *data_in, float *data_out, float *xf_sqr_norm, float *yf_sqr_norm, double sigma, int n_channels, int n_scales, int rows, int cols);
+
+#endif
diff --git a/src/fft.h b/src/fft.h

index 92f731bd76c3f46271db6cd175a24acd189a357e..c8ce998ad7d333f3de2490a183144c093bfe9740 100644 (file)
--- a/src/fft.h
+++ b/src/fft.h
@@ -17,7 +17,7 @@ public:
      virtual void init(unsigned width, unsigned height,unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) = 0;
      virtual void set_window(const cv::Mat &window) = 0;
      virtual ComplexMat forward(const cv::Mat &input) = 0;
-    virtual ComplexMat forward_raw(float *input) = 0;
+    virtual ComplexMat forward_raw(float *input, bool all_scales) = 0;
      virtual ComplexMat forward_window(const std::vector<cv::Mat> &input) = 0;
      virtual cv::Mat inverse(const ComplexMat &input) = 0;
      virtual float* inverse_raw(const ComplexMat &input) = 0;
diff --git a/src/fft_cufft.cpp b/src/fft_cufft.cpp

index 213b6e2e4179e83b6cf36dba17848ec07a75687b..fe3b6f494ec65d5fedfe206b21c7941b4db4aebf 100644 (file)
--- a/src/fft_cufft.cpp
+++ b/src/fft_cufft.cpp
@@ -172,10 +172,19 @@ ComplexMat cuFFT::forward(const cv::Mat &input)
      return complex_result;
  }
  
-ComplexMat cuFFT::forward_raw(float *input)
+ComplexMat cuFFT::forward_raw(float *input, bool all_scales)
  {
-    ComplexMat dummy;
-    return dummy;
+    ComplexMat complex_result;
+    if (all_scales){
+        complex_result.create(m_height, m_width / 2 + 1, m_num_of_scales);
+        CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(input),
+                                complex_result.get_p_data()));
+    } else {
+        complex_result.create(m_height, m_width/ 2 + 1, 1);
+        CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(input),
+                                complex_result.get_p_data()));
+    }
+    return complex_result;
  }
  
  ComplexMat cuFFT::forward_window(const std::vector<cv::Mat> &input)
@@ -244,11 +253,26 @@ cv::Mat cuFFT::inverse(const ComplexMat &input)
  
  float* cuFFT::inverse_raw(const ComplexMat &input)
  {
+    int n_channels = input.n_channels;
      cufftComplex *in = reinterpret_cast<cufftComplex*>(input.get_p_data());
-    
-    CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(data_i_features_all_scales_d)));
  
-    return data_i_features_all_scales;
+    if(n_channels == 1){
+        CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(data_i_1ch_d)));
+
+        return data_i_1ch_d;
+    } else if(n_channels == (int) m_num_of_scales){
+        CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal*>(data_i_1ch_all_scales_d)));
+
+        return data_i_1ch_all_scales_d;
+    } else if(n_channels == (int) m_num_of_feats * (int) m_num_of_scales){
+        CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(data_i_features_all_scales_d)));
+
+        return data_i_features_all_scales_d;
+    }
+
+    CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(data_i_features_d)));
+    
+    return data_i_features_d;
  }
  
  cuFFT::~cuFFT()
diff --git a/src/fft_cufft.h b/src/fft_cufft.h

index 680259df10041d626da0299b7611a1fcc3913b2d..a71bf3442848b7d056cc6875c54c9287f1ba72b9 100644 (file)
--- a/src/fft_cufft.h
+++ b/src/fft_cufft.h
@@ -22,7 +22,7 @@ public:
      void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override;
      void set_window(const cv::Mat &window) override;
      ComplexMat forward(const cv::Mat &input) override;
-    ComplexMat forward_raw(float *input) override;
+    ComplexMat forward_raw(float *input, bool all_scales) override;
      ComplexMat forward_window(const std::vector<cv::Mat> &input) override;
      cv::Mat inverse(const ComplexMat &input) override;
      float* inverse_raw(const ComplexMat &input) override;
diff --git a/src/fft_fftw.cpp b/src/fft_fftw.cpp

index 3aba1d7edd5e662c04b68c15bc753ed371796259..8ba0d1516680d3909d015b0fb5ca39b6c80d9584 100644 (file)
--- a/src/fft_fftw.cpp
+++ b/src/fft_fftw.cpp
@@ -205,7 +205,7 @@ ComplexMat Fftw::forward(const cv::Mat &input)
      return complex_result;
  }
  
-ComplexMat Ffftw::forward_raw(float *input)
+ComplexMat Fftw::forward_raw(float *input, bool all_scales)
  {
      ComplexMat dummy;
      return dummy;
diff --git a/src/fft_fftw.h b/src/fft_fftw.h

index b728a31dd104ae5a4ebf24af689c464caa2ff78a..8c23cda25d00103b8359af1eb5f7157cd52ca5be 100644 (file)
--- a/src/fft_fftw.h
+++ b/src/fft_fftw.h
@@ -22,7 +22,7 @@ public:
      void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override;
      void set_window(const cv::Mat &window) override;
      ComplexMat forward(const cv::Mat &input) override;
-    ComplexMat forward_raw(float *input) override;
+    ComplexMat forward_raw(float *input, bool all_scales) override;
      ComplexMat forward_window(const std::vector<cv::Mat> &input) override;
      cv::Mat inverse(const ComplexMat &input) override;
      float* inverse_raw(const ComplexMat &input) override;
diff --git a/src/fft_opencv.cpp b/src/fft_opencv.cpp

index 4772b1c3b0255484edbeea2bf8a1b4939e68d86d..e8046ebc94042d4ed41d571cd51e9c713c2c72f7 100644 (file)
--- a/src/fft_opencv.cpp
+++ b/src/fft_opencv.cpp
@@ -22,7 +22,7 @@ ComplexMat FftOpencv::forward(const cv::Mat &input)
      return ComplexMat(complex_result);
  }
  
-ComplexMat FftOpencv::forward_raw(float *input)
+ComplexMat FftOpencv::forward_raw(float *input, bool all_scales)
  {
      ComplexMat dummy;
      return dummy;
diff --git a/src/fft_opencv.h b/src/fft_opencv.h

index 1f49f3d1e08a9f6a024fc814148095571bbd5bf7..7050b2e5b0f2e47f9e785e816d1408cd633bd953 100644 (file)
--- a/src/fft_opencv.h
+++ b/src/fft_opencv.h
@@ -10,7 +10,7 @@ public:
      void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override;
      void set_window(const cv::Mat &window) override;
      ComplexMat forward(const cv::Mat &input) override;
-    ComplexMat forward_raw(float *input) override;
+    ComplexMat forward_raw(float *input, bool all_scales) override;
      ComplexMat forward_window(const std::vector<cv::Mat> &input) override;
      cv::Mat inverse(const ComplexMat &input) override;
      float* inverse_raw(const ComplexMat &input) override;
diff --git a/src/kcf.cpp b/src/kcf.cpp

index fafad07cf6b64b1733ca69e33a82d793392331d9..40b6c9ba741a896e7cc9346651e200e035f6c7fc 100644 (file)
--- a/src/kcf.cpp
+++ b/src/kcf.cpp
@@ -36,6 +36,7 @@ KCF_Tracker::~KCF_Tracker()
  #ifdef CUFFT
      CudaSafeCall(cudaFreeHost(xf_sqr_norm));
      CudaSafeCall(cudaFreeHost(yf_sqr_norm));
+    CudaSafeCall(cudaFree(gauss_corr_res));
  #else
      free(xf_sqr_norm);
      free(yf_sqr_norm);
@@ -111,12 +112,12 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox)
      cudaSetDeviceFlags(cudaDeviceMapHost);
      CudaSafeCall(cudaHostAlloc((void**)&xf_sqr_norm, p_num_scales*sizeof(float), cudaHostAllocMapped));
      CudaSafeCall(cudaHostGetDevicePointer((void**)&xf_sqr_norm_d, (void*)xf_sqr_norm, 0));
-    std::cout << &xf_sqr_norm << std::endl;
+
      CudaSafeCall(cudaHostAlloc((void**)&yf_sqr_norm, sizeof(float), cudaHostAllocMapped));
      CudaSafeCall(cudaHostGetDevicePointer((void**)&yf_sqr_norm_d, (void*)yf_sqr_norm, 0));
  #else
      xf_sqr_norm = (float*) malloc(p_num_scales*sizeof(float));
-    xf_sqr_norm = (float*) malloc(sizeof(float));
+    yf_sqr_norm = (float*) malloc(sizeof(float));
  #endif
  
      p_current_scale = 1.;
@@ -136,13 +137,16 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox)
      p_num_of_feats = 31;
      if(m_use_color) p_num_of_feats += 3;
      if(m_use_cnfeat) p_num_of_feats += 10;
-    p_poi_width = p_windows_size[0]/p_cell_size;
-    p_poi_height = p_windows_size[1]/p_cell_size;
+    p_roi_width = p_windows_size[0]/p_cell_size;
+    p_roi_height = p_windows_size[1]/p_cell_size;
  
      fft.init(p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size, p_num_of_feats, p_num_scales, m_use_big_batch);
      p_yf = fft.forward(gaussian_shaped_labels(p_output_sigma, p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size));
      fft.set_window(cosine_window_function(p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size));
  
+#ifdef CUFFT
+      CudaSafeCall(cudaMalloc((void**)&gauss_corr_res, (p_windows_size[0]/p_cell_size)*(p_windows_size[1]/p_cell_size)*p_num_scales*sizeof(float)));
+#endif
      //obtain a sub-window for training initial model
      std::vector<cv::Mat> path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);
      p_model_xf = fft.forward_window(path_feat);
@@ -257,7 +261,7 @@ void KCF_Tracker::track(cv::Mat &img)
              }
              scale_responses.push_back(max_val*weight);
          }
-    } else if(m_use_big_batch){
+    } else if (m_use_big_batch){
  #pragma omp parallel for ordered
          for (size_t i = 0; i < p_scales.size(); ++i) {
              std::vector<cv::Mat> tmp = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale * p_scales[i]);
@@ -618,27 +622,33 @@ ComplexMat KCF_Tracker::gaussian_correlation(const ComplexMat &xf, const Complex
  {
  #ifdef CUFFT
      xf.sqr_norm(xf_sqr_norm_d);
+    if (auto_correlation){
+        cudaDeviceSynchronize();
+        yf_sqr_norm[0] = xf_sqr_norm[0];
+    } else {
+        yf.sqr_norm(yf_sqr_norm_d);
+    }
  #else
      xf.sqr_norm(xf_sqr_norm);
-#endif
-    if(auto_correlation){
+    if (auto_correlation){
        yf_sqr_norm[0] = xf_sqr_norm[0];
      } else {
-#ifdef CUFFT
-       yf.sqr_norm(yf_sqr_norm_d);
-#else
         yf.sqr_norm(yf_sqr_norm);
-#endif
      }
-
+#endif
      ComplexMat xyf;
      xyf = auto_correlation ? xf.sqr_mag() : xf.mul2(yf.conj());
      DEBUG_PRINTM(xyf);
+#ifdef CUFFT
+    cuda_gaussian_correlation(fft.inverse_raw(xyf), gauss_corr_res, xf_sqr_norm_d, yf_sqr_norm_d, sigma, xf.n_channels, xf.n_scales, p_roi_height, p_roi_width);
  
+    return fft.forward_raw(gauss_corr_res, xf.n_scales==p_num_scales);
+#else
      //ifft2 and sum over 3rd dimension, we dont care about individual channels
      cv::Mat ifft2_res = fft.inverse(xyf);
+    DEBUG_PRINTM(ifft2_res);
      cv::Mat xy_sum;
-    if(xf.channels() != p_num_scales*p_num_of_feats)
+    if (xf.channels() != p_num_scales*p_num_of_feats)
          xy_sum.create(ifft2_res.size(), CV_32FC1);
      else
          xy_sum.create(ifft2_res.size(), CV_32FC(p_scales.size()));
@@ -648,12 +658,10 @@ ComplexMat KCF_Tracker::gaussian_correlation(const ComplexMat &xf, const Complex
          float * row_ptr_sum = xy_sum.ptr<float>(y);
          for (int x = 0; x < ifft2_res.cols; ++x) {
              for (int sum_ch = 0; sum_ch < xy_sum.channels(); ++sum_ch) {
-                row_ptr_sum[(x*xy_sum.channels())+sum_ch] += std::accumulate(row_ptr + x*ifft2_res.channels() + sum_ch*(ifft2_res.channels()/xy_sum.channels()),
-                                                                             (row_ptr + x*ifft2_res.channels() + (sum_ch+1)*(ifft2_res.channels()/xy_sum.channels())), 0.f);
+                row_ptr_sum[(x*xy_sum.channels())+sum_ch] += std::accumulate(row_ptr + x*ifft2_res.channels() + sum_ch*(ifft2_res.channels()/xy_sum.channels()), (row_ptr + x*ifft2_res.channels() + (sum_ch+1)*(ifft2_res.channels()/xy_sum.channels())), 0.f);
              }
          }
      }
-    DEBUG_PRINTM(ifft2_res);
      DEBUG_PRINTM(xy_sum);
  
      std::vector<cv::Mat> scales;
@@ -661,7 +669,7 @@ ComplexMat KCF_Tracker::gaussian_correlation(const ComplexMat &xf, const Complex
      cv::Mat in_all(scales[0].rows * xf.n_scales, scales[0].cols, CV_32F);
  
      float numel_xf_inv = 1.f/(xf.cols * xf.rows * (xf.channels()/xf.n_scales));
-    for(int i = 0; i < xf.n_scales; ++i){
+    for (int i = 0; i < xf.n_scales; ++i){
          cv::Mat in_roi(in_all, cv::Rect(0, i*scales[0].rows, scales[0].cols, scales[0].rows));
          cv::exp(- 1.f / (sigma * sigma) * cv::max((xf_sqr_norm[i] + yf_sqr_norm[0] - 2 * scales[i]) * numel_xf_inv, 0), in_roi);
          DEBUG_PRINTM(in_roi);
@@ -669,6 +677,7 @@ ComplexMat KCF_Tracker::gaussian_correlation(const ComplexMat &xf, const Complex
  
      DEBUG_PRINTM(in_all);
      return fft.forward(in_all);
+#endif
  }
  
  float get_response_circular(cv::Point2i & pt, cv::Mat & response)
diff --git a/src/kcf.h b/src/kcf.h

index 659ae54209aa1a71f263190e512b8535ff1cd21c..2501521fccff0c35eba7cfa650b0fa56a8a1a402 100644 (file)
--- a/src/kcf.h
+++ b/src/kcf.h
@@ -7,6 +7,7 @@
  
  #ifdef CUFFT
    #include "complexmat.cuh"
+  #include "cuda_functions.cuh"
    #include "cuda/cuda_error_check.cuh"
    #include <cuda_runtime.h>
  #else
@@ -108,10 +109,10 @@ private:
  
      //for big batch
      int p_num_of_feats;
-    int p_poi_height, p_poi_width;
+    int p_roi_height, p_roi_width;
      float *xf_sqr_norm = nullptr, *yf_sqr_norm = nullptr;
  #ifdef CUFFT
-    float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr;
+    float *xf_sqr_norm_d = nullptr, *yf_sqr_norm_d = nullptr, *gauss_corr_res = nullptr;
  #endif
  
      //model
author	Shanigen <vkaraf@gmail.com>
	Mon, 23 Apr 2018 12:59:36 +0000 (14:59 +0200)
committer	Shanigen <vkaraf@gmail.com>
	Mon, 23 Apr 2018 12:59:36 +0000 (14:59 +0200)
src/CMakeLists.txt		patch \| blob \| history
src/complexmat.cuh		patch \| blob \| history
src/cuda/cuda_error_check.cuh		patch \| blob \| history
src/cuda_functions.cu	[new file with mode: 0644]	patch \| blob
src/cuda_functions.cuh	[new file with mode: 0644]	patch \| blob
src/fft.h		patch \| blob \| history
src/fft_cufft.cpp		patch \| blob \| history
src/fft_cufft.h		patch \| blob \| history
src/fft_fftw.cpp		patch \| blob \| history
src/fft_fftw.h		patch \| blob \| history
src/fft_opencv.cpp		patch \| blob \| history
src/fft_opencv.h		patch \| blob \| history
src/kcf.cpp		patch \| blob \| history
src/kcf.h		patch \| blob \| history