Merge remote-tracking branch 'upstream/master' into rotation

[hercules2020/kcf.git] / src / kcf.cpp
diff --git a/src/kcf.cpp b/src/kcf.cpp

index 663f6f118627fb8f465d09e6c678596b8abac495..3195bab37fdeb0c7e44adc44c68e9f71845735e7 100644 (file)
--- a/src/kcf.cpp
+++ b/src/kcf.cpp
@@ -19,7 +19,7 @@
  #endif // OPENMP
  
  #define DEBUG_PRINT(obj)                                                                                               \
-    if (m_debug) {                                                                                                     \
+    if (m_debug || m_visual_debug) {                                                                                   \
          std::cout << #obj << " @" << __LINE__ << std::endl << (obj) << std::endl;                                      \
      }
  #define DEBUG_PRINTM(obj)                                                                                              \
@@ -134,24 +134,31 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f
      p_roi.width = p_windows_size.width / p_cell_size;
      p_roi.height = p_windows_size.height / p_cell_size;
  
-    p_num_of_feats = 31;
-    if (m_use_color) p_num_of_feats += 3;
-    if (m_use_cnfeat) p_num_of_feats += 10;
-
      p_scales.clear();
-    if (m_use_scale)
+    if (m_use_scale) {
          for (int i = -int(p_num_scales) / 2; i <= int(p_num_scales) / 2; ++i)
              p_scales.push_back(std::pow(p_scale_step, i));
-    else
+    } else {
          p_scales.push_back(1.);
+        p_num_scales = 1;
+    }
+
+    if (m_use_angle) {
+        for (int i = p_angle_min; i <= p_angle_max; i += p_angle_step)
+            p_angles.push_back(i);
+    } else {
+        p_angles.push_back(0);
+        p_num_angles = 1;
+    }
  
  #ifdef CUFFT
      if (p_roi.height * (p_roi.width / 2 + 1) > 1024) {
          std::cerr << "Window after forward FFT is too big for CUDA kernels. Plese use -f to set "
                       "the window dimensions so its size is less or equal to "
                    << 1024 * p_cell_size * p_cell_size * 2 + 1
-                  << " pixels . Currently the size of the window is: " << p_windows_size.width << "x" << p_windows_size.height
-                  << " which is  " << p_windows_size.width * p_windows_size.height << " pixels. " << std::endl;
+                  << " pixels . Currently the size of the window is: " << p_windows_size.width << "x"
+                  << p_windows_size.height << " which is  " << p_windows_size.width * p_windows_size.height
+                  << " pixels. " << std::endl;
          std::exit(EXIT_FAILURE);
      }
  
@@ -159,11 +166,11 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f
          std::cerr << "cuFFT supports only Gaussian kernel." << std::endl;
          std::exit(EXIT_FAILURE);
      }
+
      CudaSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
+
      p_rot_labels_data = DynMem(p_roi.width * p_roi.height * sizeof(float));
      p_rot_labels = cv::Mat(p_roi, CV_32FC1, p_rot_labels_data.hostMem());
-#else
-    p_xf.create(p_roi.height, p_roi.height / 2 + 1, p_num_of_feats);
  #endif
  
  #if defined(CUFFT) || defined(FFTW)
@@ -175,12 +182,16 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f
      p_yf.create(p_roi.height, width, 1);
      p_xf.create(p_roi.height, width, p_num_of_feats);
  
-    int max = BIG_BATCH_MODE ? 2 : p_num_scales;
-    for (int i = 0; i < max; ++i) {
-        if (BIG_BATCH_MODE && i == 1)
-            p_threadctxs.emplace_back(p_roi, p_num_of_feats * p_num_scales, 1, p_num_scales);
-        else
-            p_threadctxs.emplace_back(p_roi, p_num_of_feats, p_scales[i], 1);
+    int max1 = BIG_BATCH_MODE ? 2 : p_num_scales;
+    int max2 = BIG_BATCH_MODE ? 1 : p_num_angles;
+    for (int i = 0; i < max1; ++i) {
+        for (int j = 0; j < max2; ++j) {
+            if (BIG_BATCH_MODE && i == 1)
+                p_threadctxs.emplace_back(p_roi, p_num_of_feats * p_num_scales * p_num_angles, 1, 0, p_num_scales,
+                                          p_num_angles);
+            else
+                p_threadctxs.emplace_back(p_roi, p_num_of_feats, p_scales[i], p_angles[j]);
+        }
      }
  
      p_current_scale = 1.;
@@ -203,18 +214,29 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f
      fft.set_window(cosine_window_function(p_roi.width, p_roi.height));
  
      // window weights, i.e. labels
-    fft.forward(
-        gaussian_shaped_labels(p_output_sigma, p_roi.width, p_roi.height), p_yf,
-        m_use_cuda ? p_rot_labels_data.deviceMem() : nullptr, p_threadctxs.front().stream);
+    fft.forward(gaussian_shaped_labels(p_output_sigma, p_roi.width, p_roi.height), p_yf,
+                m_use_cuda ? p_rot_labels_data.deviceMem() : nullptr, p_threadctxs.front().stream);
      DEBUG_PRINTM(p_yf);
  
      // obtain a sub-window for training initial model
-    std::vector<cv::Mat> patch_feats = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy,
-                                                    p_windows_size.width, p_windows_size.height);
+    int size_x_scaled = floor(p_windows_size.width);
+    int size_y_scaled = floor(p_windows_size.height);
+
+    cv::Mat patch_gray = get_subwindow(input_gray, this->p_pose.cx, this->p_pose.cy, size_x_scaled, size_y_scaled);
+    geometric_transformations(patch_gray, p_windows_size.width, p_windows_size.height, 0, false);
+
+    cv::Mat patch_rgb;
+    if ((m_use_color || m_use_cnfeat) && input_rgb.channels() == 3) {
+        patch_rgb = get_subwindow(input_rgb, this->p_pose.cx, this->p_pose.cy, size_x_scaled, size_y_scaled);
+        geometric_transformations(patch_rgb, p_windows_size.width, p_windows_size.height, 0, false);
+    }
+
+    std::vector<cv::Mat> patch_feats = get_features(patch_rgb, patch_gray);
      fft.forward_window(patch_feats, p_model_xf, p_threadctxs.front().fw_all,
                         m_use_cuda ? p_threadctxs.front().data_features.deviceMem() : nullptr,
                         p_threadctxs.front().stream);
      DEBUG_PRINTM(p_model_xf);
+
  #if !defined(BIG_BATCH) && defined(CUFFT) && (defined(ASYNC) || defined(OPENMP))
      p_threadctxs.front().model_xf = p_model_xf;
      p_threadctxs.front().model_xf.set_stream(p_threadctxs.front().stream);
@@ -284,6 +306,7 @@ BBox_c KCF_Tracker::getBBox()
      BBox_c tmp = p_pose;
      tmp.w *= p_current_scale;
      tmp.h *= p_current_scale;
+    tmp.a = p_current_angle;
  
      if (p_resize_image) tmp.scale(1 / p_downscale_factor);
      if (p_fit_to_pw2) {
@@ -301,7 +324,7 @@ double KCF_Tracker::getFilterResponse() const
  
  void KCF_Tracker::track(cv::Mat &img)
  {
-    if (m_debug) std::cout << "NEW FRAME" << '\n';
+    if (m_debug || m_visual_debug) std::cout << "\nNEW FRAME" << std::endl;
      cv::Mat input_gray, input_rgb = img.clone();
      if (img.channels() == 3) {
          cv::cvtColor(img, input_gray, CV_BGR2GRAY);
@@ -323,7 +346,6 @@ void KCF_Tracker::track(cv::Mat &img)
              cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_scale_factor_x, p_scale_factor_y, cv::INTER_LINEAR);
          }
      }
-
      max_response = -1.;
      ThreadCtx *max = nullptr;
      cv::Point2i *max_response_pt = nullptr;
@@ -336,11 +358,9 @@ void KCF_Tracker::track(cv::Mat &img)
          });
      for (auto const &it : p_threadctxs)
          it.async_res.wait();
-
  #else  // !ASYNC
-    // FIXME: Iterate correctly in big batch mode - perhaps have only one element in the list
      NORMAL_OMP_PARALLEL_FOR
-    for (uint i = 0; i < p_threadctxs.size(); ++i)
+    for (uint i =  BIG_BATCH_MODE ? 1 : 0; i < p_threadctxs.size(); ++i)
          scale_track(p_threadctxs[i], input_rgb, input_gray);
  #endif
  
@@ -354,16 +374,40 @@ void KCF_Tracker::track(cv::Mat &img)
          }
      }
  #else
-    // FIXME: Iterate correctly in big batch mode - perhaps have only one element in the list
-    for (uint j = 0; j < p_scales.size(); ++j) {
-        if (p_threadctxs[0].max_responses[j] > max_response) {
-            max_response = p_threadctxs[0].max_responses[j];
-            max_response_pt = &p_threadctxs[0].max_locs[j];
-            max_response_map = &p_threadctxs[0].response_maps[j];
-            max = &p_threadctxs[0];
+    for (uint j = 0; j < p_num_scales; ++j) {
+        for (uint k = 0; k < p_num_angles; ++k) {
+            if (p_threadctxs.back().max_responses[j + k] > max_response) {
+                max_response = p_threadctxs.back().max_responses[j + k];
+                max_response_pt = &p_threadctxs.back().max_locs[j + k];
+                max_response_map = &p_threadctxs.back().response_maps[j + k];
+            }
          }
      }
+    max = &p_threadctxs.back();
  #endif
+    if (m_visual_debug) {
+        cv::Mat all_responses(cv::Size(p_num_angles* p_debug_image_size, p_num_scales * p_debug_image_size),
+                              p_debug_scale_responses[0].type(), cv::Scalar::all(0));
+        cv::Mat all_subwindows(cv::Size(p_num_angles* p_debug_image_size, p_num_scales* p_debug_image_size),
+                               p_debug_subwindows[0].type(), cv::Scalar::all(0));
+        for (size_t i = 0; i < p_num_scales; ++i) {
+            for (size_t j = 0; j < p_num_angles; ++j) {
+                cv::Mat in_roi(all_responses, cv::Rect(j * p_debug_image_size, i * p_debug_image_size,
+                                                       p_debug_image_size, p_debug_image_size));
+                p_debug_scale_responses[5 * i + j].copyTo(in_roi);
+                in_roi = all_subwindows(
+                    cv::Rect(j * p_debug_image_size, i * p_debug_image_size, p_debug_image_size, p_debug_image_size));
+                p_debug_subwindows[5 * i + j].copyTo(in_roi);
+            }
+        }
+        cv::namedWindow("All subwindows", CV_WINDOW_AUTOSIZE);
+        cv::imshow("All subwindows", all_subwindows);
+        cv::namedWindow("All responses", CV_WINDOW_AUTOSIZE);
+        cv::imshow("All responses", all_responses);
+        cv::waitKey();
+        p_debug_scale_responses.clear();
+        p_debug_subwindows.clear();
+    }
  
      DEBUG_PRINTM(*max_response_map);
      DEBUG_PRINT(*max_response_pt);
@@ -381,8 +425,13 @@ void KCF_Tracker::track(cv::Mat &img)
          new_location = sub_pixel_peak(*max_response_pt, *max_response_map);
      DEBUG_PRINT(new_location);
  
+    if (m_visual_debug) std::cout << "Old p_pose, cx: " << p_pose.cx << " cy: " << p_pose.cy << std::endl;
+
      p_pose.cx += p_current_scale * p_cell_size * double(new_location.x);
      p_pose.cy += p_current_scale * p_cell_size * double(new_location.y);
+
+    if (m_visual_debug) std::cout << "New p_pose, cx: " << p_pose.cx << " cy: " << p_pose.cy << std::endl;
+
      if (p_fit_to_pw2) {
          clamp2(p_pose.cx, 0.0, (img.cols * p_scale_factor_x) - 1);
          clamp2(p_pose.cy, 0.0, (img.rows * p_scale_factor_y) - 1);
@@ -401,13 +450,29 @@ void KCF_Tracker::track(cv::Mat &img)
  
      clamp2(p_current_scale, p_min_max_scale[0], p_min_max_scale[1]);
  
-    ThreadCtx &ctx = p_threadctxs.front();
+    if (p_current_scale < p_min_max_scale[0]) p_current_scale = p_min_max_scale[0];
+    if (p_current_scale > p_min_max_scale[1]) p_current_scale = p_min_max_scale[1];
+
+    p_current_angle = (p_current_angle + max->angle) < 0
+                          ? -std::abs(p_current_angle + max->angle) % 360
+                          : (p_current_angle + max->angle) % 360;
+
      // obtain a subwindow for training at newly estimated target position
-    std::vector<cv::Mat> patch_feats = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy,
-                                                    p_windows_size.width, p_windows_size.height,
-                                                    p_current_scale);
-    fft.forward_window(patch_feats, p_xf, ctx.fw_all,
-                       m_use_cuda ? ctx.data_features.deviceMem() : nullptr, ctx.stream);
+    int size_x_scaled = floor(p_windows_size.width * p_current_scale);
+    int size_y_scaled = floor(p_windows_size.height * p_current_scale);
+
+    cv::Mat patch_gray = get_subwindow(input_gray, this->p_pose.cx, this->p_pose.cy, size_x_scaled, size_y_scaled);
+    geometric_transformations(patch_gray, p_windows_size.width, p_windows_size.height, p_current_angle, false);
+
+    cv::Mat patch_rgb = cv::Mat::zeros(size_y_scaled, size_x_scaled, CV_32F);
+    if ((m_use_color || m_use_cnfeat) && input_rgb.channels() == 3) {
+        patch_rgb = get_subwindow(input_rgb, this->p_pose.cx, this->p_pose.cy, size_x_scaled, size_y_scaled);
+        geometric_transformations(patch_rgb, p_windows_size.width, p_windows_size.height, p_current_angle, false);
+    }
+
+    ThreadCtx &ctx = p_threadctxs.front();
+    std::vector<cv::Mat> patch_feats = get_features(patch_rgb, patch_gray);
+    fft.forward_window(patch_feats, p_xf, ctx.fw_all, m_use_cuda ? ctx.data_features.deviceMem() : nullptr, ctx.stream);
  
      // subsequent frames, interpolate model
      p_model_xf = p_model_xf * float((1. - p_interp_factor)) + p_xf * float(p_interp_factor);
@@ -447,15 +512,42 @@ void KCF_Tracker::scale_track(ThreadCtx &vars, cv::Mat &input_rgb, cv::Mat &inpu
      std::vector<cv::Mat> patch_feats;
      if (BIG_BATCH_MODE) {
          BIG_BATCH_OMP_PARALLEL_FOR
-        for (uint i = 0; i < p_num_scales; ++i) {
-            patch_feats = get_features(input_rgb, input_gray, this->p_pose.cx, this->p_pose.cy,
-                                       this->p_windows_size.width, this->p_windows_size.height,
-                                       this->p_current_scale * this->p_scales[i]);
+        for (uint i = 0; i < this->p_scales.size(); ++i) {
+            for (uint j = 0; j < this->p_angles.size(); ++j) {
+                int size_x_scaled = floor(this->p_windows_size.width * this->p_current_scale * this->p_scales[i]);
+                int size_y_scaled = floor(this->p_windows_size.height * this->p_current_scale * this->p_scales[i]);
+
+                cv::Mat patch_gray =
+                    get_subwindow(input_gray, this->p_pose.cx, this->p_pose.cy, size_x_scaled, size_y_scaled);
+                geometric_transformations(patch_gray, p_windows_size.width, p_windows_size.height,
+                                          p_current_scale * this->p_scales[i], p_current_angle + this->p_angles[j]);
+
+                cv::Mat patch_rgb;
+                if ((m_use_color || m_use_cnfeat) && input_rgb.channels() == 3) {
+                    patch_rgb =
+                        get_subwindow(input_rgb, this->p_pose.cx, this->p_pose.cy, size_x_scaled, size_y_scaled);
+                    geometric_transformations(patch_rgb, p_windows_size.width, p_windows_size.height,
+                                              p_current_scale * this->p_scales[i], p_current_angle + this->p_angles[j]);
+                }
+                std::vector<cv::Mat> tmp = get_features(patch_rgb, patch_gray);
+                BIG_BATCH_OMP_ORDERED
+                patch_feats.insert(patch_feats.end(), tmp.begin(), tmp.end());
+            }
          }
      } else {
-        patch_feats = get_features(input_rgb, input_gray, this->p_pose.cx, this->p_pose.cy,
-                                   this->p_windows_size.width, this->p_windows_size.height,
-                                   this->p_current_scale * vars.scale);
+        int size_x_scaled = floor(this->p_windows_size.width * this->p_current_scale * vars.scale);
+        int size_y_scaled = floor(this->p_windows_size.height * this->p_current_scale * vars.scale);
+
+        cv::Mat patch_gray = get_subwindow(input_gray, this->p_pose.cx, this->p_pose.cy, size_x_scaled, size_y_scaled);
+        geometric_transformations(patch_gray, p_windows_size.width, p_windows_size.height, p_current_scale * vars.scale);
+
+        cv::Mat patch_rgb;
+        if ((m_use_color || m_use_cnfeat) && input_rgb.channels() == 3) {
+            patch_rgb = get_subwindow(input_rgb, this->p_pose.cx, this->p_pose.cy, size_x_scaled, size_y_scaled);
+            geometric_transformations(patch_rgb, p_windows_size.width, p_windows_size.height, p_current_scale * vars.scale,
+                                      p_current_angle + vars.angle);
+        }
+        patch_feats = get_features(patch_rgb, patch_gray);
      }
  
      fft.forward_window(patch_feats, vars.zf, vars.fw_all, m_use_cuda ? vars.data_features.deviceMem() : nullptr,
@@ -512,38 +604,15 @@ void KCF_Tracker::scale_track(ThreadCtx &vars, cv::Mat &input_rgb, cv::Mat &inpu
  
  // ****************************************************************************
  
-std::vector<cv::Mat> KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale)
+std::vector<cv::Mat> KCF_Tracker::get_features(cv::Mat &patch_rgb, cv::Mat &patch_gray)
  {
-    int size_x_scaled = floor(size_x * scale);
-    int size_y_scaled = floor(size_y * scale);
-
-    cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, size_x_scaled, size_y_scaled);
-    cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, size_x_scaled, size_y_scaled);
-
-    // resize to default size
-    if (scale > 1.) {
-        // if we downsample use  INTER_AREA interpolation
-        cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_AREA);
-    } else {
-        cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_LINEAR);
-    }
-
      // get hog(Histogram of Oriented Gradients) features
      std::vector<cv::Mat> hog_feat = FHoG::extract(patch_gray, 2, p_cell_size, 9);
  
      // get color rgb features (simple r,g,b channels)
      std::vector<cv::Mat> color_feat;
-    if ((m_use_color || m_use_cnfeat) && input_rgb.channels() == 3) {
-        // resize to default size
-        if (scale > 1.) {
-            // if we downsample use  INTER_AREA interpolation
-            cv::resize(patch_rgb, patch_rgb, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., cv::INTER_AREA);
-        } else {
-            cv::resize(patch_rgb, patch_rgb, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., cv::INTER_LINEAR);
-        }
-    }
  
-    if (m_use_color && input_rgb.channels() == 3) {
+    if (m_use_color && patch_rgb.channels() == 3) {
          // use rgb color space
          cv::Mat patch_rgb_norm;
          patch_rgb.convertTo(patch_rgb_norm, CV_32F, 1. / 255., -0.5);
@@ -555,7 +624,7 @@ std::vector<cv::Mat> KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & in
          color_feat.insert(color_feat.end(), rgb.begin(), rgb.end());
      }
  
-    if (m_use_cnfeat && input_rgb.channels() == 3) {
+    if (m_use_cnfeat && patch_rgb.channels() == 3) {
          std::vector<cv::Mat> cn_feat = CNFeat::extract(patch_rgb);
          color_feat.insert(color_feat.end(), cn_feat.begin(), cn_feat.end());
      }
@@ -718,12 +787,9 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int wid
  
      if (x2 - x1 == 0 || y2 - y1 == 0)
          patch = cv::Mat::zeros(height, width, CV_32FC1);
-    else {
+    else
          cv::copyMakeBorder(input(cv::Range(y1, y2), cv::Range(x1, x2)), patch, top, bottom, left, right,
                             cv::BORDER_REPLICATE);
-        //      imshow( "copyMakeBorder", patch);
-        //      cv::waitKey();
-    }
  
      // sanity check
      assert(patch.cols == width && patch.rows == height);
@@ -731,8 +797,47 @@ cv::Mat KCF_Tracker::get_subwindow(const cv::Mat &input, int cx, int cy, int wid
      return patch;
  }
  
-void KCF_Tracker::gaussian_correlation(struct ThreadCtx &vars, const ComplexMat &xf, const ComplexMat &yf,
-                                       double sigma, bool auto_correlation)
+void KCF_Tracker::geometric_transformations(cv::Mat &patch, int size_x, int size_y, int angle, bool allow_debug)
+{
+    if (m_use_angle) {
+        cv::Point2f center((patch.cols - 1) / 2., (patch.rows - 1) / 2.);
+        cv::Mat r = cv::getRotationMatrix2D(center, angle, 1.0);
+
+        cv::warpAffine(patch, patch, r, cv::Size(patch.cols, patch.rows), cv::INTER_LINEAR, cv::BORDER_REPLICATE);
+    }
+
+    // resize to default size
+    if (patch.channels() != 3) {
+        if (patch.cols / size_x > 1.) {
+            // if we downsample use  INTER_AREA interpolation
+            cv::resize(patch, patch, cv::Size(size_x, size_y), 0., 0., cv::INTER_AREA);
+        } else {
+            cv::resize(patch, patch, cv::Size(size_x, size_y), 0., 0., cv::INTER_LINEAR);
+        }
+    } else {
+        if (patch.cols / size_x > 1.) {
+            // if we downsample use  INTER_AREA interpolation
+            cv::resize(patch, patch, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., cv::INTER_AREA);
+        } else {
+            cv::resize(patch, patch, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., cv::INTER_LINEAR);
+        }
+        if (m_visual_debug && allow_debug) {
+            cv::Mat input_clone = patch.clone();
+            cv::resize(input_clone, input_clone, cv::Size(p_debug_image_size, p_debug_image_size), 0., 0.,
+                       cv::INTER_LINEAR);
+
+            std::string angle_string = std::to_string(p_current_angle + angle);
+
+            cv::putText(input_clone, angle_string, cv::Point(1, input_clone.rows - 5), cv::FONT_HERSHEY_COMPLEX_SMALL,
+                        0.5, cv::Scalar(0, 255, 0), 1);
+
+            p_debug_subwindows.push_back(input_clone);
+        }
+    }
+}
+
+void KCF_Tracker::gaussian_correlation(struct ThreadCtx &vars, const ComplexMat &xf, const ComplexMat &yf, double sigma,
+                                       bool auto_correlation)
  {
      xf.sqr_norm(vars.xf_sqr_norm);
      if (auto_correlation) {