Rework patch fitting to specific dimensions

author Michal Sojka <michal.sojka@cvut.cz>

Wed, 26 Sep 2018 21:38:44 +0000 (23:38 +0200)

committer Michal Sojka <michal.sojka@cvut.cz>

Thu, 27 Sep 2018 05:31:54 +0000 (07:31 +0200)
author Michal Sojka <michal.sojka@cvut.cz>
Wed, 26 Sep 2018 21:38:44 +0000 (23:38 +0200)
committer Michal Sojka <michal.sojka@cvut.cz>
Thu, 27 Sep 2018 05:31:54 +0000 (07:31 +0200)
diff --git a/README.md b/README.md

index 8cf2c6388337a4cdbdbf481e84e20672400e7e48..856a3ead71ef61bfad88bf3dfc73c53aaad4c929 100644 (file)
--- a/README.md
+++ b/README.md
@@ -165,7 +165,7 @@ top_left_y, width, height".
  | --visualize, -v[delay_ms] | Visualize the output, optionally with specified delay. If the delay is 0 the program will wait for a key press. |
  | --output, -o <output.txt>     | Specify name of output file. |
  | --debug, -d                           | Generate debug output. |
-| --fit, -f[W[xH]] | Specifies the dimension to which the extracted patch should be scaled. It should be divisible by 4. No dimension is the same as `128x128`, a single dimension `W` will result in patch size of `W`×`W`. |
+| --fit, -f[W[xH]] | Specifies the dimension to which the extracted patch should be scaled. It should be divisible by 4. No dimension or zero rounds the dimensions to the nearest smaller power of 2, a single dimension `W` will result in patch size of `W`×`W`. |
  
  
  ## Authors
diff --git a/main_vot.cpp b/main_vot.cpp

index eddb39f9dbedfd167dd4310c1c54c604f9b26e5b..7743540aa1a975edf8800e5cfac1912a0f92d433 100644 (file)
--- a/main_vot.cpp
+++ b/main_vot.cpp
@@ -67,7 +67,7 @@ int main(int argc, char *argv[])
                        << " --visualize | -v[delay_ms]\n"
                        << " --output    | -o <output.txt>\n"
                        << " --debug     | -d\n"
-                      << " --fit       | -f[WxH]\n";
+                      << " --fit       | -f[W[xH]]\n";
              exit(0);
              break;
          case 'o':
@@ -78,7 +78,7 @@ int main(int argc, char *argv[])
              break;
          case 'f':
              if (!optarg) {
-                fit_size_x = fit_size_y = 128;
+                fit_size_x = fit_size_y = 0;
              } else {
                  char tail;
                  if (sscanf(optarg, "%d%c", &fit_size_x, &tail) == 1) {
@@ -88,12 +88,6 @@ int main(int argc, char *argv[])
                      return 1;
                  }
              }
-            int min_size = 2 * tracker.p_cell_size;
-            if (fit_size_x <  min_size || fit_size_x < min_size) {
-                fprintf(stderr, "Fit size %dx%d too small. Minimum is %dx%d.\n",
-                        fit_size_x, fit_size_y, min_size, min_size);
-                return 1;
-            }
              break;
          }
      }
diff --git a/src/kcf.cpp b/src/kcf.cpp

index 75119bd74ffcb5b5f0c3f9e00d348157e5ab1770..fd622bfe7825f20d04a9dca8c2e98d321c93cdc5 100644 (file)
--- a/src/kcf.cpp
+++ b/src/kcf.cpp
@@ -92,6 +92,15 @@ void KCF_Tracker::train(cv::Mat input_rgb, cv::Mat input_gray, double interp_fac
      //        p_model_alphaf = p_yf / (kf + p_lambda);   //equation for fast training
  }
  
+static int round_pw2_down(int x)
+{
+        for (int i = 1; i < 32; i <<= 1)
+            x |= x >> i;
+        x++;
+        return x >> 1;
+}
+
+
  void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int fit_size_y)
  {
      __dbgTracer.debug = m_debug;
@@ -140,53 +149,41 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f
          img.convertTo(input_gray, CV_32FC1);
  
      // don't need too large image
-    if (p_init_pose.w * p_init_pose.h > 100. * 100. && (fit_size_x == -1 || fit_size_y == -1)) {
+    if (p_init_pose.w * p_init_pose.h > 100. * 100.) {
          std::cout << "resizing image by factor of " << 1 / p_downscale_factor << std::endl;
          p_resize_image = true;
          p_init_pose.scale(p_downscale_factor);
          cv::resize(input_gray, input_gray, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA);
          cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA);
-    } else if (!(fit_size_x == -1 && fit_size_y == -1)) {
-        if (fit_size_x % p_cell_size != 0 || fit_size_y % p_cell_size != 0) {
-            std::cerr << "Error: Fit size is not multiple of HOG cell size (" << p_cell_size << ")" << std::endl;
-            std::exit(EXIT_FAILURE);
-        }
-        p_fit_factor_x = (double)fit_size_x / round(p_init_pose.w * (1. + p_padding));
-        p_fit_factor_y = (double)fit_size_y / round(p_init_pose.h * (1. + p_padding));
-        std::cout << "resizing image horizontaly by factor of " << p_fit_factor_x << " and verticaly by factor of "
-                  << p_fit_factor_y << std::endl;
-        p_fit_to_pw2 = true;
-        p_init_pose.scale_x(p_fit_factor_x);
-        p_init_pose.scale_y(p_fit_factor_y);
-        if (fabs(p_fit_factor_x - 1) > p_floating_error || fabs(p_fit_factor_y - 1) > p_floating_error) {
-            if (p_fit_factor_x < 1 && p_fit_factor_y < 1) {
-                cv::resize(input_gray, input_gray, cv::Size(0, 0), p_fit_factor_x, p_fit_factor_y, cv::INTER_AREA);
-                cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_fit_factor_x, p_fit_factor_y, cv::INTER_AREA);
-            } else {
-                cv::resize(input_gray, input_gray, cv::Size(0, 0), p_fit_factor_x, p_fit_factor_y, cv::INTER_LINEAR);
-                cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_fit_factor_x, p_fit_factor_y, cv::INTER_LINEAR);
-            }
-        }
      }
  
-
      // compute win size + fit to fhog cell size
      p_windows_size.width = round(p_init_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size;
      p_windows_size.height = round(p_init_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size;
-    feature_size.width = p_windows_size.width / p_cell_size;
-    feature_size.height = p_windows_size.height / p_cell_size;
+
+    if (fit_size_x == 0 || fit_size_y == 0) {
+        // Round down to the next highest power of 2
+        fit_size = cv::Size(round_pw2_down(p_windows_size.width),
+                            round_pw2_down(p_windows_size.height));
+    } else if (fit_size_x == -1 || fit_size_y == -1) {
+        fit_size =  p_windows_size;
+    } else {
+        fit_size = cv::Size(fit_size_x, fit_size_y);
+    }
+
+    feature_size = fit_size / p_cell_size;
  
      p_scales.clear();
      for (int i = -int(p_num_scales) / 2; i <= int(p_num_scales) / 2; ++i)
          p_scales.push_back(std::pow(p_scale_step, i));
  
  #ifdef CUFFT
-    if (feature_size.height * (feature_size.width / 2 + 1) > 1024) {
+    if (Fft::freq_size(feature_size).area() > 1024) {
          std::cerr << "Window after forward FFT is too big for CUDA kernels. Plese use -f to set "
                       "the window dimensions so its size is less or equal to "
                    << 1024 * p_cell_size * p_cell_size * 2 + 1
-                  << " pixels . Currently the size of the window is: " << p_windows_size.width << "x" << p_windows_size.height
-                  << " which is  " << p_windows_size.width * p_windows_size.height << " pixels. " << std::endl;
+                  << " pixels. Currently the size of the window is: " << fit_size
+                  << " which is  " << fit_size.area() << " pixels. " << std::endl;
          std::exit(EXIT_FAILURE);
      }
  
@@ -226,12 +223,16 @@ void KCF_Tracker::init(cv::Mat &img, const cv::Rect &bbox, int fit_size_x, int f
      p_min_max_scale[0] = std::pow(p_scale_step, std::ceil(std::log(min_size_ratio) / log(p_scale_step)));
      p_min_max_scale[1] = std::pow(p_scale_step, std::floor(std::log(max_size_ratio) / log(p_scale_step)));
  
-    std::cout << "init: img size " << img.cols << "x" << img.rows << std::endl;
-    std::cout << "init: win size " << p_windows_size.width << "x" << p_windows_size.height << std::endl;
-    std::cout << "init: FFT size " << feature_size.width << "x" << feature_size.height << std::endl;
+    std::cout << "init: img size " << img.size() << std::endl;
+    std::cout << "init: win size " << p_windows_size;
+    if (p_windows_size != fit_size)
+        std::cout << " resized to " << fit_size;
+    std::cout << std::endl;
+    std::cout << "init: FFT size " << feature_size << std::endl;
      std::cout << "init: min max scales factors: " << p_min_max_scale[0] << " " << p_min_max_scale[1] << std::endl;
  
-    p_output_sigma = std::sqrt(p_init_pose.w * p_init_pose.h) * p_output_sigma_factor / p_cell_size;
+    p_output_sigma = std::sqrt(p_init_pose.w * p_init_pose.h * double(fit_size.area()) / p_windows_size.area())
+           * p_output_sigma_factor / p_cell_size;
  
      fft.init(feature_size.width, feature_size.height, p_num_of_feats, p_num_scales);
      fft.set_window(MatDynMem(cosine_window_function(feature_size.width, feature_size.height)));
@@ -256,9 +257,6 @@ void KCF_Tracker::updateTrackerPosition(BBox_c &bbox)
      BBox_c tmp = bbox;
      if (p_resize_image) {
          tmp.scale(p_downscale_factor);
-    } else if (p_fit_to_pw2) {
-        tmp.scale_x(p_fit_factor_x);
-        tmp.scale_y(p_fit_factor_y);
      }
      p_current_center = tmp.center();
  }
@@ -273,10 +271,6 @@ BBox_c KCF_Tracker::getBBox()
  
      if (p_resize_image)
          tmp.scale(1 / p_downscale_factor);
-    if (p_fit_to_pw2) {
-        tmp.scale_x(1 / p_fit_factor_x);
-        tmp.scale_y(1 / p_fit_factor_y);
-    }
  
      return tmp;
  }
@@ -291,15 +285,6 @@ void KCF_Tracker::resizeImgs(cv::Mat &input_rgb, cv::Mat &input_gray)
      if (p_resize_image) {
          cv::resize(input_gray, input_gray, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA);
          cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_downscale_factor, p_downscale_factor, cv::INTER_AREA);
-    } else if (p_fit_to_pw2 && fabs(p_fit_factor_x - 1) > p_floating_error &&
-               fabs(p_fit_factor_y - 1) > p_floating_error) {
-        if (p_fit_factor_x < 1 && p_fit_factor_y < 1) {
-            cv::resize(input_gray, input_gray, cv::Size(0, 0), p_fit_factor_x, p_fit_factor_y, cv::INTER_AREA);
-            cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_fit_factor_x, p_fit_factor_y, cv::INTER_AREA);
-        } else {
-            cv::resize(input_gray, input_gray, cv::Size(0, 0), p_fit_factor_x, p_fit_factor_y, cv::INTER_LINEAR);
-            cv::resize(input_rgb, input_rgb, cv::Size(0, 0), p_fit_factor_x, p_fit_factor_y, cv::INTER_LINEAR);
-        }
      }
  }
  
@@ -376,15 +361,13 @@ void KCF_Tracker::track(cv::Mat &img)
      uint max_idx;
      max_response = findMaxReponse(max_idx, new_location);
  
+    new_location.x *= double(p_windows_size.width) / fit_size.width;
+    new_location.y *= double(p_windows_size.height) / fit_size.height;
+
      p_current_center += p_current_scale * p_cell_size * new_location;
  
-    if (p_fit_to_pw2) {
-        clamp2(p_current_center.x, 0.0, (img.cols * p_fit_factor_x) - 1);
-        clamp2(p_current_center.y, 0.0, (img.rows * p_fit_factor_y) - 1);
-    } else {
-        clamp2(p_current_center.x, 0.0, img.cols - 1.0);
-        clamp2(p_current_center.y, 0.0, img.rows - 1.0);
-    }
+    clamp2(p_current_center.x, 0.0, img.cols - 1.0);
+    clamp2(p_current_center.y, 0.0, img.rows - 1.0);
  
      // sub grid scale interpolation
      if (m_use_subgrid_scale) {
@@ -458,18 +441,18 @@ void ThreadCtx::track(const KCF_Tracker &kcf, cv::Mat &input_rgb, cv::Mat &input
  cv::Mat KCF_Tracker::get_features(cv::Mat &input_rgb, cv::Mat &input_gray, int cx, int cy,
                                    int size_x, int size_y, double scale) const
  {
-    int size_x_scaled = floor(size_x * scale);
-    int size_y_scaled = floor(size_y * scale);
+    cv::Size scaled = cv::Size(floor(size_x * scale), floor(size_y * scale));
  
-    cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, size_x_scaled, size_y_scaled);
-    cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, size_x_scaled, size_y_scaled);
+    cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, scaled.width, scaled.height);
+    cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, scaled.width, scaled.height);
  
      // resize to default size
-    if (scale > 1.) {
+    if (scaled.area() > fit_size.area()) {
          // if we downsample use  INTER_AREA interpolation
-        cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_AREA);
+        // note: this is just a guess - we may downsample in X and upsample in Y (or vice versa)
+        cv::resize(patch_gray, patch_gray, fit_size, 0., 0., cv::INTER_AREA);
      } else {
-        cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_LINEAR);
+        cv::resize(patch_gray, patch_gray, fit_size, 0., 0., cv::INTER_LINEAR);
      }
  
      // get hog(Histogram of Oriented Gradients) features
@@ -479,11 +462,11 @@ cv::Mat KCF_Tracker::get_features(cv::Mat &input_rgb, cv::Mat &input_gray, int c
      std::vector<cv::Mat> color_feat;
      if ((m_use_color || m_use_cnfeat) && input_rgb.channels() == 3) {
          // resize to default size
-        if (scale > 1.) {
+        if (scaled.area() > (fit_size / p_cell_size).area()) {
              // if we downsample use  INTER_AREA interpolation
-            cv::resize(patch_rgb, patch_rgb, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., cv::INTER_AREA);
+            cv::resize(patch_rgb, patch_rgb, fit_size / p_cell_size, 0., 0., cv::INTER_AREA);
          } else {
-            cv::resize(patch_rgb, patch_rgb, cv::Size(size_x / p_cell_size, size_y / p_cell_size), 0., 0., cv::INTER_LINEAR);
+            cv::resize(patch_rgb, patch_rgb, fit_size / p_cell_size, 0., 0., cv::INTER_LINEAR);
          }
      }
  
diff --git a/src/kcf.h b/src/kcf.h

index 42c68f7c3ed1a8e80b0cbbe5dfd8ffea50dee913..2916939c8af4499377721622ea76539461c96113 100644 (file)
--- a/src/kcf.h
+++ b/src/kcf.h
@@ -36,18 +36,6 @@ struct BBox_c
          h  *= factor;
      }
  
-    inline void scale_x(double factor)
-    {
-        cx *= factor;
-        w  *= factor;
-    }
-
-    inline void scale_y(double factor)
-    {
-        cy *= factor;
-        h  *= factor;
-    }
-
      inline cv::Rect get_rect()
      {
          return cv::Rect(int(cx-w/2.), int(cy-h/2.), int(w), int(h));
@@ -104,11 +92,8 @@ private:
      double max_response = -1.;
  
      bool p_resize_image = false;
-    bool p_fit_to_pw2 = false;
  
      const double p_downscale_factor = 0.5;
-    double p_fit_factor_x = 1;
-    double p_fit_factor_y = 1;
      const double p_floating_error = 0.0001;
  
      const double p_padding = 1.5;
@@ -117,7 +102,8 @@ private:
      const double p_kernel_sigma = 0.5;    //def = 0.5
      const double p_lambda = 1e-4;         //regularization in learning step
      const double p_interp_factor = 0.02;  //def = 0.02, linear interpolation factor for adaptation
-    cv::Size p_windows_size;
+    cv::Size p_windows_size;              // size of the patch to find the tracked object in
+    cv::Size fit_size;                    // size to which rescale the patch for better FFT performance
  
      const uint p_num_scales = m_use_scale ? 7 : 1;
      const double p_scale_step = 1.02;
author	Michal Sojka <michal.sojka@cvut.cz>
	Wed, 26 Sep 2018 21:38:44 +0000 (23:38 +0200)
committer	Michal Sojka <michal.sojka@cvut.cz>
	Thu, 27 Sep 2018 05:31:54 +0000 (07:31 +0200)
README.md		patch \| blob \| history
main_vot.cpp		patch \| blob \| history
src/kcf.cpp		patch \| blob \| history
src/kcf.h		patch \| blob \| history