option(OPENMP "Use OpenMP library. Works with FFTW and OpenCV implementation." OFF)
option(ASYNC "Works only if OPENCV_CUFFT is not ON. Will enable C++ async directive." OFF)
option(PROFILING "Enable profiling using perf_event_open together with libpfm4. " OFF)
+option(BIG_BATCH "Enable transforming all features from all scales together." OFF)
IF(PROFILING)
add_definitions(-DPROFILING )
MESSAGE(STATUS "Profiling mode")
ENDIF()
+IF(BIG_BATCH)
+ add_definitions(-DBIG_BATCH )
+ MESSAGE(STATUS "Big_batch mode")
+ENDIF()
+
SET(use_cuda OFF)
IF(FFT STREQUAL "OpenCV")
p_data = convert(mat);
}
+ ComplexMat_(int _rows, int _cols, int _n_channels,std::vector<std::complex<T>> data) : cols(_cols), rows(_rows), n_channels(_n_channels)
+ {
+ p_data = data;
+ }
// cv::Mat API compatibility
cv::Size size() { return cv::Size(cols, rows); }
int channels() { return n_channels; }
}
}
+
T sqr_norm() const
{
T sum_sqr_norm = 0;
return p_data;
}
+ ComplexMat_<T> get_part(int id, int n_of_feat)
+ {
+ std::vector<std::complex<T>> data(p_data.begin()+id*rows*cols*n_of_feat,
+ p_data.begin()+(id+1)*rows*cols*n_of_feat);
+ ComplexMat_<T> result(this->rows,this->cols,n_of_feat,data);
+ return result;
+ }
+
//element-wise per channel multiplication, division and addition
ComplexMat_<T> operator*(const ComplexMat_<T> & rhs) const
{
m_height = height;
plan_f = NULL;
plan_fw = NULL;
+ plan_fwh = NULL;
plan_if = NULL;
plan_ir = NULL;
float *in = reinterpret_cast<float*>(in_all.data);
fftwf_complex *out = reinterpret_cast<fftwf_complex*>(complex_result.data);
- if(!plan_fw){
- int rank = 2;
- int n[] = {(int)m_height, (int)m_width};
- int howmany = n_channels;
- int idist = m_height*m_width, odist = m_height*(m_width/2+1);
- int istride = 1, ostride = 1;
- int *inembed = NULL, *onembed = NULL;
+ if(n_channels <= 44){
+ if(!plan_fw){
+ int rank = 2;
+ int n[] = {(int)m_height, (int)m_width};
+ int howmany = n_channels;
+ int idist = m_height*m_width, odist = m_height*(m_width/2+1);
+ int istride = 1, ostride = 1;
+ int *inembed = NULL, *onembed = NULL;
#pragma omp critical
#ifdef ASYNC
- std::unique_lock<std::mutex> lock(fftw_mut);
- fftw_plan_with_nthreads(2);
+ std::unique_lock<std::mutex> lock(fftw_mut);
+ fftw_plan_with_nthreads(2);
#elif OPENMP
#pragma omp critical
- fftw_plan_with_nthreads(omp_get_max_threads());
+ fftw_plan_with_nthreads(omp_get_max_threads());
#endif
- plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany,
- in, inembed, istride, idist,
- out, onembed, ostride, odist,
- FFTW_ESTIMATE);
- fftwf_execute(plan_fw);
- }else{fftwf_execute_dft_r2c(plan_fw,in,out);}
-
+ plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany,
+ in, inembed, istride, idist,
+ out, onembed, ostride, odist,
+ FFTW_ESTIMATE);
+ fftwf_execute(plan_fw);
+ }else{fftwf_execute_dft_r2c(plan_fw,in,out);}
+ } else {
+ if(!plan_fwh){
+ int rank = 2;
+ int n[] = {(int)m_height, (int)m_width};
+ int howmany = n_channels;
+ int idist = m_height*m_width, odist = m_height*(m_width/2+1);
+ int istride = 1, ostride = 1;
+ int *inembed = NULL, *onembed = NULL;
+#pragma omp critical
+#ifdef ASYNC
+ std::unique_lock<std::mutex> lock(fftw_mut);
+ fftw_plan_with_nthreads(2);
+#elif OPENMP
+#pragma omp critical
+ fftw_plan_with_nthreads(omp_get_max_threads());
+#endif
+ plan_fwh = fftwf_plan_many_dft_r2c(rank, n, howmany,
+ in, inembed, istride, idist,
+ out, onembed, ostride, odist,
+ FFTW_ESTIMATE);
+ fftwf_execute(plan_fwh);
+ }else{fftwf_execute_dft_r2c(plan_fwh,in,out);}
+ }
ComplexMat result(m_height, m_width/2 + 1, n_channels);
for (int i = 0; i < n_channels; ++i)
result.set_channel(i, complex_result(cv::Rect(0, i*m_height, m_width/2+1, m_height)));
-
return result;
}
{
fftwf_destroy_plan(plan_f);
fftwf_destroy_plan(plan_fw);
+ fftwf_destroy_plan(plan_fwh);
fftwf_destroy_plan(plan_if);
fftwf_destroy_plan(plan_ir);
}
private:
unsigned m_width, m_height;
cv::Mat m_window;
- fftwf_plan plan_f, plan_fw, plan_if, plan_ir;
+ fftwf_plan plan_f, plan_fw, plan_fwh, plan_if, plan_ir;
#if defined(ASYNC)
std::mutex fftw_mut;
#endif
//obtain a sub-window for training initial model
std::vector<cv::Mat> path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);
+ if(m_use_big_batch) num_of_feats = path_feat.size();
p_model_xf = fft.forward_window(path_feat);
DEBUG_PRINTM(p_model_xf);
ComplexMat kf = gaussian_correlation(p_model_xf, p_model_xf, p_kernel_sigma, true);
DEBUG_PRINTM(kf);
p_model_alphaf_num = p_yf * kf;
+ DEBUG_PRINTM(p_model_alphaf_num);
p_model_alphaf_den = kf * (kf + p_lambda);
+ DEBUG_PRINTM(p_model_alphaf_den);
}
p_model_alphaf = p_model_alphaf_num / p_model_alphaf_den;
+ DEBUG_PRINTM(p_model_alphaf);
// p_model_alphaf = p_yf / (kf + p_lambda); //equation for fast training
}
cv::Point2i min_loc, max_loc;
cv::minMaxLoc(response, &min_val, &max_val, &min_loc, &max_loc);
+ double weight = p_scales[i] < 1. ? p_scales[i] : 1./p_scales[i];
+ if (max_val*weight > max_response) {
+ max_response = max_val*weight;
+ max_response_map = response;
+ max_response_pt = max_loc;
+ scale_index = i;
+ }
+ scale_responses.push_back(max_val*weight);
+ }
+ } else if(m_use_big_batch){
+ for (size_t i = 0; i < p_scales.size(); ++i) {
+ std::vector<cv::Mat> tmp = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale * p_scales[i]);
+ patch_feat.insert(std::end(patch_feat), std::begin(tmp), std::end(tmp));
+ }
+ ComplexMat zf = fft.forward_window(patch_feat);
+ DEBUG_PRINTM(zf);
+ cv::Mat response;
+ for (size_t i = 0; i < p_scales.size(); ++i) {
+ if (m_use_linearkernel)
+ response = fft.inverse((p_model_alphaf * zf.get_part(i,num_of_feats)).sum_over_channels());
+ else {
+ ComplexMat kzf = gaussian_correlation(zf.get_part(i,num_of_feats), p_model_xf, p_kernel_sigma);
+ DEBUG_PRINTM(p_model_alphaf);
+ DEBUG_PRINTM(kzf);
+ DEBUG_PRINTM(p_model_alphaf * kzf);
+ response = fft.inverse(p_model_alphaf * kzf);
+ }
+ DEBUG_PRINTM(response);
+
+ /* target location is at the maximum response. we must take into
+ account the fact that, if the target doesn't move, the peak
+ will appear at the top-left corner, not at the center (this is
+ discussed in the paper). the responses wrap around cyclically. */
+ double min_val, max_val;
+ cv::Point2i min_loc, max_loc;
+ cv::minMaxLoc(response, &min_val, &max_val, &min_loc, &max_loc);
+ DEBUG_PRINT(max_loc);
+
double weight = p_scales[i] < 1. ? p_scales[i] : 1./p_scales[i];
if (max_val*weight > max_response) {
max_response = max_val*weight;
ComplexMat kzf = gaussian_correlation(zf, p_model_xf, p_kernel_sigma);
DEBUG_PRINTM(p_model_alphaf);
DEBUG_PRINTM(kzf);
+ DEBUG_PRINTM(p_model_alphaf * kzf);
response = fft.inverse(p_model_alphaf * kzf);
}
DEBUG_PRINTM(response);
bool m_use_subgrid_scale {true};
bool m_use_cnfeat {true};
bool m_use_linearkernel {false};
+#ifdef BIG_BATCH
+ bool m_use_big_batch {true};
+#else
+ bool m_use_big_batch {false};
+#endif
/*
padding ... extra area surrounding the target (1.5)
double p_min_max_scale[2];
std::vector<double> p_scales;
+ //for big batch
+ int num_of_feats;
+
//model
ComplexMat p_yf;
ComplexMat p_model_alphaf;
ComplexMat p_model_alphaf_num;
ComplexMat p_model_alphaf_den;
ComplexMat p_model_xf;
-
//helping functions
cv::Mat get_subwindow(const cv::Mat & input, int cx, int cy, int size_x, int size_y);
cv::Mat gaussian_shaped_labels(double sigma, int dim1, int dim2);