DynMem has device pointer only when compiling for CUDA.
#endif
template <typename T> class DynMem_ {
- T *ptr = nullptr;
+ private:
+ T *ptr_h = nullptr;
+#ifdef CUFFT
T *ptr_d = nullptr;
-
+#endif
public:
typedef T type;
-
- DynMem_()
- {}
+ DynMem_() {}
DynMem_(size_t size)
{
#ifdef CUFFT
CudaSafeCall(
cudaHostGetDevicePointer(reinterpret_cast<void **>(&this->ptr_d), reinterpret_cast<void *>(this->ptr), 0));
#else
- this->ptr = new float[size];
+ this->ptr_h = new float[size];
#endif
}
DynMem_(DynMem_&& other) {
- this->ptr = other.ptr;
+ this->ptr_h = other.ptr_h;
+ other.ptr_h = nullptr;
+#ifdef CUFFT
this->ptr_d = other.ptr_d;
-
- other.ptr = nullptr;
other.ptr_d = nullptr;
+#endif
}
~DynMem_()
{
#ifdef CUFFT
CudaSafeCall(cudaFreeHost(this->ptr));
#else
- delete[] this->ptr;
+ delete[] this->ptr_h;
#endif
}
- T *hostMem() { return ptr; }
+ T *hostMem() { return ptr_h; }
+#ifdef CUFFT
T *deviceMem() { return ptr_d; }
-
+#endif
void operator=(DynMem_ &&rhs)
{
- this->ptr = rhs.ptr;
+ this->ptr_h = rhs.ptr_h;
+ rhs.ptr_h = nullptr;
+#ifdef CUFFT
this->ptr_d = rhs.ptr_d;
-
- rhs.ptr = nullptr;
rhs.ptr_d = nullptr;
+#endif
}
};
+
typedef DynMem_<float> DynMem;
-class MatDynMem : public DynMem, public cv::Mat {
+
+class MatDynMem : protected DynMem, public cv::Mat {
public:
MatDynMem(cv::Size size, int type)
: DynMem(size.area() * sizeof(DynMem::type) * CV_MAT_CN(type)), cv::Mat(size, type, hostMem())
assert((type & CV_MAT_DEPTH_MASK) == CV_32F);
}
MatDynMem(int height, int width, int type) { MatDynMem(cv::Size(width, height), type); }
+ MatDynMem(int ndims, const int *sizes, int type)
+ : DynMem(volume(ndims, sizes) * sizeof(DynMem::type) * CV_MAT_CN(type)), cv::Mat(ndims, sizes, type, hostMem())
+ {
+ assert((type & CV_MAT_DEPTH_MASK) == CV_32F);
+ }
+ void operator=(const cv::MatExpr &expr) {
+ static_cast<cv::Mat>(*this) = expr;
+ }
+
+ private:
+ static int volume(int ndims, const int *sizes)
+ {
+ int vol = 1;
+ for (int i = 0; i < ndims; i++)
+ vol *= sizes[i];
+ return vol;
+ }
};
#endif // DYNMEM_HPP
#include <opencv2/opencv.hpp>
#include <vector>
+#include <cassert>
#ifdef CUFFT
#include "complexmat.cuh"
{
public:
virtual void init(unsigned width, unsigned height,unsigned num_of_feats, unsigned num_of_scales) = 0;
- virtual void set_window(const cv::Mat & window) = 0;
- virtual void forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr) = 0;
- virtual void forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr) = 0;
- virtual void inverse(ComplexMat & complex_input, cv::Mat & real_result, float *real_result_arr) = 0;
+ virtual void set_window(const MatDynMem &window) = 0;
+ virtual void forward(const cv::Mat & real_input, ComplexMat & complex_result) = 0;
+ virtual void forward_window(MatDynMem &patch_feats_in, ComplexMat & complex_result, MatDynMem &tmp) = 0;
+ virtual void inverse(ComplexMat & complex_input, MatDynMem & real_result) = 0;
virtual ~Fft() = 0;
static cv::Size freq_size(cv::Size space_size)
#endif
return ret;
}
+
+protected:
+ bool is_patch_feats_valid(const MatDynMem &patch_feats)
+ {
+ return patch_feats.dims == 3;
+ // && patch_feats.size[1] == width
+ // && patch_feats.size[2] == height
+ }
};
#endif // FFT_H
#endif
}
-void cuFFT::set_window(const cv::Mat &window)
+void cuFFT::set_window(const MatDynMem &window)
{
m_window = window;
}
return;
}
-void cuFFT::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
- float *real_input_arr)
+void cuFFT::forward_window(MatDynMem &patch_feats_in, ComplexMat & complex_result, MatDynMem &tmp)
{
int n_channels = int(patch_feats.size());
return;
}
-void cuFFT::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr)
+void cuFFT::inverse(ComplexMat & complex_input, MatDynMem & real_result)
{
int n_channels = complex_input.n_channels;
cufftComplex *in = reinterpret_cast<cufftComplex *>(complex_input.get_p_data());
{
public:
void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales) override;
- void set_window(const cv::Mat & window) override;
- void forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr) override;
- void forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr) override;
- void inverse(ComplexMat & complex_input, cv::Mat & real_result, float *real_result_arr) override;
+ void set_window(const MatDynMem &window) override;
+ void forward(const cv::Mat & real_input, ComplexMat & complex_result) override;
+ void forward_window(MatDynMem &patch_feats_in, ComplexMat & complex_result, MatDynMem &tmp) override;
+ void inverse(ComplexMat & complex_input, MatDynMem & real_result) override;
~cuFFT() override;
private:
cv::Mat m_window;
#endif
}
-void Fftw::set_window(const cv::Mat &window)
+void Fftw::set_window(const MatDynMem &window)
{
m_window = window;
}
-void Fftw::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr)
+void Fftw::forward(const cv::Mat & real_input, ComplexMat & complex_result)
{
- (void)real_input_arr;
-
if (BIG_BATCH_MODE && real_input.rows == int(m_height * m_num_of_scales)) {
fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
return;
}
-void Fftw::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
- float *real_input_arr)
+void Fftw::forward_window(MatDynMem &feat, ComplexMat & complex_result, MatDynMem &temp)
{
- (void)real_input_arr;
+ assert(is_patch_feats_valid(feat));
- int n_channels = int(patch_feats.size());
+ int n_channels = feat.size[0];
for (int i = 0; i < n_channels; ++i) {
- cv::Mat in_roi(fw_all, cv::Rect(0, i * int(m_height), int(m_width), int(m_height)));
- in_roi = patch_feats[uint(i)].mul(m_window);
+ cv::Mat feat_plane(feat.dims - 1, feat.size + 1, feat.cv::Mat::type(), feat.ptr<void>(i));
+ cv::Mat temp_plane(temp.dims - 1, temp.size + 1, temp.cv::Mat::type(), temp.ptr(i));
+ temp_plane = feat_plane.mul(m_window);
}
- float *in = reinterpret_cast<float *>(fw_all.data);
+ float *in = temp.ptr<float>();
fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
if (n_channels <= int(m_num_of_feats))
return;
}
-void Fftw::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr)
+void Fftw::inverse(ComplexMat & complex_input, MatDynMem & real_result)
{
- (void)real_result_arr;
-
int n_channels = complex_input.n_channels;
fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
- float *out = reinterpret_cast<float *>(real_result.data);
+ float *out = real_result.ptr<float>();
if (n_channels == 1)
fftwf_execute_dft_c2r(plan_i_1ch, in, out);
fftwf_execute_dft_c2r(plan_i_features, in, out);
real_result = real_result / (m_width * m_height);
- return;
}
Fftw::~Fftw()
public:
Fftw();
void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales) override;
- void set_window(const cv::Mat & window) override;
- void forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr) override;
- void forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr) override;
- void inverse(ComplexMat & complex_input, cv::Mat & real_result, float *real_result_arr) override;
+ void set_window(const MatDynMem &window) override;
+ void forward(const cv::Mat & real_input, ComplexMat & complex_result) override;
+ void forward_window(MatDynMem &patch_feats_in, ComplexMat & complex_result, MatDynMem &tmp) override;
+ void inverse(ComplexMat & complex_input, MatDynMem & real_result) override;
~Fftw() override;
private:
unsigned m_width, m_height, m_num_of_feats, m_num_of_scales;
std::cout << "FFT: OpenCV" << std::endl;
}
-void FftOpencv::set_window(const cv::Mat &window)
+void FftOpencv::set_window(const MatDynMem &window)
{
m_window = window;
}
return;
}
-void FftOpencv::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
- float *real_input_arr)
+void FftOpencv::forward_window(MatDynMem &patch_feats_in, ComplexMat & complex_result, MatDynMem &tmp)
{
(void)real_input_arr;
(void)fw_all;
return;
}
-void FftOpencv::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr)
+void FftOpencv::inverse(ComplexMat & complex_input, MatDynMem & real_result)
{
(void)real_result_arr;
{
public:
void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales) override;
- void set_window(const cv::Mat & window) override;
- void forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr) override;
- void forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr) override;
- void inverse(ComplexMat & complex_input, cv::Mat & real_result, float *real_result_arr) override;
+ void set_window(const MatDynMem &window) override;
+ void forward(const cv::Mat & real_input, ComplexMat & complex_result) override;
+ void forward_window(MatDynMem &patch_feats_in, ComplexMat & complex_result, MatDynMem &tmp) override;
+ void inverse(ComplexMat & complex_input, MatDynMem & real_result) override;
~FftOpencv() override;
private:
cv::Mat m_window;
p_roi.width = p_windows_size.width / p_cell_size;
p_roi.height = p_windows_size.height / p_cell_size;
- p_num_of_feats = 31;
- if (m_use_color) p_num_of_feats += 3;
- if (m_use_cnfeat) p_num_of_feats += 10;
-
p_scales.clear();
if (m_use_scale)
for (int i = -int(p_num_scales) / 2; i <= int(p_num_scales) / 2; ++i)
fft.set_window(cosine_window_function(p_roi.width, p_roi.height));
// window weights, i.e. labels
- fft.forward(
- gaussian_shaped_labels(p_output_sigma, p_roi.width, p_roi.height), p_yf,
- m_use_cuda ? p_rot_labels_data.deviceMem() : nullptr);
+ fft.forward(gaussian_shaped_labels(p_output_sigma, p_roi.width, p_roi.height), p_yf);
DEBUG_PRINTM(p_yf);
// obtain a sub-window for training initial model
std::vector<cv::Mat> patch_feats = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy,
p_windows_size.width, p_windows_size.height);
- fft.forward_window(patch_feats, p_model_xf, d.threadctxs.front().fw_all,
- m_use_cuda ? d.threadctxs.front().data_features.deviceMem() : nullptr);
+ fft.forward_window(patch_feats, p_model_xf);
DEBUG_PRINTM(p_model_xf);
if (m_use_linearkernel) {
// ****************************************************************************
-std::vector<cv::Mat> KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale)
+void KCF_Tracker::get_features(MatDynMem &result_3d, cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale)
{
+ assert(result_3d.size[0] == num_of_feats());
+ assert(result_3d.size[1] == size_x);
+ assert(result_3d.size[2] == size_y);
+
int size_x_scaled = floor(size_x * scale);
int size_y_scaled = floor(size_y * scale);
double p_min_max_scale[2];
std::vector<double> p_scales;
- //for big batch
- int p_num_of_feats;
+ const int p_num_of_feats = 31 + m_use_color ? 3 : 0 + m_use_cnfeat ? 10 : 0;
cv::Size p_roi;
Kcf_Tracker_Private &d;
std::unique_ptr<GaussianCorrelation> gaussian_correlation;
cv::Mat circshift(const cv::Mat & patch, int x_rot, int y_rot);
cv::Mat cosine_window_function(int dim1, int dim2);
- std::vector<cv::Mat> get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale = 1.);
+ void get_features(MatDynMem &feat_3d, cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale = 1.);
cv::Point2f sub_pixel_peak(cv::Point & max_loc, cv::Mat & response);
double sub_grid_scale(uint index);
#ifdef BIG_BATCH
// Stores value of responses, location of maximal response and response maps for each scale
- std::vector<double> max_responses{num_of_scales};
- std::vector<cv::Point2i> max_locs{num_of_scales};
- std::vector<cv::Mat> response_maps{num_of_scales};
+ std::vector<double> max_responses = std::vector<double>(num_of_scales);
+ std::vector<cv::Point2i> max_locs = std::vector<cv::Point2i>(num_of_scales);
+ std::vector<cv::Mat> response_maps = std::vector<cv::Mat>(num_of_scales);
#else
const double scale;
#endif