]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blobdiff - src/fft_fftw.cpp
test: Decrease accuracy limit
[hercules2020/kcf.git] / src / fft_fftw.cpp
index 132b2459d92fcb7cf00da9964321078146c36c6c..1d26269baaaa3a59dcfdb66e863bd9ef49dda9db 100644 (file)
@@ -1,26 +1,57 @@
 #include "fft_fftw.h"
-
-#include "fft.h"
+#include <unistd.h>
 
 #ifdef OPENMP
 #include <omp.h>
 #endif
 
-#if !defined(ASYNC) && !defined(OPENMP) && !defined(CUFFTW)
-#define FFTW_PLAN_WITH_THREADS() fftw_plan_with_nthreads(4);
-#else
-#define FFTW_PLAN_WITH_THREADS()
-#endif
-
 Fftw::Fftw(){}
 
+fftwf_plan Fftw::create_plan_fwd(uint howmany) const
+{
+    cv::Mat mat_in = cv::Mat::zeros(howmany * m_height, m_width, CV_32F);
+    ComplexMat mat_out(m_height, m_width / 2 + 1, howmany);
+    float *in = reinterpret_cast<float *>(mat_in.data);
+    fftwf_complex *out = reinterpret_cast<fftwf_complex *>(mat_out.get_p_data());
+
+    int rank = 2;
+    int n[] = {(int)m_height, (int)m_width};
+    int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
+    int istride = 1, ostride = 1;
+    int *inembed = NULL, *onembed = NULL;
+
+    return fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist, FFTW_PATIENT);
+}
+
+fftwf_plan Fftw::create_plan_inv(uint howmany) const
+{
+    ComplexMat mat_in(m_height, m_width / 2 + 1, howmany);
+    cv::Mat mat_out = cv::Mat::zeros(howmany * m_height, m_width, CV_32F);
+    fftwf_complex *in = reinterpret_cast<fftwf_complex *>(mat_in.get_p_data());
+    float *out = reinterpret_cast<float *>(mat_out.data);
+
+    int rank = 2;
+    int n[] = {(int)m_height, (int)m_width};
+    int idist = m_height * (m_width / 2 + 1), odist = m_height * m_width;
+    int istride = 1, ostride = 1;
+    int *inembed = nullptr, *onembed = nullptr;
+
+    return fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist, FFTW_PATIENT);
+}
+
 void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
 {
     Fft::init(width, height, num_of_feats, num_of_scales);
 
-#if (!defined(ASYNC) && !defined(CUFFTW)) && defined(OPENMP)
+#if !defined(CUFFTW) && defined(BIG_BATCH)
     fftw_init_threads();
-#endif // OPENMP
+  #if defined(OPENMP)
+    fftw_plan_with_nthreads(omp_get_max_threads());
+  #else
+    int np = sysconf(_SC_NPROCESSORS_ONLN);
+    fftw_plan_with_nthreads(np);
+  #endif
+#endif
 
 #ifndef CUFFTW
     std::cout << "FFT: FFTW" << std::endl;
@@ -28,121 +59,16 @@ void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned
     std::cout << "FFT: cuFFTW" << std::endl;
 #endif
     fftwf_cleanup();
-    // FFT forward one scale
-    {
-        cv::Mat in_f = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
-        ComplexMat out_f(int(m_height), m_width / 2 + 1, 1);
-        plan_f = fftwf_plan_dft_r2c_2d(int(m_height), int(m_width), reinterpret_cast<float *>(in_f.data),
-                                       reinterpret_cast<fftwf_complex *>(out_f.get_p_data()), FFTW_PATIENT);
-    }
-#ifdef BIG_BATCH
-    // FFT forward all scales
-    if (m_num_of_scales > 1) {
-        cv::Mat in_f_all = cv::Mat::zeros(m_height * m_num_of_scales, m_width, CV_32F);
-        ComplexMat out_f_all(m_height, m_width / 2 + 1, m_num_of_scales);
-        float *in = reinterpret_cast<float *>(in_f_all.data);
-        fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_f_all.get_p_data());
-        int rank = 2;
-        int n[] = {(int)m_height, (int)m_width};
-        int howmany = m_num_of_scales;
-        int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
-        int istride = 1, ostride = 1;
-        int *inembed = NULL, *onembed = NULL;
-
-        FFTW_PLAN_WITH_THREADS();
-        plan_f_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
-                                                    ostride, odist, FFTW_PATIENT);
-    }
-#endif
-    // FFT forward window one scale
-    {
-        cv::Mat in_fw = cv::Mat::zeros(int(m_height * m_num_of_feats), int(m_width), CV_32F);
-        ComplexMat out_fw(int(m_height), m_width / 2 + 1, int(m_num_of_feats));
-        float *in = reinterpret_cast<float *>(in_fw.data);
-        fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_fw.get_p_data());
-        int rank = 2;
-        int n[] = {int(m_height), int(m_width)};
-        int howmany = int(m_num_of_feats);
-        int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
-        int istride = 1, ostride = 1;
-        int *inembed = nullptr, *onembed = nullptr;
-
-        FFTW_PLAN_WITH_THREADS();
-        plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist,
-                                          FFTW_PATIENT);
-    }
-#ifdef BIG_BATCH
-    // FFT forward window all scales all feats
-    if (m_num_of_scales > 1) {
-        cv::Mat in_all = cv::Mat::zeros(m_height * (m_num_of_scales * m_num_of_feats), m_width, CV_32F);
-        ComplexMat out_all(m_height, m_width / 2 + 1, m_num_of_scales * m_num_of_feats);
-        float *in = reinterpret_cast<float *>(in_all.data);
-        fftwf_complex *out = reinterpret_cast<fftwf_complex *>(out_all.get_p_data());
-        int rank = 2;
-        int n[] = {(int)m_height, (int)m_width};
-        int howmany = m_num_of_scales * m_num_of_feats;
-        int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
-        int istride = 1, ostride = 1;
-        int *inembed = NULL, *onembed = NULL;
-
-        FFTW_PLAN_WITH_THREADS();
-        plan_fw_all_scales = fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed,
-                                                     ostride, odist, FFTW_PATIENT);
-    }
-#endif
-    // FFT inverse one scale
-    {
-        ComplexMat in_i(m_height, m_width, m_num_of_feats);
-        cv::Mat out_i = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC(int(m_num_of_feats)));
-        fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i.get_p_data());
-        float *out = reinterpret_cast<float *>(out_i.data);
-        int rank = 2;
-        int n[] = {int(m_height), int(m_width)};
-        int howmany = int(m_num_of_feats);
-        int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
-        int istride = 1, ostride = int(m_num_of_feats);
-        int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
-
-        FFTW_PLAN_WITH_THREADS();
-        plan_i_features = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
-                                                  odist, FFTW_PATIENT);
-    }
-    // FFT inverse all scales
+
+    plan_f = create_plan_fwd(1);
+    plan_fw = create_plan_fwd(m_num_of_feats);
+    plan_i_1ch = create_plan_inv(1);
+
 #ifdef BIG_BATCH
-    if (m_num_of_scales > 1) {
-        ComplexMat in_i_all(m_height, m_width, m_num_of_feats * m_num_of_scales);
-        cv::Mat out_i_all = cv::Mat::zeros(m_height, m_width, CV_32FC(m_num_of_feats * m_num_of_scales));
-        fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i_all.get_p_data());
-        float *out = reinterpret_cast<float *>(out_i_all.data);
-        int rank = 2;
-        int n[] = {(int)m_height, (int)m_width};
-        int howmany = m_num_of_feats * m_num_of_scales;
-        int idist = m_height * (m_width / 2 + 1), odist = 1;
-        int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
-        int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
-
-        FFTW_PLAN_WITH_THREADS();
-        plan_i_features_all_scales = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out,
-                                                             onembed, ostride, odist, FFTW_PATIENT);
-    }
+    plan_f_all_scales = create_plan_fwd(m_num_of_scales);
+    plan_fw_all_scales = create_plan_fwd(m_num_of_scales * m_num_of_feats);
+    plan_i_all_scales = create_plan_inv(m_num_of_scales);
 #endif
-    // FFT inverse one channel
-    {
-        ComplexMat in_i1(int(m_height), int(m_width), 1);
-        cv::Mat out_i1 = cv::Mat::zeros(int(m_height), int(m_width), CV_32FC1);
-        fftwf_complex *in = reinterpret_cast<fftwf_complex *>(in_i1.get_p_data());
-        float *out = reinterpret_cast<float *>(out_i1.data);
-        int rank = 2;
-        int n[] = {int(m_height), int(m_width)};
-        int howmany = IF_BIG_BATCH(m_num_of_scales, 1);
-        int idist = m_height * (m_width / 2 + 1), odist = 1;
-        int istride = 1, ostride = IF_BIG_BATCH(m_num_of_scales, 1);
-        int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
-
-        FFTW_PLAN_WITH_THREADS();
-        plan_i_1ch = fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride,
-                                             odist, FFTW_PATIENT);
-    }
 }
 
 void Fftw::set_window(const MatDynMem &window)
@@ -155,25 +81,25 @@ void Fftw::forward(const MatScales &real_input, ComplexMat &complex_result)
 {
     Fft::forward(real_input, complex_result);
 
-    if (BIG_BATCH_MODE && real_input.rows == int(m_height * IF_BIG_BATCH(m_num_of_scales, 1))) {
-        fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
-                              reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
-    } else {
+    if (real_input.size[0] == 1)
         fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float *>(real_input.data),
                               reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
-    }
-    return;
+#ifdef BIG_BATCH
+    else
+        fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
+                              reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
+#endif
 }
 
 void Fftw::forward_window(MatScaleFeats  &feat, ComplexMat & complex_result, MatScaleFeats &temp)
 {
     Fft::forward_window(feat, complex_result, temp);
 
-    uint n_channels = feat.size[0];
-    for (uint i = 0; i < n_channels; ++i) {
-        for (uint j = 0; j < uint(feat.size[1]); ++j) {
-            cv::Mat feat_plane = feat.plane(i, j);
-            cv::Mat temp_plane = temp.plane(i, j);
+    uint n_scales = feat.size[0];
+    for (uint s = 0; s < n_scales; ++s) {
+        for (uint ch = 0; ch < uint(feat.size[1]); ++ch) {
+            cv::Mat feat_plane = feat.plane(s, ch);
+            cv::Mat temp_plane = temp.plane(s, ch);
             temp_plane = feat_plane.mul(m_window);
         }
     }
@@ -181,14 +107,15 @@ void Fftw::forward_window(MatScaleFeats  &feat, ComplexMat & complex_result, Mat
     float *in = temp.ptr<float>();
     fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
 
-    if (n_channels <= m_num_of_feats)
+    if (n_scales == 1)
         fftwf_execute_dft_r2c(plan_fw, in, out);
+#ifdef BIG_BATCH
     else
         fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
-    return;
+#endif
 }
 
-void Fftw::inverse(ComplexMat &  complex_input, MatDynMem & real_result)
+void Fftw::inverse(ComplexMat &complex_input, MatScales &real_result)
 {
     Fft::inverse(complex_input, real_result);
 
@@ -196,27 +123,24 @@ void Fftw::inverse(ComplexMat &  complex_input, MatDynMem & real_result)
     fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
     float *out = real_result.ptr<float>();
 
-    if (n_channels == 1|| (BIG_BATCH_MODE && n_channels == int(IF_BIG_BATCH(m_num_of_scales, 1))))
+    if (n_channels == 1)
         fftwf_execute_dft_c2r(plan_i_1ch, in, out);
-    else if (BIG_BATCH_MODE && n_channels == int(m_num_of_feats) * int(IF_BIG_BATCH(m_num_of_scales, 1)))
-        fftwf_execute_dft_c2r(plan_i_features_all_scales, in, out);
+#ifdef BIG_BATCH
     else
-        fftwf_execute_dft_c2r(plan_i_features, in, out);
-
-    real_result = real_result / (m_width * m_height);
+        fftwf_execute_dft_c2r(plan_i_all_scales, in, out);
+#endif
+    real_result *= 1.0 / (m_width * m_height);
 }
 
 Fftw::~Fftw()
 {
-    fftwf_destroy_plan(plan_f);
-    fftwf_destroy_plan(plan_fw);
-    fftwf_destroy_plan(plan_i_features);
-    fftwf_destroy_plan(plan_i_1ch);
-
-    if (BIG_BATCH_MODE) {
-        fftwf_destroy_plan(plan_f_all_scales);
-        fftwf_destroy_plan(plan_i_features_all_scales);
-        fftwf_destroy_plan(plan_fw_all_scales);
-        fftwf_destroy_plan(plan_i_1ch_all_scales);
-    }
+    if (plan_f) fftwf_destroy_plan(plan_f);
+    if (plan_fw) fftwf_destroy_plan(plan_fw);
+    if (plan_i_1ch) fftwf_destroy_plan(plan_i_1ch);
+
+#ifdef BIG_BATCH
+    if (plan_f_all_scales) fftwf_destroy_plan(plan_f_all_scales);
+    if (plan_fw_all_scales) fftwf_destroy_plan(plan_fw_all_scales);
+    if (plan_i_all_scales) fftwf_destroy_plan(plan_i_all_scales);
+#endif
 }