]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blobdiff - src/fft_fftw.cpp
test: Decrease accuracy limit
[hercules2020/kcf.git] / src / fft_fftw.cpp
index c69d46e77213a10695139629c39a207ff7b51f3f..1d26269baaaa3a59dcfdb66e863bd9ef49dda9db 100644 (file)
 #include "fft_fftw.h"
-
-#include "fft.h"
+#include <unistd.h>
 
 #ifdef OPENMP
-  #include <omp.h>
+#include <omp.h>
 #endif
 
-Fftw::Fftw()
+Fftw::Fftw(){}
+
+fftwf_plan Fftw::create_plan_fwd(uint howmany) const
+{
+    cv::Mat mat_in = cv::Mat::zeros(howmany * m_height, m_width, CV_32F);
+    ComplexMat mat_out(m_height, m_width / 2 + 1, howmany);
+    float *in = reinterpret_cast<float *>(mat_in.data);
+    fftwf_complex *out = reinterpret_cast<fftwf_complex *>(mat_out.get_p_data());
+
+    int rank = 2;
+    int n[] = {(int)m_height, (int)m_width};
+    int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
+    int istride = 1, ostride = 1;
+    int *inembed = NULL, *onembed = NULL;
+
+    return fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist, FFTW_PATIENT);
+}
+
+fftwf_plan Fftw::create_plan_inv(uint howmany) const
 {
+    ComplexMat mat_in(m_height, m_width / 2 + 1, howmany);
+    cv::Mat mat_out = cv::Mat::zeros(howmany * m_height, m_width, CV_32F);
+    fftwf_complex *in = reinterpret_cast<fftwf_complex *>(mat_in.get_p_data());
+    float *out = reinterpret_cast<float *>(mat_out.data);
+
+    int rank = 2;
+    int n[] = {(int)m_height, (int)m_width};
+    int idist = m_height * (m_width / 2 + 1), odist = m_height * m_width;
+    int istride = 1, ostride = 1;
+    int *inembed = nullptr, *onembed = nullptr;
+
+    return fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist, FFTW_PATIENT);
 }
 
-void Fftw::init(unsigned width, unsigned height)
+void Fftw::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
 {
-    m_width = width;
-    m_height = height;
-    plan_f = NULL;
-    plan_fw = NULL;
-    plan_fwh = NULL;
-    plan_if = NULL;
-    plan_ir = NULL;
-
-#if defined(ASYNC) || defined(OPENMP)
+    Fft::init(width, height, num_of_feats, num_of_scales);
+
+#if !defined(CUFFTW) && defined(BIG_BATCH)
     fftw_init_threads();
-#endif //OPENMP
+  #if defined(OPENMP)
+    fftw_plan_with_nthreads(omp_get_max_threads());
+  #else
+    int np = sysconf(_SC_NPROCESSORS_ONLN);
+    fftw_plan_with_nthreads(np);
+  #endif
+#endif
 
 #ifndef CUFFTW
     std::cout << "FFT: FFTW" << std::endl;
 #else
     std::cout << "FFT: cuFFTW" << std::endl;
+#endif
+    fftwf_cleanup();
+
+    plan_f = create_plan_fwd(1);
+    plan_fw = create_plan_fwd(m_num_of_feats);
+    plan_i_1ch = create_plan_inv(1);
+
+#ifdef BIG_BATCH
+    plan_f_all_scales = create_plan_fwd(m_num_of_scales);
+    plan_fw_all_scales = create_plan_fwd(m_num_of_scales * m_num_of_feats);
+    plan_i_all_scales = create_plan_inv(m_num_of_scales);
 #endif
 }
 
-void Fftw::set_window(const cv::Mat &window)
+void Fftw::set_window(const MatDynMem &window)
 {
+    Fft::set_window(window);
     m_window = window;
 }
 
-ComplexMat Fftw::forward(const cv::Mat &input)
+void Fftw::forward(const MatScales &real_input, ComplexMat &complex_result)
 {
-    cv::Mat complex_result(m_height, m_width / 2 + 1, CV_32FC2);
-
-    if(!plan_f){
-#ifdef ASYNC
-        std::unique_lock<std::mutex> lock(fftw_mut);
-        fftw_plan_with_nthreads(2);
-#elif OPENMP
-#pragma omp critical
-        fftw_plan_with_nthreads(omp_get_max_threads());
+    Fft::forward(real_input, complex_result);
+
+    if (real_input.size[0] == 1)
+        fftwf_execute_dft_r2c(plan_f, reinterpret_cast<float *>(real_input.data),
+                              reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
+#ifdef BIG_BATCH
+    else
+        fftwf_execute_dft_r2c(plan_f_all_scales, reinterpret_cast<float *>(real_input.data),
+                              reinterpret_cast<fftwf_complex *>(complex_result.get_p_data()));
 #endif
-#pragma omp critical
-        plan_f = fftwf_plan_dft_r2c_2d(m_height, m_width,
-                                                  reinterpret_cast<float*>(input.data),
-                                                  reinterpret_cast<fftwf_complex*>(complex_result.data),
-                                                  FFTW_ESTIMATE);
-        fftwf_execute(plan_f);
-    }else{fftwf_execute_dft_r2c(plan_f,reinterpret_cast<float*>(input.data),reinterpret_cast<fftwf_complex*>(complex_result.data));}
-
-    return ComplexMat(complex_result);
 }
 
-ComplexMat Fftw::forward_window(const std::vector<cv::Mat> &input)
+void Fftw::forward_window(MatScaleFeats  &feat, ComplexMat & complex_result, MatScaleFeats &temp)
 {
-    int n_channels = input.size();
-    cv::Mat in_all(m_height * n_channels, m_width, CV_32F);
-    for (int i = 0; i < n_channels; ++i) {
-        cv::Mat in_roi(in_all, cv::Rect(0, i*m_height, m_width, m_height));
-        in_roi = input[i].mul(m_window);
+    Fft::forward_window(feat, complex_result, temp);
+
+    uint n_scales = feat.size[0];
+    for (uint s = 0; s < n_scales; ++s) {
+        for (uint ch = 0; ch < uint(feat.size[1]); ++ch) {
+            cv::Mat feat_plane = feat.plane(s, ch);
+            cv::Mat temp_plane = temp.plane(s, ch);
+            temp_plane = feat_plane.mul(m_window);
+        }
     }
-    cv::Mat complex_result(n_channels*m_height, m_width/2+1, CV_32FC2);
-
-    float *in = reinterpret_cast<float*>(in_all.data);
-    fftwf_complex *out = reinterpret_cast<fftwf_complex*>(complex_result.data);
-    if(n_channels <= 44){
-        if(!plan_fw){
-            int rank = 2;
-            int n[] = {(int)m_height, (int)m_width};
-            int howmany = n_channels;
-            int idist = m_height*m_width, odist = m_height*(m_width/2+1);
-            int istride = 1, ostride = 1;
-            int *inembed = NULL, *onembed = NULL;
-#pragma omp critical
-#ifdef ASYNC
-            std::unique_lock<std::mutex> lock(fftw_mut);
-            fftw_plan_with_nthreads(2);
-#elif OPENMP
-#pragma omp critical
-            fftw_plan_with_nthreads(omp_get_max_threads());
-#endif
-            plan_fw = fftwf_plan_many_dft_r2c(rank, n, howmany,
-                                                        in,  inembed, istride, idist,
-                                                        out, onembed, ostride, odist,
-                                                        FFTW_ESTIMATE);
-            fftwf_execute(plan_fw);
-        }else{fftwf_execute_dft_r2c(plan_fw,in,out);}
-    } else {
-        if(!plan_fwh){
-            int rank = 2;
-            int n[] = {(int)m_height, (int)m_width};
-            int howmany = n_channels;
-            int idist = m_height*m_width, odist = m_height*(m_width/2+1);
-            int istride = 1, ostride = 1;
-            int *inembed = NULL, *onembed = NULL;
-#pragma omp critical
-#ifdef ASYNC
-            std::unique_lock<std::mutex> lock(fftw_mut);
-            fftw_plan_with_nthreads(2);
-#elif OPENMP
-#pragma omp critical
-            fftw_plan_with_nthreads(omp_get_max_threads());
+
+    float *in = temp.ptr<float>();
+    fftwf_complex *out = reinterpret_cast<fftwf_complex *>(complex_result.get_p_data());
+
+    if (n_scales == 1)
+        fftwf_execute_dft_r2c(plan_fw, in, out);
+#ifdef BIG_BATCH
+    else
+        fftwf_execute_dft_r2c(plan_fw_all_scales, in, out);
 #endif
-            plan_fwh = fftwf_plan_many_dft_r2c(rank, n, howmany,
-                                                        in,  inembed, istride, idist,
-                                                        out, onembed, ostride, odist,
-                                                        FFTW_ESTIMATE);
-            fftwf_execute(plan_fwh);
-        }else{fftwf_execute_dft_r2c(plan_fwh,in,out);}
-    }
-    ComplexMat result(m_height, m_width/2 + 1, n_channels);
-    for (int i = 0; i < n_channels; ++i)
-        result.set_channel(i, complex_result(cv::Rect(0, i*m_height, m_width/2+1, m_height)));
-    return result;
 }
 
-cv::Mat Fftw::inverse(const ComplexMat &inputf)
+void Fftw::inverse(ComplexMat &complex_input, MatScales &real_result)
 {
-    int n_channels = inputf.n_channels;
-    cv::Mat real_result(m_height, m_width, CV_32FC(n_channels));
-    fftwf_complex *in = reinterpret_cast<fftwf_complex*>(inputf.get_p_data());
-    float *out = reinterpret_cast<float*>(real_result.data);
-
-    if(n_channels != 1){
-        if(!plan_if){
-            int rank = 2;
-            int n[] = {(int)m_height, (int)m_width};
-            int howmany = n_channels;
-            int idist = m_height*(m_width/2+1), odist = 1;
-            int istride = 1, ostride = n_channels;
-            int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
-
-#ifdef ASYNC
-            std::unique_lock<std::mutex> lock(fftw_mut);
-            fftw_plan_with_nthreads(2);
-#elif OPENMP
-#pragma omp critical
-            fftw_plan_with_nthreads(omp_get_max_threads());
-#endif
-#pragma omp critical
-            plan_if = fftwf_plan_many_dft_c2r(rank, n, howmany,
-                                                         in,  inembed, istride, idist,
-                                                         out, onembed, ostride, odist,
-                                                         FFTW_ESTIMATE);
-            fftwf_execute(plan_if);
-        }else{fftwf_execute_dft_c2r(plan_if,in,out);}
-    }else{
-        if(!plan_ir){
-            int rank = 2;
-            int n[] = {(int)m_height, (int)m_width};
-            int howmany = n_channels;
-            int idist = m_height*(m_width/2+1), odist = 1;
-            int istride = 1, ostride = n_channels;
-            int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
-
-#ifdef ASYNC
-            std::unique_lock<std::mutex> lock(fftw_mut);
-            fftw_plan_with_nthreads(2);
-#elif OPENMP
-#pragma omp critical
-            fftw_plan_with_nthreads(omp_get_max_threads());
+    Fft::inverse(complex_input, real_result);
+
+    int n_channels = complex_input.n_channels;
+    fftwf_complex *in = reinterpret_cast<fftwf_complex *>(complex_input.get_p_data());
+    float *out = real_result.ptr<float>();
+
+    if (n_channels == 1)
+        fftwf_execute_dft_c2r(plan_i_1ch, in, out);
+#ifdef BIG_BATCH
+    else
+        fftwf_execute_dft_c2r(plan_i_all_scales, in, out);
 #endif
-#pragma omp critical
-            plan_ir = fftwf_plan_many_dft_c2r(rank, n, howmany,
-                                                         in,  inembed, istride, idist,
-                                                         out, onembed, ostride, odist,
-                                                         FFTW_ESTIMATE);
-            fftwf_execute(plan_ir);
-    }else{fftwf_execute_dft_c2r(plan_ir,in,out);}
-  }
-
-    return real_result/(m_width*m_height);
+    real_result *= 1.0 / (m_width * m_height);
 }
 
 Fftw::~Fftw()
 {
-  fftwf_destroy_plan(plan_f);
-  fftwf_destroy_plan(plan_fw);
-  fftwf_destroy_plan(plan_fwh);
-  fftwf_destroy_plan(plan_if);
-  fftwf_destroy_plan(plan_ir);
+    if (plan_f) fftwf_destroy_plan(plan_f);
+    if (plan_fw) fftwf_destroy_plan(plan_fw);
+    if (plan_i_1ch) fftwf_destroy_plan(plan_i_1ch);
+
+#ifdef BIG_BATCH
+    if (plan_f_all_scales) fftwf_destroy_plan(plan_f_all_scales);
+    if (plan_fw_all_scales) fftwf_destroy_plan(plan_fw_all_scales);
+    if (plan_i_all_scales) fftwf_destroy_plan(plan_i_all_scales);
+#endif
 }