-
#ifndef FFT_CUDA_H
#define FFT_CUDA_H
-#include "fft.h"
-
-#if CV_MAJOR_VERSION == 2
- #include <opencv2/gpu/gpu.hpp>
- #define CUDA cv::gpu
-#else
- #include "opencv2/opencv.hpp"
- #define CUDA cv::cuda
-#endif
-
#include <cufft.h>
#include <cuda_runtime.h>
+#include <cublas_v2.h>
+
+#include "fft.h"
+#include "cuda_error_check.hpp"
+#include "pragmas.h"
+
+struct ThreadCtx;
class cuFFT : public Fft
{
public:
cuFFT();
- void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode) override;
- void set_window(const cv::Mat &window) override;
- ComplexMat forward(const cv::Mat &input) override;
- ComplexMat forward_window(const std::vector<cv::Mat> &input) override;
- cv::Mat inverse(const ComplexMat &inputf) override;
- ~cuFFT() override;
+ void init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales);
+ void set_window(const MatDynMem &window);
+ void forward(const MatScales &real_input, ComplexMat &complex_result);
+ void forward_window(MatScaleFeats &patch_feats_in, ComplexMat &complex_result, MatScaleFeats &tmp);
+ void inverse(ComplexMat &complex_input, MatScales &real_result);
+ ~cuFFT();
+
+protected:
+ cufftHandle create_plan_fwd(uint howmany) const;
+ cufftHandle create_plan_inv(uint howmany) const;
+
private:
cv::Mat m_window;
- unsigned m_width, m_height, m_num_of_feats, m_num_of_scales, m_num_of_streams;
- bool m_big_batch_mode;
- cudaStream_t streams[4];
- cufftHandle plan_f, plan_f_all_scales, plan_fw, plan_fw_all_scales, plan_i_features,
- plan_i_features_all_scales, plan_i_1ch, plan_i_1ch_all_scales;
- float *data_f, *data_f_all_scales, *data_fw, *data_fw_d, *data_fw_all_scales, *data_fw_all_scales_d, *data_i_features, *data_i_features_d,
- *data_i_features_all_scales, *data_i_features_all_scales_d, *data_i_1ch, *data_i_1ch_d, *data_i_1ch_all_scales, *data_i_1ch_all_scales_d;
+ cufftHandle plan_f, plan_fw, plan_i_1ch;
+#ifdef BIG_BATCH
+ cufftHandle plan_f_all_scales, plan_fw_all_scales, plan_i_all_scales;
+#endif
+ cublasHandle_t cublas;
};
#endif // FFT_CUDA_H