]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/fft_cufft.cpp
Make KCF member variables constexpr where possible
[hercules2020/kcf.git] / src / fft_cufft.cpp
1 #include "fft_cufft.h"
2
3 cuFFT::cuFFT()
4 {
5     CudaSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
6     cudaErrorCheck(cublasCreate(&cublas));
7     cudaErrorCheck(cublasSetStream(cublas, cudaStreamPerThread));
8 }
9
10 cufftHandle cuFFT::create_plan_fwd(uint howmany) const
11 {
12     int rank = 2;
13     int n[] = {(int)m_height, (int)m_width};
14     int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
15     int istride = 1, ostride = 1;
16     int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
17
18     cufftHandle plan;
19     cudaErrorCheck(cufftPlanMany(&plan, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
20     cudaErrorCheck(cufftSetStream(plan, cudaStreamPerThread));
21     return plan;
22 }
23
24 cufftHandle cuFFT::create_plan_inv(uint howmany) const
25 {
26     int rank = 2;
27     int n[] = {(int)m_height, (int)m_width};
28     int idist = m_height * (m_width / 2 + 1), odist = m_height * m_width;
29     int istride = 1, ostride = 1;
30     int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
31
32     cufftHandle plan;
33     cudaErrorCheck(cufftPlanMany(&plan, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_C2R, howmany));
34     cudaErrorCheck(cufftSetStream(plan, cudaStreamPerThread));
35     return plan;
36 }
37
38
39 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
40 {
41     Fft::init(width, height, num_of_feats, num_of_scales);
42
43     std::cout << "FFT: cuFFT" << std::endl;
44
45     plan_f = create_plan_fwd(1);
46     plan_fw = create_plan_fwd(m_num_of_feats);
47     plan_i_1ch = create_plan_inv(1);
48
49 #ifdef BIG_BATCH
50     plan_f_all_scales = create_plan_fwd(m_num_of_scales);
51     plan_fw_all_scales = create_plan_fwd(m_num_of_scales * m_num_of_feats);
52     plan_i_all_scales = create_plan_inv(m_num_of_scales);
53 #endif
54 }
55
56 void cuFFT::set_window(const MatDynMem &window)
57 {
58     Fft::set_window(window);
59     m_window = window;
60 }
61
62 void cuFFT::forward(const MatScales &real_input, ComplexMat &complex_result)
63 {
64     Fft::forward(real_input, complex_result);
65     auto in = static_cast<cufftReal *>(const_cast<MatScales&>(real_input).deviceMem());
66
67     if (real_input.size[0] == 1)
68         cudaErrorCheck(cufftExecR2C(plan_f, in, complex_result.get_dev_data()));
69 #ifdef BIG_BATCH
70     else
71         cudaErrorCheck(cufftExecR2C(plan_f_all_scales, in, complex_result.get_dev_data()));
72 #endif
73 }
74
75 void cuFFT::forward_window(MatScaleFeats &feat, ComplexMat &complex_result, MatScaleFeats &temp)
76 {
77     Fft::forward_window(feat, complex_result, temp);
78
79     cufftReal *temp_data = temp.deviceMem();
80     uint n_scales = feat.size[0];
81
82     for (uint s = 0; s < n_scales; ++s) {
83         for (uint ch = 0; ch < uint(feat.size[1]); ++ch) {
84             cv::Mat feat_plane = feat.plane(s, ch);
85             cv::Mat temp_plane = temp.plane(s, ch);
86             temp_plane = feat_plane.mul(m_window);
87         }
88     }
89
90     if (n_scales == 1)
91         cudaErrorCheck(cufftExecR2C(plan_fw, temp_data, complex_result.get_dev_data()));
92 #ifdef BIG_BATCH
93     else
94         cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_dev_data()));
95 #endif
96 }
97
98 void cuFFT::inverse(ComplexMat &complex_input, MatScales &real_result)
99 {
100     Fft::inverse(complex_input, real_result);
101
102     uint n_channels = complex_input.n_channels;
103     cufftComplex *in = reinterpret_cast<cufftComplex *>(complex_input.get_dev_data());
104     cufftReal *out = real_result.deviceMem();
105     float alpha = 1.0 / (m_width * m_height);
106
107     if (n_channels == 1)
108         cudaErrorCheck(cufftExecC2R(plan_i_1ch, in, out));
109 #ifdef BIG_BATCH
110     else
111         cudaErrorCheck(cufftExecC2R(plan_i_all_scales, in, out));
112 #endif
113     cudaErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1));
114     // The result is a cv::Mat, which will be accesses by CPU, so we
115     // must synchronize with the GPU here
116     CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
117 }
118
119 cuFFT::~cuFFT()
120 {
121     cudaErrorCheck(cublasDestroy(cublas));
122
123     cudaErrorCheck(cufftDestroy(plan_f));
124     cudaErrorCheck(cufftDestroy(plan_fw));
125     cudaErrorCheck(cufftDestroy(plan_i_1ch));
126
127 #ifdef BIG_BATCH
128     cudaErrorCheck(cufftDestroy(plan_f_all_scales));
129     cudaErrorCheck(cufftDestroy(plan_fw_all_scales));
130     cudaErrorCheck(cufftDestroy(plan_i_all_scales));
131 #endif
132 }