3 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
7 m_num_of_feats = num_of_feats;
8 m_num_of_scales = num_of_scales;
10 std::cout << "FFT: cuFFT" << std::endl;
12 // FFT forward one scale
14 CufftErrorCheck(cufftPlan2d(&plan_f, int(m_height), int(m_width), CUFFT_R2C));
17 // FFT forward all scales
18 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
20 int n[] = {(int)m_height, (int)m_width};
21 int howmany = m_num_of_scales;
22 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
23 int istride = 1, ostride = 1;
24 int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
26 CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
28 CufftErrorCheck(cufftSetStream(plan_f_all_scales, cudaStreamPerThread));
31 // FFT forward window one scale
34 int n[] = {int(m_height), int(m_width)};
35 int howmany = int(m_num_of_feats);
36 int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
37 int istride = 1, ostride = 1;
38 int *inembed = n, onembed[] = {int(m_height), int(m_width / 2 + 1)};
41 cufftPlanMany(&plan_fw, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
42 CufftErrorCheck(cufftSetStream(plan_fw, cudaStreamPerThread));
45 // FFT forward window all scales all feats
46 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
48 int n[] = {(int)m_height, (int)m_width};
49 int howmany = m_num_of_scales * m_num_of_feats;
50 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
51 int istride = 1, ostride = 1;
52 int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
54 CufftErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
56 CufftErrorCheck(cufftSetStream(plan_fw_all_scales, cudaStreamPerThread));
59 // FFT inverse one scale
62 int n[] = {int(m_height), int(m_width)};
63 int howmany = int(m_num_of_feats);
64 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
65 int istride = 1, ostride = int(m_num_of_feats);
66 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
68 CufftErrorCheck(cufftPlanMany(&plan_i_features, rank, n, inembed, istride, idist, onembed, ostride, odist,
70 CufftErrorCheck(cufftSetStream(plan_i_features, cudaStreamPerThread));
72 // FFT inverse all scales
74 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
76 int n[] = {(int)m_height, (int)m_width};
77 int howmany = m_num_of_feats * m_num_of_scales;
78 int idist = m_height * (m_width / 2 + 1), odist = 1;
79 int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
80 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
82 CufftErrorCheck(cufftPlanMany(&plan_i_features_all_scales, rank, n, inembed, istride, idist, onembed, ostride,
83 odist, CUFFT_C2R, howmany));
84 CufftErrorCheck(cufftSetStream(plan_i_features_all_scales, cudaStreamPerThread));
87 // FFT inverse one channel one scale
90 int n[] = {int(m_height), int(m_width)};
92 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
93 int istride = 1, ostride = 1;
94 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
97 cufftPlanMany(&plan_i_1ch, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_C2R, howmany));
98 CufftErrorCheck(cufftSetStream(plan_i_1ch, cudaStreamPerThread));
101 // FFT inverse one channel all scales
102 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
104 int n[] = {(int)m_height, (int)m_width};
105 int howmany = m_num_of_scales;
106 int idist = m_height * (m_width / 2 + 1), odist = 1;
107 int istride = 1, ostride = m_num_of_scales;
108 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
110 CufftErrorCheck(cufftPlanMany(&plan_i_1ch_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
111 CUFFT_C2R, howmany));
112 CufftErrorCheck(cufftSetStream(plan_i_1ch_all_scales, cudaStreamPerThread));
117 void cuFFT::set_window(const cv::Mat &window)
122 void cuFFT::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr)
124 if (BIG_BATCH_MODE && real_input.rows == int(m_height * m_num_of_scales)) {
125 CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal *>(real_input_arr),
126 complex_result.get_p_data()));
131 cufftExecR2C(plan_f, reinterpret_cast<cufftReal *>(real_input_arr), complex_result.get_p_data()));
132 cudaStreamSynchronize(cudaStreamPerThread);
138 void cuFFT::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
139 float *real_input_arr)
141 int n_channels = int(patch_feats.size());
143 if (n_channels > int(m_num_of_feats)) {
144 for (uint i = 0; i < uint(n_channels); ++i) {
145 cv::Mat in_roi(fw_all, cv::Rect(0, int(i * m_height), int(m_width), int(m_height)));
146 in_roi = patch_feats[i].mul(m_window);
148 CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal *>(real_input_arr),
149 complex_result.get_p_data()));
151 for (uint i = 0; i < uint(n_channels); ++i) {
152 cv::Mat in_roi(fw_all, cv::Rect(0, int(i * m_height), int(m_width), int(m_height)));
153 in_roi = patch_feats[i].mul(m_window);
158 cufftExecR2C(plan_fw, reinterpret_cast<cufftReal *>(real_input_arr), complex_result.get_p_data()));
159 cudaStreamSynchronize(cudaStreamPerThread);
165 void cuFFT::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr)
167 int n_channels = complex_input.n_channels;
168 cufftComplex *in = reinterpret_cast<cufftComplex *>(complex_input.get_p_data());
170 if (n_channels == 1) {
173 CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal *>(real_result_arr)));
174 cudaStreamSynchronize(cudaStreamPerThread);
176 real_result = real_result / (m_width * m_height);
178 } else if (n_channels == int(m_num_of_scales)) {
179 CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal *>(real_result_arr)));
180 cudaStreamSynchronize(cudaStreamPerThread);
182 real_result = real_result / (m_width * m_height);
184 } else if (n_channels == int(m_num_of_feats) * int(m_num_of_scales)) {
185 CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal *>(real_result_arr)));
186 cudaStreamSynchronize(cudaStreamPerThread);
191 CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal *>(real_result_arr)));
192 #if defined(OPENMP) && !defined(BIG_BATCH)
193 CudaSafeCall(cudaStreamSynchronize(cudaStreamPerThread));
201 CufftErrorCheck(cufftDestroy(plan_f));
202 CufftErrorCheck(cufftDestroy(plan_fw));
203 CufftErrorCheck(cufftDestroy(plan_i_1ch));
204 CufftErrorCheck(cufftDestroy(plan_i_features));
206 if (BIG_BATCH_MODE) {
207 CufftErrorCheck(cufftDestroy(plan_f_all_scales));
208 CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
209 CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
210 CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));