3 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
7 m_num_of_feats = num_of_feats;
8 m_num_of_scales = num_of_scales;
10 std::cout << "FFT: cuFFT" << std::endl;
12 // FFT forward one scale
14 CufftErrorCheck(cufftPlan2d(&plan_f, int(m_height), int(m_width), CUFFT_R2C));
17 // FFT forward all scales
18 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
20 int n[] = {(int)m_height, (int)m_width};
21 int howmany = m_num_of_scales;
22 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
23 int istride = 1, ostride = 1;
24 int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
26 CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
30 // FFT forward window one scale
33 int n[] = {int(m_height), int(m_width)};
34 int howmany = int(m_num_of_feats);
35 int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
36 int istride = 1, ostride = 1;
37 int *inembed = n, onembed[] = {int(m_height), int(m_width / 2 + 1)};
40 cufftPlanMany(&plan_fw, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
43 // FFT forward window all scales all feats
44 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
46 int n[] = {(int)m_height, (int)m_width};
47 int howmany = m_num_of_scales * m_num_of_feats;
48 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
49 int istride = 1, ostride = 1;
50 int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
52 CufftErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
56 // FFT inverse one scale
59 int n[] = {int(m_height), int(m_width)};
60 int howmany = int(m_num_of_feats);
61 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
62 int istride = 1, ostride = int(m_num_of_feats);
63 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
65 CufftErrorCheck(cufftPlanMany(&plan_i_features, rank, n, inembed, istride, idist, onembed, ostride, odist,
68 // FFT inverse all scales
70 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
72 int n[] = {(int)m_height, (int)m_width};
73 int howmany = m_num_of_feats * m_num_of_scales;
74 int idist = m_height * (m_width / 2 + 1), odist = 1;
75 int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
76 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
78 CufftErrorCheck(cufftPlanMany(&plan_i_features_all_scales, rank, n, inembed, istride, idist, onembed, ostride,
79 odist, CUFFT_C2R, howmany));
82 // FFT inverse one channel one scale
85 int n[] = {int(m_height), int(m_width)};
87 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
88 int istride = 1, ostride = 1;
89 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
92 cufftPlanMany(&plan_i_1ch, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_C2R, howmany));
95 // FFT inverse one channel all scales
96 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
98 int n[] = {(int)m_height, (int)m_width};
99 int howmany = m_num_of_scales;
100 int idist = m_height * (m_width / 2 + 1), odist = 1;
101 int istride = 1, ostride = m_num_of_scales;
102 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
104 CufftErrorCheck(cufftPlanMany(&plan_i_1ch_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
105 CUFFT_C2R, howmany));
110 void cuFFT::set_window(const cv::Mat &window)
115 void cuFFT::forward(const cv::Mat &real_input, ComplexMat &complex_result, float *real_input_arr, cudaStream_t stream)
117 if (BIG_BATCH_MODE && real_input.rows == int(m_height * m_num_of_scales)) {
118 CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal *>(real_input_arr),
119 complex_result.get_p_data()));
123 CufftErrorCheck(cufftSetStream(plan_f, stream));
125 cufftExecR2C(plan_f, reinterpret_cast<cufftReal *>(real_input_arr), complex_result.get_p_data()));
126 cudaStreamSynchronize(stream);
132 void cuFFT::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat &complex_result, cv::Mat &fw_all,
133 float *real_input_arr, cudaStream_t stream)
135 int n_channels = int(patch_feats.size());
137 if (n_channels > int(m_num_of_feats)) {
138 for (uint i = 0; i < uint(n_channels); ++i) {
139 cv::Mat in_roi(fw_all, cv::Rect(0, int(i * m_height), int(m_width), int(m_height)));
140 in_roi = patch_feats[i].mul(m_window);
142 CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal *>(real_input_arr),
143 complex_result.get_p_data()));
145 for (uint i = 0; i < uint(n_channels); ++i) {
146 cv::Mat in_roi(fw_all, cv::Rect(0, int(i * m_height), int(m_width), int(m_height)));
147 in_roi = patch_feats[i].mul(m_window);
151 CufftErrorCheck(cufftSetStream(plan_fw, stream));
153 cufftExecR2C(plan_fw, reinterpret_cast<cufftReal *>(real_input_arr), complex_result.get_p_data()));
154 cudaStreamSynchronize(stream);
160 void cuFFT::inverse(ComplexMat &complex_input, cv::Mat &real_result, float *real_result_arr, cudaStream_t stream)
162 int n_channels = complex_input.n_channels;
163 cufftComplex *in = reinterpret_cast<cufftComplex *>(complex_input.get_p_data());
165 if (n_channels == 1) {
168 CufftErrorCheck(cufftSetStream(plan_i_1ch, stream));
169 CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal *>(real_result_arr)));
170 cudaStreamSynchronize(stream);
172 real_result = real_result / (m_width * m_height);
174 } else if (n_channels == int(m_num_of_scales)) {
175 CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal *>(real_result_arr)));
176 cudaStreamSynchronize(stream);
178 real_result = real_result / (m_width * m_height);
180 } else if (n_channels == int(m_num_of_feats) * int(m_num_of_scales)) {
181 CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal *>(real_result_arr)));
186 CufftErrorCheck(cufftSetStream(plan_i_features, stream));
187 CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal *>(real_result_arr)));
188 #if defined(OPENMP) && !defined(BIG_BATCH)
189 CudaSafeCall(cudaStreamSynchronize(stream));
197 CufftErrorCheck(cufftDestroy(plan_f));
198 CufftErrorCheck(cufftDestroy(plan_fw));
199 CufftErrorCheck(cufftDestroy(plan_i_1ch));
200 CufftErrorCheck(cufftDestroy(plan_i_features));
202 if (BIG_BATCH_MODE) {
203 CufftErrorCheck(cufftDestroy(plan_f_all_scales));
204 CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
205 CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
206 CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));