6 CublasErrorCheck(cublasCreate(&cublas));
9 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales)
13 m_num_of_feats = num_of_feats;
14 m_num_of_scales = num_of_scales;
16 std::cout << "FFT: cuFFT" << std::endl;
18 // FFT forward one scale
20 CufftErrorCheck(cufftPlan2d(&plan_f, int(m_height), int(m_width), CUFFT_R2C));
23 // FFT forward all scales
24 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
26 int n[] = {(int)m_height, (int)m_width};
27 int howmany = m_num_of_scales;
28 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
29 int istride = 1, ostride = 1;
30 int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
32 CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
34 CufftErrorCheck(cufftSetStream(plan_f_all_scales, cudaStreamPerThread));
37 // FFT forward window one scale
40 int n[] = {int(m_height), int(m_width)};
41 int howmany = int(m_num_of_feats);
42 int idist = int(m_height * m_width), odist = int(m_height * (m_width / 2 + 1));
43 int istride = 1, ostride = 1;
44 int *inembed = n, onembed[] = {int(m_height), int(m_width / 2 + 1)};
47 cufftPlanMany(&plan_fw, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, howmany));
48 CufftErrorCheck(cufftSetStream(plan_fw, cudaStreamPerThread));
51 // FFT forward window all scales all feats
52 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
54 int n[] = {(int)m_height, (int)m_width};
55 int howmany = m_num_of_scales * m_num_of_feats;
56 int idist = m_height * m_width, odist = m_height * (m_width / 2 + 1);
57 int istride = 1, ostride = 1;
58 int *inembed = n, onembed[] = {(int)m_height, (int)m_width / 2 + 1};
60 CufftErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
62 CufftErrorCheck(cufftSetStream(plan_fw_all_scales, cudaStreamPerThread));
65 // FFT inverse one scale
68 int n[] = {int(m_height), int(m_width)};
69 int howmany = int(m_num_of_feats);
70 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
71 int istride = 1, ostride = int(m_num_of_feats);
72 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
74 CufftErrorCheck(cufftPlanMany(&plan_i_features, rank, n, inembed, istride, idist, onembed, ostride, odist,
76 CufftErrorCheck(cufftSetStream(plan_i_features, cudaStreamPerThread));
78 // FFT inverse all scales
80 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
82 int n[] = {(int)m_height, (int)m_width};
83 int howmany = m_num_of_feats * m_num_of_scales;
84 int idist = m_height * (m_width / 2 + 1), odist = 1;
85 int istride = 1, ostride = m_num_of_feats * m_num_of_scales;
86 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
88 CufftErrorCheck(cufftPlanMany(&plan_i_features_all_scales, rank, n, inembed, istride, idist, onembed, ostride,
89 odist, CUFFT_C2R, howmany));
90 CufftErrorCheck(cufftSetStream(plan_i_features_all_scales, cudaStreamPerThread));
93 // FFT inverse one channel one scale
96 int n[] = {int(m_height), int(m_width)};
98 int idist = int(m_height * (m_width / 2 + 1)), odist = 1;
99 int istride = 1, ostride = 1;
100 int inembed[] = {int(m_height), int(m_width / 2 + 1)}, *onembed = n;
103 cufftPlanMany(&plan_i_1ch, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_C2R, howmany));
104 CufftErrorCheck(cufftSetStream(plan_i_1ch, cudaStreamPerThread));
107 // FFT inverse one channel all scales
108 if (m_num_of_scales > 1 && BIG_BATCH_MODE) {
110 int n[] = {(int)m_height, (int)m_width};
111 int howmany = m_num_of_scales;
112 int idist = m_height * (m_width / 2 + 1), odist = 1;
113 int istride = 1, ostride = m_num_of_scales;
114 int inembed[] = {(int)m_height, (int)m_width / 2 + 1}, *onembed = n;
116 CufftErrorCheck(cufftPlanMany(&plan_i_1ch_all_scales, rank, n, inembed, istride, idist, onembed, ostride, odist,
117 CUFFT_C2R, howmany));
118 CufftErrorCheck(cufftSetStream(plan_i_1ch_all_scales, cudaStreamPerThread));
123 void cuFFT::set_window(const MatDynMem &window)
128 void cuFFT::forward(MatDynMem & real_input, ComplexMat & complex_result)
130 if (BIG_BATCH_MODE && real_input.rows == int(m_height * m_num_of_scales)) {
131 CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal *>(real_input.deviceMem()),
132 complex_result.get_p_data()));
137 cufftExecR2C(plan_f, reinterpret_cast<cufftReal *>(real_input.deviceMem()), complex_result.get_p_data()));
138 cudaStreamSynchronize(cudaStreamPerThread);
144 void cuFFT::forward_window(MatDynMem &feat, ComplexMat & complex_result, MatDynMem &temp)
146 uint n_channels = feat.size[0];
147 cufftReal *temp_data = temp.deviceMem();
149 assert(feat.dims == 3);
150 assert(n_channels == m_num_of_feats || n_channels == m_num_of_feats * m_num_of_scales);
152 for (uint i = 0; i < n_channels; ++i) {
153 cv::Mat feat_plane(feat.dims - 1, feat.size + 1, feat.cv::Mat::type(), feat.ptr<void>(i));
154 cv::Mat temp_plane(temp.dims - 1, temp.size + 1, temp.cv::Mat::type(), temp.ptr(i));
155 temp_plane = feat_plane.mul(m_window);
157 CufftErrorCheck(cufftExecR2C((n_channels == m_num_of_feats) ? plan_fw : plan_fw_all_scales,
158 temp_data, complex_result.get_p_data()));
161 void cuFFT::inverse(ComplexMat &complex_input, MatDynMem &real_result)
163 uint n_channels = complex_input.n_channels;
164 cufftComplex *in = reinterpret_cast<cufftComplex *>(complex_input.get_p_data());
165 cufftReal *out = real_result.deviceMem();
166 float alpha = 1.0 / (m_width * m_height);
169 if (n_channels == 1) {
170 CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, out));
171 CublasErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1));
173 } else if (n_channels == m_num_of_scales) {
174 CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, out));
175 CublasErrorCheck(cublasSscal(cublas, real_result.total(), &alpha, out, 1));
177 } else if (n_channels == m_num_of_feats * m_num_of_scales) {
178 CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, out));
179 cudaStreamSynchronize(cudaStreamPerThread);
182 CufftErrorCheck(cufftExecC2R(plan_i_features, in, out));
188 CublasErrorCheck(cublasDestroy(cublas));
190 CufftErrorCheck(cufftDestroy(plan_f));
191 CufftErrorCheck(cufftDestroy(plan_fw));
192 CufftErrorCheck(cufftDestroy(plan_i_1ch));
193 CufftErrorCheck(cufftDestroy(plan_i_features));
195 if (BIG_BATCH_MODE) {
196 CufftErrorCheck(cufftDestroy(plan_f_all_scales));
197 CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
198 CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
199 CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));