]> rtime.felk.cvut.cz Git - hercules2020/kcf.git/blob - src/fft_cufft.cpp
CUDA streams works
[hercules2020/kcf.git] / src / fft_cufft.cpp
1 #include "fft_cufft.h"
2
3 void cuFFT::init(unsigned width, unsigned height, unsigned num_of_feats, unsigned num_of_scales, bool big_batch_mode)
4 {
5     m_width = width;
6     m_height = height;
7     m_num_of_feats = num_of_feats;
8     m_num_of_scales = num_of_scales;
9     m_big_batch_mode = big_batch_mode;
10
11     std::cout << "FFT: cuFFT" << std::endl;
12
13     //FFT forward one scale
14     {
15        CufftErrorCheck(cufftPlan2d(&plan_f, int(m_height), int(m_width), CUFFT_R2C));
16     }
17 #ifdef BIG_BATCH
18     //FFT forward all scales
19     if(m_num_of_scales > 1 && m_big_batch_mode)
20     {
21         int rank = 2;
22         int n[] = {(int)m_height, (int)m_width};
23         int howmany = m_num_of_scales;
24         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
25         int istride = 1, ostride = 1;
26         int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
27
28         CufftErrorCheck(cufftPlanMany(&plan_f_all_scales, rank, n,
29                   inembed, istride, idist,
30                   onembed, ostride, odist,
31                   CUFFT_R2C, howmany));
32     }
33 #endif
34     //FFT forward window one scale
35     {
36         int rank = 2;
37         int n[] = {int(m_height), int(m_width)};
38         int howmany = int(m_num_of_feats);
39         int idist = int(m_height*m_width), odist = int(m_height*(m_width/2+1));
40         int istride = 1, ostride = 1;
41         int *inembed = n, onembed[] = {int(m_height), int(m_width/2+1)};
42
43         CufftErrorCheck(cufftPlanMany(&plan_fw, rank, n,
44                   inembed, istride, idist,
45                   onembed, ostride, odist,
46                   CUFFT_R2C, howmany));
47     }
48 #ifdef BIG_BATCH
49     //FFT forward window all scales all feats
50     if(m_num_of_scales > 1 && m_big_batch_mode)
51     {
52         int rank = 2;
53         int n[] = {(int)m_height, (int)m_width};
54         int howmany = m_num_of_scales*m_num_of_feats;
55         int idist = m_height*m_width, odist = m_height*(m_width/2+1);
56         int istride = 1, ostride = 1;
57         int *inembed = n, onembed[] = {(int)m_height, (int)m_width/2+1};
58
59         CufftErrorCheck(cufftPlanMany(&plan_fw_all_scales, rank, n,
60                   inembed, istride, idist,
61                   onembed, ostride, odist,
62                   CUFFT_R2C, howmany));
63     }
64 #endif
65     //FFT inverse one scale
66     {
67         int rank = 2;
68         int n[] = {int(m_height), int(m_width)};
69         int howmany = int(m_num_of_feats);
70         int idist = int(m_height*(m_width/2+1)), odist = 1;
71         int istride = 1, ostride = int(m_num_of_feats);
72         int inembed[] = {int(m_height), int(m_width/2+1)}, *onembed = n;
73
74         CufftErrorCheck(cufftPlanMany(&plan_i_features, rank, n,
75                   inembed, istride, idist,
76                   onembed, ostride, odist,
77                   CUFFT_C2R, howmany));
78     }
79     //FFT inverse all scales
80 #ifdef BIG_BATCH
81     if(m_num_of_scales > 1 && m_big_batch_mode)
82     {
83         int rank = 2;
84         int n[] = {(int)m_height, (int)m_width};
85         int howmany = m_num_of_feats*m_num_of_scales;
86         int idist = m_height*(m_width/2+1), odist = 1;
87         int istride = 1, ostride = m_num_of_feats*m_num_of_scales;
88         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
89
90         CufftErrorCheck(cufftPlanMany(&plan_i_features_all_scales, rank, n,
91                   inembed, istride, idist,
92                   onembed, ostride, odist,
93                   CUFFT_C2R, howmany));
94     }
95 #endif
96     //FFT inverse one channel one scale
97     {
98         int rank = 2;
99         int n[] = {int(m_height), int(m_width)};
100         int howmany = 1;
101         int idist = int(m_height*(m_width/2+1)), odist = 1;
102         int istride = 1, ostride = 1;
103         int inembed[] = {int(m_height), int(m_width/2+1)}, *onembed = n;
104
105         CufftErrorCheck(cufftPlanMany(&plan_i_1ch, rank, n,
106                   inembed, istride, idist,
107                   onembed, ostride, odist,
108                   CUFFT_C2R, howmany));
109     }
110 #ifdef BIG_BATCH
111     //FFT inverse one channel all scales
112     if(m_num_of_scales > 1 && m_big_batch_mode)
113     {
114         int rank = 2;
115         int n[] = {(int)m_height, (int)m_width};
116         int howmany = m_num_of_scales;
117         int idist = m_height*(m_width/2+1), odist = 1;
118         int istride = 1, ostride = m_num_of_scales;
119         int inembed[] = {(int)m_height, (int)m_width/2+1}, *onembed = n;
120
121         CufftErrorCheck(cufftPlanMany(&plan_i_1ch_all_scales, rank, n,
122                   inembed, istride, idist,
123                   onembed, ostride, odist,
124                   CUFFT_C2R, howmany));
125     }
126 #endif
127 }
128
129 void cuFFT::set_window(const cv::Mat & window)
130 {
131      m_window = window;
132 }
133
134 void cuFFT::forward(const cv::Mat & real_input, ComplexMat & complex_result, float *real_input_arr, cudaStream_t stream)
135 {
136     (void) real_input;
137
138     if(m_big_batch_mode && real_input.rows == int(m_height*m_num_of_scales)){
139         CufftErrorCheck(cufftExecR2C(plan_f_all_scales, reinterpret_cast<cufftReal*>(real_input_arr),
140                                 complex_result.get_p_data()));
141     } else {
142 #pragma omp critical
143         {
144         CufftErrorCheck(cufftSetStream(plan_f, stream));
145         CufftErrorCheck(cufftExecR2C(plan_f, reinterpret_cast<cufftReal*>(real_input_arr), complex_result.get_p_data()));
146         cudaStreamSynchronize(stream);
147         }
148     }
149     return;
150 }
151
152 void cuFFT::forward_window(std::vector<cv::Mat> patch_feats, ComplexMat & complex_result, cv::Mat & fw_all, float *real_input_arr, cudaStream_t stream)
153 {
154     int n_channels = int(patch_feats.size());
155
156     if(n_channels > int(m_num_of_feats)){
157         for (uint i = 0; i < uint(n_channels); ++i) {
158             cv::Mat in_roi(fw_all, cv::Rect(0, int(i*m_height), int(m_width), int(m_height)));
159             in_roi = patch_feats[i].mul(m_window);
160         }
161         CufftErrorCheck(cufftExecR2C(plan_fw_all_scales, reinterpret_cast<cufftReal*>(real_input_arr), complex_result.get_p_data()));
162     } else {
163         for (uint i = 0; i < uint(n_channels); ++i) {
164             cv::Mat in_roi(fw_all, cv::Rect(0, int(i*m_height), int(m_width), int(m_height)));
165             in_roi = patch_feats[i].mul(m_window);
166         }
167 #pragma omp critical
168         {
169         CufftErrorCheck(cufftSetStream(plan_fw, stream));
170         CufftErrorCheck(cufftExecR2C(plan_fw, reinterpret_cast<cufftReal*>(real_input_arr), complex_result.get_p_data()));
171         cudaStreamSynchronize(stream);
172         }
173     }
174     return;
175 }
176
177 void cuFFT::inverse(ComplexMat &  complex_input, cv::Mat & real_result, float *real_result_arr, cudaStream_t stream)
178 {
179     int n_channels = complex_input.n_channels;
180     cufftComplex *in = reinterpret_cast<cufftComplex*>(complex_input.get_p_data());
181
182     if(n_channels == 1){
183 #pragma omp critical
184         {
185         CufftErrorCheck(cufftSetStream(plan_i_1ch, stream));
186         CufftErrorCheck(cufftExecC2R(plan_i_1ch, in, reinterpret_cast<cufftReal*>(real_result_arr)));
187         cudaStreamSynchronize(stream);
188         }
189         real_result = real_result/(m_width*m_height);
190         return;
191     } else if(n_channels == int(m_num_of_scales)){
192         CufftErrorCheck(cufftExecC2R(plan_i_1ch_all_scales, in, reinterpret_cast<cufftReal*>(real_result_arr)));
193         cudaStreamSynchronize(stream);
194
195         real_result = real_result/(m_width*m_height);
196         return;
197     } else if(n_channels == int(m_num_of_feats) * int(m_num_of_scales)){
198         CufftErrorCheck(cufftExecC2R(plan_i_features_all_scales, in, reinterpret_cast<cufftReal*>(real_result_arr)));
199         return;
200     }
201 #pragma omp critical
202     {
203     CufftErrorCheck(cufftSetStream(plan_i_features, stream));
204     CufftErrorCheck(cufftExecC2R(plan_i_features, in, reinterpret_cast<cufftReal*>(real_result_arr)));
205 #if defined(OPENMP) && !defined(BIG_BATCH)
206     cudaStreamSynchronize(stream);
207 #endif
208     }
209     return;
210 }
211
212 cuFFT::~cuFFT()
213 {
214   CufftErrorCheck(cufftDestroy(plan_f));
215   CufftErrorCheck(cufftDestroy(plan_fw));
216   CufftErrorCheck(cufftDestroy(plan_i_1ch));
217   CufftErrorCheck(cufftDestroy(plan_i_features));
218   
219   if (m_big_batch_mode) {
220       CufftErrorCheck(cufftDestroy(plan_f_all_scales));
221       CufftErrorCheck(cufftDestroy(plan_fw_all_scales));
222       CufftErrorCheck(cufftDestroy(plan_i_1ch_all_scales));
223       CufftErrorCheck(cufftDestroy(plan_i_features_all_scales));
224   }
225 }