1 #include "complexmat.cuh"
3 __global__ void sqr_norm_kernel(int n, float* out, float* data, float rows, float cols)
5 extern __shared__ float sdata[];
6 int i = blockDim.x * threadIdx.y + threadIdx.x;
7 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
8 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
11 sdata[i] = data[threadId]*data[threadId] + data[threadId+1]*data[threadId+1];
14 for (unsigned int s=(blockDim.x*blockDim.y+1)/2, old_s = blockDim.x*blockDim.y;s>0; s>>=1) {
18 if (i < s && i+s < old_s) {
19 sdata[i] += sdata[i + s];
26 atomicAdd(&out[blockId/n], sdata[0]/(rows*cols));
30 float ComplexMat::sqr_norm() const
33 CudaSafeCall(cudaMemset(result, 0, n_scales*sizeof(float)));
35 dim3 threadsPerBlock(rows, cols);
36 dim3 numBlocks(n_channels, 1);
38 sqr_norm_kernel<<<numBlocks, threadsPerBlock, rows*cols*sizeof(float)>>>(n_channels, result, p_data, rows, cols);
44 void ComplexMat::sqr_norm(float *result) const
46 CudaSafeCall(cudaMemset(result, 0, n_scales*sizeof(float)));
48 dim3 threadsPerBlock(rows, cols);
49 dim3 numBlocks(n_channels/n_scales, n_scales);
51 sqr_norm_kernel<<<numBlocks, threadsPerBlock, rows*cols*sizeof(float)>>>(n_channels/n_scales, result, p_data, rows, cols);
57 __global__ void sqr_mag_kernel(float* data, float* result)
59 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
60 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
62 result[threadId] = data[threadId]*data[threadId] + data[threadId+1]*data[threadId+1];
63 result[threadId+1] = 0;
66 ComplexMat ComplexMat::sqr_mag() const
68 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
70 dim3 threadsPerBlock(rows, cols);
71 dim3 numBlocks(n_channels/n_scales, n_scales);
72 sqr_mag_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, result.p_data);
78 __global__ void conj_kernel(float* data, float* result)
80 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
81 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
83 result[threadId] = data[threadId];
84 result[threadId+1] = -data[threadId+1];
87 ComplexMat ComplexMat::conj() const
89 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
91 dim3 threadsPerBlock(rows, cols);
92 dim3 numBlocks(n_channels/n_scales, n_scales);
93 conj_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, result.p_data);
99 ComplexMat ComplexMat::sum_over_channels() const
101 // assert(p_data.size() > 1);
102 ComplexMat result(this->rows, this->cols, 1);
106 cufftComplex* ComplexMat::get_p_data() const
108 return (cufftComplex*) p_data;
111 __global__ void same_num_channels_mul_kernel(float* data_l, float* data_r, float* result)
113 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
114 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
116 result[threadId] = data_l[threadId]*data_r[threadId] - data_l[threadId+1]*data_r[threadId+1];
117 result[threadId+1] = data_l[threadId]*data_r[threadId+1] + data_l[threadId+1]*data_r[threadId];
120 //element-wise per channel multiplication, division and addition
121 ComplexMat ComplexMat::operator*(const ComplexMat & rhs) const
123 assert(rhs.n_channels == n_channels && rhs.cols == cols && rhs.rows == rows);
125 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
127 dim3 threadsPerBlock(rows, cols);
128 dim3 numBlocks(n_channels/n_scales, n_scales);
129 same_num_channels_mul_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
135 __global__ void same_num_channels_div_kernel(float* data_l, float* data_r, float* result)
137 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
138 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
140 result[threadId] = (data_l[threadId]*data_r[threadId] + data_l[threadId+1]*data_r[threadId+1])/
141 (data_r[threadId]*data_r[threadId] + data_r[threadId+1]*data_r[threadId+1]);
142 result[threadId+1] = (data_l[threadId+1]*data_r[threadId] - data_l[threadId]*data_r[threadId+1])/
143 (data_r[threadId]*data_r[threadId] + data_r[threadId+1]*data_r[threadId+1]);
146 ComplexMat ComplexMat::operator/(const ComplexMat & rhs) const
148 assert(rhs.n_channels == n_channels && rhs.cols == cols && rhs.rows == rows);
150 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
152 dim3 threadsPerBlock(rows, cols);
153 dim3 numBlocks(n_channels/n_scales, n_scales);
154 same_num_channels_div_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
160 __global__ void same_num_channels_add_kernel(float* data_l, float* data_r, float* result)
162 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
163 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
165 result[threadId] = data_l[threadId]+data_r[threadId];
166 result[threadId+1] = data_l[threadId+1]+data_r[threadId+1];
169 ComplexMat ComplexMat::operator+(const ComplexMat & rhs) const
171 assert(rhs.n_channels == n_channels && rhs.cols == cols && rhs.rows == rows);
173 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
175 dim3 threadsPerBlock(rows, cols);
176 dim3 numBlocks(n_channels/n_scales, n_scales);
177 same_num_channels_add_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
183 __global__ void constant_mul_kernel(float* data_l, float constant, float* result)
185 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
186 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
188 result[threadId] = data_l[threadId]*constant;
189 result[threadId+1] = data_l[threadId+1]*constant;
192 ComplexMat ComplexMat::operator*(const float & rhs) const
194 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
196 dim3 threadsPerBlock(rows, cols);
197 dim3 numBlocks(n_channels/n_scales, n_scales);
198 constant_mul_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs, result.p_data);
204 __global__ void constant_add_kernel(float* data_l, float constant, float* result)
206 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
207 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
209 result[threadId] = data_l[threadId]+constant;
210 result[threadId+1] = data_l[threadId+1];
213 ComplexMat ComplexMat::operator+(const float & rhs) const
215 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
217 dim3 threadsPerBlock(rows, cols);
218 dim3 numBlocks(n_channels/n_scales, n_scales);
219 constant_add_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs, result.p_data);
225 __global__ void one_channel_mul_kernel(float* data_l, float* data_r, float* result)
227 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
228 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
229 int one_ch_index = 2*((threadIdx.y * blockDim.x) + threadIdx.x);
231 result[threadId] = data_l[threadId]*data_r[one_ch_index] - data_l[threadId+1]*data_r[one_ch_index+1];
232 result[threadId+1] = data_l[threadId]*data_r[one_ch_index+1] + data_l[threadId+1]*data_r[one_ch_index];
235 //multiplying element-wise multichannel by one channel mats (rhs mat is with one channel)
236 ComplexMat ComplexMat::mul(const ComplexMat & rhs) const
238 assert(rhs.n_channels == 1 && rhs.cols == cols && rhs.rows == rows);
240 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
242 dim3 threadsPerBlock(rows, cols);
243 dim3 numBlocks(n_channels/n_scales, n_scales);
244 one_channel_mul_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
250 __global__ void scales_channel_mul_kernel(float* data_l, float* data_r, float* result)
252 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
253 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
254 int one_ch_index = 2*((threadIdx.y * blockDim.x) + threadIdx.x+blockIdx.x*blockDim.x*blockDim.y);
256 result[threadId] = data_l[threadId]*data_r[one_ch_index] - data_l[threadId+1]*data_r[one_ch_index+1];
257 result[threadId+1] = data_l[threadId]*data_r[one_ch_index+1] + data_l[threadId+1]*data_r[one_ch_index];
260 //multiplying element-wise multichannel by one channel mats (rhs mat is with multiple channel)
261 ComplexMat ComplexMat::mul2(const ComplexMat & rhs) const
263 assert(rhs.n_channels == n_channels/n_scales && rhs.cols == cols && rhs.rows == rows);
265 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
267 dim3 threadsPerBlock(rows, cols);
268 dim3 numBlocks(n_channels/n_scales, n_scales);
269 scales_channel_mul_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
275 void ComplexMat::operator=(ComplexMat & rhs)
279 n_channels = rhs.n_channels;
280 n_scales = rhs.n_scales;
286 void ComplexMat::operator=(ComplexMat && rhs)
290 n_channels = rhs.n_channels;
291 n_scales = rhs.n_scales;
295 rhs.p_data = nullptr;