1 #include "complexmat.cuh"
3 __global__ void sqr_norm_kernel(int n, float* out, float* data, float rows, float cols)
5 extern __shared__ float sdata[];
6 int i = blockDim.x * threadIdx.y + threadIdx.x;
7 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
8 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
11 sdata[i] = data[threadId]*data[threadId] + data[threadId+1]*data[threadId+1];
14 for (unsigned int s=(blockDim.x*blockDim.y+1)/2, old_s = blockDim.x*blockDim.y;s>0; s>>=1) {
18 if (i < s && i+s < old_s) {
19 sdata[i] += sdata[i + s];
26 atomicAdd(&out[blockId/n], sdata[0]/(rows*cols));
30 void ComplexMat::sqr_norm(float *result) const
32 CudaSafeCall(cudaMemset(result, 0, n_scales*sizeof(float)));
34 dim3 threadsPerBlock(rows, cols);
35 dim3 numBlocks(n_channels/n_scales, n_scales);
37 sqr_norm_kernel<<<numBlocks, threadsPerBlock, rows*cols*sizeof(float)>>>(n_channels/n_scales, result, p_data, rows, cols);
43 __global__ void sqr_mag_kernel(float* data, float* result)
45 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
46 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
48 result[threadId] = data[threadId]*data[threadId] + data[threadId+1]*data[threadId+1];
49 result[threadId+1] = 0;
52 ComplexMat ComplexMat::sqr_mag() const
54 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
56 dim3 threadsPerBlock(rows, cols);
57 dim3 numBlocks(n_channels/n_scales, n_scales);
58 sqr_mag_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, result.p_data);
64 __global__ void conj_kernel(float* data, float* result)
66 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
67 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
69 result[threadId] = data[threadId];
70 result[threadId+1] = -data[threadId+1];
73 ComplexMat ComplexMat::conj() const
75 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
77 dim3 threadsPerBlock(rows, cols);
78 dim3 numBlocks(n_channels/n_scales, n_scales);
79 conj_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, result.p_data);
85 ComplexMat ComplexMat::sum_over_channels() const
87 // assert(p_data.size() > 1);
88 ComplexMat result(this->rows, this->cols, 1);
92 cufftComplex* ComplexMat::get_p_data() const
94 return (cufftComplex*) p_data;
97 __global__ void same_num_channels_mul_kernel(float* data_l, float* data_r, float* result)
99 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
100 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
102 result[threadId] = data_l[threadId]*data_r[threadId] - data_l[threadId+1]*data_r[threadId+1];
103 result[threadId+1] = data_l[threadId]*data_r[threadId+1] + data_l[threadId+1]*data_r[threadId];
106 //element-wise per channel multiplication, division and addition
107 ComplexMat ComplexMat::operator*(const ComplexMat & rhs) const
109 assert(rhs.n_channels == n_channels && rhs.cols == cols && rhs.rows == rows);
111 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
113 dim3 threadsPerBlock(rows, cols);
114 dim3 numBlocks(n_channels/n_scales, n_scales);
115 same_num_channels_mul_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
121 __global__ void same_num_channels_div_kernel(float* data_l, float* data_r, float* result)
123 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
124 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
126 result[threadId] = (data_l[threadId]*data_r[threadId] + data_l[threadId+1]*data_r[threadId+1])/
127 (data_r[threadId]*data_r[threadId] + data_r[threadId+1]*data_r[threadId+1]);
128 result[threadId+1] = (data_l[threadId+1]*data_r[threadId] - data_l[threadId]*data_r[threadId+1])/
129 (data_r[threadId]*data_r[threadId] + data_r[threadId+1]*data_r[threadId+1]);
132 ComplexMat ComplexMat::operator/(const ComplexMat & rhs) const
134 assert(rhs.n_channels == n_channels && rhs.cols == cols && rhs.rows == rows);
136 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
138 dim3 threadsPerBlock(rows, cols);
139 dim3 numBlocks(n_channels/n_scales, n_scales);
140 same_num_channels_div_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
146 __global__ void same_num_channels_add_kernel(float* data_l, float* data_r, float* result)
148 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
149 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
151 result[threadId] = data_l[threadId]+data_r[threadId];
152 result[threadId+1] = data_l[threadId+1]+data_r[threadId+1];
155 ComplexMat ComplexMat::operator+(const ComplexMat & rhs) const
157 assert(rhs.n_channels == n_channels && rhs.cols == cols && rhs.rows == rows);
159 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
161 dim3 threadsPerBlock(rows, cols);
162 dim3 numBlocks(n_channels/n_scales, n_scales);
163 same_num_channels_add_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
169 __global__ void constant_mul_kernel(float* data_l, float constant, float* result)
171 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
172 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
174 result[threadId] = data_l[threadId]*constant;
175 result[threadId+1] = data_l[threadId+1]*constant;
178 ComplexMat ComplexMat::operator*(const float & rhs) const
180 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
182 dim3 threadsPerBlock(rows, cols);
183 dim3 numBlocks(n_channels/n_scales, n_scales);
184 constant_mul_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs, result.p_data);
190 __global__ void constant_add_kernel(float* data_l, float constant, float* result)
192 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
193 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
195 result[threadId] = data_l[threadId]+constant;
196 result[threadId+1] = data_l[threadId+1];
199 ComplexMat ComplexMat::operator+(const float & rhs) const
201 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
203 dim3 threadsPerBlock(rows, cols);
204 dim3 numBlocks(n_channels/n_scales, n_scales);
205 constant_add_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs, result.p_data);
211 __global__ void one_channel_mul_kernel(float* data_l, float* data_r, float* result)
213 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
214 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
215 int one_ch_index = 2*((threadIdx.y * blockDim.x) + threadIdx.x);
217 result[threadId] = data_l[threadId]*data_r[one_ch_index] - data_l[threadId+1]*data_r[one_ch_index+1];
218 result[threadId+1] = data_l[threadId]*data_r[one_ch_index+1] + data_l[threadId+1]*data_r[one_ch_index];
221 //multiplying element-wise multichannel by one channel mats (rhs mat is with one channel)
222 ComplexMat ComplexMat::mul(const ComplexMat & rhs) const
224 assert(rhs.n_channels == 1 && rhs.cols == cols && rhs.rows == rows);
226 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
228 dim3 threadsPerBlock(rows, cols);
229 dim3 numBlocks(n_channels/n_scales, n_scales);
230 one_channel_mul_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
236 __global__ void scales_channel_mul_kernel(float* data_l, float* data_r, float* result)
238 int blockId = blockIdx.x + blockIdx.y * gridDim.x;
239 int threadId = 2*(blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
240 int one_ch_index = 2*((threadIdx.y * blockDim.x) + threadIdx.x+blockIdx.x*blockDim.x*blockDim.y);
242 result[threadId] = data_l[threadId]*data_r[one_ch_index] - data_l[threadId+1]*data_r[one_ch_index+1];
243 result[threadId+1] = data_l[threadId]*data_r[one_ch_index+1] + data_l[threadId+1]*data_r[one_ch_index];
246 //multiplying element-wise multichannel by one channel mats (rhs mat is with multiple channel)
247 ComplexMat ComplexMat::mul2(const ComplexMat & rhs) const
249 assert(rhs.n_channels == n_channels/n_scales && rhs.cols == cols && rhs.rows == rows);
251 ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
253 dim3 threadsPerBlock(rows, cols);
254 dim3 numBlocks(n_channels/n_scales, n_scales);
255 scales_channel_mul_kernel<<<numBlocks, threadsPerBlock>>>(this->p_data, rhs.p_data, result.p_data);
261 void ComplexMat::operator=(ComplexMat & rhs)
265 n_channels = rhs.n_channels;
266 n_scales = rhs.n_scales;
272 void ComplexMat::operator=(ComplexMat && rhs)
276 n_channels = rhs.n_channels;
277 n_scales = rhs.n_scales;
281 rhs.p_data = nullptr;