}
}
-void ComplexMat::sqr_norm(float *result) const
+void ComplexMat::sqr_norm(DynMem &result) const
{
- CudaSafeCall(cudaMemsetAsync(result, 0, n_scales * sizeof(float), this->stream));
+ CudaSafeCall(cudaMemsetAsync(result.deviceMem(), 0, n_scales * sizeof(float)));
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- sqr_norm_kernel<<<numBlocks, threadsPerBlock, rows * cols * sizeof(float), this->stream>>>(
- n_channels / n_scales, result, this->p_data, rows, cols);
+ sqr_norm_kernel<<<numBlocks, threadsPerBlock, rows * cols * sizeof(float)>>>(
+ n_channels / n_scales, result.deviceMem(), this->p_data, rows, cols);
CudaCheckError();
return;
ComplexMat ComplexMat::sqr_mag() const
{
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales, this->stream);
+ ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- sqr_mag_kernel<<<numBlocks, threadsPerBlock, 0, this->stream>>>(this->p_data, result.p_data);
+ sqr_mag_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, result.p_data);
CudaCheckError();
return result;
ComplexMat ComplexMat::conj() const
{
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales, this->stream);
+ ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- conj_kernel<<<numBlocks, threadsPerBlock, 0, this->stream>>>(this->p_data, result.p_data);
+ conj_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, result.p_data);
CudaCheckError();
return result;
ComplexMat ComplexMat::sum_over_channels() const
{
// assert(p_data.size() > 1);
- ComplexMat result(this->rows, this->cols, 1, this->stream);
+ ComplexMat result(this->rows, this->cols, 1);
return result;
}
{
assert(rhs.n_channels == n_channels && rhs.cols == cols && rhs.rows == rows);
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales, this->stream);
+ ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- same_num_channels_mul_kernel<<<numBlocks, threadsPerBlock, 0, this->stream>>>(this->p_data, rhs.p_data,
+ same_num_channels_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data,
result.p_data);
CudaCheckError();
{
assert(rhs.n_channels == n_channels && rhs.cols == cols && rhs.rows == rows);
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales, this->stream);
+ ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- same_num_channels_div_kernel<<<numBlocks, threadsPerBlock, 0, this->stream>>>(this->p_data, rhs.p_data,
+ same_num_channels_div_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data,
result.p_data);
CudaCheckError();
{
assert(rhs.n_channels == n_channels && rhs.cols == cols && rhs.rows == rows);
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales, this->stream);
+ ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- same_num_channels_add_kernel<<<numBlocks, threadsPerBlock, 0, this->stream>>>(this->p_data, rhs.p_data,
+ same_num_channels_add_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data,
result.p_data);
CudaCheckError();
ComplexMat ComplexMat::operator*(const float &rhs) const
{
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales, this->stream);
+ ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- constant_mul_kernel<<<numBlocks, threadsPerBlock, 0, this->stream>>>(this->p_data, rhs, result.p_data);
+ constant_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs, result.p_data);
CudaCheckError();
return result;
ComplexMat ComplexMat::operator+(const float &rhs) const
{
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales, this->stream);
+ ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- constant_add_kernel<<<numBlocks, threadsPerBlock, 0, this->stream>>>(this->p_data, rhs, result.p_data);
+ constant_add_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs, result.p_data);
CudaCheckError();
return result;
{
assert(rhs.n_channels == 1 && rhs.cols == cols && rhs.rows == rows);
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales, this->stream);
+ ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- one_channel_mul_kernel<<<numBlocks, threadsPerBlock, 0, this->stream>>>(this->p_data, rhs.p_data, result.p_data);
+ one_channel_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data, result.p_data);
CudaCheckError();
return result;
{
assert(rhs.n_channels == n_channels / n_scales && rhs.cols == cols && rhs.rows == rows);
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales, this->stream);
+ ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- scales_channel_mul_kernel<<<numBlocks, threadsPerBlock, 0, this->stream>>>(this->p_data, rhs.p_data, result.p_data);
+ scales_channel_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data, result.p_data);
CudaCheckError();
return result;
rows = rhs.rows;
n_channels = rhs.n_channels;
n_scales = rhs.n_scales;
- stream = rhs.stream;
- foreign_data = true;
p_data = rhs.p_data;
}
rows = rhs.rows;
n_channels = rhs.n_channels;
n_scales = rhs.n_scales;
- stream = rhs.stream;
p_data = rhs.p_data;