cmake_minimum_required(VERSION 2.8)
-set(KCF_LIB_SRC kcf.cpp kcf.h fft.cpp threadctx.hpp pragmas.h dynmem.hpp debug.cpp)
+set(KCF_LIB_SRC kcf.cpp kcf.h fft.cpp threadctx.hpp pragmas.h dynmem.hpp debug.cpp complexmat.hpp)
find_package(PkgConfig)
SET(use_cuda OFF)
IF(FFT STREQUAL "OpenCV")
- list(APPEND KCF_LIB_SRC fft_opencv.cpp complexmat.hpp)
+ list(APPEND KCF_LIB_SRC fft_opencv.cpp)
ELSEIF(FFT STREQUAL "fftw")
- list(APPEND KCF_LIB_SRC fft_fftw.cpp complexmat.hpp)
+ list(APPEND KCF_LIB_SRC fft_fftw.cpp)
add_definitions(-DFFTW)
pkg_check_modules(FFTW REQUIRED fftw3f)
ELSEIF(FFT STREQUAL "cuFFTW")
- list(APPEND KCF_LIB_SRC fft_fftw.cpp complexmat.hpp)
+ list(APPEND KCF_LIB_SRC fft_fftw.cpp)
add_definitions(-DFFTW -DCUFFTW)
set(use_cuda ON)
ELSEIF(FFT STREQUAL "cuFFT")
- list(APPEND KCF_LIB_SRC fft_cufft.cpp complexmat.cuh cuda_functions.cuh complexmat.cu cuda_functions.cu)
+ list(APPEND KCF_LIB_SRC fft_cufft.cpp cuda_functions.h cuda_functions.cu)
add_definitions(-DCUFFT)
set(use_cuda ON)
iF(CUDA_DEBUG)
MESSAGE(FATAL_ERROR "Invalid FFT implementation selected")
ENDIF()
+IF(FFT STREQUAL "cuFFT")
+ list(APPEND KCF_LIB_SRC complexmat.cu)
+ELSE()
+ list(APPEND KCF_LIB_SRC complexmat.cpp)
+ENDIF()
+
IF((FFT STREQUAL "OpenCV") AND BIG_BATCH)
message(SEND_ERROR "OpenCV version does not support big batch mode.")
ENDIF()
--- /dev/null
+#include "complexmat.hpp"
+
+ComplexMat_::T ComplexMat_::sqr_norm() const
+{
+ assert(n_scales == 1);
+
+ int n_channels_per_scale = n_channels / n_scales;
+ T sum_sqr_norm = 0;
+ for (int i = 0; i < n_channels_per_scale; ++i) {
+ for (auto lhs = p_data.hostMem() + i * rows * cols; lhs != p_data.hostMem() + (i + 1) * rows * cols; ++lhs)
+ sum_sqr_norm += lhs->real() * lhs->real() + lhs->imag() * lhs->imag();
+ }
+ sum_sqr_norm = sum_sqr_norm / static_cast<T>(cols * rows);
+ return sum_sqr_norm;
+}
+
+void ComplexMat_::sqr_norm(DynMem_<ComplexMat_::T> &result) const
+{
+ int n_channels_per_scale = n_channels / n_scales;
+ int scale_offset = n_channels_per_scale * rows * cols;
+ for (uint scale = 0; scale < n_scales; ++scale) {
+ T sum_sqr_norm = 0;
+ for (int i = 0; i < n_channels_per_scale; ++i)
+ for (auto lhs = p_data.hostMem() + i * rows * cols + scale * scale_offset;
+ lhs != p_data.hostMem() + (i + 1) * rows * cols + scale * scale_offset; ++lhs)
+ sum_sqr_norm += lhs->real() * lhs->real() + lhs->imag() * lhs->imag();
+ result.hostMem()[scale] = sum_sqr_norm / static_cast<T>(cols * rows);
+ }
+ return;
+}
+
+ComplexMat_ ComplexMat_::sqr_mag() const
+{
+ return mat_const_operator([](std::complex<T> &c) { c = c.real() * c.real() + c.imag() * c.imag(); });
+}
+
+ComplexMat_ ComplexMat_::conj() const
+{
+ return mat_const_operator([](std::complex<T> &c) { c = std::complex<T>(c.real(), -c.imag()); });
+}
+
+ComplexMat_ ComplexMat_::sum_over_channels() const
+{
+ assert(p_data.num_elem == n_channels * rows * cols);
+
+ uint n_channels_per_scale = n_channels / n_scales;
+ uint scale_offset = n_channels_per_scale * rows * cols;
+
+ ComplexMat_ result(this->rows, this->cols, 1, n_scales);
+ for (uint scale = 0; scale < n_scales; ++scale) {
+ for (uint i = 0; i < rows * cols; ++i) {
+ std::complex<T> acc = 0;
+ for (uint ch = 0; ch < n_channels_per_scale; ++ch)
+ acc += p_data[scale * scale_offset + i + ch * rows * cols];
+ result.p_data.hostMem()[scale * rows * cols + i] = acc;
+ }
+ }
+ return result;
+}
+
+ComplexMat_ ComplexMat_::operator/(const ComplexMat_ &rhs) const
+{
+ return mat_mat_operator([](std::complex<T> &c_lhs, const std::complex<T> &c_rhs) { c_lhs /= c_rhs; }, rhs);
+}
+
+ComplexMat_ ComplexMat_::operator+(const ComplexMat_ &rhs) const
+{
+ return mat_mat_operator([](std::complex<T> &c_lhs, const std::complex<T> &c_rhs) { c_lhs += c_rhs; }, rhs);
+}
+
+ComplexMat_ ComplexMat_::operator*(const ComplexMat_::T &rhs) const
+{
+ return mat_const_operator([&rhs](std::complex<T> &c) { c *= rhs; });
+}
+
+ComplexMat_ ComplexMat_::mul(const ComplexMat_ &rhs) const
+{
+ return matn_mat1_operator([](std::complex<T> &c_lhs, const std::complex<T> &c_rhs) { c_lhs *= c_rhs; }, rhs);
+}
+
+ComplexMat_ ComplexMat_::operator+(const ComplexMat_::T &rhs) const
+{
+ return mat_const_operator([&rhs](std::complex<T> &c) { c += rhs; });
+}
+
+ComplexMat_ ComplexMat_::operator*(const ComplexMat_ &rhs) const
+{
+ return mat_mat_operator([](std::complex<T> &c_lhs, const std::complex<T> &c_rhs) { c_lhs *= c_rhs; }, rhs);
+}
-#include "complexmat.cuh"
+#include "complexmat.hpp"
-__global__ void sqr_norm_kernel(int n, float *out, float *data, float rows, float cols)
+__global__ void sqr_norm_kernel(int n, float *out, const float *data, float rows, float cols)
{
extern __shared__ float sdata[];
int i = blockDim.x * threadIdx.y + threadIdx.x;
dim3 numBlocks(n_channels / n_scales, n_scales);
sqr_norm_kernel<<<numBlocks, threadsPerBlock, rows * cols * sizeof(float)>>>(
- n_channels / n_scales, result.deviceMem(), this->p_data, rows, cols);
+ n_channels / n_scales, result.deviceMem(), (float*)this->p_data.deviceMem(), rows, cols);
CudaCheckError();
return;
}
-__global__ void sqr_mag_kernel(float *data, float *result)
+__global__ void sqr_mag_kernel(const float *data, float *result)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = 2 * (blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- sqr_mag_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, result.p_data);
+ sqr_mag_kernel<<<numBlocks, threadsPerBlock, 0>>>((float*)this->p_data.deviceMem(), (float*)result.p_data.deviceMem());
CudaCheckError();
return result;
}
-__global__ void conj_kernel(float *data, float *result)
+__global__ void conj_kernel(const float *data, float *result)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = 2 * (blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- conj_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, result.p_data);
+ conj_kernel<<<numBlocks, threadsPerBlock, 0>>>((float*)this->p_data.deviceMem(), (float*)result.p_data.deviceMem());
CudaCheckError();
return result;
return result;
}
-cufftComplex *ComplexMat::get_p_data() const
-{
- return (cufftComplex *)p_data;
-}
-
-__global__ void same_num_channels_mul_kernel(float *data_l, float *data_r, float *result)
+__global__ void same_num_channels_mul_kernel(const float *data_l, const float *data_r, float *result)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = 2 * (blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- same_num_channels_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data,
- result.p_data);
+ same_num_channels_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>((float*)this->p_data.deviceMem(),
+ (float*)rhs.p_data.deviceMem(),
+ (float*)result.p_data.deviceMem());
CudaCheckError();
return result;
}
-__global__ void same_num_channels_div_kernel(float *data_l, float *data_r, float *result)
+__global__ void same_num_channels_div_kernel(const float *data_l, const float *data_r, float *result)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = 2 * (blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- same_num_channels_div_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data,
- result.p_data);
+ same_num_channels_div_kernel<<<numBlocks, threadsPerBlock, 0>>>((float*)this->p_data.deviceMem(),
+ (float*)rhs.p_data.deviceMem(),
+ (float*)result.p_data.deviceMem());
CudaCheckError();
return result;
}
-__global__ void same_num_channels_add_kernel(float *data_l, float *data_r, float *result)
+__global__ void same_num_channels_add_kernel(const float *data_l, const float *data_r, float *result)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = 2 * (blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- same_num_channels_add_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data,
- result.p_data);
+ same_num_channels_add_kernel<<<numBlocks, threadsPerBlock, 0>>>((float*)this->p_data.deviceMem(),
+ (float*)rhs.p_data.deviceMem(),
+ (float*)result.p_data.deviceMem());
CudaCheckError();
return result;
}
-__global__ void constant_mul_kernel(float *data_l, float constant, float *result)
+__global__ void constant_mul_kernel(const float *data_l, float constant, float *result)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = 2 * (blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- constant_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs, result.p_data);
+ constant_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>((float*)this->p_data.deviceMem(),
+ rhs,
+ (float*)result.p_data.deviceMem());
CudaCheckError();
return result;
}
-__global__ void constant_add_kernel(float *data_l, float constant, float *result)
+__global__ void constant_add_kernel(const float *data_l, float constant, float *result)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = 2 * (blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- constant_add_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs, result.p_data);
+ constant_add_kernel<<<numBlocks, threadsPerBlock, 0>>>((float*)this->p_data.deviceMem(),
+ rhs,
+ (float*)result.p_data.deviceMem());
CudaCheckError();
return result;
}
-__global__ void one_channel_mul_kernel(float *data_l, float *data_r, float *result)
+__global__ void one_channel_mul_kernel(const float *data_l, const float *data_r, float *result)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = 2 * (blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x);
dim3 threadsPerBlock(rows, cols);
dim3 numBlocks(n_channels / n_scales, n_scales);
- one_channel_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data, result.p_data);
+ one_channel_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>((float*)this->p_data.deviceMem(),
+ (float*)rhs.p_data.deviceMem(),
+ (float*)result.p_data.deviceMem());
CudaCheckError();
return result;
}
// multiplying element-wise multichannel by one channel mats (rhs mat is with multiple channel)
-ComplexMat ComplexMat::mul2(const ComplexMat &rhs) const
-{
- assert(rhs.n_channels == n_channels / n_scales && rhs.cols == cols && rhs.rows == rows);
-
- ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
-
- dim3 threadsPerBlock(rows, cols);
- dim3 numBlocks(n_channels / n_scales, n_scales);
- scales_channel_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data, result.p_data);
- CudaCheckError();
+// ComplexMat ComplexMat::mul2(const ComplexMat &rhs) const
+// {
+// assert(rhs.n_channels == n_channels / n_scales && rhs.cols == cols && rhs.rows == rows);
- return result;
-}
+// ComplexMat result(this->rows, this->cols, this->channels(), this->n_scales);
-void ComplexMat::operator=(ComplexMat &rhs)
-{
- cols = rhs.cols;
- rows = rhs.rows;
- n_channels = rhs.n_channels;
- n_scales = rhs.n_scales;
+// dim3 threadsPerBlock(rows, cols);
+// dim3 numBlocks(n_channels / n_scales, n_scales);
+// scales_channel_mul_kernel<<<numBlocks, threadsPerBlock, 0>>>(this->p_data, rhs.p_data, result.p_data);
+// CudaCheckError();
- p_data = rhs.p_data;
-}
+// return result;
+// }
-void ComplexMat::operator=(ComplexMat &&rhs)
-{
- cols = rhs.cols;
- rows = rhs.rows;
- n_channels = rhs.n_channels;
- n_scales = rhs.n_scales;
+// void ComplexMat::operator=(ComplexMat &&rhs)
+// {
+// cols = rhs.cols;
+// rows = rhs.rows;
+// n_channels = rhs.n_channels;
+// n_scales = rhs.n_scales;
- p_data = rhs.p_data;
+// p_data = rhs.p_data;
- rhs.p_data = nullptr;
-}
+// rhs.p_data = nullptr;
+// }
+++ /dev/null
-#ifndef COMPLEXMAT_H
-#define COMPLEXMAT_H
-
-#include <opencv2/opencv.hpp>
-
-#include "dynmem.hpp"
-#include "cuda_runtime.h"
-#include "cufft.h"
-
-#include "cuda_error_check.hpp"
-
-class ComplexMat {
- public:
- uint cols;
- uint rows;
- uint n_channels;
- uint n_scales = 1;
-
- ComplexMat() : cols(0), rows(0), n_channels(0) {}
-
- ComplexMat(uint _rows, uint _cols, uint _n_channels, uint _n_scales = 1)
- : cols(_cols), rows(_rows), n_channels(_n_channels * _n_scales), n_scales(_n_scales)
- {
- CudaSafeCall(cudaMalloc(&p_data, n_channels * cols * rows * sizeof(cufftComplex)));
- }
-
- ComplexMat(cv::Size size, uint _n_channels, uint _n_scales = 1)
- : cols(size.width), rows(size.height), n_channels(_n_channels * _n_channels), n_scales(_n_scales)
- {
- CudaSafeCall(cudaMalloc(&p_data, n_channels * cols * rows * sizeof(cufftComplex)));
- }
-
- ComplexMat(ComplexMat &&other)
- {
- cols = other.cols;
- rows = other.rows;
- n_channels = other.n_channels;
- n_scales = other.n_scales;
- p_data = other.p_data;
-
- other.p_data = nullptr;
- }
-
- ~ComplexMat()
- {
- if (p_data != nullptr) {
- CudaSafeCall(cudaFree(p_data));
- p_data = nullptr;
- }
- }
-
- void create(uint _rows, uint _cols, uint _n_channels)
- {
- rows = _rows;
- cols = _cols;
- n_channels = _n_channels;
- CudaSafeCall(cudaMalloc(&p_data, n_channels * cols * rows * sizeof(cufftComplex)));
- }
-
- void create(uint _rows, uint _cols, uint _n_channels, uint _n_scales)
- {
- rows = _rows;
- cols = _cols;
- n_channels = _n_channels;
- n_scales = _n_scales;
- CudaSafeCall(cudaMalloc(&p_data, n_channels * cols * rows * sizeof(cufftComplex)));
- }
- // cv::Mat API compatibility
- cv::Size size() const { return cv::Size(cols, rows); }
- uint channels() const { return n_channels; }
-
- void sqr_norm(DynMem &result) const;
-
- ComplexMat sqr_mag() const;
-
- ComplexMat conj() const;
-
- ComplexMat sum_over_channels() const;
-
- cufftComplex *get_p_data() const;
-
- // element-wise per channel multiplication, division and addition
- ComplexMat operator*(const ComplexMat &rhs) const;
- ComplexMat operator/(const ComplexMat &rhs) const;
- ComplexMat operator+(const ComplexMat &rhs) const;
-
- // multiplying or adding constant
- ComplexMat operator*(const float &rhs) const;
- ComplexMat operator+(const float &rhs) const;
-
- // multiplying element-wise multichannel by one channel mats (rhs mat is with one channel)
- ComplexMat mul(const ComplexMat &rhs) const;
-
- // multiplying element-wise multichannel by one channel mats (rhs mat is with multiple channel)
- ComplexMat mul2(const ComplexMat &rhs) const;
- // text output
- friend std::ostream &operator<<(std::ostream &os, const ComplexMat &mat)
- {
- float *data_cpu = reinterpret_cast<float*>(malloc(mat.rows * mat.cols * mat.n_channels * sizeof(cufftComplex)));
- CudaSafeCall(cudaMemcpy(data_cpu, mat.p_data, mat.rows * mat.cols * mat.n_channels * sizeof(cufftComplex),
- cudaMemcpyDeviceToHost));
- // for (int i = 0; i < mat.n_channels; ++i){
- for (int i = 0; i < 1; ++i) {
- os << "Channel " << i << std::endl;
- for (uint j = 0; j < mat.rows; ++j) {
- for (uint k = 0; k < 2 * mat.cols - 2; k += 2)
- os << "(" << data_cpu[j * 2 * mat.cols + k] << "," << data_cpu[j * 2 * mat.cols + (k + 1)] << ")"
- << ", ";
- os << "(" << data_cpu[j * 2 * mat.cols + 2 * mat.cols - 2] << ","
- << data_cpu[j * 2 * mat.cols + 2 * mat.cols - 1] << ")" << std::endl;
- }
- }
- free(data_cpu);
- return os;
- }
-
- void operator=(ComplexMat &rhs);
- void operator=(ComplexMat &&rhs);
-
- private:
- mutable float *p_data = nullptr;
-};
-
-#endif // COMPLEXMAT_H
#include <functional>
#include "dynmem.hpp"
+#ifdef CUFFT
+#include <cufft.h>
+#endif
+
class ComplexMat_ {
public:
typedef float T;
}
}
- T sqr_norm() const
- {
- assert(n_scales == 1);
+ T sqr_norm() const;
- int n_channels_per_scale = n_channels / n_scales;
- T sum_sqr_norm = 0;
- for (int i = 0; i < n_channels_per_scale; ++i) {
- for (auto lhs = p_data.hostMem() + i * rows * cols; lhs != p_data.hostMem() + (i + 1) * rows * cols; ++lhs)
- sum_sqr_norm += lhs->real() * lhs->real() + lhs->imag() * lhs->imag();
- }
- sum_sqr_norm = sum_sqr_norm / static_cast<T>(cols * rows);
- return sum_sqr_norm;
- }
+ void sqr_norm(DynMem_<T> &result) const;
- void sqr_norm(DynMem_<T> &result) const
- {
- int n_channels_per_scale = n_channels / n_scales;
- int scale_offset = n_channels_per_scale * rows * cols;
- for (uint scale = 0; scale < n_scales; ++scale) {
- T sum_sqr_norm = 0;
- for (int i = 0; i < n_channels_per_scale; ++i)
- for (auto lhs = p_data.hostMem() + i * rows * cols + scale * scale_offset;
- lhs != p_data.hostMem() + (i + 1) * rows * cols + scale * scale_offset; ++lhs)
- sum_sqr_norm += lhs->real() * lhs->real() + lhs->imag() * lhs->imag();
- result.hostMem()[scale] = sum_sqr_norm / static_cast<T>(cols * rows);
- }
- return;
- }
-
- ComplexMat_ sqr_mag() const
- {
- return mat_const_operator([](std::complex<T> &c) { c = c.real() * c.real() + c.imag() * c.imag(); });
- }
+ ComplexMat_ sqr_mag() const;
- ComplexMat_ conj() const
- {
- return mat_const_operator([](std::complex<T> &c) { c = std::complex<T>(c.real(), -c.imag()); });
- }
+ ComplexMat_ conj() const;
- ComplexMat_ sum_over_channels() const
- {
- assert(p_data.num_elem == n_channels * rows * cols);
-
- uint n_channels_per_scale = n_channels / n_scales;
- uint scale_offset = n_channels_per_scale * rows * cols;
-
- ComplexMat_ result(this->rows, this->cols, 1, n_scales);
- for (uint scale = 0; scale < n_scales; ++scale) {
- for (uint i = 0; i < rows * cols; ++i) {
- std::complex<T> acc = 0;
- for (uint ch = 0; ch < n_channels_per_scale; ++ch)
- acc += p_data[scale * scale_offset + i + ch * rows * cols];
- result.p_data.hostMem()[scale * rows * cols + i] = acc;
- }
- }
- return result;
- }
+ ComplexMat_ sum_over_channels() const;
// return 2 channels (real, imag) for first complex channel
cv::Mat to_cv_mat() const
std::complex<T> *get_p_data() { return p_data.hostMem(); }
const std::complex<T> *get_p_data() const { return p_data.hostMem(); }
+#ifdef CUFFT
+ cufftComplex *get_dev_data() { return (cufftComplex*)p_data.deviceMem(); }
+ const cufftComplex *get_dev_data() const { return (cufftComplex*)p_data.deviceMem(); }
+#endif
+
// element-wise per channel multiplication, division and addition
- ComplexMat_ operator*(const ComplexMat_ &rhs) const
- {
- return mat_mat_operator([](std::complex<T> &c_lhs, const std::complex<T> &c_rhs) { c_lhs *= c_rhs; }, rhs);
- }
- ComplexMat_ operator/(const ComplexMat_ &rhs) const
- {
- return mat_mat_operator([](std::complex<T> &c_lhs, const std::complex<T> &c_rhs) { c_lhs /= c_rhs; }, rhs);
- }
- ComplexMat_ operator+(const ComplexMat_ &rhs) const
- {
- return mat_mat_operator([](std::complex<T> &c_lhs, const std::complex<T> &c_rhs) { c_lhs += c_rhs; }, rhs);
- }
+ ComplexMat_ operator*(const ComplexMat_ &rhs) const;
+ ComplexMat_ operator/(const ComplexMat_ &rhs) const;
+ ComplexMat_ operator+(const ComplexMat_ &rhs) const;
// multiplying or adding constant
- ComplexMat_ operator*(const T &rhs) const
- {
- return mat_const_operator([&rhs](std::complex<T> &c) { c *= rhs; });
- }
- ComplexMat_ operator+(const T &rhs) const
- {
- return mat_const_operator([&rhs](std::complex<T> &c) { c += rhs; });
- }
+ ComplexMat_ operator*(const T &rhs) const;
+ ComplexMat_ operator+(const T &rhs) const;
// multiplying element-wise multichannel by one channel mats (rhs mat is with one channel)
- ComplexMat_ mul(const ComplexMat_ &rhs) const
- {
- return matn_mat1_operator([](std::complex<T> &c_lhs, const std::complex<T> &c_rhs) { c_lhs *= c_rhs; }, rhs);
- }
+ ComplexMat_ mul(const ComplexMat_ &rhs) const;
// multiplying element-wise multichannel mats - same as operator*(ComplexMat), but without allocating memory for the result
ComplexMat_ muln(const ComplexMat_ &rhs) const
-#include "cuda_functions.cuh"
+#include "cuda_functions.h"
__global__ void gaussian_correlation_kernel(float *data_in, float *data_out, float *xf_sqr_norm, float *yf_sqr_norm,
int rows, int cols, int channels_per_scale, double sigma)
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include "dynmem.hpp"
-#ifdef CUFFT
-#include "complexmat.cuh"
-#else
#include "complexmat.hpp"
+
+#ifdef CUFFT
+#include <cufft.h>
#endif
const T *hostMem() const { return ptr_h; }
#ifdef CUFFT
T *deviceMem() { return ptr_d; }
+ const T *deviceMem() const { return ptr_d; }
#endif
void operator=(DynMem_ &rhs) {
assert(num_elem == rhs.num_elem);
#include <opencv2/opencv.hpp>
#include <vector>
#include <cassert>
-
-#ifdef CUFFT
- #include "complexmat.cuh"
-#else
- #include "complexmat.hpp"
-#endif
+#include "complexmat.hpp"
#ifdef BIG_BATCH
#define BIG_BATCH_MODE 1
auto in = static_cast<cufftReal *>(const_cast<MatScales&>(real_input).deviceMem());
if (real_input.size[0] == 1)
- cudaErrorCheck(cufftExecR2C(plan_f, in, complex_result.get_p_data()));
+ cudaErrorCheck(cufftExecR2C(plan_f, in, complex_result.get_dev_data()));
#ifdef BIG_BATCH
else
- cudaErrorCheck(cufftExecR2C(plan_f_all_scales, in, complex_result.get_p_data()));
+ cudaErrorCheck(cufftExecR2C(plan_f_all_scales, in, complex_result.get_dev_data()));
#endif
}
}
if (n_scales == 1)
- cudaErrorCheck(cufftExecR2C(plan_fw, temp_data, complex_result.get_p_data()));
+ cudaErrorCheck(cufftExecR2C(plan_fw, temp_data, complex_result.get_dev_data()));
#ifdef BIG_BATCH
else
- cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_p_data()));
+ cudaErrorCheck(cufftExecR2C(plan_fw_all_scales, temp_data, complex_result.get_dev_data()));
#endif
}
#include <memory>
#include "fhog.hpp"
+#include "complexmat.hpp"
#ifdef CUFFT
-#include "complexmat.cuh"
-#include "cuda_functions.cuh"
+#include "cuda_functions.h"
#include "cuda_error_check.hpp"
#include <cuda_runtime.h>
-#else
-#include "complexmat.hpp"
#endif
#include "cnfeat.hpp"
#include <future>
#include "dynmem.hpp"
#include "kcf.h"
-
-#ifdef CUFFT
-#include "complexmat.cuh"
-#else
#include "complexmat.hpp"
-#endif
class KCF_Tracker;