I have the following minimal piece of code and would like to know how I can multiply two matrices in my kernel function?
I can eg not create a Mat (like in openCV) in the kernel function.
__global__ void myMatKernel(int N, Mat *b)
{
Mat a; // creates compilation error 1
// b = a*b; <---- what I would need
}
int main (void)
{
Mat a(10, 1, CV_64F);
a.setTo(Scalar(2.2));
Mat c(1, 10, CV_64F);
c.setTo(Scalar(3.35));
Mat d;
d = a*c; // works perfectly fine, but would like to do this operation on the GPU
Mat *b;
cudaMallocManaged(&b, sizeof(Mat));
cudaDeviceSynchronize();
//assign somehow values to matrix b before passing it to the function
myMatKernel<<<1,256>>>(1, b) ;
cudaFree(b);
}
compilation error 1: “error: calling a host function(“cv::Mat::Mat”) from a global funcction(“myKernel”) is not allowed”
Could someone explain/show how I can solve these issues?