Hello all,
I have been struggling to use Zgemm for a week now. I use Zgemm to do several matrix multiplication operations. However, I can’t even get the first multiplication result correct. Here is what I want to do
AB = C. A is 2x54, and B is 54x2, and C is 2x2. From reading cublas library manual, AB = C, s0 C is m x n, A is m x k, and B is k x n. => m = 2, k = 54, n = 2.
Here is the example code that I wrote to try the first multiplication:
#include <stdio.h>
#include <stdlib.h>
#include
#include
#include
#include
#include <cuda.h>
#include <math.h>
#include <cuComplex.h>
#include <complex.h>
#include
#include
#include “device_launch_parameters.h”
#include <thrust/device_vector.h>
#include <thrust/system_error.h>
#include <thrust/extrema.h>
#include <cublas_v2.h>
using namespace std;
int main(void)
{
cuDoubleComplex array_host, debug_host;
cuDoubleComplex nAI, Tresultk;
cudaMalloc((void **)&Tresultk , 254sizeof(cuDoubleComplex));
cudaMalloc((void **)&nAI, 254sizeof(cuDoubleComplex));
cuDoubleComplex M1resultk;
cudaMalloc((void **)&M1resultk, 22*sizeof(cuDoubleComplex));
cuDoubleComplex test[108];
//test = (cuDoubleComplex *) malloc(2*54*sizeof(cuDoubleComplex));
//test[0].x = 0.234567; test[0].y = 1.234567;
/*
test[1].x = 0.234567; test[1].y = 1.234567;
test[2].x = 0.234567; test[2].y = 1.234567; test[3].x = 0.234567; test[3].y = 1.234567;
test[4].x = 0.234567; test[4].y = 1.234567;
*/
for (int z = 0; z<54; z++)
{
test[z].x = z+1; test[z].y = 0;
test[z+54].x = z+1; test[z+54].y = 0;
}
//test[54].x = 0.234567; test[54].y = 1.234567;
//test[1].x = 0.234567; test[1].y = 1.234567;
for (int zz = 0; zz<108; zz++)
{
cout << test[zz].x << " " << test[zz].y << '\n';
}
//cublasStatus_t statt;
cudaMemcpy(nAI,test,2*54*sizeof(cuDoubleComplex),cudaMemcpyHostToDevice);
cudaError_t code95 = cudaGetLastError();
if (code95 != cudaSuccess)
printf ("95 cudamemcpy error -- %s\n", cudaGetErrorString(code95));
test[0].x = 1; test[0].y = 0;
for (int z = 1; z<108; z++)
{
test[z].x = 0; test[z].y = 0;
}
test[1].x = 1; test[1].y =0;
for (int zz = 0; zz<108; zz++)
{
cout << test[zz].x << " " << test[zz].y << '\n';
}
cudaMemcpy(Tresultk,test,2*54*sizeof(cuDoubleComplex),cudaMemcpyHostToDevice);
cudaError_t code96 = cudaGetLastError();
if (code96 != cudaSuccess)
printf ("96 cudamemcpy error -- %s\n", cudaGetErrorString(code96));
debug_host = (cuDoubleComplex *) malloc(2*54*sizeof(cuDoubleComplex));
cudaMemcpy(debug_host,nAI,2*54*sizeof(cuDoubleComplex),cudaMemcpyDeviceToHost);
cudaError_t code92 = cudaGetLastError();
if (code92 != cudaSuccess)
printf ("92 cudamemcpy error -- %s\n", cudaGetErrorString(code92));
ofstream myfile1;
myfile1.open ("nAI_07232015.txt");
for (int kl = 0; kl<108; kl++)
{
myfile1 << "X is ";
myfile1 << debug_host[kl].x << " " << "Y is ";
myfile1 << debug_host[kl].y << '\n';
}
myfile1.close();
//cublasSetMatrix (2 ,2 , sizeof (cuDoubleComplex) ,c ,2 ,M1test , 2 );
cuDoubleComplex al, bet;
al.x=1; al.y=0; bet.x=0; bet.y=0;
cublasHandle_t handle4;
cublasCreate_v2(&handle4);
cublasZgemm_v2(handle4,CUBLAS_OP_N,CUBLAS_OP_N,2,2,54,&al,nAI,2,Tresultk,54,&bet,M1resultk,2);
array_host = (cuDoubleComplex *) malloc(2*2*sizeof(cuDoubleComplex));
cudaMemcpy(array_host,M1resultk,2*2*sizeof(cuDoubleComplex),cudaMemcpyDeviceToHost);
cudaError_t code93 = cudaGetLastError();
if (code93 != cudaSuccess)
printf ("93 cudamemcpy error -- %s\n", cudaGetErrorString(code93));
ofstream myfile;
myfile.open ("M1resultk_07232015.txt");
for (int kk = 0; kk<4; kk++)
{
myfile << "X is ";
myfile << array_host[kk].x << " " << "Y is ";
myfile << array_host[kk].y << '\n';
}
myfile.close();
cublasDestroy_v2(handle4);
cout << "done with M1 k" << '\n';
}
I make the first row of A has data from 1 to 54, such as A[0] =1, A[1] =2, A[2] =3, … , A[53] =54, and second row of A has same data as first row, such as A[54] =1, A[55] =2, A[56] =3, … , A[107] =54.
I also make B[0] = 1 and B[1]= 1 and rest of elements are all 0.
No matter how I change m, n, k, I can’t get the correct result.
Can someone please help me? Thank you very much!!!