Hi,
I have one matrix 512x512x108 and i need do some operations with your data, and when i execute the kernel and execute one line show the message:
cuda the launch timed out and was terminated.
If i remove this line:
dadosOut[indice] = 500;
The kernel execute without problems. What the problem?
yInc = 512;
zInc = 512*512;
dadosIn[512512108]
dadosOut[512512108]
tamanho = 2
__device__ int calcIndice(int x, int y, int z, int yInc, int zInc){
int indice = (x + (y*yInc))+(z*zInc);
int valor = 0;
if(indice > 0 && indice < 108*zInc){
valor = indice;
}
return valor;
}
__global__ void calc(short *dadosIn, float *dadosOut, int *vZ, int tamanho, int yInc, int zInc){
int x = blockIdx.x * tamanho + threadIdx.x;
int y = ( blockIdx.y - (256*(blockIdx.y/256)) ) * tamanho + threadIdx.y;
int z = (blockIdx.y/256) * tamanho + threadIdx.z;
int indice = (x + (y*yInc))+(z*zInc);
int gradX, gradY, gradZ;
float mHessiana[3][3], mA[3][3], mQ[3][3], mR[3][3], mS[3][3], mStemp[3][3]; //matrizes Hessiana, A, Q, R e S do método QR
float eValues[3], eVectors[3][3]; //Eigenvalues e Eigenvectorss
float valor = 0;
int cont = 0;
int isCenterline = 0;
float prodV1 = 0;
float prodV2 = 0;
if(x>5 && y>5 && z>5 && x<507 && y<507 && z <103){
//-----------------------------------------------------------------------
//passa o filtro gaussiano blur
dadosOut[indice] = (dadosIn[calcIndice(x-1, y+1, z-1, yInc, zInc)] +
2 *dadosIn[calcIndice(x, y+1, z-1, yInc, zInc)] +
dadosIn[calcIndice(x+1, y+1, z-1, yInc, zInc)] +
2 * dadosIn[calcIndice(x-1, y, z-1, yInc, zInc)] +
4 * dadosIn[calcIndice(x, y, z-1, yInc, zInc)] +
2 * dadosIn[calcIndice(x+1, y, z-1, yInc, zInc)] +
dadosIn[calcIndice(x-1, y-1, z-1, yInc, zInc)] +
2 *dadosIn[calcIndice(x, y-1, z-1, yInc, zInc)] +
dadosIn[calcIndice(x+1, y-1, z-1, yInc, zInc)] +
2 * dadosIn[calcIndice(x-1, y+1, z, yInc, zInc)] +
4 * dadosIn[calcIndice(x, y+1, z, yInc, zInc)] +
2 * dadosIn[calcIndice(x+1, y+1, z, yInc, zInc)] +
4 * dadosIn[calcIndice(x-1, y, z, yInc, zInc)] +
8 * dadosIn[calcIndice(x, y, z, yInc, zInc)] +
4 * dadosIn[calcIndice(x+1, y, z, yInc, zInc)] +
2 * dadosIn[calcIndice(x-1, y-1, z, yInc, zInc)] +
4 * dadosIn[calcIndice(x, y-1, z, yInc, zInc)] +
2 * dadosIn[calcIndice(x+1, y-1, z, yInc, zInc)] +
dadosIn[calcIndice(x-1, y+1, z+1, yInc, zInc)] +
2 *dadosIn[calcIndice(x, y+1, z+1, yInc, zInc)] +
dadosIn[calcIndice(x+1, y+1, z+1, yInc, zInc)] +
2 * dadosIn[calcIndice(x-1, y, z+1, yInc, zInc)] +
4 * dadosIn[calcIndice(x, y, z+1, yInc, zInc)] +
2 * dadosIn[calcIndice(x+1, y, z+1, yInc, zInc)] +
dadosIn[calcIndice(x-1, y-1, z+1, yInc, zInc)] +
2 *dadosIn[calcIndice(x, y-1, z+1, yInc, zInc)] +
dadosIn[calcIndice(x+1, y-1, z+1, yInc, zInc)])/64;
//------------------------------------------------------------------
//calcula os gradientes
gradX = (dadosOut[calcIndice(x-1, y-1, z, yInc, zInc)] +
2 * dadosOut[calcIndice(x, y-1, z, yInc, zInc)] +
dadosOut[calcIndice(x+1, y-1, z, yInc, zInc)])
-
(dadosOut[calcIndice(x-1, y+1, z, yInc, zInc)] +
2 * dadosOut[calcIndice(x, y+1, z, yInc, zInc)] +
dadosOut[calcIndice(x+1, y+1, z, yInc, zInc)]);
gradY = (dadosOut[calcIndice(x+1, y+1, z, yInc, zInc)] +
2 * dadosOut[calcIndice(x+1, y, z, yInc, zInc)] +
dadosOut[calcIndice(x+1, y-1, z, yInc, zInc)])
-
(dadosOut[calcIndice(x-1, y+1, z, yInc, zInc)] +
2 * dadosOut[calcIndice(x-1, y, z, yInc, zInc)] +
dadosOut[calcIndice(x-1, y-1, z, yInc, zInc)]);
gradZ = (dadosOut[calcIndice(x, y-1, z-1, yInc, zInc)] +
2 * dadosOut[calcIndice(x, y-1, z, yInc, zInc)] +
dadosOut[calcIndice(x, y-1, z+1, yInc, zInc)])
-
(dadosOut[calcIndice(x, y+1, z-1, yInc, zInc)] +
2 * dadosOut[calcIndice(x, y+1, z, yInc, zInc)] +
dadosOut[calcIndice(x, y+1, z+1, yInc, zInc)]);
vZ[0] = 3;
//------------------------------------------------------------------------
//calcula matriz Hessiana
//Matriz Hessiana
// 0 1 2
//0 Dxx Dxy Dxz
//1 Dyx Dyy Dyz
//2 Dzx Dzy Dzz
//Dxx
mHessiana[0][0] = dadosOut[calcIndice(x+1,y,z, yInc, zInc)] - 2 * dadosOut[calcIndice(x,y,z, yInc, zInc)] +
dadosOut[calcIndice(x-1,y,z, yInc, zInc)];
//Dyy
mHessiana[1][1] = dadosOut[calcIndice(x,y+1,z, yInc, zInc)] - 2 * dadosOut[calcIndice(x,y,z, yInc, zInc)] +
dadosOut[calcIndice(x,y-1,z, yInc, zInc)];
//Dzz
mHessiana[2][2] = dadosOut[calcIndice(x,y,z+1, yInc, zInc)] - 2 * dadosOut[calcIndice(x,y,z, yInc, zInc)] +
dadosOut[calcIndice(x,y,z-1, yInc, zInc)];
//Dxy e Dyx
mHessiana[0][1] = mHessiana[1][0] = (dadosOut[calcIndice(x-1,y+1,z, yInc, zInc)] - dadosOut[calcIndice(x+1,y+1,z, yInc, zInc)] +
dadosOut[calcIndice(x+1,y-1,z, yInc, zInc)] - dadosOut[calcIndice(x-1,y-1,z, yInc, zInc)])/4;
//Dxz e Dzx
mHessiana[0][2] = mHessiana[2][0] = (dadosOut[calcIndice(x-1,y,z+1, yInc, zInc)] - dadosOut[calcIndice(x+1,y,z+1, yInc, zInc)] +
dadosOut[calcIndice(x+1,y,z-1, yInc, zInc)] - dadosOut[calcIndice(x-1,y,z-1, yInc, zInc)])/4;
//Dyz e Dzy
mHessiana[1][2] = mHessiana[2][1] = (dadosOut[calcIndice(x,y-1,z+1, yInc, zInc)] - dadosOut[calcIndice(x,y+1,z+1, yInc, zInc)] +
dadosOut[calcIndice(x,y+1,z-1, yInc, zInc)] - dadosOut[calcIndice(x,y-1,z-1, yInc, zInc)])/4;
//----------------------------------------------------------------------------
//calcula os eigens
//copia os dados para a Matriz A
for (int j = 0; j <= 2; j++){
for(int i = 0; i <=2; i++){
mA[i][j] = mHessiana[i][j];
}
}
//inicializa matriz R
for (int i = 0; i <= 2; i++){
for(int j = 0; j <=2; j++){
mR[i][j] = mS[i][j] = mQ[i][j] = 0;
}
}
while(fabs(mQ[0][0]) != 1 && fabs(mQ[1][1]) != 1 && fabs(mQ[2][2]) != 1 && cont < 50){
cont++;
//calculo das matrizes Q e R
for(int j = 0; j <= 2; j++){
valor = 0;
for(int t = 0; t <= 2; t++){
valor += mA[j][t] * mA[j][t];
}
mR[j][j] = sqrt(valor);
if(mR[j][j] == 0){
break;
} else {
for(int i = 0; i <= 2; i++){
mA[j][i] = mA[j][i] / mR[j][j];
}
}
for(int k = j+1; k <= 2; k++){
valor = 0;
for(int u = 0; u <= 2; u++){
valor += mA[j][u] * mA[k][u];
}
mR[k][j] = valor;
for(int p = 0; p <=2; p++){
mA[k][p] = mA[k][p] - (mA[j][p] * mR[k][j]);
}
}
}
//copiando para a verdadeira matriz Q
for (int i = 0; i <= 2; i++){
for(int j = 0; j <=2; j++){
mQ[i][j] = mA[i][j];
if(cont == 1){
mS[i][j] = mA[i][j];
}
}
}
//calculando a matriz S (caso não seja a primeira iteração)
if(cont > 1){
for (int i = 0; i <= 2; i++){
for(int j = 0; j <=2; j++){
mStemp[i][j] = mS[0][j] * mQ[i][0] + mS[1][j] * mQ[i][1] + mS[2][j] * mQ[i][2];
}
}
}
//copiando para a verdadeira S
if(cont > 1){
for (int i = 0; i <= 2; i++){
for(int j = 0; j <=2; j++){
mS[i][j] = mStemp[i][j];
}
}
}
//nova matriz A
for (int i = 0; i <= 2; i++){
for(int j = 0; j <=2; j++){
mA[i][j] = mR[0][j] * mQ[i][0] + mR[1][j] * mQ[i][1] + mR[2][j] * mQ[i][2];
}
}
}
//------------------------------------------------------------------------
//verifica os pontos que são centerline
//copia valores para a verificação
for(int c = 0; c <= 2; c++){
eValues[c] = mA[c][c];
for(int d = 0; d <= 2; d++){
eVectors[c][d] = mS[c][d];
}
}
float temp = 0;
//ordena os eigens para a avaliação do ponto
for(int i = 0; i <= 2; i++){
for(int j = i; j <= 2; j++){
if(eValues[i] > eValues[j]){
temp = eValues[i];
eValues[i] = eValues[j];
eValues[j] = temp;
for(int d = 0; d <= 2; d++){
temp = eVectors[i][d];
eVectors[i][d] = eVectors[j][d];
eVectors[j][d] = temp;
}
}
}
}
//verificação se o ponto é centerline
if(eValues[0] <= 0 && eValues[1] <= 0){
prodV1 = produtoVetores(eVectors[0][0], eVectors[0][1], eVectors[0][2], gradX, gradY, gradZ);
prodV2 = produtoVetores(eVectors[1][0], eVectors[1][1], eVectors[1][2], gradX, gradY, gradZ);
if(prodV1 == 0 && prodV2 == 0){
//if(eValues[1]/eValues[0] >= 0.5){
dadosOut[indice] = 500; // <<<<------ line of problem
//}
}
}
}
}