Cross-posting
I also post this question on Stackoverflow
https://stackoverflow.com/questions/46338178/cuda-unspecified-launch-failure-on-cudagraphicsunmapresources
Edited on 2017.9.22(Fri) 11:30(JST)
Currently, I’m using CUDA and OpenGL to simulating ocean.
I found that when the number of vertices is around 6,000 or 25,000, the program works correctly. But if the number of vertices is around 100,000 or 400,000, I get unspecified launch failure error.
This is the code I use to update vertices positions and normals with CUDA in a frame:
while (!glfwWindowShouldClose(window))
{
...
vec3 *d_vertices = NULL, *d_normals = NULL;
cudaGraphicsMapResources(1, &cudaVboResVertices, 0);
cudaGraphicsMapResources(1, &cudaVboResNormals, 0);
cudaGraphicsResourceGetMappedPointer(
(void**)&d_vertices, NULL, cudaVboResVertices
);
cudaGraphicsResourceGetMappedPointer(
(void**)&d_normals, NULL, cudaVboResNormals
);
//update vertices positions and normals
//faceNumber*3 is the number of vertices
launchGPUKernel(faceNumber*3, d_vertices, d_normals);
t += dt;
//the error first occurs at this line at the first iteration
cudaGraphicsUnmapResources(1, &cudaVboResNormals, 0);
cudaGraphicsUnmapResources(1, &cudaVboResVertices, 0);
...
}
The launchGPUKernel is like this:
void launchGPUKernel(int num_points, vec3 *d_vtxs, vec3 *d_nmls){
dim3 grid(num_points/512 + 1, 1);
dim3 block(16, 64, 1);
d_update<<<grid, block>>>(num_points, WAVE_NUM, d_vtxs, d_nmls, d_wave_paras, t);
}
The d_update:
__global__ void d_update(
int num_points, int wave_num,
vec3 *d_vtxs, vec3 *d_nmls, float *d_wv_prs,
float d_time
){
long block_number = blockIdx.x + blockIdx.y*gridDim.x;
block_number *= (blockDim.x*blockDim.y);//the number of threads before current block
long idx = threadIdx.x + threadIdx.y*blockDim.x;
idx += block_number;
float x, z, height;
x = d_vtxs[idx].x;
z = d_vtxs[idx].z;
height = 0;
for (size_t j = 0; j < wave_num; j++) {
float a, b, theta, omega, phi;
a = d_wv_prs[j*5+0];
b = d_wv_prs[j*5+1];
theta = d_wv_prs[j*5+2];
omega = d_wv_prs[j*5+3];
phi = d_wv_prs[j*5+4];
float temp = (cos(theta)*x + sin(theta)*z)*omega + d_time*phi;
height += a*cos(temp) + b*sin(temp);
}
d_vtxs[idx].y = height;
float Hx, Hz;
Hx = 0;
Hz = 0;
for (size_t j = 0; j < wave_num; j++) {
float a, b, theta, omega, phi;
a = d_wv_prs[j*5+0];
b = d_wv_prs[j*5+1];
theta = d_wv_prs[j*5+2];
omega = d_wv_prs[j*5+3];
phi = d_wv_prs[j*5+4];
float temp = (cos(theta)*x + sin(theta)*z)*omega + d_time*phi;
Hx += -sin(temp)*omega*cos(theta)*a + cos(temp)*omega*cos(theta)*b;
Hz += -sin(temp)*omega*sin(theta)*a + cos(temp)*omega*sin(theta)*b;
}
vec3 v3_temp = normalize( vec3(-Hx, 1, -Hz) );
d_nmls[idx].x = v3_temp.x;
d_nmls[idx].y = v3_temp.y;
d_nmls[idx].z = v3_temp.z;
}
Environment
OS X EI Captitan 10.11.6
CUDA 8.0
OpenGL 4.0
Sorry for the lack of information before.
I don’t know what is wrong.
Need your help, guys :)
Thank you very much!