Hi All!
I’ve got a problem with using renderbuffer in CUDA kernel. My task is:
-
Render image in some position to renderbuffer.
-
Get image and process it pixel-by-pixel.
So I create and bind all buffers:
rbo = new GLRBO(GL_RGBA, mesh->size.Max[0] - mesh->size.Min[0], mesh->size.Max[1] - mesh->size.Min[1]);
fbo = new GLFBO(GL_COLOR_ATTACHMENT0, rbo->GetId());
vao = new GLVAO(1, &BufferIds[0]);
vao->Bind();
vbo = new GLVBO(2, &BufferIds[1]);
vbo->Bind(GL_ARRAY_BUFFER);
vbo->SetData(GL_ARRAY_BUFFER, sizeof(GLVertex)*(mesh->nVertices+1), vertices, GL_DYNAMIC_DRAW);
vbo->Bind(GL_ELEMENT_ARRAY_BUFFER);
vbo->SetData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint)*mesh->nElements, indices, GL_DYNAMIC_DRAW);
vao->Unbind();
//register in CUDA
cudaSetDevice(0);
cudaGLSetGLDevice(0)
cudaGraphicsResource* resource;
cudaGraphicsGLRegisterImage(&resource, rbo->GetId(), GL_RENDERBUFFER, cudaGraphicsMapFlagsReadOnly);
cudaGraphicsMapResources(1, &resource, 0);
// ... Render image
void GLMain::_RenderScene()
{
++FrameCount;
//render at maximum resolution to render buffer
glViewport(0, 0, rboWidth, rboHeight);
if(!bDone)
{
fbo->Bind();
glClearColor(0.0, 0.0, 0.0, 1.0);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
DrawMesh();
fbo->BindReadDraw();
cudaArray* array;
cudaGraphicsSubResourceGetMappedArray(&array, resource, 0, 0);
size_t size = sizeof(float)*rboWidth*rboHeight;
float* initData = (float*)malloc(size*4);
memset(initData, 0, size*4);
ProcessImage(ar, initData, rboWidth, rboHeight);
}
//Render to screen
}
//...... Process_Image.cu
texture <float4, cudaTextureType2D, cudaReadModeElementType> tex;
extern "C" void ProcessImage(cudaArray* in, float* _outAngle, float* _outIntence, float* _inData, int w, int h )
{
cudaBindTextureToArray(tex, in);
size_t size = sizeof(float)*w*h;
float* inData;
cudaMalloc(&inData, size*4);
dim3 threads(16, 16);
dim3 blocks(w/threads.x, h/threads.y);
cuProcessKernel<<<blocks, threads>>>(inData, w, h);
cudaThreadSynchronize();
cudaUnbindTexture(tex);
cudaMemcpy(_inData, inData, size*4, cudaMemcpyDeviceToHost);
cudaFree(outAngle);
cudaFree(outIntence);
}
//...Kernel
__global__ void cuProcessKernel(float* initialData, int w, int h)
{
int x = blockIdx.x*blockDim.x + threadIdx.x;
int y = blockIdx.y*blockDim.y + threadIdx.y;
int idx = y * w + x;
float4 pix = tex2D(tex, x, y);
initialData[idx*4] = pix.x;
initialData[idx*4+1] = pix.y;
initialData[idx*4+2] = pix.z;
initialData[idx*4+3] = pix.w;
}
So I’m simply trying to get per-pixel image in float4 format. But instead I get some garbage in the output array. When I make all white image it returns incorrect values. Could someone tell me where is my mistake ?
System: OS Win 7 x64 SP1, Core i7 2600/GTX570/16Gb, CUDA Toolkit 4.2, devdriver_4.2_winvista-win7_64_301.32_general.