CUDA -OpenGL Interop
Hi All!
I've got a problem with using renderbuffer in CUDA kernel. My task is:
1. Render image in some position to renderbuffer.
2. Get image and process it pixel-by-pixel.
So I create and bind all buffers:
[code]
rbo = new GLRBO(GL_RGBA, mesh->size.Max[0] - mesh->size.Min[0], mesh->size.Max[1] - mesh->size.Min[1]);
fbo = new GLFBO(GL_COLOR_ATTACHMENT0, rbo->GetId());
vao = new GLVAO(1, &BufferIds[0]);
vao->Bind();
vbo = new GLVBO(2, &BufferIds[1]);
vbo->Bind(GL_ARRAY_BUFFER);
vbo->SetData(GL_ARRAY_BUFFER, sizeof(GLVertex)*(mesh->nVertices+1), vertices, GL_DYNAMIC_DRAW);
vbo->Bind(GL_ELEMENT_ARRAY_BUFFER);
vbo->SetData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint)*mesh->nElements, indices, GL_DYNAMIC_DRAW);
vao->Unbind();
//register in CUDA
cudaSetDevice(0);
cudaGLSetGLDevice(0)
cudaGraphicsResource* resource;
cudaGraphicsGLRegisterImage(&resource, rbo->GetId(), GL_RENDERBUFFER, cudaGraphicsMapFlagsReadOnly);
cudaGraphicsMapResources(1, &resource, 0);
// ... Render image
void GLMain::_RenderScene()
{
++FrameCount;
//render at maximum resolution to render buffer
glViewport(0, 0, rboWidth, rboHeight);

if(!bDone)
{
fbo->Bind();
glClearColor(0.0, 0.0, 0.0, 1.0);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
DrawMesh();
fbo->BindReadDraw();
cudaArray* array;
cudaGraphicsSubResourceGetMappedArray(&array, resource, 0, 0);
size_t size = sizeof(float)*rboWidth*rboHeight;
float* initData = (float*)malloc(size*4);
memset(initData, 0, size*4);
ProcessImage(ar, initData, rboWidth, rboHeight);
}
//Render to screen
}

//...... Process_Image.cu
texture <float4, cudaTextureType2D, cudaReadModeElementType> tex;
extern "C" void ProcessImage(cudaArray* in, float* _outAngle, float* _outIntence, float* _inData, int w, int h )
{
cudaBindTextureToArray(tex, in);
size_t size = sizeof(float)*w*h;
float* inData;
cudaMalloc(&inData, size*4);
dim3 threads(16, 16);
dim3 blocks(w/threads.x, h/threads.y);
cuProcessKernel<<<blocks, threads>>>(inData, w, h);
cudaThreadSynchronize();
cudaUnbindTexture(tex);
cudaMemcpy(_inData, inData, size*4, cudaMemcpyDeviceToHost);
cudaFree(outAngle);
cudaFree(outIntence);
}
//...Kernel
__global__ void cuProcessKernel(float* initialData, int w, int h)
{
int x = blockIdx.x*blockDim.x + threadIdx.x;
int y = blockIdx.y*blockDim.y + threadIdx.y;
int idx = y * w + x;
float4 pix = tex2D(tex, x, y);
initialData[idx*4] = pix.x;
initialData[idx*4+1] = pix.y;
initialData[idx*4+2] = pix.z;
initialData[idx*4+3] = pix.w;
}
[/code]
So I'm simply trying to get per-pixel image in float4 format. But instead I get some garbage in the output array. When I make all white image it returns incorrect values. Could someone tell me where is my mistake ?
System: OS Win 7 x64 SP1, Core i7 2600/GTX570/16Gb, CUDA Toolkit 4.2, devdriver_4.2_winvista-win7_64_301.32_general.
Hi All!

I've got a problem with using renderbuffer in CUDA kernel. My task is:

1. Render image in some position to renderbuffer.

2. Get image and process it pixel-by-pixel.

So I create and bind all buffers:



rbo = new GLRBO(GL_RGBA, mesh->size.Max[0] - mesh->size.Min[0], mesh->size.Max[1] - mesh->size.Min[1]);

fbo = new GLFBO(GL_COLOR_ATTACHMENT0, rbo->GetId());

vao = new GLVAO(1, &BufferIds[0]);

vao->Bind();

vbo = new GLVBO(2, &BufferIds[1]);

vbo->Bind(GL_ARRAY_BUFFER);

vbo->SetData(GL_ARRAY_BUFFER, sizeof(GLVertex)*(mesh->nVertices+1), vertices, GL_DYNAMIC_DRAW);

vbo->Bind(GL_ELEMENT_ARRAY_BUFFER);

vbo->SetData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint)*mesh->nElements, indices, GL_DYNAMIC_DRAW);

vao->Unbind();

//register in CUDA

cudaSetDevice(0);

cudaGLSetGLDevice(0)

cudaGraphicsResource* resource;

cudaGraphicsGLRegisterImage(&resource, rbo->GetId(), GL_RENDERBUFFER, cudaGraphicsMapFlagsReadOnly);

cudaGraphicsMapResources(1, &resource, 0);

// ... Render image

void GLMain::_RenderScene()

{

++FrameCount;

//render at maximum resolution to render buffer

glViewport(0, 0, rboWidth, rboHeight);



if(!bDone)

{

fbo->Bind();

glClearColor(0.0, 0.0, 0.0, 1.0);

glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

DrawMesh();

fbo->BindReadDraw();

cudaArray* array;

cudaGraphicsSubResourceGetMappedArray(&array, resource, 0, 0);

size_t size = sizeof(float)*rboWidth*rboHeight;

float* initData = (float*)malloc(size*4);

memset(initData, 0, size*4);

ProcessImage(ar, initData, rboWidth, rboHeight);

}

//Render to screen

}



//...... Process_Image.cu

texture <float4, cudaTextureType2D, cudaReadModeElementType> tex;

extern "C" void ProcessImage(cudaArray* in, float* _outAngle, float* _outIntence, float* _inData, int w, int h )

{

cudaBindTextureToArray(tex, in);

size_t size = sizeof(float)*w*h;

float* inData;

cudaMalloc(&inData, size*4);

dim3 threads(16, 16);

dim3 blocks(w/threads.x, h/threads.y);

cuProcessKernel<<<blocks, threads>>>(inData, w, h);

cudaThreadSynchronize();

cudaUnbindTexture(tex);

cudaMemcpy(_inData, inData, size*4, cudaMemcpyDeviceToHost);

cudaFree(outAngle);

cudaFree(outIntence);

}

//...Kernel

__global__ void cuProcessKernel(float* initialData, int w, int h)

{

int x = blockIdx.x*blockDim.x + threadIdx.x;

int y = blockIdx.y*blockDim.y + threadIdx.y;

int idx = y * w + x;

float4 pix = tex2D(tex, x, y);

initialData[idx*4] = pix.x;

initialData[idx*4+1] = pix.y;

initialData[idx*4+2] = pix.z;

initialData[idx*4+3] = pix.w;

}


So I'm simply trying to get per-pixel image in float4 format. But instead I get some garbage in the output array. When I make all white image it returns incorrect values. Could someone tell me where is my mistake ?

System: OS Win 7 x64 SP1, Core i7 2600/GTX570/16Gb, CUDA Toolkit 4.2, devdriver_4.2_winvista-win7_64_301.32_general.

#1
Posted 04/27/2012 06:34 PM   
Scroll To Top