Hi, I am trying to take a screenshot from a Unity Computer game and hardware accelerated encode it to JPEG.
Therefore I want access to the GPU memory of Unity, which uses DirectX11 internally.
The entire program runs, but the output is a completely grey image of the requested size. I get no errors. What could this mean?
In the copy pass, I copy the RGB values byte by byte and if I override the colors, I do get that color as output, so apparently the texture I am reading is malfunctioning or for some reason really grey.
My code (stripped for simplification) is as follows:
int EncodeJPEGState_CopyRGBDataFromGPUMemory(EncodeJPEGState *EJSTate, const int flipVertical, const int width, const int height, ID3D11Resource* pD3DResource, int &error)
{
CUgraphicsResource outCUDAResource = NULL;
CUresult cuda_error = CUDA_SUCCESS;
cudaError_t CUDA_error = cudaSuccess;
D3D11_RESOURCE_DIMENSION pResourceDimension;
pD3DResource->GetType(&pResourceDimension);
if (pResourceDimension != D3D11_RESOURCE_DIMENSION_TEXTURE2D) {
error = -5;
return error;
}
cuda_error = cuGraphicsD3D11RegisterResource(&outCUDAResource, pD3DResource, /*CU_GRAPHICS_REGISTER_FLAGS_NONE*/CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST | CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER); //
// from: http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__GRAPHICS.html#axzz4hziyhyPs
cuda_error = cuGraphicsMapResources(1, &outCUDAResource, 0); // last argument is stream, but 0 has no definition like CU_STREAM_LEGACY has.
CUarray pArray = NULL;
/*
From: https://stackoverflow.com/questions/14927524/read-cudaarray-in-device-code
CUDA arrays are opaque memory layouts optimized for texture fetching. They are one dimensional, two dimensional, or three-dimensional and composed of elements,
each of which has 1, 2 or 4 components that may be signed or unsigned 8 , 16 or 32 bit integers, 16 bit floats, or 32 bit floats.
CUDA arrays are only accessible by kernels through texture fetching as described in Texture Memory or surface reading and writing as described in Surface Memory
So It is an array of pixels! What we want :-)!
*/
// CUresult cuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel)
cuda_error = cuGraphicsSubResourceGetMappedArray(&pArray, outCUDAResource, 0, 0); // must be 0,0 at end. values with 1 don't work: five error 1: CUDA_ERROR_INVALID_VALUE
CUDA_error = CUDA_texture_copy(pArray, EJSTate->RGBDataCUDA, width, height);
cuda_error = cuGraphicsUnmapResources(1, &outCUDAResource, 0); // last argument is stream, but 0 has no definition like CU_STREAM_LEGACY has.
//This reference count will be decremented when this resource is unregistered through cuGraphicsUnregisterResource().
cuda_error = cuGraphicsUnregisterResource(outCUDAResource);
return 0;
}
// Simple copy kernel
__global__ void copyKernel(cudaSurfaceObject_t inputSurfObj, Npp8u *outputRGB, int width, int height)
{
// Calculate surface coordinates
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < width && y < height) {
uchar4 data;
// Read from input surface
surf2Dread(&data, inputSurfObj, x * 4, y);
// Write to output surface
outputRGB[(y*width + x) * 3] = data.w; // R
outputRGB[(y*width + x) * 3 + 1] = data.x; // G
outputRGB[(y*width + x) * 3 + 2] = data.y; // B
}
}
cudaError_t CUDA_texture_copy(CUarray Source_CUarray, Npp8u *Dest_RGBDataCUDA, int width, int height)
{
// Specify surface
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = (cudaArray_t)Source_CUarray; // WARNING Source_CUarray = CUarray!
// from: https://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/html/group__CUDART__DRIVER.html
// The types CUarray and struct cudaArray * represent the same data type and may be used interchangeably by casting the two types between each other.
// In order to use a CUarray in a CUDA Runtime API function which takes a struct cudaArray *, it is necessary to explicitly cast the CUarray to a struct cudaArray *.
cudaSurfaceObject_t inputSurfObj = 0;
cudaError_t CUDA_error = cudaCreateSurfaceObject(&inputSurfObj, &resDesc);
if (CUDA_error != cudaSuccess) {
return CUDA_error;
}
// Invoke kernel
dim3 dimBlock(16, 16);
dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y);
copyKernel <<<dimGrid, dimBlock>>> (inputSurfObj, Dest_RGBDataCUDA, width, height);
// Wait for GPU to finish before accessing on host
CUDA_error = cudaDeviceSynchronize();
// Destroy surface object
cudaDestroySurfaceObject(inputSurfObj);
return CUDA_error;
}