Hello,
I’m using:
GeForce GTX 1080 TI which has a compute capability 3.0.
OpenCV 3.2 version (was built for VS2013, x64 Release and Debug configurations separately).
CUDA 8.0 version.
Visual studio 2013, Relase and Debug configurations of x64 platform.
My purpose is to process part of the entire input image. The image part declared by upper left coordinate and width and height.
Problem description: An invalid configuration argument CUDA error is rasied only when I’m running the Release output in stand alone mode (without debugging) via visual studio DEBUG menu (Ctrl + F5).
If I’m running the same Release executable via VS Debug menu (F5) the error isn’t raised.
Also, when I’m running the output of Debug configuration that was generated by the same application code, both options F5 and Ctrl+F5 are work properly and the error isn’t raised.
Here is my code:
#define CUDA_CHECK_ERROR(stmt, fname, line)
{
cudaError_t cudaStatus;
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess)
{
printf(“File: %s\nLine: %i\nCUDA statement: \n%s\nCUDA error - %08d\n\n”, fname, line, stmt, cudaStatus);
printf(“CUDA error information: %s\n”, cudaGetErrorString(cudaStatus));
getchar();
}
};
#define CUDA(stmt) do {
stmt;
CUDA_CHECK_ERROR(#stmt, FILE, LINE);
}while(0)
struct sRect
{
unsigned int m_StartRow;
unsigned int m_StartCol;
unsigned int m_SizeRows;
unsigned int m_SizeCols;
};
global void CleanNoisePreparation(unsigned char * SrcImage, size_t iStep, const sRect ImageSlice)
{
int iXPos = threadIdx.x + blockIdx.xblockDim.x;
int iYPos = threadIdx.y + blockIdx.yblockDim.y;
if (!(iXPos < ImageSlice.m_SizeCols && iYPos < ImageSlice.m_SizeRows))
return;
/In case pixel value is less or equal to 127 set it to black color (0) otherwisw set it to white color (255)/
SrcImage[iYPos * iStep + iXPos] = (SrcImage[iYPos * iStep + iXPos] <= (unsigned char)127) ? ((unsigned char)0) : ((unsigned char)255);
}
void PerformCleanNoisePreparationOnGPU(cv::cuda::GpuMat& Image,
const sRect &ImageSlice,
const dim3 &dimGrid,
const dim3 &dimBlock,
const cudaStream_t &Stream)
{
/Calculate the rquired start address based on the required image slice characteristics/
unsigned char * pImageData = (unsigned char*)(Image.data ImageSlice.m_StartRow * Image.step + ImageSlice.m_StartCol);
CleanNoisePreparation << <dimGrid, dimBlock, 0, Stream >> >(pImageData, Image.step, ImageSlice);
CUDA(cudaGetLastError());
}
void main
{
sRect ResSliceParams;
ResSliceParams.m_StartRow = 0;
ResSliceParams.m_StartCol = 4854;
ResSliceParams.m_SizeRows = 7096;
ResSliceParams.m_SizeCols = 5146;
cv::cuda::GpuMat MyFrame = cv::cuda::GpuMat::GpuMat(cv::Size(10000, 7096), CV_8U);
//Image step size is 10240
dim3 dimBlock (32, 32, 1)
dim3 dimGrid (161, 222, 1)
cudaStream_t cudaStream;
cudaStreamCreateWithFlags(&cudaStream, cudaStreamNonBlocking);
PerformCleanNoisePreparationOnGPU(MyFrame,
ResSliceParams,
dimGrid,
dimBlock,
cudaStream);
}
The error is raised also when:
1.The kernel is totally empty (All lines were commented)
2.The kernel inputs list is empty.
3.Default stream is used instead of specific stream