Description:
cudaMalloc crashes the process or produces the following error message:
all CUDA-capable devices are busy or unavailable
The bug is reproducable on Windows 10 with driver version between 355.60 and 361.91 using CUDA toolkit 7.0 and 7.5. I was unable to reproduce it on Windows 8.1.
The result (crash or error message) depends on the model of GPU card. We tested with several single and multiGPU configurations including GTX 640, 750, TITAN BLACK, TITAN Z, 690, 980 and 980Ti cards.
TDR was disabled.
Duplication steps:
Compile a test app with Visual Studio 2012 with simple device query and cudamalloc (small amount of memory). Run the program a few hundred times with a batch file.
Product:
GTX 690, TITAN, TITAN Z, GTX 980, GTX 980TI
Toolkit versions:
7.5
7.0
Op
Problem occures on Windows 10. Works fine on Windows 8.1.
Note: I was unable to report this as a bug because the report page crashes every time.
Sample code:
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
inline void gpuAssert( cudaError code, const char *file, int line, bool abort=true )
{
if ( code != cudaSuccess )
{
fprintf( stderr, "GPUassert: %s %s %d\n", cudaGetErrorString( code ), file, line );
if ( abort )
{
exit( code );
}
}
}
#define CUDA_SAFE_CALL( ans ) { gpuAssert( ( ans ), __FILE__, __LINE__ ); }
int main( int argc, char *argv[] )
{
int numberOfDevices = 0;
size_t sizeInBytes = 16*1024*1024;
CUDA_SAFE_CALL( cudaGetDeviceCount ( &numberOfDevices ) );
for ( int i = 0; i < numberOfDevices; i++ )
{
CUDA_SAFE_CALL( cudaSetDevice( i ) );
{
int device;
CUDA_SAFE_CALL( cudaGetDevice( &device ) );
size_t mem_free, mem_tot;
CUDA_SAFE_CALL( cudaMemGetInfo(&mem_free, & mem_tot) );
fprintf( stdout, "before cudaMalloc #%d: %llu bytes from %llu / %llu\n", device, sizeInBytes, mem_free, mem_tot );
}
float *devPtr = nullptr;
CUDA_SAFE_CALL( cudaMalloc( (void**)&devPtr, sizeInBytes ) );
{
int device;
CUDA_SAFE_CALL( cudaGetDevice( &device ) );
size_t mem_free, mem_tot;
CUDA_SAFE_CALL( cudaMemGetInfo(&mem_free, & mem_tot) );
fprintf( stdout, "after cudaMalloc #%d: %llu bytes free\n", device, mem_free );
}
}
return 0;
}