Things seem to be just as bad under CUDA 7.5:
// TDR75.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <cuda.h>
int _tmain(int argc, _TCHAR* argv[])
{
while(1)
{
int ch = _getch();
printf("%c\n", ch);
switch(ch)
{
case 'i':
{
CUresult result;
result = cuInit(0);
printf("cuInit() return %i\n", result);
continue;
}
case 'r':
{
CUresult result;
CUdevice dev = 0;
result = cuDeviceGet(&dev, 0);
printf("cuDeviceGet() returned %i, dev = %i\n", result, dev);
result = cuDevicePrimaryCtxReset(dev);
printf("cuDevicePrimaryCtxReset() returned %i\n", result);
continue;
}
case 'p':
{
CUresult result;
CUdevice dev = 0;
result = cuDeviceGet(&dev, 0);
printf("cuDeviceGet() returned %i, dev = %i\n", result, dev);
CUcontext ctx = 0;
result = cuDevicePrimaryCtxRetain(&ctx, dev);
printf("cuDevicePrimaryCtxRetain() returned %i, ctx = %p\n", result, ctx);
continue;
}
case 'f':
{
CUresult result;
CUdevice dev = 0;
result = cuDeviceGet(&dev, 0);
printf("cuDeviceGet() returned %i, dev = %i\n", result, dev);
result = cuDevicePrimaryCtxRelease(dev);
printf("cuDevicePrimaryCtxRelease() returned %i\n", result);
continue;
}
case 'q':
{
CUresult result;
CUdevice dev = 0;
unsigned int flags = 0;
int active = 0;
result = cuDeviceGet(&dev, 0);
printf("cuDeviceGet() returned %i, dev = %i\n", result, dev);
result = cuDevicePrimaryCtxGetState(dev, &flags, &active);
printf("cuDevicePrimaryCtxGetState() returned %i, flags = %u, active = %i\n", result, dev, flags, active);
continue;
}
case 't':
{
CUresult result;
int count;
result = cuDeviceGetCount(&count);
printf("cuDeviceGetCount() returned %i, count = %i\n", result, count);
CUdevice dev = 0;
result = cuDeviceGet(&dev, 0);
printf("cuDeviceGet() returned %i, dev = %i\n", result, dev);
CUcontext ctx = 0;
result = cuCtxCreate(&ctx, CU_CTX_BLOCKING_SYNC, dev);
printf("cuCtxCreate() returned %i, ctx = %p\n", result, ctx);
result = cuCtxDestroy(ctx);
printf("cuCtxDestroy() returned %i\n", result);
continue;
}
case 'x':
{
break;
}
default:
{
continue;
}
}
break;
}
return 0;
}
The first attempt (with a TDR between ‘f’ and ‘r’):
i
cuInit() return 0
p
cuDeviceGet() returned 0, dev = 0
cuDevicePrimaryCtxRetain() returned 0, ctx = 0000000000372A40
q
cuDeviceGet() returned 0, dev = 0
cuDevicePrimaryCtxGetState() returned 0, flags = 0, active = 0
f
cuDeviceGet() returned 0, dev = 0
cuDevicePrimaryCtxRelease() returned 0
r
cuDeviceGet() returned 0, dev = 0
cuDevicePrimaryCtxReset() returned 0
p
cuDeviceGet() returned 0, dev = 0
cuDevicePrimaryCtxRetain() returned 999, ctx = 0000000000000000
t
cuDeviceGetCount() returned 0, count = 2
cuDeviceGet() returned 0, dev = 0
cuCtxCreate() returned 999, ctx = 0000000000000000
cuCtxDestroy() returned 1
Another attempt (with a TDR between ‘i’ and ‘p’):
i
cuInit() return 0
p
cuDeviceGet() returned 0, dev = 0
cuDevicePrimaryCtxRetain() returned 999, ctx = 0000000000000000
r
cuDeviceGet() returned 0, dev = 0
cuDevicePrimaryCtxReset() returned 0
p
cuDeviceGet() returned 0, dev = 0
cuDevicePrimaryCtxRetain() returned 999, ctx = 0000000000000000
So basically, after my first call to cuInit(), if another application causes a TDR then my process is unable to ever use CUDA again.