I found that when I use the cublas single precision vector norm function cublasSnrm2() and the vector contains any denormalized numbers the result will be NAN on Fermi cards. Is this a bug?
In the example below, the second case with value=1e-39 gives me a NAN on a C2050. On a C1060 both results are zero.
— Example program —
#include <cublas_v2.h>
#include <stdio.h>
int main() {
int vectorSize=1000;
cublasHandle_t handle;
cublasCreate(&handle);
float* vectorX_cpu;
float* vectorX_gpu;
float result;
cudaMallocHost((void**)&vectorX_cpu,vectorSizesizeof(float));
cudaMalloc((void**)&vectorX_gpu,vectorSizesizeof(float));
for(int i=0;i<vectorSize;i++) {
vectorX_cpu[i]=1e-38f;
}
cudaMemcpy(vectorX_gpu,vectorX_cpu,vectorSize*sizeof(float),
cudaMemcpyHostToDevice);
cublasSnrm2(handle,vectorSize,vectorX_gpu,1,&result);
printf(“value=%e vectorSize=%i result=%e\n”,
vectorX_cpu[0],vectorSize,result);
for(int i=0;i<vectorSize;i++) {
vectorX_cpu[i]=1e-39f;
}
cudaMemcpy(vectorX_gpu,vectorX_cpu,vectorSize*sizeof(float),
cudaMemcpyHostToDevice);
cublasSnrm2(handle,vectorSize,vectorX_gpu,1,&result);
printf(“value=%e vectorSize=%i result=%e\n”,
vectorX_cpu[0],vectorSize,result);
cudaFree(vectorX_gpu);
cublasDestroy(handle);
return 0;
}