I am very new to CUDA.
I have tried to execute the following program (from Udacity, lesson 1):
#include <iostream>
__global__ void cube(float * d_out, float * d_in) {
int idx = threadIdx.x;
float f = d_in[idx];
d_out[idx] = f*f*f;
}
int main(int argc, char *argv[]) {
const int ARRAY_SIZE = 64;
const int ARRAY_BYTES = ARRAY_SIZE * sizeof(float);
// generate the input array on the host
float h_in[ARRAY_SIZE];
for (int i = 0; i < ARRAY_SIZE; i++) {
h_in[i] = float(i);
}
float h_out[ARRAY_SIZE];
// declare GPU memory pointers
float * d_in;
float * d_out;
// allocate GPU memory
cudaMalloc((void**)&d_in, ARRAY_BYTES);
cudaMalloc((void**)&d_out, ARRAY_BYTES);
// transfer the array to the GPU
cudaMemcpy(d_in, h_in, ARRAY_BYTES, cudaMemcpyHostToDevice);
// launch the kernel
cube <<<1, ARRAY_SIZE>>>(d_out, d_in);
// copy back the result array to the CPU
cudaMemcpy(h_out, d_out, ARRAY_BYTES, cudaMemcpyDeviceToHost);
// print out the resulting array
for (int i = 0; i < ARRAY_SIZE; i++) {
std::cout << h_out[i];
std::cout << (((i%4)!=3)?"\t":"\n");
}
cudaFree(d_in);
cudaFree(d_out);
return 0;
}
I don’t have CUDA installed on my machine, but I have tried run it on my uni’s GPU clusters.
It runs OK on this cluster:
Model
HP SL390 G7
Processors
Two INTEL X5650 @ 2.66 GHz, 6 cores, 48GB
Two NVDIA Tesla M2090 @ 650 MHz, 512 cores, 6GB
However, I couldn’t manage to run on another one. More precisely, it runs, but gives me only zeros (or numbers such as 8.68805e-44).
I have tried to identify the reason. But I couldn’t find it.
Any suggestion will be highly valued.
To help you assess what am I dealing with, I’ll be running the commands on the desired cluster node from this link:
http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#axzz4YxAZvHxk
$ lspci | grep -i nvidia
02:00.0 3D controller: NVIDIA Corporation GK110GL [Tesla K20m] (rev a1)
84:00.0 3D controller: NVIDIA Corporation GK110GL [Tesla K20m] (rev a1)
$ uname -m && cat /etc/*release
x86_64
CentOS release 6.8 (Final)
LSB_VERSION=base-4.0-amd64:base-4.0-ia32:base-4.0-noarch:core-4.0-amd64:core-4.0-ia32:core-4.0-noarch:graphics-4.0-amd64:graphics-4.0-ia32:graphics-4.0-noarch:printing-4.0-amd64:printing-4.0-ia32:printing-4.0-noarch
CentOS release 6.8 (Final)
CentOS release 6.8 (Final)
$ gcc --version
gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-17)
Copyright (C) 2010 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
$ uname -r
2.6.32-642.13.1.el6.x86_64