I try to perform the Cuda C programming for the Cuda GPU properties as below
However I get some mistake for the CPU and GPU
Kindly please provide your opinion and suggestion thus I will be able to improve my computing skills
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <time.h>
#include <math.h>
#define DATA_SIZE 1048576
int data[DATA_SIZE];
bool InitCUDA()
{
int count;
cudaGetDeviceCount(&count);
if (count == 0)
{
fprintf(stderr, "There is no device.\n");
return false;
}
int i;
for (i = 0; i < count; i++)
{
cudaDeviceProp prop;
if (cudaGetDeviceProperties(&prop, i) == cudaSuccess)
{
if (prop.major >= 1)
{
cudaGetDeviceProperties(&prop, i);
printf("Device Name: %s\n", prop.name);
printf("Total global mem: %1u bytes\n", prop.totalGlobalMem);
printf("Max threads per block: %d\n", prop.maxThreadsPerBlock);
printf("Clock rate: %.2f GHz\n", prop.clockRate*1e-6f);
printf("\n");
break;
}
}
cudaSetDevice(i);
}
return true;
}
global static void sumOfSquares(int num, int result, clock_t* time)
{
int sum = 0;
int i;
clock_t start = clock();
for (i = 0; i < DATA_SIZE; i++)
{
sum += num[i] * num[i];
}
*result = sum;
*time = clock() - start;
}
int sumOfSquares_CPU(int *data)
{
int sum = 0;
for (int i = 0; i< DATA_SIZE; i++)
{
sum += data[i] * data[i];
}
return sum;
}
void GenerateNumbers(int *number, int size)
{
for (int i = 0; i< size; i++)
{
number[i] = rand() % 10;
}
}
int main()
{
if (!InitCUDA())
{
return 0;
}
printf("CUDA initialized.\n");
GenerateNumbers(data, DATA_SIZE);
int* gpudata, *result;
clock_t* time;
cudaMalloc((void**)&gpudata, sizeof(int)* DATA_SIZE);
cudaMalloc((void**)&result, sizeof(int));
cudaMalloc((void**)&time, sizeof(clock_t));
cudaMemcpy(gpudata, data, sizeof(int)* DATA_SIZE, cudaMemcpyHostToDevice);
sumOfSquares << <1, 1, 0 >> >(gpudata, result, time);
int sum, sum1;
clock_t time_used;
cudaMemcpy(&sum, result, sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(&time_used, time, sizeof(clock_t), cudaMemcpyDeviceToHost);
cudaFree(gpudata);
cudaFree(result);
clock_t cpu_start = clock();
int cpu_time;
sum = sumOfSquares_CPU(data);
cpu_time = clock() - cpu_start;
printf("\nAnswer 2\n");
printf("(CPU) sum : %d\n", sum);
printf("(GPU) sum : %d\n", sum1);
printf("(CPU) sum - (GPU) sum = %d\n", sum - sum1);
printf("\nAnswer 3\n");
printf("(CPU) time: %.0f us\n", ((double)cpu_time / CLOCKS_PER_SEC)*pow(10.0, 6) / 1000);
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
printf("(GPU) time: %.0f us\n", (time_used / prop.clockRate)*pow(10.0, 6) / 1000);
system("pause");
return 0;
}