How to calculate CPU time and GPU time

I try to perform the Cuda C programming for the Cuda GPU properties as below

However I get some mistake for the CPU and GPU

Kindly please provide your opinion and suggestion thus I will be able to improve my computing skills

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <time.h>
#include <math.h>

#define DATA_SIZE 1048576

int data[DATA_SIZE];

bool InitCUDA()
{
int count;

cudaGetDeviceCount(&count);
if (count == 0)
{
	fprintf(stderr, "There is no device.\n");
	return false;
}

int i;
for (i = 0; i < count; i++)
{
	cudaDeviceProp prop;
	if (cudaGetDeviceProperties(&prop, i) == cudaSuccess)
	{
		if (prop.major >= 1)
		{
		cudaGetDeviceProperties(&prop, i);
		printf("Device Name: %s\n", prop.name);
		printf("Total global mem: %1u bytes\n", prop.totalGlobalMem);
		printf("Max threads per block: %d\n", prop.maxThreadsPerBlock);
		printf("Clock rate: %.2f GHz\n", prop.clockRate*1e-6f);
		printf("\n");
		break;
		}
	}

	cudaSetDevice(i);
}
return true;

}

global static void sumOfSquares(int num, int result, clock_t* time)
{
int sum = 0;
int i;
clock_t start = clock();
for (i = 0; i < DATA_SIZE; i++)
{
sum += num[i] * num[i];
}

*result = sum;
*time = clock() - start;

}

int sumOfSquares_CPU(int *data)
{
int sum = 0;
for (int i = 0; i< DATA_SIZE; i++)
{
sum += data[i] * data[i];
}
return sum;
}

void GenerateNumbers(int *number, int size)
{
for (int i = 0; i< size; i++)
{
number[i] = rand() % 10;
}
}

int main()
{
if (!InitCUDA())
{
return 0;
}

printf("CUDA initialized.\n");

GenerateNumbers(data, DATA_SIZE);
int* gpudata, *result;
clock_t* time;
cudaMalloc((void**)&gpudata, sizeof(int)* DATA_SIZE);
cudaMalloc((void**)&result, sizeof(int));
cudaMalloc((void**)&time, sizeof(clock_t));
cudaMemcpy(gpudata, data, sizeof(int)* DATA_SIZE, cudaMemcpyHostToDevice);

sumOfSquares << <1, 1, 0 >> >(gpudata, result, time);

int sum, sum1;
clock_t time_used;
cudaMemcpy(&sum, result, sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(&time_used, time, sizeof(clock_t), cudaMemcpyDeviceToHost);
cudaFree(gpudata);
cudaFree(result);

clock_t cpu_start = clock();
int cpu_time;
sum = sumOfSquares_CPU(data);
cpu_time = clock() - cpu_start;

printf("\nAnswer 2\n");
printf("(CPU) sum : %d\n", sum);
printf("(GPU) sum : %d\n", sum1);
printf("(CPU) sum - (GPU) sum = %d\n", sum - sum1);
printf("\nAnswer 3\n");
printf("(CPU) time: %.0f us\n", ((double)cpu_time / CLOCKS_PER_SEC)*pow(10.0, 6) / 1000);

cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
printf("(GPU) time: %.0f us\n", (time_used / prop.clockRate)*pow(10.0, 6) / 1000);

system("pause");
return 0;

}

This forum is for users of the NVIDIA GPU Cloud. Please re-post this question on the CUDA support forum.