__global__ in cuda c is not getting executed.

Hi all I have created a program for vector addition in CUDA C and executing it in visual studio 2010. Program executed perfectly but it never enters into global void add(int *a, int *b, int *c, int n) function and the statements inside this function are not executed.

Why the program is not entering into global function? Kinldy reply.
Program is as follows

//#include “cuda.h”
#include “cuda_runtime.h”
#include “device_launch_parameters.h”

#include <malloc.h>
#include <assert.h>
#define SIZE 10

global
void add(int *a, int *b,int *c,int count)
{
int n=threadIdx.x;
if(n<count)
c[n]=a[n]+b[n];
}

int main()
{
int i,*a,*b,*c,*d;
int *d_a,*d_b,*d_c,t;

a=(int )malloc(SIZEsizeof(int));
b=(int )malloc(SIZEsizeof(int));
c=(int )malloc(SIZEsizeof(int));

t=SIZE*sizeof(int);

cudaMalloc((void **)&d_a,t);
cudaMalloc((void **)&d_b,t);
cudaMalloc((void **)&d_c,t);

for(i=0;i<SIZE;i++)
{
a[i]=4;
b[i]=5;
c[i]=0;
}

cudaMemcpy(d_a,a,SIZEsizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(d_b,b,SIZE
sizeof(int),cudaMemcpyHostToDevice);
//cudaMemcpy(d_c,c,SIZE*sizeof(int),cudaMemcpyHostToDevice);

int block_size = 128;
int grid_size = SIZE / block_size;

add<<<grid_size,block_size>>>(d_a,d_b,d_c,SIZE);

cudaMemcpy(c,d_c,SIZE*sizeof(int),cudaMemcpyDeviceToHost);

for(i=0;i<SIZE;i++)
{
printf(“c[%d]=%d\n”,i,c[i]);
}
free(a);
free(b);
free(c);

cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);

// system(“pause”);

}

Add proper status checking to all CUDA API calls and kernel launches.

#define SIZE 10

int block_size = 128;

int grid_size = SIZE / block_size;

???