why function of gpuGenerateVerletList is not run on GPU? Thanks.

int threadsPerBlock=256;
int blocksPerGrid=N/threadsPerBlock + (N%threadsPerBlock == 0?0:1);
int *vCount; //holds number of neighbor list entries for each particle
int *vLists; //holds in each row the verlet list for one particle
size_t vListsPitch; //receives pitch value from cudaMallocPitch call
int NVList = static_cast(0.75 * pow( 2.0 * rVer, 3.)); //maximum number of entries in each verlet list
float *dMax; //holds displacement of each particle since last verlet list update
bool *dListUpdate; //flag for list update on device
bool hListUpdate=false; //flag for list update on host

dim3 dimGrid(threadsPerBlock);
dim3 dimBlock(blocksPerGrid);

gpuGenerateVerletList<<<blocksPerGrid, threadsPerBlock>>>(dPos, vLists, vListsPitch, vCount, dMax, rVerSq, dListUpdate);