What is the true error in the variable declaration place? Variable error
So, I have the following code:

[code]int main(void){
float host_sums[numberofBlocks];
float *dev_sums;
float totalSum = 0.0f;
cudaMalloc((void**)&dev_sums, sizeof(float) * totalofThreads);
__solve_trap<<<numberofBlocks, numberofThreads>>>(A,B,dev_sums);
cudaMemcpy(host_sums, dev_sums, sizeof(float) * totalofThreads, cudaMemcpyDeviceToHost);
for(int i = 0; i < numberofBlocks; i++) totalSum += host_sums[i]; //Error->*
totalSum -= trap_error;
printf("%f",totalSum);
cudaFree(dev_sums);
return 0;
}[/code]

When i run the program, i've got an error: '*' are adding a nonzero number with host_sums[i]. I debugged the program, but after the cudaMemcpy, totalSum <> 0, so if i put float totalSum = 0 after that memcpy, it doesnt give me any error, immediately before the call to cudaMemcpy, I have totalSum==0, but immediately after it I have totalSum!=0

So, what's the really problem initializing variables after any cudaMemcpy? what causes this error?

Edit: I had and error [code]cudaMemcpy(host_sums, dev_sums, sizeof(float) * numberofBlocks, cudaMemcpyDeviceToHost);[/code] is the correct line
So, I have the following code:



int main(void){

float host_sums[numberofBlocks];

float *dev_sums;

float totalSum = 0.0f;

cudaMalloc((void**)&dev_sums, sizeof(float) * totalofThreads);

__solve_trap<<<numberofBlocks, numberofThreads>>>(A,B,dev_sums);

cudaMemcpy(host_sums, dev_sums, sizeof(float) * totalofThreads, cudaMemcpyDeviceToHost);

for(int i = 0; i < numberofBlocks; i++) totalSum += host_sums[i]; //Error->*

totalSum -= trap_error;

printf("%f",totalSum);

cudaFree(dev_sums);

return 0;

}




When i run the program, i've got an error: '*' are adding a nonzero number with host_sums[i]. I debugged the program, but after the cudaMemcpy, totalSum <> 0, so if i put float totalSum = 0 after that memcpy, it doesnt give me any error, immediately before the call to cudaMemcpy, I have totalSum==0, but immediately after it I have totalSum!=0



So, what's the really problem initializing variables after any cudaMemcpy? what causes this error?



Edit: I had and error
cudaMemcpy(host_sums, dev_sums, sizeof(float) * numberofBlocks,    cudaMemcpyDeviceToHost);
is the correct line

#1
Posted 05/01/2012 07:07 PM   
Hi,
My bet is that "numberofBlocks"!="totalofThreads". And since "host_sums" has a size of "numberofBlocks" and "dev_sums" has a size of "totalofThreads" (which I suspect is an error), and that moreover, you indeed copy back "totalofThreads" results from the device into this poor little "dev_sums", this last one just explodes.
Hi,

My bet is that "numberofBlocks"!="totalofThreads". And since "host_sums" has a size of "numberofBlocks" and "dev_sums" has a size of "totalofThreads" (which I suspect is an error), and that moreover, you indeed copy back "totalofThreads" results from the device into this poor little "dev_sums", this last one just explodes.

#2
Posted 05/02/2012 05:08 AM   
Scroll To Top