Hi all!
I am new in CUDA programming.
Wrote program to sum ​​two arrays in the third array.
For some reason, the target array C is always zero, even after adding … do not tell what I’m doing wrong?
The source code Cuda.cu:
#include <iostream>
#include <cuda_runtime.h>
__global__ void sum(float *A, float *B, float *C)
{
int n = blockDim.x * blockIdx.x + threadIdx.x;
C[n] = A[n] + B[n];
}
void StartSum(float *A, float *B, float *C, int N)
{
sum<<< N/64, 64 >>>(A, B, C) ;
}
The source code to initialize arrays and calling summation:
#include <windows.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <iostream>
#define N 5
void StartSum(float *A, float *B, float *C, int n);
int main()
{
float a[N] = {1,2,3,4,5}, b[N]={-2,-4,5,7,1}, c[N] = {0,0,0,0,0};
cudaError_t err;
float *dev_a , *dev_b , *dev_c ;
cudaSetDevice(0);
cudaMalloc((void**)&dev_a , sizeof (float)*N);
cudaMalloc((void**)&dev_b , sizeof (float)*N);
cudaMalloc((void**)&dev_c , sizeof (float)*N);
err = cudaMemcpy(dev_a, a, sizeof(float)*N, cudaMemcpyHostToDevice);
err = cudaMemcpy(dev_b, b, sizeof(float)*N, cudaMemcpyHostToDevice);
err = cudaMemcpy(dev_c, c, sizeof(float)*N, cudaMemcpyHostToDevice);
StartSum(dev_a, dev_b, dev_c, N);
err = cudaMemcpy(c, dev_c, sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0; i<N; i++)
std::cout<<c[i]<<" ";
std::cout<<std::endl;
system("PAUSE");
}
In deriving the results always get zero … std::cout<<c[i]<<" ";
Thank you in advance for your help.