I cant quite figure out the best way to sum up all the elements of buffer.
I have tried the following kernels but neither sums up the buffers properly.
__kernel void vector_sum(__global int *A,__global int *sum) {
// Get the index of the current element to be processed
int i = get_global_id(0);
// Do the operation
sum[0] = A[i] + sum[0];
}
__kernel void vector_sum(__global int *A,int sum) {
// Get the index of the current element to be processed
int i = get_global_id(0);
// Do the operation
sum = A[i] + sum;
}
Does anyone have an idea for the best way to do this?