__kernel void vector_sum(__global int *A,__global int *sum) { // Get the index of the current element to be processed int i = get_global_id(0); // Do the operation sum[0] = A[i] + sum[0]; }

__kernel void vector_sum(__global int *A,int sum) { // Get the index of the current element to be processed int i = get_global_id(0); // Do the operation sum = A[i] + sum; }

You must log in to send a PM.

Add Reply

Authorization Required

Not a member? Register Now

I have tried the following kernels but neither sums up the buffers properly.

[code]

__kernel void vector_sum(__global int *A,__global int *sum) {

// Get the index of the current element to be processed

int i = get_global_id(0);

// Do the operation

sum[0] = A[i] + sum[0];

}

[/code]

[code]

__kernel void vector_sum(__global int *A,int sum) {

// Get the index of the current element to be processed

int i = get_global_id(0);

// Do the operation

sum = A[i] + sum;

}

[/code]

Does anyone have an idea for the best way to do this?

I have tried the following kernels but neither sums up the buffers properly.

Does anyone have an idea for the best way to do this?

I found this resource on the amd site

http://developer.amd.com/documentation/articles/pages/opencl-optimization-case-study-simple-reductions.aspx

I found this resource on the amd site

http://developer.amd.com/documentation/articles/pages/opencl-optimization-case-study-simple-reductions.aspx