porting of struct and methods
Hello Community.

I have to port a code from C++ to Cuda. The code use an object that use its internal function to do a simulation. This object use mostly simple mathematical function but it use also a lot a random number generator function. I though that I could replace the random function with an internal random function of the object and then use it in the kernel. To show you what I need I wrote a toy model that is an extreme simplification of what I need.

[code]#include <cuda.h>
#include <iostream>
#include <curand_kernel.h>
using namespace std;

struct test{

float value1;
float value2;
curandState B;

float __device__ rand(){return curand_uniform(&B);}

};

__global__ void setup_kernel(curandState *state)
{
const int id=blockIdx.x;
curand_init(id, id, 0, &state[id]);
}

__global__ void fill_mat(struct test *anobj, curandState *state)
{
const int Idx=blockIdx.x;
curandState localState = state[Idx];
anobj[Idx].B=localState;
anobj[Idx].value1=anobj[Idx].rand();
anobj[Idx].value2=anobj[Idx].rand();
}

int main()
{
int num=10;
curandState *devStates;
cudaMalloc( (void **)&devStates, num*sizeof(curandState) );

struct test *results = (struct test*)malloc(num*sizeof(struct test));
struct test *to_device;
cudaMalloc ( (void **)&to_device, num*sizeof(to_device));

setup_kernel<<<num, 1>>>(devStates);

fill_mat<<<num,1>>>(to_device, devStates);

cudaMemcpy(results,to_device,num*sizeof(struct test),cudaMemcpyDeviceToHost);

for(int i=0;i<num;i++)
cout<<results[i].value1<<endl;
cout<<results[i].value2<<endl;
return 0;
}
[/code]


I need also that at each call of rand() I get a different random number (so there may be some error in the passage of the B state).
Unfortunately the code that I have put doesn't work, it give a lot of zeros for for the results.

Could somebody help me to correct this code please?
Hello Community.



I have to port a code from C++ to Cuda. The code use an object that use its internal function to do a simulation. This object use mostly simple mathematical function but it use also a lot a random number generator function. I though that I could replace the random function with an internal random function of the object and then use it in the kernel. To show you what I need I wrote a toy model that is an extreme simplification of what I need.



#include <cuda.h>

#include <iostream>

#include <curand_kernel.h>

using namespace std;



struct test{



float value1;

float value2;

curandState B;



float __device__ rand(){return curand_uniform(&B);}



};



__global__ void setup_kernel(curandState *state)

{

const int id=blockIdx.x;

curand_init(id, id, 0, &state[id]);

}



__global__ void fill_mat(struct test *anobj, curandState *state)

{

const int Idx=blockIdx.x;

curandState localState = state[Idx];

anobj[Idx].B=localState;

anobj[Idx].value1=anobj[Idx].rand();

anobj[Idx].value2=anobj[Idx].rand();

}



int main()

{

int num=10;

curandState *devStates;

cudaMalloc( (void **)&devStates, num*sizeof(curandState) );



struct test *results = (struct test*)malloc(num*sizeof(struct test));

struct test *to_device;

cudaMalloc ( (void **)&to_device, num*sizeof(to_device));



setup_kernel<<<num, 1>>>(devStates);



fill_mat<<<num,1>>>(to_device, devStates);



cudaMemcpy(results,to_device,num*sizeof(struct test),cudaMemcpyDeviceToHost);



for(int i=0;i<num;i++)

cout<<results[i].value1<<endl;

cout<<results[i].value2<<endl;

return 0;

}






I need also that at each call of rand() I get a different random number (so there may be some error in the passage of the B state).

Unfortunately the code that I have put doesn't work, it give a lot of zeros for for the results.



Could somebody help me to correct this code please?

#1
Posted 02/04/2012 09:10 AM   
Problem found (but I still need to run some tests to see if it work completely as I want...).

In practice you need to add a cudaMemcpy from host to device before launch the kernel.

cudaMemcpy(to_device,results,num*sizeof(struct test),cudaMemcpyHostToDevice);
Problem found (but I still need to run some tests to see if it work completely as I want...).



In practice you need to add a cudaMemcpy from host to device before launch the kernel.



cudaMemcpy(to_device,results,num*sizeof(struct test),cudaMemcpyHostToDevice);

#2
Posted 02/04/2012 02:29 PM   
Scroll To Top