Struct

how should i send a struct to the GPU?

If you pass-by-value as a kernel paramenter, you can send a struct the same way you send ordinary types.

If you want to copy the struct using cudaMemcpy, you can do so the same way as ordinary types.

If the struct contains pointers to dynamically allocated areas, things become more complicated, you will probably have to construct a “deep copy”, or reorganize your data.

I do need struct contains dynamically allocated areas.
Can you help me?

struct data {
  int size;
  float* array;
};

data d;
d.size = 100;
cudaMalloc(&d.array, 100*sizeof(float));
...
kernel_func<<<1,1>>>(d);

…any problem?

This is valid, but not useful as written if OP intends to read or modify any data pointed to by d.array in host code. Then a deep copy will be required.

Thank you, very much! But it is not working in my program.

I have the following struct:

struct Array
{
unsigned int rows;
unsigned int cols;
long int elementNumber;
unsigned char array[700000];
char *fileName[20];
File *file;
}; typedef struct Array_t;

When I want to allocate and copy the above struct I do:

//I receive 2 pointer type Array with the names: Harray, Darray
cudaMalloc ((Array**) Darray , sizeof(Array));
cudaMemcpy (Darray , &Harray , sizeof(Array) , cudaMemcpyHostToDevice);

The cudaMemcpy (Darray , &Harray , sizeof(Array) , cudaMemcpyHostToDevice); [u]returns invalid argument[u]
Do you know what is wrong?

Additionally, I want to do it dynamic with unsigned char *array in the struct but I doesn’t working for me!

You’ll need to use either a cuda deep copy, or else unified memory.

You can google “cuda deep copy” or “cuda unified memory” and start reading.

#include <cuda_runtime.h>
#include <cassert> 

struct Array {
  unsigned int rows;
  unsigned int cols;
  long int elementNumber;
  unsigned char* array;
};

int main() {
  cudaError_t status;
  Array Harray;
  Array* Darray;

  status = cudaMalloc(&Harray.array, 12345*sizeof(unsigned char));
  assert( status == cudaSuccess );

  status = cudaMalloc(&Darray , sizeof(Array));
  assert( status == cudaSuccess );
  status = cudaMemcpy(Darray , &Harray , sizeof(Array) , cudaMemcpyHostToDevice);
  assert( status == cudaSuccess );

  status = cudaFree(Darray);
  assert( status == cudaSuccess );
  status = cudaFree(Harray.array);
  assert( status == cudaSuccess );

}

no any problems.

why do you allocate the Harray using cudaMalloc? it’s belong to the host.
I need to copy and aloocate Harray to Darray.

Darray should be used on device, so Darray.array needs to be accessible from device.
otherwise,

cudaMallocHost(&Harray.array, 12345*sizeof(unsigned char));
/* after above, fill Harray.array */
cudaMalloc(&Darray , sizeof(Array)); // allocate device
cudaMemcpy(Darray , &Harray , sizeof(Array) , cudaMemcpyHostToDevice); // copy host->device
unsinged char* tmp; // allocate device for Array::array
cudaMalloc(&tmp, 12345*sizeof(unsigned char));
// copy array host->device
cudaMemcpy(tmp, Harray.array, 12345*sizeof(unsigned char), cudaMemcpyHostToDevice);
// copy device-address tmp to Darray.array
cudamemcpy(&Darray.array, &tmp, sizeof(unsigned char*));

Ok, after you did the above example:

whare do you fill Harray.array?

Whole sample here:

#include <cuda_runtime.h>
#include <cstdio>
#include <cassert> 

struct Array {
  unsigned int rows;
  unsigned int cols;
  long int elementNumber;
  unsigned char* array;
};

__global__ void kernel_fun(const Array* p) {
  printf("%d,%d %ld [%s]\n", p->rows, p->cols, p->elementNumber, p->array);
}

int main() {
  cudaError_t status;
  Array Harray;
  Array* Darray;

  const char* str = "Hello, world";
  status = cudaMalloc(&Harray.array, 16*sizeof(unsigned char));
  assert( status == cudaSuccess );
  status = cudaMemcpy(Harray.array, str, 16*sizeof(unsigned char), cudaMemcpyHostToDevice); 
  assert( status == cudaSuccess );
  Harray.rows = 123;
  Harray.cols = 456;
  Harray.elementNumber = 789L;

  status = cudaMalloc(&Darray , sizeof(Array));
  assert( status == cudaSuccess );
  status = cudaMemcpy(Darray , &Harray , sizeof(Array) , cudaMemcpyHostToDevice);
  assert( status == cudaSuccess );

  kernel_fun<<<1,1>>>(Darray);
  cudaDeviceSynchronize();

  status = cudaFree(Darray);
  assert( status == cudaSuccess );
  status = cudaFree(Harray.array);
  assert( status == cudaSuccess );

}