Hi everyone,
I have a problem with CUDA, actually i’m working on optimizing a program (that do a lot of calculation).
I’ve got the following error code :
Error: cannot pass an argument with a user-provided copy-constructor to a device-side kernel launch
on the following line :
alphaAngle <<< 1, 1 >>> (positionEarthStation, positionSat, positionGeoSat, CUDA_result, CUDA_tid, AngleIteration);
The error occured 3 time on this line, i guess for the 1st, 2nd and 3rd parameters.
Their type are Cartesian which is describe as follow :
class Cartesian
{
public:
double m_X;
double m_Y;
double m_Z;
Global global;
private:
double m_m;
public:
__host__ __device__ Cartesian(void);
__host__ __device__ Cartesian(const Cartesian &c);
__host__ __device__ Cartesian(double x, double y, double z);
/*__host__ __device__ Cartesian& operator=(const Cartesian& c);*/
__host__ __device__ void set(double x, double y, double z);
__host__ __device__ double magnitude(void);
__host__ __device__ LatLonAlt ToLLA(void);
};
What i’m doing is call a kernel from another kernel and pass arguments received and/or modified as parameter of the function called.
Function calling :
__global__ void SatPositionKernel(Cartesian positionEarthStation, Propagator *CUDA_sat, Cartesian positionGeoSat, int timeIndex, int AngleIteration)
{
/* Calculer la position du satellite défilant */
int tid, CUDA_tid[BLOCKS_ANGLE], index;
double CUDA_result[BLOCKS_ANGLE];
Cartesian positionSat, output;
Global global;
if ((tid = threadIdx.x + blockIdx.x * blockDim.x) < NB_SAT)
{
positionSat = CUDA_sat[tid].evaluate(timeIndex * STEP, SIMULATION_DURATION, 0, true);
/* Pour chaque position sur l'orbite geostationnaire */
alphaAngle <<< 1, 1 >>> (positionEarthStation, positionSat, positionGeoSat, CUDA_result, CUDA_tid, AngleIteration);
cudaDeviceSynchronize();
index = getIndex(CUDA_result);
output.set(positionGeoSat.m_X * cos(STEP * index) + positionGeoSat.m_Y * -sin(STEP * index), positionGeoSat.m_X * sin(STEP * index) + positionGeoSat.m_Y * cos(STEP * index), positionGeoSat.m_Z);
printf("The sat %s at Time %f is as position (%.3f, %.3f, %.3f)\nThe Base at position (%.3f, %.3f; %.3f)\nThe Geo-orbital position is (%.3f, %.3f, %.3f)\nThe angle formed is %.15f",
CUDA_sat[tid].m_name, timeIndex * STEP, positionSat.m_X, positionSat.m_Y, positionSat.m_Z, positionEarthStation.m_X, positionEarthStation.m_Y, positionEarthStation.m_Z,
output.m_X, output.m_Y, output.m_Z, global.radToDeg(CUDA_result[index]));
}
}
Function called
__global__ void alphaAngle(Cartesian EarthStation, Cartesian Sat, Cartesian GeoSat, double *CUDA_result, int *CUDA_tid, int iteration)
{
__shared__ double tmp[THREADS_ANGLE][2];
Cartesian new_pos, vecU, vecV;
Global global;
int tid, idx, i;
idx = threadIdx.x;
if ((tid = threadIdx.x + blockIdx.x * blockDim.x) < iteration)
{
new_pos = global.rotationZAxis(GeoSat, STEP_ANGLE * tid);
vecU.set(Sat.m_X - EarthStation.m_X, Sat.m_Y - EarthStation.m_Y, Sat.m_Z - EarthStation.m_Z);
vecV.set(new_pos.m_X - EarthStation.m_X, new_pos.m_Y - EarthStation.m_Y, new_pos.m_Z - EarthStation.m_Z);
tmp[idx][0] = global.dotProduct(vecU, vecV);
tmp[idx][1] = (double)tid;
}
__syncthreads();
i = THREADS_ANGLE / 2;
while (i != 0)
{
(idx < i && tmp[idx][0] > tmp[idx + i][0]) ? (tmp[idx][0] = tmp[idx + i][0], tmp[idx][1] = tmp[idx + i][1]) : (0);
__syncthreads();
i /= 2;
}
(idx == 0) ? (CUDA_result[blockIdx.x] = tmp[0][0], CUDA_tid[blockIdx.x] = (int)tmp[0][1]) : (0);
}
PositionEarthStation defined as follow in previous function :
Cartesian positionEarthStation(1597885.53777688, 1253552.16551859, 6046164.27311665);
position sat defined in function calling.
positionGeoSat defined as follow in previous function :
positionDebutArcGeo = findStartGeo(positionEarthStation);
I tried to do something with approximatly the same operation/affectation in another code. Here it is :
__global__ void kernel2(Cartesian *titi)
{
printf("fils titi[0](%f, %f, %f)\n", titi->m_X, titi->m_Y, titi->m_Z);
}
__global__ void kernel(Cartesian *toto)
{
printf("parent\n");
printf("pere titi[0](%f, %f, %f)\n", toto->m_X, toto->m_Y, toto->m_Z);
toto->set(3, 2, 1);
kernel2 << < 1, 1 >> > (toto);
cudaDeviceSynchronize();
}
__global__ void test2(Cartesian titi)
{
printf("fils titi(%f, %f, %f)\n", titi.m_X, titi.m_Y, titi.m_Z);
}
__device__ Cartesian get_momo(void)
{
return (Cartesian(1, 1, 1));
}
__global__ void test(Cartesian titi)
{
Cartesian momo(1, 2, 6);
momo = get_momo();
printf("pere titi(%f, %f, %f)\n", titi.m_X, titi.m_Y, titi.m_Z);
titi.set(1, 2, 3);
test2 << < 1, 1 >> > (momo);
cudaDeviceSynchronize();
}
int main(void)
{
Cartesian titi, *toto;
titi.m_X = 1;
titi.m_Y = 1;
titi.m_Z = 2;
cudaMalloc((void **)&toto, sizeof(Cartesian));
cudaMemcpy(toto, &titi, sizeof(Cartesian), cudaMemcpyHostToDevice);
test << < 1, 1 >> > (titi);
cudaDeviceSynchronize();
printf("fin\n");
}
Cartesian type : same definition.
For the constructor / set function :
__host__ __device__ Cartesian::Cartesian(void) : m_X(0), m_Y(0), m_Z(0) {}
__host__ __device__ Cartesian::Cartesian(double x, double y, double z) : m_X(x), m_Y(y), m_Z(z) {}
__host__ __device__ void Cartesian::set(double x, double y, double z)
{
m_X = x;
m_Y = y;
m_Z = z;
m_m = -1;
}
But on that code it work perfectly, if someone have an idea, need mode information/code/something tell me !
Thanks for your help !