User-provided copy-constructor

Kasbrick · August 29, 2016, 12:46pm

Hi everyone,

I have a problem with CUDA, actually i’m working on optimizing a program (that do a lot of calculation).
I’ve got the following error code :

Error: cannot pass an argument with a user-provided copy-constructor to a device-side kernel launch

on the following line :

alphaAngle <<< 1, 1 >>> (positionEarthStation, positionSat, positionGeoSat, CUDA_result, CUDA_tid, AngleIteration);

The error occured 3 time on this line, i guess for the 1st, 2nd and 3rd parameters.

Their type are Cartesian which is describe as follow :

class								Cartesian
{
public:
	double							m_X;
	double							m_Y;
	double							m_Z;
	Global							global;

private:
	double							m_m;

public:
	__host__ __device__				Cartesian(void);
	__host__ __device__				Cartesian(const Cartesian &c);
	__host__ __device__				Cartesian(double x, double y, double z);
	/*__host__ __device__	Cartesian&	operator=(const Cartesian& c);*/
	__host__ __device__ void		set(double x, double y, double z);
	__host__ __device__	double		magnitude(void);
	__host__ __device__ LatLonAlt	ToLLA(void);
};

What i’m doing is call a kernel from another kernel and pass arguments received and/or modified as parameter of the function called.
Function calling :

__global__ void SatPositionKernel(Cartesian positionEarthStation, Propagator *CUDA_sat, Cartesian positionGeoSat, int timeIndex, int AngleIteration)
{
	/* Calculer la position du satellite défilant */
	int			tid, CUDA_tid[BLOCKS_ANGLE], index;
	double		CUDA_result[BLOCKS_ANGLE];
	Cartesian	positionSat, output;
	Global		global;

	if ((tid = threadIdx.x + blockIdx.x * blockDim.x) < NB_SAT)
	{
		positionSat = CUDA_sat[tid].evaluate(timeIndex * STEP, SIMULATION_DURATION, 0, true);
		/* Pour chaque position sur l'orbite geostationnaire */
		alphaAngle <<< 1, 1 >>> (positionEarthStation, positionSat, positionGeoSat, CUDA_result, CUDA_tid, AngleIteration);
		cudaDeviceSynchronize();
		index = getIndex(CUDA_result);
		output.set(positionGeoSat.m_X * cos(STEP * index) + positionGeoSat.m_Y * -sin(STEP * index), positionGeoSat.m_X * sin(STEP * index) + positionGeoSat.m_Y * cos(STEP * index), positionGeoSat.m_Z);
		printf("The sat %s at Time %f is as position (%.3f, %.3f, %.3f)\nThe Base at position (%.3f, %.3f; %.3f)\nThe Geo-orbital position is (%.3f, %.3f, %.3f)\nThe angle formed is %.15f",
			CUDA_sat[tid].m_name, timeIndex * STEP, positionSat.m_X, positionSat.m_Y, positionSat.m_Z, positionEarthStation.m_X, positionEarthStation.m_Y, positionEarthStation.m_Z,
			output.m_X, output.m_Y, output.m_Z, global.radToDeg(CUDA_result[index]));
	}
}

Function called

__global__ void			alphaAngle(Cartesian EarthStation, Cartesian Sat, Cartesian GeoSat, double *CUDA_result, int *CUDA_tid, int iteration)
{
	__shared__ double	tmp[THREADS_ANGLE][2];
	Cartesian			new_pos, vecU, vecV;
	Global				global;
	int					tid, idx, i;

	idx = threadIdx.x;
	if ((tid = threadIdx.x + blockIdx.x * blockDim.x) < iteration)
	{
		new_pos = global.rotationZAxis(GeoSat, STEP_ANGLE * tid);
		vecU.set(Sat.m_X - EarthStation.m_X, Sat.m_Y - EarthStation.m_Y, Sat.m_Z - EarthStation.m_Z);
		vecV.set(new_pos.m_X - EarthStation.m_X, new_pos.m_Y - EarthStation.m_Y, new_pos.m_Z - EarthStation.m_Z);
		tmp[idx][0] = global.dotProduct(vecU, vecV);
		tmp[idx][1] = (double)tid;
	}
	__syncthreads();
	i = THREADS_ANGLE / 2;
	while (i != 0)
	{
		(idx < i && tmp[idx][0] > tmp[idx + i][0]) ? (tmp[idx][0] = tmp[idx + i][0], tmp[idx][1] = tmp[idx + i][1]) : (0);
		__syncthreads();
		i /= 2;
	}
	(idx == 0) ? (CUDA_result[blockIdx.x] = tmp[0][0], CUDA_tid[blockIdx.x] = (int)tmp[0][1]) : (0);
}

PositionEarthStation defined as follow in previous function :

Cartesian		positionEarthStation(1597885.53777688, 1253552.16551859, 6046164.27311665);

position sat defined in function calling.

positionGeoSat defined as follow in previous function :

positionDebutArcGeo = findStartGeo(positionEarthStation);

I tried to do something with approximatly the same operation/affectation in another code. Here it is :

__global__ void kernel2(Cartesian *titi)
{
	printf("fils titi[0](%f, %f, %f)\n", titi->m_X, titi->m_Y, titi->m_Z);
}

__global__ void kernel(Cartesian *toto)
{
	printf("parent\n");
	printf("pere titi[0](%f, %f, %f)\n", toto->m_X, toto->m_Y, toto->m_Z);
	toto->set(3, 2, 1);
	kernel2 << < 1, 1 >> > (toto);
	cudaDeviceSynchronize();
}

__global__ void test2(Cartesian titi)
{
	printf("fils titi(%f, %f, %f)\n", titi.m_X, titi.m_Y, titi.m_Z);
}

__device__ Cartesian get_momo(void)
{
	return (Cartesian(1, 1, 1));
}

__global__ void test(Cartesian titi)
{
	Cartesian		momo(1, 2, 6);

	momo = get_momo();
	printf("pere titi(%f, %f, %f)\n", titi.m_X, titi.m_Y, titi.m_Z);
	titi.set(1, 2, 3);
	test2 << < 1, 1 >> > (momo);
	cudaDeviceSynchronize();
}

int		main(void)
{
	Cartesian	titi, *toto;
	titi.m_X = 1;
	titi.m_Y = 1;
	titi.m_Z = 2;
	cudaMalloc((void **)&toto, sizeof(Cartesian));
	cudaMemcpy(toto, &titi, sizeof(Cartesian), cudaMemcpyHostToDevice);
	test << < 1, 1 >> > (titi);
	cudaDeviceSynchronize();
	printf("fin\n");
}

Cartesian type : same definition.

For the constructor / set function :

__host__ __device__		Cartesian::Cartesian(void) : m_X(0), m_Y(0), m_Z(0) {}

__host__ __device__		Cartesian::Cartesian(double x, double y, double z) : m_X(x), m_Y(y), m_Z(z) {}

__host__ __device__	void	Cartesian::set(double x, double y, double z)
{
	m_X = x;
	m_Y = y;
	m_Z = z;
	m_m = -1;
}

But on that code it work perfectly, if someone have an idea, need mode information/code/something tell me !

Thanks for your help !

Robert_Crovella · August 29, 2016, 7:08pm

As suggested to you on your cross posting on stack overflow, the error message is quite clear.

Modify your kernel parameters and arguments so that you are not passing objects with non-default constructors.

In some of the examples you have shown, you are passing the object by pointer. This method should be workable.