CUDA vector type reference

I overload the operater of the CUDA types such as “float4” as this:

__device__ float4 operator+(float4&,float4&)

It works well.

but it errored when i changed to this:

template<typename T>

class CUvector{

public:

    __device__ __host__ CUvector();

    __device__ __host__  CUvector(T&);

    __device__  __host__ CUvector(CUvector&);

public:

    __device__ __host__ CUvector& operator+=(CUvector&);

    ...

private:

    T iVECTOR;

};

template<>

class CUvector<float4>{

public:

   __device__ __host__  CUvector(){

        iVECTOR.x=0.0;

        iVECTOR.y=0.0;

        iVECTOR.z=0.0;

        iVECTOR.w=0.0;

    }

    __device__ __host__ CUvector(float4& v){

        iVECTOR.x=v.x;

        iVECTOR.y=v.y;

        iVECTOR.z=v.z;

        iVECTOR.w=v.w;

    }

    __device__ __host__ CUvector(CUvector<float4>& v){

        iVECTOR.x=v.kernel().x;

        iVECTOR.y=v.kernel().y;

        iVECTOR.z=v.kernel().z;

        iVECTOR.w=v.kernel().w;

    }

public:

    __device__ __host__ CUvector<float4>& operator+=(CUvector<float4>& v)

    {

        iVECTOR.x+=v.kernel().x;

        iVECTOR.y+=v.kernel().y;

        iVECTOR.z+=v.kernel().z;

        iVECTOR.w+=v.kernel().w;

        return iVECTOR;

    }

    ...

private:

    float4 iVECTOR;

};

Wether the type reference or “this pointer” supported in CUDA?

Thanks!

Hi,
Be aware that the current versions of CUDA only support a very small subset of C++ features for device code… I’ve also used operator overloading and templates within CUDA kernels, but you have to stick with the subset of functionality that’s currently implemented.

Cheers,
John Stone

Thanks tachyon_john!It seems that CUDA not supported this-pointer

I changed the code to down and it as ok:

template<typename T>

class CUvector{

public:

    __device__ CUvector();

    __device__ CUvector(T&);

    __device__ CUvector(CUvector&);

    __device__ friend CUvector operator+(CUvector&,CUvector&);

private:

    T iVECTOR;

};

template<>

class CUvector<float4>{

public:

    __device__ CUvector(){

        iVECTOR.x=0.0;

        iVECTOR.y=0.0;

        iVECTOR.z=0.0;

        iVECTOR.w=0.0;

    }

    __device__ CUvector(float4& v){

        iVECTOR.x=v.x;

        iVECTOR.y=v.y;

        iVECTOR.z=v.z;

        iVECTOR.w=v.w;

    }

    __device__ CUvector(CUvector<float4>& v){

        iVECTOR.x=v.iVECTOR.x;

        iVECTOR.y=v.iVECTOR.y;

        iVECTOR.z=v.iVECTOR.z;

        iVECTOR.w=v.iVECTOR.w;

    }

    __device__ friend CUvector<float4> operator+(CUvector<float4>& v0,CUvector<float4>& v1)

    {

        CUvector lmem;

        lmem.iVECTOR.x=v0.iVECTOR.x+v1.iVECTOR.x;

        lmem.iVECTOR.y=v0.iVECTOR.y+v1.iVECTOR.y;

        lmem.iVECTOR.z=v0.iVECTOR.z+v1.iVECTOR.z;

        lmem.iVECTOR.w=v0.iVECTOR.w+v1.iVECTOR.w;

        return lmem;

    }

private:

    float4 iVECTOR;

};