I overload the operater of the CUDA types such as “float4” as this:
__device__ float4 operator+(float4&,float4&)
It works well.
but it errored when i changed to this:
template<typename T>
class CUvector{
public:
__device__ __host__ CUvector();
__device__ __host__ CUvector(T&);
__device__ __host__ CUvector(CUvector&);
public:
__device__ __host__ CUvector& operator+=(CUvector&);
...
private:
T iVECTOR;
};
template<>
class CUvector<float4>{
public:
__device__ __host__ CUvector(){
iVECTOR.x=0.0;
iVECTOR.y=0.0;
iVECTOR.z=0.0;
iVECTOR.w=0.0;
}
__device__ __host__ CUvector(float4& v){
iVECTOR.x=v.x;
iVECTOR.y=v.y;
iVECTOR.z=v.z;
iVECTOR.w=v.w;
}
__device__ __host__ CUvector(CUvector<float4>& v){
iVECTOR.x=v.kernel().x;
iVECTOR.y=v.kernel().y;
iVECTOR.z=v.kernel().z;
iVECTOR.w=v.kernel().w;
}
public:
__device__ __host__ CUvector<float4>& operator+=(CUvector<float4>& v)
{
iVECTOR.x+=v.kernel().x;
iVECTOR.y+=v.kernel().y;
iVECTOR.z+=v.kernel().z;
iVECTOR.w+=v.kernel().w;
return iVECTOR;
}
...
private:
float4 iVECTOR;
};
Wether the type reference or “this pointer” supported in CUDA?
Thanks!