Hi,
Be aware that the current versions of CUDA only support a very small subset of C++ features for device code... I've also used operator overloading and templates within CUDA kernels, but you have to stick with the subset of functionality that's currently implemented.
Be aware that the current versions of CUDA only support a very small subset of C++ features for device code... I've also used operator overloading and templates within CUDA kernels, but you have to stick with the subset of functionality that's currently implemented.
Thanks tachyon_john!It seems that CUDA not supported this-pointer
I changed the code to down and it as ok:
[code]
template<typename T>
class CUvector{
public:
__device__ CUvector();
__device__ CUvector(T&);
__device__ CUvector(CUvector&);
__device__ friend CUvector operator+(CUvector&,CUvector&);
private:
T iVECTOR;
};
[code]
__device__ float4 operator+(float4&,float4&)
[/code]
It works well.
but it errored when i changed to this:
[code]
template<typename T>
class CUvector{
public:
__device__ __host__ CUvector();
__device__ __host__ CUvector(T&);
__device__ __host__ CUvector(CUvector&);
public:
__device__ __host__ CUvector& operator+=(CUvector&);
...
private:
T iVECTOR;
};
template<>
class CUvector<float4>{
public:
__device__ __host__ CUvector(){
iVECTOR.x=0.0;
iVECTOR.y=0.0;
iVECTOR.z=0.0;
iVECTOR.w=0.0;
}
__device__ __host__ CUvector(float4& v){
iVECTOR.x=v.x;
iVECTOR.y=v.y;
iVECTOR.z=v.z;
iVECTOR.w=v.w;
}
__device__ __host__ CUvector(CUvector<float4>& v){
iVECTOR.x=v.kernel().x;
iVECTOR.y=v.kernel().y;
iVECTOR.z=v.kernel().z;
iVECTOR.w=v.kernel().w;
}
public:
__device__ __host__ CUvector<float4>& operator+=(CUvector<float4>& v)
{
iVECTOR.x+=v.kernel().x;
iVECTOR.y+=v.kernel().y;
iVECTOR.z+=v.kernel().z;
iVECTOR.w+=v.kernel().w;
return iVECTOR;
}
...
private:
float4 iVECTOR;
};
[/code]
Wether the type reference or "this pointer" supported in CUDA?
Thanks!
It works well.
but it errored when i changed to this:
Wether the type reference or "this pointer" supported in CUDA?
Thanks!
Be aware that the current versions of CUDA only support a very small subset of C++ features for device code... I've also used operator overloading and templates within CUDA kernels, but you have to stick with the subset of functionality that's currently implemented.
Cheers,
John Stone
Be aware that the current versions of CUDA only support a very small subset of C++ features for device code... I've also used operator overloading and templates within CUDA kernels, but you have to stick with the subset of functionality that's currently implemented.
Cheers,
John Stone
I changed the code to down and it as ok:
[code]
template<typename T>
class CUvector{
public:
__device__ CUvector();
__device__ CUvector(T&);
__device__ CUvector(CUvector&);
__device__ friend CUvector operator+(CUvector&,CUvector&);
private:
T iVECTOR;
};
template<>
class CUvector<float4>{
public:
__device__ CUvector(){
iVECTOR.x=0.0;
iVECTOR.y=0.0;
iVECTOR.z=0.0;
iVECTOR.w=0.0;
}
__device__ CUvector(float4& v){
iVECTOR.x=v.x;
iVECTOR.y=v.y;
iVECTOR.z=v.z;
iVECTOR.w=v.w;
}
__device__ CUvector(CUvector<float4>& v){
iVECTOR.x=v.iVECTOR.x;
iVECTOR.y=v.iVECTOR.y;
iVECTOR.z=v.iVECTOR.z;
iVECTOR.w=v.iVECTOR.w;
}
__device__ friend CUvector<float4> operator+(CUvector<float4>& v0,CUvector<float4>& v1)
{
CUvector lmem;
lmem.iVECTOR.x=v0.iVECTOR.x+v1.iVECTOR.x;
lmem.iVECTOR.y=v0.iVECTOR.y+v1.iVECTOR.y;
lmem.iVECTOR.z=v0.iVECTOR.z+v1.iVECTOR.z;
lmem.iVECTOR.w=v0.iVECTOR.w+v1.iVECTOR.w;
return lmem;
}
private:
float4 iVECTOR;
};
[/code]
I changed the code to down and it as ok: