No device side linker? How do you include other files?

Hello,

I just found out that NVCC doesn’t have a device side linker… so if I have a function f() in file a.cu and I want files x.cu, y.cu and z.cu to call that device side function in a.cu, how do I do that? Including the .cu instead of the .h will just give me a multiple declaration errors and I can’t use it the normal way. What do I do?

Does specifier “extern” work?

I added that, but I may be doing it wrong. Anyway, here’s the code:

Code that needs it:

__global__ void cu_calcWaveHeight(TriVal *list, TriVal *norm, int size,

                                int w, int d, float h)

{

   int x = (blockIdx.x*blockDim.x) + threadIdx.x;

   int y = (blockIdx.y*blockDim.y) + threadIdx.y;

   int i;

float height;

   TriVal vec;

//need to use vector function

//{SW_A, SW_B, SE, NE_A, NE_B, NW}

   int triVec[6] = {-1, -1, -1, -1, -1, -1};

//find given index(s) per vertex

//SW A+B pt

   if((x != w) && (y != d))

   {

      triVec[0] = (6 * x) + (6 * w * y);

      triVec[1] = triVec[0] + 5;

   }

//SE

   if((x != 0) && (y != d))

   {

      triVec[2] = (6 * x) + (6 * w * y) + 1;

   }

//NE A+B

   if((x != 0) && (y != 0))

   {

      triVec[3] = (6 * x) + (6 * w * y) + 2;

      triVec[4] = triVec[3] + 1;

   }

//NW

   if((x != w) && (y != 0))

   {

      triVec[4] = (6 * x) + (6 * w * y) + 4;

   }

//calculate height

   cu_vecNorm(&vec);

//apply to vertex

   if(triVec[0] >= 0)

   {

      list[triVec[0]].mB = height;

      list[triVec[1]].mB = height;

   }

   if(triVec[2] >= 0)

   {

      list[triVec[2]].mB = height;

   }

   if(triVec[3] >= 0)

   {

      list[triVec[3]].mB = height;

      list[triVec[4]].mB = height;

   }

   if(triVec[5] >= 0)

   {

      list[triVec[5]].mB = height;

   }

}

code I need to include:

cudaVec.h:

#ifndef CUDA_VEC_H

#define CUDA_VEC_H

#include "../Types.h"

__device__ float cu_vecMagn(TriVal *vec);

__device__ void cu_vecMagn(QuadVal *vec); //fourth value is magnitude

__device__ void cu_vecNorm(TriVal *vec);

__device__ void cu_vecCross(TriVal *a, TriVal *b, TriVal *out);

__device__ float cu_vecDot(TriVal *a, TriVal *b);

#endif

cudaVec.cu:

#include "cudaVec.h"

#include <stdlib.h>

#ifndef CUDA_VEC_CU

#define CUDA_VEC_CU

__device__ float cu_vecMagn(TriVal *vec)

{

   float mag;

mag = sqrt(pow(vec->mA,2)+pow(vec->mB,2)+pow(vec->mC,2));

   return mag;

}

__device__ void cu_vecMagn(QuadVal *vec)

{

   vec->mD = sqrt(pow(vec->mA,2)+pow(vec->mB,2)+pow(vec->mC,2));

}

__device__ void cu_vecNorm(TriVal *vec)

{

   float mag;

mag = sqrt(pow(vec->mA,2)+pow(vec->mB,2)+pow(vec->mC,2));

if(mag > 0.0)

   {

      vec->mA /= mag;

      vec->mB /= mag;

      vec->mC /= mag;

   }

}

__device__ void cu_vecCross(TriVal *a, TriVal *b, TriVal *out)

{

   out->mA = (a->mB*b->mC) - (a->mC*b->mB);

   out->mB = (a->mC*b->mA) - (a->mA*b->mC);

   out->mC = (a->mA*b->mB) - (a->mB*b->mA);

}

__device__ float cu_vecDot(TriVal *a, TriVal *b)

{

   return a->mA*b->mA + a->mB*b->mB + a->mC*b->mC;

}

#endif

Given the above what is the “proper” way to do it?

The absence of a device-side linker implies that device functions in separate compilation units cannot call each other. Functions that are called from various source files can be placed in files that are incorporated via #include, for example as header files. This is the way CUDA implements the standard math library functions, by the way: as a collection of header files.