Having seen recommendations that performance will be better when using RTP_CONTEXT_TYPE_CUDA and RTP_BUFFER_TYPE_CUDA_LINEAR, I decided to try it out. But I only get crashes, and the SDK/prime*/ examples were of no help because they are all CPU/HOST based. See complete code below that involves just 7 triangles and the crash that results. Any ideas what I am doing wrong? Thanks.
#include <cuda_runtime.h>
#include <optix_prime/optix_primepp.h>
int main(int argc, const char *argv)
{
int3 tri[7] = {make_int3(0, 1, 2),
make_int3(0, 2, 3),
make_int3(4, 0, 3),
make_int3(0, 4, 5),
make_int3(0, 5, 1),
make_int3(3, 2, 6),
make_int3(1, 5, 6),
};
float3 vert[7] = {make_float3(0, 0.127, 0),
make_float3(0.127, 0.127, 0),
make_float3(0.127, 0, 0),
make_float3(0, 0, 0),
make_float3(0, 0.127, 0.127),
make_float3(0.127, 0.127, 0.127),
make_float3(0.127, 0, 0.127),
};
RTPcontexttype contextType = RTP_CONTEXT_TYPE_CUDA;
optix::prime::Context context = optix::prime::Context::create(contextType);
optix::prime::Model model = context->createModel();
int numTri = 7;
int numVert = 7;
void *cudaTri = NULL;
cudaMalloc((void **) &cudaTri, sizeof(int3) * numTri);
cudaMemcpy(cudaTri, &tri[0], sizeof(int3) * numTri, cudaMemcpyHostToDevice);
void *cudaVert = NULL;
cudaMalloc((void **) &cudaVert, sizeof(float3) * numVert);
cudaMemcpy(cudaVert, &vert[0], sizeof(float3) * numVert, cudaMemcpyHostToDevice);
model->setTriangles(numTri, RTP_BUFFER_TYPE_CUDA_LINEAR, cudaTri,
numVert, RTP_BUFFER_TYPE_CUDA_LINEAR, cudaVert);
model->update(0);
return 0;
}
libc++abi.dylib: terminating with uncaught exception of type optix::prime::Exception: Function “RTPresult _rtpModelUpdate(RTPmodel, unsigned int)” caught exception: Encountered a CUDA error: cudaEventSynchronize( m_eventEnd ) returned (4): unspecified launch failure
Abort