Well… joy was short again. Simply shooting bunch of rays reflected back to origin worked. But when I put back cosine weighted hemisphere sampling it blew up again. Computation of values and debug output works (with issues explained later), but when they are assigned to PRD it hangs (error 702). I verified that vector components are valid numbers and length equal to 1.
I changed sampling to match code use in OptiX samples - hangs (error 702). Works when simply negate ray.direction
newDir = - oldDir
or reflect
newDir = 2*normal + oldDir
- In all cases vectors are normalized. How could this be an issue ?
Closes hit and sampling code:
RT_PROGRAM void closestHitLightDbg()
{
lightPrd.depth++;
float3 worldShadingNormal = normalize( rtTransformNormal( RT_OBJECT_TO_WORLD, shadingNormal ) );
float3 hitPoint = ray.origin + tHit*ray.direction;
// Russian Roulette
float rrSample = rnd(lightPrd.seed); // using random numbers same as path_trace sample
if (0.5f < rrSample)
{
lightPrd.done = 1;
return;
}
float2 sample = make_float2(rnd(lightPrd.seed),rnd(lightPrd.seed));
float3 dir = sampleUnitHemisphereCos(worldShadingNormal, sample); // doesn't work
dir = sampleUnitHemisphereCosOptix(worldShadingNormal, sample); // doesn't work
dir = normalize(2*worldShadingNormal + ray.direction); // works
dir = -ray.direction; // works
lightPrd.direction = normalize(dir);
lightPrd.origin = hitPoint;
}
// --------- Original hemisphere sampling version
static __device__ __inline__ optix::float3 sampleUnitHemisphereCos(
const optix::float3 & normal, const optix::float2& sample)
{
using namespace optix;
float theta = acosf(sqrtf(sample.x));
float phi = 2.0f * M_PIf *sample.y;
float xs = sinf(theta) * cosf(phi);
float ys = cosf(theta);
float zs = sinf(theta) * sinf(phi);
float3 U, V;
createCoordinateSystem(normal, U, V);
return optix::normalize(xs*U + ys*normal + zs*V);
}
// Create ONB from normalized normal (code: Physically Based Rendering, Pharr & Humphreys pg. 63)
static __device__ __inline__ void createCoordinateSystem(
const optix::float3& N, optix::float3& U, optix::float3& V )
{
using namespace optix;
if(fabs(N.x) > fabs(N.y))
{
float invLength = 1.f/sqrtf(N.x*N.x + N.z*N.z);
U = make_float3(-N.z*invLength, 0.f, N.x*invLength);
}
else
{
float invLength = 1.f/sqrtf(N.y*N.y + N.z*N.z);
U = make_float3(0.f, N.z*invLength, -N.y*invLength);
}
V = cross(N, U);
}
// ------- Optix samples based hemisphere sampling
float3 __device__ __inline__ sampleUnitHemisphereCosOptix(float3 normal, float2 rnd)
{
float3 p;
cosine_sample_hemisphere(rnd.x, rnd.y, p);
float3 v1, v2;
createONB(normal, v1, v2);
return v1 * p.x + v2 * p.y + normal * p.z;
}
// Create ONB from normalaized vector
static __device__ __inline__ void createONB(
const optix::float3& n, optix::float3& U, optix::float3& V)
{
using namespace optix;
U = cross( n, make_float3( 0.0f, 1.0f, 0.0f ) );
if ( dot(U, U) < 1.e-3f )
U = cross( n, make_float3( 1.0f, 0.0f, 0.0f ) );
U = normalize( U );
V = cross( n, U );
}
Then there also still are issues with rtPrintf(), independent from issues with ray dirrections. The regular printf() seems to work fine always. It somehow seems to depend on the way fields of launchIndex are used. Below I identified all the funky cases when rtPrinft causes hang or format exception. There are four cases and their behavior depending if line marked #2 is commented or not.
RT_PROGRAM void generatorDbg()
{
SubpathPRD lightPrd;
lightPrd.depth = 0;
lightPrd.done = 0;
lightPrd.seed = tea<16>(1200*launchIndex.y+launchIndex.x, 1);
float3 rayOrigin = make_float3( 343.0f, 548.0f, 227.0f);
float3 rayDirection = make_float3( .0f, -1.0f, .0f);
Ray lightRay = Ray(rayOrigin, rayDirection, RayType::LIGHT_VCM, 0.0001, RT_DEFAULT_MAX );
int a = launchIndex.x; // #1 using launchIndex.x in the loop doesn't produce same effect
for (int i=0;;i++)
{
// Example 1
// Without #2 - output in first iteration, then hang
// With #2 - works
rtPrintf("Output\n");
// Example 2
// Without #2 - output in first iteration, then "Error ir rtPrintf format string"
// With #2 - works
if (launchIndex.x == 0 && launchIndex.y == 0)
{
rtPrintf("Outputs\n");
}
// Example 3
// Without #2 - works
// With #2 - works
//if (launchIndex.x == 0 && launchIndex.y == 0)
{
rtPrintf("i %d", launchIndex.x);
rtPrintf("Outputs\n");
}
rtTrace( sceneRootObject, lightRay, lightPrd );
if (lightPrd.done)
{
lightPrd.done += a; // #2
break;
}
lightRay.origin = lightPrd.origin;
lightRay.direction = lightPrd.direction;
// Example 4
// Without #2 - output in first iteration, then "Error ir rtPrintf format string"
// With #2 - works
rtPrintf("Output\n");
}
}
I couldn’t reproduce this in OptiX sample projects. User GL_Kyle said in the thread below he has seen rtPrintf to cover up unrelated memory corruption issues. He didn’t specify though if he meant host or device memory.
https://devtalk.nvidia.com/default/topic/734914/optix/optix-bug-crash-with-cuda-error-kernel-ret-700-when-not-rtprinting-anything-small-demo-code-/
-
How that could happen before the first kernel launch? Setting some context buffers/variables multiple times ?
-
I’d really appreciate if someone could look through context manipulation code below and see if there is anything suspicious.
I collected all context affecting code except (anyhit program loading for given material / render methods). Everything is common to all methods, except parts marked METHOD SPECIFIC (less than 10 lines). Also call to initializeRandomStates() that initializes curandStates in GPU memory is commented out eliminate possibility it corrupts something, I changed kernels to use simple random numbers as in kernels.
Cornell::getSceneRootGroup() creates the scene geometry exactly the same as path_trace sample hence most of it is omitted, only the last part where GeometryGroup is added as a child to Group which is then returned and used as top level object.
void OptixRenderer::initialize(const ComputeDevice & device, RenderMethod::E renderMethod)
{
initDevice(device); // Sets OptiX device, simple hence won't include
m_context["localIterationNumber"]->setUint(0); // context created in OptixRenderer constructor
// An empty scene root node
optix::Group group = m_context->createGroup();
m_context["sceneRootObject"]->set(group);
// Output Buffer
m_outputBuffer = m_context->createBuffer( RT_BUFFER_OUTPUT, RT_FORMAT_FLOAT3, m_width, m_height );
m_context["outputBuffer"]->set(m_outputBuffer);
// Random state buffer (must be large enough to give states to both photons and image pixels)
m_randomStatesBuffer = m_context->createBuffer(RT_BUFFER_INPUT_OUTPUT|RT_BUFFER_GPU_LOCAL);
m_randomStatesBuffer->setFormat( RT_FORMAT_USER );
m_randomStatesBuffer->setElementSize( sizeof( RandomState ) ); // typedef curandState RandomState
m_randomStatesBuffer->setSize( PHOTON_LAUNCH_WIDTH, PHOTON_LAUNCH_HEIGHT ); // use common size for all render methods
m_context["randomStates"]->set(m_randomStatesBuffer);
// Light sources buffer
m_lightBuffer = m_context->createBuffer(RT_BUFFER_INPUT);
m_lightBuffer->setFormat(RT_FORMAT_USER);
m_lightBuffer->setElementSize(sizeof(Light));
m_lightBuffer->setSize(1);
m_context["lights"]->set( m_lightBuffer );
m_context->setRayTypeCount(RayType::NUM_RAY_TYPES);
m_context->setStackSize(ENABLE_PARTICIPATING_MEDIA ? 3000 : 1596);
m_initialized = true;
m_contextCompiled = false;
initializeRenderMethod(renderMethod); // specific to each method
}
void OptixRenderer::initializeRenderMethod(RenderMethod::E renderMethod)
{
// METHOD SPECIFIC
Program generatorProgram = m_context->createProgramFromPTXFile( "LightPathGeneratorVCMDbg.cu.ptx", "generatorDbg" );
Program exceptionProgram = m_context->createProgramFromPTXFile( "LightPathGeneratorVCMDbg.cu.ptx", "exception" );
Program missProgram = m_context->createProgramFromPTXFile( "LightPathGeneratorVCMDbg.cu.ptx", "miss");
m_context->setRayGenerationProgram(OptixEntryPointVCM::LIGHT_ESTIMATE_PASS, generatorProgram);
m_context->setMissProgram(OptixEntryPointVCM::LIGHT_ESTIMATE_PASS, missProgram);
m_context->setExceptionProgram(OptixEntryPointVCM::LIGHT_ESTIMATE_PASS, exceptionProgram);
}
// Called after OptixRenderer::initialize()
void OptixRenderer::initScene( IScene & scene )
{
m_sceneRootGroup = scene.getSceneRootGroup(m_context, m_initializedRenderMethod); // code for Cornell::getSceneRootGroup below
m_context["sceneRootObject"]->set(m_sceneRootGroup);
// Add the lights from the scene to the light buffer
m_lightBuffer->setSize(lights.size());
Light* lights_host = (Light*)m_lightBuffer->map();
memcpy(lights_host, scene.getSceneLights().constData(), sizeof(Light)*lights.size());
m_lightBuffer->unmap();
m_contextCompiled = false;
}
optix::Group Cornell::getSceneRootGroup(optix::Context & context, RenderMethod::E renderMethod)
{
// GeometryGroup built exactly as in in path_trace sample except it is added
// as a single child to a Group which is then returned (by conventions that
// loaded scenes contains root group and multiple child GeometryGroups)
geometry_group->setAcceleration(context->createAcceleration("Trbvh", "Bvh"));
optix::Group gro = context->createGroup();
gro->setChildCount(1);
gro->setChild(0, geometry_group);
optix::Acceleration acceleration = context->createAcceleration("Trbvh", "Bvh");
gro->setAcceleration(acceleration);
return gro;
}
// called before rendering a frame if m_contextCompiled is false
void OptixRenderer::compile()
{
m_context->validate();
m_context->compile();
m_contextCompiled = true;
}
void OptixRenderer::renderNextIteration(unsigned long long localIterationNumber, const RenderServerRenderRequestDetails & details)
{
if( details.getWidth() != m_width || details.getHeight() != m_height)
{
this->resizeBuffers(details.getWidth(), details.getHeight());
}
// Omitted logging and branching to entry point for current render method
// METHOD SPECIFIC
m_context->launch( OptixEntryPointVCM::LIGHT_ESTIMATE_PASS,
static_cast<unsigned int>(SUBPATH_LENGHT_ESTIMATE_LAUNCH_WIDTH),
static_cast<unsigned int>(SUBPATH_LENGHT_ESTIMATE_LAUNCH_HEIGHT) );
}
void OptixRenderer::resizeBuffers(unsigned int width, unsigned int height)
{
m_outputBuffer->setSize( width, height );
m_randomStatesBuffer->setSize(max(PHOTON_LAUNCH_WIDTH, (unsigned int)1280),
max(PHOTON_LAUNCH_HEIGHT, (unsigned int)768));
// Calls cuda kernel and initializes curandStates in GPU memory,
// commented out to eliminate as possible cause of memory corruption.
// Using simple random numbers as in OptiX path_trace sample
//initializeRandomStates();
m_width = width;
m_height = height;
}
By the way this the renderer that I’m trying to extend:
I guess I’ll try to revert to Optix 3.0 that was used developing this project and see if that works better.
EDIT: Apparently it is not available for download. There seems to be fresh 3.6 release, will try that.