I would recommend to crosscheck with the stable OptiX 3.9.0 as well.
Rays missing triangles can simply happen due to the non-watertight default triangle intersection routines.
In that case your blue dots would appear on the edges of triangles.
If it’s not that, you might have some self-intersection problem. Hard to say without a reproducer.
If you’re under Windows you might also not want to exceed the number of rays per launch to get over the WDDM timeout of 2 seconds for kernel drivers which will lead to a display driver restart due to Windows timeout detection and recovery (TDR).
In case that is happening, there isn’t really a need to shoot many ambient occlusion rays per launch.
A progressive implementation approach for ambient occlusion is really simple. I have one in about 100-150 lines of CUDA code overall. I can post that when I’m back in the office.
Looks like this when putting raygeneration, closesthit and anyhit and some of the headers into one file.
Not tested this way, I recommend to separate them again.
// Copyright NVIDIA Corporation 2013-2016
// TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED
// *AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS
// OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS
// BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES
// WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
// BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS)
// ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS
// BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
#include <optix.h>
#include <optixu/optixu_math_namespace.h>
//#include "rt_function.h"
#ifndef RT_FUNCTION
#define RT_FUNCTION __forceinline__ __device__
#endif
// #include "per_ray_data.h"
// #include "random_number_generators.h" // Implement your favorite RNG here.
// rng.seed(uint iteration, uint dimension, uint scramble) initializes it.
// Calling float rng() must return a float in range [0.0f, 1.0f).
// Here's a simple Linear Congruential Generator to show the implementation.
// This isn't refining nicely. There are much better samplers for AO.
class RNG_LCG
{
public:
RT_FUNCTION void seed(unsigned int index, unsigned int dimension, unsigned int scramble)
{
m_index = index + scramble;
//m_dimension = dimension;
//m_scramble = scramble;
}
RT_FUNCTION float operator()()
{
m_index = m_index * 1664525u + 1013904223u;
return float(m_index >> 8) / float(0x01000000u); // Return float in the range [0.0f, 1.0f)
}
private:
unsigned int m_index;
//unsigned int m_dimension; // Unused
//unsigned int m_scramble; // Unused
};
typedef RNG_LCG RNG;
struct PerRayData
{
RNG rng;
float radiance;
};
struct PerRayData_shadow
{
float radiance;
};
// Implement a function to do cosine weighted hemispherical sampling.
// There should be one in the path_tracer example.
#include "mappings_impl.h"
// Just because the rest of the framework was using RGBA32F.
// This should be just float in the AO case for better performance.
rtBuffer<float4, 2> sys_OutputBuffer; // RGBA32F
// Renderer system variables.
rtDeclareVariable(float3, sys_CameraPosition, , );
rtDeclareVariable(float3, sys_CameraU, , );
rtDeclareVariable(float3, sys_CameraV, , );
rtDeclareVariable(float3, sys_CameraW, , );
rtDeclareVariable(rtObject, sys_TopObject, , );
rtDeclareVariable(float, sys_SceneEpsilon, , );
rtDeclareVariable(float, sys_Distance, , );
rtDeclareVariable(unsigned int, sys_IterationIndex, , );
rtDeclareVariable(uint2, launchIndex, rtLaunchIndex, );
rtDeclareVariable(uint2, launchDim, rtLaunchDim, );
// Semantic variables.
rtDeclareVariable(optix::Ray, theRay, rtCurrentRay, );
rtDeclareVariable(float, theIntersectionDistance, rtIntersectionDistance, );
rtDeclareVariable(PerRayData, thePrd, rtPayload, );
rtDeclareVariable(PerRayData_shadow, thePrdShadow, rtPayload, );
// Attributes
rtDeclareVariable(optix::float4, varGeometricNormal, attribute GEOMETRIC_NORMAL, );
//rtDeclareVariable(optix::float4, varNormal, attribute NORMAL, );
RT_FUNCTION float integrator_ao(const unsigned int iterationIndex)
{
PerRayData prd;
prd.rng.seed(iterationIndex, 0, hash(launchIndex.y * launchDim.x + launchIndex.x)); // index, dimension, scramble
prd.radiance = 1.0f; // Initialize for miss. No miss program required for this ray type.
// Sample point in pixel coordinates.
// Jitter the subpixel location by adding a random float2([0.0f, 1.0f)).
const float2 fragment = make_float2(launchIndex) + make_float2(prd.rng(), prd.rng());
// Normalized device coordinates in range [-1, 1].
const float2 ndc = fragment / make_float2(sys_OutputBuffer.size()) * 2.0f - 1.0f; // Full resolution!
const float3 dir = optix::normalize(ndc.x * sys_CameraU + ndc.y * sys_CameraV + sys_CameraW); // == -wo for the primary rays.
optix::Ray ray = optix::make_Ray(sys_CameraPosition, dir, 0, 0.0f, RT_DEFAULT_MAX); // Raytype 0 is radiance ray.
rtTrace(sys_TopObject, ray, prd);
return prd.radiance; // Always 0.0f or 1.0f.
}
// Entry point for pinhole camera with manual accumulation.
// (Not used on the VCA which needs to generate full images like in the else-clause below.)
RT_PROGRAM void raygeneration()
{
const float radiance = integrator_ao(sys_IterationIndex);
// NaN values will never go away. Filter them out before they can arrive in the output buffer.
if (!isnan(radiance))
{
if (0 < sys_IterationIndex)
{
const float dst = sys_OutputBuffer[launchIndex].x; // RGBA32F but only access the .x component, result is greyscale anyway.
const float accu = optix::lerp(dst, radiance, 1.0f / (float) (sys_IterationIndex + 1));
sys_OutputBuffer[launchIndex] = make_float4(accu, accu, accu, 1.0f);
}
else
{
// sys_IterationIndex 0 will fill the buffer.
// If this isn't done separately, the result of the lerp() above is undefined, e.g. dst could be NaN.
sys_OutputBuffer[launchIndex] = make_float4(radiance, radiance, radiance, 1.0f);
}
}
}
// For the radiance ray type 0. (This is the primary ray shot inside the ray generation program.)
RT_PROGRAM void closesthit()
{
const float3 position = theRay.origin + theRay.direction * theIntersectionDistance;
//const float3 normal = optix::normalize(rtTransformNormal(RT_OBJECT_TO_WORLD, make_float3(varNormal)));
const float3 geometry_normal = optix::normalize(rtTransformNormal(RT_OBJECT_TO_WORLD, make_float3(varGeometricNormal)));
float3 wi;
float pdf;
Mappings<float3>::unitSquareToCosineHemisphere(thePrd.rng(), thePrd.rng(), geometry_normal, wi, pdf);
PerRayData_shadow prdShadow;
prdShadow.radiance = 1.0f; // Initialize for miss. No miss program required for this ray type either.
// Ambient occlusion ray with user defined distance to not get black in closed rooms.
// Set default for sys_Distance is RT_DEFAULKT_MAX which works for outdoor scenes.
optix::Ray ray = optix::make_Ray(position, wi, 1, sys_SceneEpsilon, sys_Distance); // Raytype 1 is shadow ray.
rtTrace(sys_TopObject, ray, prdShadow);
thePrd.radiance = prdShadow.radiance;
}
// For the ambient occlusion (shadow) ray type 1. (This is the secondary ray shot inside the radiance ray's closesthit program.)
RT_PROGRAM void anyhit_shadow()
{
thePrdShadow.radiance = 0.0f;
rtTerminateRay();
}
Launch this multiple times with increasing sys_IterationIndex starting at 0 to get an arbitrarily nice ambient occlusion result:
for (your_number_of_AO_rays_per_pixel)
{
m_context["sys_IterationIndex"]->setUint(m_iterationIndex); // Iteration index is zero-based!
m_context->launch(0, m_width, m_height);
m_iterationIndex++;
}
Mind that above is just rendering with ambient occlusion.
If you want to bake it, you’d just need to change the raygeneration program to start from uniformly distributed sample positions on your geometry and shoot ambient occlusion rays into the upper hemisphere.
That is, move the code from the closesthit program into the raygeneration program and use only one raytype, the ambient occlusion (shadow) ray with the simple anyhit program. Can’t get any simpler than that.
When using the shading normal to define the upper hemisphere, make sure that the rays do not penetrate the geometry by checking against the geometric normal as well. This will happen with coarse geometry and smoothed shading normals or bump maps.
What you do with the results is your responsibility.
https://github.com/nvpro-samples/optix_prime_baking contains some vertex baking algorithms.