Hello,
I am working on an application where I am using OptiX to perform many successive ray tracing operations against dynamic geometry. I have set up my group structure as follows:
One top group, with it’s own acceleration structure. It has several (~10) transform objects as child nodes, each of which contains a geometry group with it’s own acceleration structure. The geometry and texture information in those child nodes comes from an OptiXMesh loaded from a .obj file.
The behavior that I notice is a gradual slowdown of the application over many successive calls to rtTrace. The slowdown is roughly 50% over the course of 200 seconds where I rebuild the acceleration structure roughly ten times per second.
I suspect that the gradual slowdown is due to increasing time to rebuild the acceleration structure since I do not notice the slowdown if I never mark the top level acceleration structure as dirty (but trace all the same rays I otherwise would)
Some other notes, or things I have explored while debugging:
- My OS is Ubuntu 14.04 64 bit. My OptiX version is 4.0.0
- My video card is K2100M. My driver is version 361.45.18.
- I never mark the lower level acceleration structures as dirty, since their geometry never changes
- The device and host memory usage of the program does not seem to be increasing (checked with nvidia-smi)
- My program uses ~500MiB of device memory (checked with nvidia-smi)
- I don’t think I have any memory leaks on the device or host (checked with valgrind and cuda-memcheck)
- My program spends ~90% of it’s time in “Megakernel_CUDA_0” according to nvprof
- The program slows down by a factor of ~4 over the course of 25 minutes.
- The slowdown does not seem to plateau even when run for a long time.
- My scene has a total of ~1 million polygons, split up over ~10 children.
- I have tried many combinations of acceleration structure builders and traversers, including attempting to leverage the refit functionality of the Bvh acceleration structure, but have not noticed any significant performance difference between any two combinations.
- I can run a very similar setup with Optix Prime, and the slowdown does not occur.
I have copied and pasted some of my code below. I would be happy to create an OAC trace to email.
void scene::createGeometry( )
{
// Make overall group
top_group_ = context_->createGroup( );
context_["top_object"]->set( top_group_ );
context_["top_shadower"]->set( top_group_ );
}
void scene::add_model( std::string& objfilename)
{
int model_id = models_.size();
models_.push_back(new model( objfilename, DefualtMat, accel_desc_, BlankTS, context_ , static_cast<GeometryGroup>(NULL), model_id));
}
void scene::set_model_transform(int model_id, const SimpleMatrix4x3& transform, const SimpleMatrix4x3& inv_transform)
{
float transform_matrix[4*4] = {transform.f0, transform.f1, transform.f2, transform.f3,
transform.f4, transform.f5, transform.f6, transform.f7,
transform.f8, transform.f9, transform.f10, transform.f11,
0.0f, 0.0f, 0.0f, 1.0f};
float inv_transform_matrix[4*4] = {inv_transform.f0, inv_transform.f1, inv_transform.f2, inv_transform.f3,
inv_transform.f4, inv_transform.f5, inv_transform.f6, inv_transform.f7,
inv_transform.f8, inv_transform.f9, inv_transform.f10, inv_transform.f11,
0.0f, 0.0f, 0.0f, 1.0f};
models_[model_id]->transform_->setMatrix(false, transform_matrix, inv_transform_matrix);
if(!top_group_->getAcceleration()->isDirty())
{
top_group_->getAcceleration()->markDirty();
}
}
void scene::simulate( const PrimeRay* input_rays,
const HitInstanceColor* output_hits,
const float* output_hit_points,
unsigned long num_horizontal_rays,
unsigned long num_vertical_rays)
{
set_rays(input_rays, num_horizontal_rays, num_vertical_rays);
resizeHitsBuffer(num_horizontal_rays, num_vertical_rays);
trace();
copy_hits_out(output_hits, output_hit_points, num_horizontal_rays, num_vertical_rays);
}
void scene::finalize(void)
{
std::cout<<"\nFinalizing Scene";
unsigned int numModels = models_.size();
top_group_->setChildCount( numModels );
for( unsigned int i=0; i<numModels; i++ )
top_group_->setChild( i, models_[i]->transform_ );
top_group_->setAcceleration( context_->createAcceleration( "Bvh", "Bvh" ) );
rtAccelerationSetProperty( top_group_->getAcceleration()->get(), "refit", "1" );
rtAccelerationSetProperty( top_group_->getAcceleration()->get(), "refine", "0" );
context_->validate( );
context_->compile( );
}
void scene::initScene( void )
{
// Setup context
cudaDeviceReset();
context_->setRayTypeCount( 2 );
context_->setEntryPointCount( 1 );
context_->setStackSize( 1350 );
context_->setPrintEnabled( false );
context_[ "radiance_ray_type" ]->setUint( 0u );
context_[ "shadow_ray_type" ]->setUint( 1u );
context_[ "max_depth" ]->setInt( 1 );
accel_desc_.builder = "Sbvh";
accel_desc_.traverser = "Sbvh";
// Output buffer
hits_buffer_ = context_->createBuffer(RT_BUFFER_OUTPUT);
hits_buffer_->setFormat(RT_FORMAT_USER);
hits_buffer_->setElementSize(sizeof(HitInstanceColor));
hits_buffer_->setSize(RTsize(0),RTsize(0));
context_["hits_buffer"]->set( hits_buffer_);
// Output points buffer
hit_points_buffer_ = context_->createBuffer(RT_BUFFER_OUTPUT);
hit_points_buffer_->setFormat(RT_FORMAT_FLOAT);
hit_points_buffer_->setSize(RTsize(0));
context_["hit_points_buffer"]->set( hit_points_buffer_);
// Ray generation program
context_->setRayGenerationProgram( 0, context_->createProgramFromPTXFile( ptxpath( "avSim", "ray_sensor.cu" ), "ray_sensor" ) );
// Exception / miss programs
context_->setExceptionProgram( 0, context_->createProgramFromPTXFile( ptxpath( "avSim", "ray_sensor.cu" ), "exception" ) );
context_[ "bad_color" ]->setFloat( 0, 1.0f, 0 );
context_->setExceptionEnabled(RT_EXCEPTION_ALL ,false );
context_["envmap"]->setTextureSampler( loadTexture( context_, std::string( sutilSamplesDir() ) + "/envmaps/above_the_sea.ppm", make_float3(1.0f, 1.0f, 1.0f)) );
context_->setMissProgram( 0, context_->createProgramFromPTXFile( ptxpath( "avSim", "default_material.cu" ), "envmap_miss" ) );
context_[ "bg_color" ]->setFloat( BLUE_SKY );
rays_buffer_ = context_->createBuffer(RT_BUFFER_INPUT);
rays_buffer_->setFormat(RT_FORMAT_USER);
rays_buffer_->setElementSize(sizeof(PrimeRay));
rays_buffer_->setSize(RTsize(0),RTsize(0));
context_["rays"]->set(rays_buffer_);
float m[4*4] = {
1,0,0,0,
0,1,0,0,
0,0,1,0,
0,0,0,1
};
context_["sensor_to_world_transform"]->setMatrix4x4fv(false, m);
// Set up the default material
DefualtMat = context_->createMaterial( );
DefualtMat->setClosestHitProgram( 0, context_->createProgramFromPTXFile( ptxpath( "avSim", "default_material.cu" ), "closest_hit_radiance" ) );
DefualtMat->setAnyHitProgram( 1, context_->createProgramFromPTXFile( ptxpath( "avSim", "default_material.cu" ), "any_hit_shadow" ) );
DefualtMat["ambient_light_color"]->setFloat( BLUE_SKY );
DefualtMat["attenuation_color"]->setFloat( BLUE_SKY );
DefualtMat["attenuation_density"]->setFloat( -0.002f ); // Must be < 0.
DefualtMat["light_dir"]->setFloat( 0, 1.0f, 0 );
BlankTS = loadTexture( context_, "", make_float3( 1, 1, 1 ) );
// Set up geometry
createGeometry( );
}
model::model(std::string &objfilename,
optix::Material matl,
AccelDescriptor& accel_desc,
optix::TextureSampler projectedTexSamp,
optix::Context context,
optix::GeometryGroup geometry_group,
int model_id)
{
if(geometry_group) geometry_group_ = geometry_group;
else geometry_group_ = context->createGeometryGroup();
GeometryInstance instance_;
OptiXMesh model_loader( context, geometry_group_, matl, accel_desc );
float m[4*4] = {
1,0,0,0,
0,1,0,0,
0,0,1,0,
0,0,0,1
};
Matrix4x4 Rot( m );
Matrix4x4 XForm = Rot;
model_loader.setLoadingTransform( XForm );
model_loader.loadBegin_Geometry( objfilename );
model_loader.loadFinish_Materials();
// Set the material properties that differ between the fish and the other scene elements
for (unsigned int i = 0; i < geometry_group_->getChildCount(); ++i)
{
instance_ = geometry_group_->getChild( i );
instance_["caustic_map"]->setTextureSampler( projectedTexSamp );
instance_["diffuse_map_scale"]->setFloat( 1.0f );
instance_["emission_color"]->setFloat( make_float3(0) );
instance_["Kr"]->setFloat( 0 );
}
RTvariable varID;
rtGeometryInstanceDeclareVariable(instance_->get(), "geometryInstanceID", &varID);
rtVariableSet1i(varID, model_id);
transform_ = context->createTransform();
transform_->setChild( geometry_group_ );
transform_->setMatrix(false,m,m);
}
Am I setting up my group structure in a logical, application-appropriate way? What could be the cause of my progressive slowdown issue? I’d appreciate any help.
Thank you!
Pete