3.3.1 faster than 3.3.3?!

Hello,

i have been working with PhysX 3.3.1 (17938824) and VS2013, everything was fine.
When i compiled the recent version 3.3.3 (19456754) from the git repository and used it, the simulation was suddenly very slow. The same code example with 300 stacked boxes runs 10 times faster in 3.3.1 (average of 1.8ms for 2.5ms simulation) compared to 3.3.3 (average of 18.7ms for 2.5ms simulation).

Here is the code:

#include <PxActor.h>
#include <extensions\PxDefaultErrorCallback.h>
#include <extensions/PxExtensionsAPI.h> 
#include <foundation\PxMat44.h>
#include <foundation\PxTransform.h>
#include <PxMaterial.h>
#include <PxPhysics.h>
#include <PxRigidDynamic.h>
#include <PxRigidStatic.h>
#include <PxScene.h>

#include <map>
#include <chrono>
#include <iostream>

physx::PxPhysics* _physicsSDK;
physx::PxScene* _scene;
physx::PxMaterial* _material;
typedef std::map<int, physx::PxRigidActor*> ActorMap;
ActorMap _actors;
float width;
int max_i, max_j;

class ErrorCallback : public physx::PxErrorCallback
{
public:
	virtual void reportError(physx::PxErrorCode::Enum code, const char* message, const char* file, int line)
	{
		std::cout << "Error " << code << " was found in (" << file << ", " << line << "): " << message << std::endl;
	}
};

ErrorCallback errorHandler;
physx::PxDefaultAllocator defaultAllocator;

void createBox(int id, physx::PxVec3 dim, double density)
{
	physx::PxBoxGeometry geometry(dim);
	physx::PxRigidDynamic* actor = PxCreateDynamic(*_physicsSDK, physx::PxTransform::createIdentity(), geometry, *_material, density);

	if (actor)
	{
		_scene->addActor(*actor);
		_actors[id] = actor;
	}
}

void setMatrix(int id, physx::PxMat44 matrix)
{
	physx::PxRigidActor* actor = _actors[id];
	if (actor) actor->setGlobalPose(physx::PxTransform(matrix));
}

void resetBoxes()
{
	int i = 0; // rows
	int j = 0; // columns

	for (int id = 0; id < _actors.size(); id++)
	{
		setMatrix(id, physx::PxMat44(physx::PxMat33(1.0), physx::PxVec3(-width*(float)max_j / 2 + width*(float)j, width / 2 + width*(float)i, 0.0f)));
		if (j == max_j - 1)
		{
			j = 0;
			i++;
		}
		else
		{
			j++;
		}
	}
}


void main() {
	// setup the scene
	physx::PxFoundation* foundation = PxCreateFoundation(PX_PHYSICS_VERSION, defaultAllocator, errorHandler);

	_physicsSDK = PxCreatePhysics(PX_PHYSICS_VERSION, *foundation, physx::PxTolerancesScale());

	if (!PxInitExtensions(*_physicsSDK))
	{
		return;
	}

#ifdef _DEBUG
	if (_physicsSDK->getPvdConnectionManager())
	{
		physx::PxVisualDebuggerExt::createConnection(_physicsSDK->getPvdConnectionManager(), "localhost", 5425, 10000);
	}
	else
	{
		return;
	}
#endif

	physx::PxSceneDesc sceneDesc(_physicsSDK->getTolerancesScale());
	sceneDesc.gravity = physx::PxVec3(0.0f, -9.81f, 0.0f);
	sceneDesc.filterShader = &physx::PxDefaultSimulationFilterShader;

	if (!sceneDesc.cpuDispatcher)
	{
		physx::PxDefaultCpuDispatcher* mCpuDispatcher = physx::PxDefaultCpuDispatcherCreate(1);
		if (!mCpuDispatcher)
			return;
		sceneDesc.cpuDispatcher = mCpuDispatcher;
	}

	_scene = _physicsSDK->createScene(sceneDesc);
	if (!_scene)
	{
		return;
	}

	// material
	_material = _physicsSDK->createMaterial(10.0f, 10.0f, 0.00010f); // restitution - Stosszahl

	// Create the ground
	physx::PxTransform pose(physx::PxVec3(0.0f, 0.0f, 0.0f), physx::PxQuat(physx::PxPi / 2.0, physx::PxVec3(0.0, 0.0, 1.0)));
	physx::PxRigidStatic* actor = _physicsSDK->createRigidStatic(pose);
	if (!actor)
	{
		return;
	}

	physx::PxShape* shape = actor->createShape(physx::PxPlaneGeometry(), *_material);
	if (!shape)
	{
		return;
	}
	_scene->addActor(*actor);

	// create the boxes
	width = 0.04f; // edge length of the boxes
	max_i = 30; // rows
	max_j = 10; // columns
	for (int i = 0; i < max_i; i++)
	{
		for (int j = 0; j < max_j; j++)
		{
			createBox(i * 10 + j, physx::PxVec3(width / 2, width / 2, width / 2), 0.1f);
		}
	}

	// reset the boxes
	resetBoxes();

	// simulate
	int counter = 0;
	float counter_limit = 1000.0; // number of steps to compute
	float dt = 0.0025f; // time step for simulation

	auto t_start = std::chrono::high_resolution_clock::now();

	while (counter < counter_limit) {
		_scene->simulate(dt);
		while (!_scene->fetchResults()) { /* do nothing but wait */ }
		counter++;
	}

	auto t_end = std::chrono::high_resolution_clock::now();

	std::cout << "Time passed: " << std::chrono::duration<double, std::milli>(t_end - t_start).count() << " ms\nAverage time per frame: " << std::chrono::duration<double, std::milli>(t_end - t_start).count() / counter_limit << " ms\n";
	
	// wait for a key press
	std::cout << "..." << std::endl;
	std::cin.clear();
	std::cin.ignore(std::cin.rdbuf()->in_avail());
	std::cin.get();

	return;

}

Can anybody reproduce or even bring light into this strange behavior?

Thanks!

I thought nvidia opensourced only cpu part of physx ? That would explain it - physx sdk 3.3.1 binaries has gpu acceleration included, while open sourced 3.3.3 can run only on cpu, so there you go. I thought about using 3.3.3 also, but then i remembered, that it was only useless cpu implementation.

That’s the explanation, thank you!

It is certanly not an explanation, as PhysX SDK never supported GPU acceleration for rigid body physics.
(only experimental, and only as part of APEX Destruction)

Thank you for your answer. Hmm i thought this is a good explanation, because I use the binary version of 3.3.1.
Is not at least the collision detection processing happening on remote processors like the GPU?
This question came up, because in the “NVIDIA PhysX SDK Documentation » User’s Guide » Callbacks and Customization” it says: “[…] Unfortunately this quickly becomes too slow if done for a very large game world, especially if the collision detection processing happens on a remote processor like the GPU or an other kind of vector processor with local memory, …”