nvinfer1::IBuilder* builder = createInferBuilder(gLogger);
nvinfer1::INetworkDefinition* network = builder->createNetwork();
builder->setDebugSync(false);
builder->setMinFindIterations(3); // allow time for TX1 GPU to spin up
builder->setAverageFindIterations(2);
builder->setMaxWorkspaceSize(1 << 30);
// parse the caffe model to populate the network, then set the outputs
nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
mEnableFP16 = true;
nvinfer1::DataType modelDataType = mEnableFP16 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT; // create a 16-bit model if it's natively supported
const nvcaffeparser1::IBlobNameToTensor *blobNameToTensor =
parser->parse(deployFile.c_str(), // caffe deploy file
modelFile.c_str(), // caffe model file
*network, // network definition that the parser will populate
modelDataType);
// the caffe file has no notion of outputs, so we need to manually say which tensors the engine should generate
const size_t num_outputs = outputs.size();
for( size_t n=0; n < num_outputs; n++ )
{
nvinfer1::ITensor* tensor = blobNameToTensor->find(outputs[n].c_str());
if( !tensor )
printf(LOG_GIE "failed to retrieve tensor for output '%s'\n", outputs[n].c_str());
else
printf(LOG_GIE "retrieved output tensor '%s'\n", tensor->getName());
network->markOutput(*tensor);
}
// Build the engine
printf(LOG_GIE "configuring CUDA engine\n");
builder->setMaxBatchSize(maxBatchSize);
//builder->setMaxWorkspaceSize(16 << 20); //from tensorNet.cpp
builder->setMaxWorkspaceSize(1 << 30); //isn't this better?
// set up the network for paired-fp16 format
if(mEnableFP16)
builder->setHalf2Mode(true);
printf(LOG_GIE "building CUDA engine\n");
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
buildCudaEngine crashes.
Edit: I also want to point out that only TensorRT 3.0 fails at this.