Hi,
I’m using a Caffe trained network in my application for classifying patterns when I use OpenCV (which only use CPU) I get 26ms for each patch, but when I use Caffe(GPU mode) to improve the time, Unexpectedly, the time increase to 32ms! I couldn’t check GPU usage with gpustat [GitHub - wookayin/gpustat: 📊 A simple command-line utility for querying and monitoring GPU status] but by checking CPU usage it’s obvious I’m still using CPU with Caffe even when I set the mode to GPU!
jetpack 3.0 is installed on my jetsonTX2,
here is part of my code
void Network::load() throw (utility::Exception)
{
int gpuCount = 0;
cudaDeviceProp deviceProperties;
try {
if (useGPU_) {
cudaGetDeviceCount(&gpuCount);
cudaGetDeviceProperties(&deviceProperties, 0);
std::cout << "GPUs: " << gpuCount << "\nGPU device name: " << deviceProperties.name << std::endl;
caffeNetwork_ = std::make_shared< caffe::Net<float> >(
prototypeFile_,
caffe::TEST
);
caffeNetwork_->CopyTrainedLayersFrom(modelFile_);
} else
network_ = cv::dnn::readNetFromCaffe(prototypeFile_, modelFile_);
loadMeanFile();
loadLabelsFile();
} catch (cv::Exception& exception) {
throw utility::Exception(exception.what());
}
}
const Result Network::classifier(
const cv::Mat& image,
const size_t imageSize
) throw (utility::Exception)
{
caffe::Blob<float>* caffeInput;
caffe::Blob<float>* caffeOutput;
cv::Mat blob;
cv::Mat caffeInputMatrix;
cv::Mat probabilities;
Class _class;
Result result;
caffe::Timer forwardTimer;
// Convert image to batch of images
blob = cv::dnn::blobFromImage(
image,
1.0f,
cv::Size(imageSize, imageSize),
cv::Scalar(meanPixels[0], meanPixels[1], meanPixels[2]),
false
);
if (useGPU_) {
// Run Caffe model using Caffe
caffeInput = caffeNetwork_->input_blobs()[0];
// Wrap Caffe's input blob to cv::Mat
caffeInputMatrix = cv::Mat(
caffeInput->shape(),
CV_32F,
(char*) caffeInput->cpu_data()
);
blob.copyTo(caffeInputMatrix);
// forwardTimer.Start();
caffeOutput = caffeNetwork_->Forward()[0];
// std::cout << "Forward Time: " << forwardTimer.MilliSeconds() << std::endl;
probabilities = cv::Mat(
caffeOutput->shape(),
CV_32F,
(char*) caffeOutput->cpu_data()
);
} else {
network_.setInput(blob, "data");
probabilities = network_.forward("softmax");
}
_class = getClass(probabilities);
result.label(labels[_class.first]);
result.probability(_class.second);
return result;
}