4.3 C++ Inference Example
his article uses an object detection task as an example to show you how to perform model inference in a C++ environment. Object detection is a crucial computer vision task that aims to identify the specific location and category of various objects within an image or video. This example uses the YOLOv5 model, deployed and inferred based on the ONNX inference framework and hardware acceleration libraries provided by SpaceMIT.
Clone the Code
git clone https://gitee.com/bianbu/spacemit-demo.git ~
Build and Run
cd spacemit_demo/examples/CV/yolov5/cpp
mkdir build
cd build
cmake ..
./yolov5_demo --model ../../model/yolov5_n.q.onnx --image ../../data/test.jpg
The inference results are saved as result.jpg
by default, as shown in the image below:
Inference Process and Code Analysis
Header File Imports
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
#include "spacemit_ort_env.h"
Open Camera
// Device ID Defaults to 1
cv::VideoCapture cap(1);
if (!cap.isOpened()) {
std::cerr << "Failed to open camera!" << std::endl;
return -1;
}
Image Preprocess
// Image Preprocessing Function
std::vector<float> preprocess(const cv::Mat& image, int inputWidth = 320, int inputHeight = 320) {
cv::Mat resizedImage;
// Image Resize to Fixed Size
cv::resize(image, resizedImage, cv::Size(inputWidth, inputHeight));
// Image Channel Conversion,BGR->RGB
cv::cvtColor(resizedImage, resizedImage, cv::COLOR_BGR2RGB);
// Image Normalization
resizedImage.convertTo(resizedImage, CV_32F, 1.0 / 255.0);
std::vector<float> inputTensor;
// Convert Image Channel Order to [C, H, W]
for (int c = 0; c < 3; ++c) {
for (int h = 0; h < inputHeight; ++h) {
for (int w = 0; w < inputWidth; ++w) {
inputTensor.push_back(resizedImage.at<cv::Vec3f>(h, w)[c]);
}
}
}
return inputTensor;
}
Session Initialization and Pre-configuration
// Initialize ONNX Runtime Environment
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "YOLOv5Inference");
Ort::SessionOptions session_options;
// Set Runtime Thread Count, Maximum 4
session_options.SetIntraOpNumThreads(4)
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
// Load SpaceMIT EP
SessionOptionsSpaceMITEnvInit(session_options);
// Load ONNX Model
Ort::Session session_(env, modelPath.c_str(), session_options);
// Get Input Node Names
Ort::AllocatorWithDefaultOptions allocator;
std::vector<const char*> input_node_names_;
std::vector<std::string> input_names_;
size_t num_inputs_;
num_inputs_ = session_.GetInputCount();
input_node_names_.resize(num_inputs_);
input_names_.resize(num_inputs_, "");
for (size_t i = 0; i < num_inputs_; ++i) {
auto input_name = session_.GetInputNameAllocated(i, allocator);
input_names_[i].append(input_name.get());
input_node_names_[i] = input_names_[i].c_str();
}
// Get Input Width and Height
Ort::TypeInfo input_type_info = session_.GetInputTypeInfo(0);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
std::vector<int64_t> input_dims = input_tensor_info.GetShape();
int inputWidth = input_dims[3];
int inputHeight = input_dims[2];
// Get Output Node Names
std::vector<const char*> output_node_names_;
std::vector<std::string> output_names_;
size_t num_outputs_;
num_outputs_ = session_.GetOutputCount();
output_node_names_.resize(num_outputs_);
output_names_.resize(num_outputs_, "");
for (size_t i = 0; i < num_outputs_; ++i) {
auto output_name = session_.GetOutputNameAllocated(i, allocator);
output_names_[i].append(output_name.get());
output_node_names_[i] = output_names_[i].c_str();
}
// Run Image preprocess
std::vector<float> inputTensor = preprocess(frame, inputWidth, inputHeight);
// Create Input Tensor
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, inputTensor.data(), inputTensor.size(), input_shape.data(), input_shape.size());
Run Inference
std::vector<Ort::Value> outputs = session_.Run(Ort::RunOptions{nullptr}, input_node_names_.data(), &input_tensor, 1, output_node_names_.data(), output_node_names_.size());
Get Output and Postprocess
// Get Output Data
float* dets_data = outputs[0].GetTensorMutableData<float>();
float* labels_pred_data = outputs[1].GetTensorMutableData<float>();
// Get Number of Detected Objects
auto dets_tensor_info = outputs[0].GetTensorTypeAndShapeInfo();
std::vector<int64_t> dets_dims = dets_tensor_info.GetShape();
size_t num_detections = dets_dims[1];
// Get Coordinates, Scores, and Labels Separately
std::vector<std::vector<float>> dets(num_detections, std::vector<float>(4));
std::vector<float> scores(num_detections);
std::vector<int> labels_pred(num_detections);
for (size_t i = 0; i < num_detections; ++i) {
for (int j = 0; j < 4; ++j) {
dets[i][j] = dets_data[i * 5 + j];
}
scores[i] = dets_data[i * 5 + 4];
labels_pred[i] = static_cast<int>(labels_pred_data[i]);
}
// Adjust Bounding Boxes to Original Image Dimensions
float scale_x = static_cast<float>(image_shape.width) / inputWidth;
float scale_y = static_cast<float>(image_shape.height) / inputHeight;
for (auto& det : dets) {
det[0] *= scale_x;
det[1] *= scale_y;
det[2] *= scale_x;
det[3] *= scale_y;
}
Visualize Results
void visualizeResults(cv::Mat& image, const std::vector<std::vector<float>>& dets, const std::vector<float>& scores, const std::vector<int>& labels_pred, const std::vector<std::string>& labels, float conf_threshold = 0.4) {
for (size_t i = 0; i < dets.size(); ++i) {
const auto& det = dets[i];
float score = scores[i];
if (score > conf_threshold) {
int class_id = labels_pred[i];
int x1 = static_cast<int>(det[0]);
int y1 = static_cast<int>(det[1]);
int x2 = static_cast<int>(det[2]);
int y2 = static_cast<int>(det[3]);
std::string label = labels[class_id];
cv::rectangle(image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2);
cv::putText(image, label + ": " + std::to_string(score), cv::Point(x1, y1 - 10), cv::FONT_HERSHEY_SIMPLEX, 0.9, cv::Scalar(0, 255, 0), 2);
}
}
}