4.3 C++ Inference Example

Last Version: 11/09/2025

This section demonstrates how to perform model inference in a C++ environment using an object detection task as an example. Object detection identifies both the location and category of objects in an image or video.

We use the YOLOv5 model running on the ONNX Runtime framework with SpacemiT hardware acceleration.

Clone the Code

git clone https://gitee.com/bianbu/spacemit-demo.git ~

Build and Run

cd spacemit_demo/examples/CV/yolov5/cpp
mkdir build
cd build
cmake ..
./yolov5_demo --model ../../model/yolov5_n.q.onnx --image ../../data/test.jpg

The inference results are saved as result.jpg by default, Example detection result showing bounding boxes:

Inference Process and Code Walkthrough

Header File Imports

#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
#include "spacemit_ort_env.h"

Open Camera

// Device ID Defaults to 1
cv::VideoCapture cap(1);
if (!cap.isOpened()) {
    std::cerr << "Failed to open camera!" << std::endl;
    return -1;
}

Preprocess the Input Image

// Image Preprocessing Function
std::vector<float> preprocess(const cv::Mat& image, int inputWidth = 320, int inputHeight = 320) {
    cv::Mat resizedImage;

    // Image Resize to Fixed Size
    cv::resize(image, resizedImage, cv::Size(inputWidth, inputHeight));

    // Image Channel Conversion,BGR->RGB
    cv::cvtColor(resizedImage, resizedImage, cv::COLOR_BGR2RGB);

    // Image Normalization
    resizedImage.convertTo(resizedImage, CV_32F, 1.0 / 255.0);

    std::vector<float> inputTensor;

    // Convert Image Channel Order to [C, H, W]
    for (int c = 0; c < 3; ++c) {
        for (int h = 0; h < inputHeight; ++h) {
            for (int w = 0; w < inputWidth; ++w) {
                inputTensor.push_back(resizedImage.at<cv::Vec3f>(h, w)[c]);
            }
        }
    }
    return inputTensor;
}

Session Initialization and Pre-configuration

// Initialize ONNX Runtime Environment
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "YOLOv5Inference");
Ort::SessionOptions session_options;

// Set Runtime Thread Count, Maximum 4
session_options.SetIntraOpNumThreads(4)
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

// Load SpaceMIT EP
SessionOptionsSpaceMITEnvInit(session_options);

// Load ONNX Model
Ort::Session session_(env, modelPath.c_str(), session_options);

// Get Input Node Names
Ort::AllocatorWithDefaultOptions allocator;
std::vector<const char*> input_node_names_;
std::vector<std::string> input_names_;
size_t num_inputs_;
num_inputs_ = session_.GetInputCount();
input_node_names_.resize(num_inputs_);
input_names_.resize(num_inputs_, "");
for (size_t i = 0; i < num_inputs_; ++i) {
    auto input_name = session_.GetInputNameAllocated(i, allocator);
    input_names_[i].append(input_name.get());
    input_node_names_[i] = input_names_[i].c_str();
}
// Get Input Width and Height
Ort::TypeInfo input_type_info = session_.GetInputTypeInfo(0);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
std::vector<int64_t> input_dims = input_tensor_info.GetShape();
int inputWidth = input_dims[3];
int inputHeight = input_dims[2];

// Get Output Node Names
std::vector<const char*> output_node_names_;
std::vector<std::string> output_names_;
size_t num_outputs_;
num_outputs_ = session_.GetOutputCount();
output_node_names_.resize(num_outputs_);
output_names_.resize(num_outputs_, "");
for (size_t i = 0; i < num_outputs_; ++i) {
    auto output_name = session_.GetOutputNameAllocated(i, allocator);
    output_names_[i].append(output_name.get());
    output_node_names_[i] = output_names_[i].c_str();
}

// Run Image preprocess
std::vector<float> inputTensor = preprocess(frame, inputWidth, inputHeight);

// Create Input Tensor
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, inputTensor.data(), inputTensor.size(), input_shape.data(), input_shape.size());

Run Inference

std::vector<Ort::Value> outputs = session_.Run(Ort::RunOptions{nullptr}, input_node_names_.data(), &input_tensor, 1, output_node_names_.data(), output_node_names_.size());

Get Output and Postprocess

// Get Output Data
float* dets_data = outputs[0].GetTensorMutableData<float>();
float* labels_pred_data = outputs[1].GetTensorMutableData<float>();

// Get Number of Detected Objects
auto dets_tensor_info = outputs[0].GetTensorTypeAndShapeInfo();
std::vector<int64_t> dets_dims = dets_tensor_info.GetShape();
size_t num_detections = dets_dims[1];

// Get Coordinates, Scores, and Labels Separately
std::vector<std::vector<float>> dets(num_detections, std::vector<float>(4));
std::vector<float> scores(num_detections);
std::vector<int> labels_pred(num_detections);
for (size_t i = 0; i < num_detections; ++i) {
    for (int j = 0; j < 4; ++j) {
        dets[i][j] = dets_data[i * 5 + j];
    }

    scores[i] = dets_data[i * 5 + 4];
    labels_pred[i] = static_cast<int>(labels_pred_data[i]);
}
// Adjust Bounding Boxes to Original Image Dimensions
float scale_x = static_cast<float>(image_shape.width) / inputWidth;
float scale_y = static_cast<float>(image_shape.height) / inputHeight;
for (auto& det : dets) {
    det[0] *= scale_x;
    det[1] *= scale_y;
    det[2] *= scale_x;
    det[3] *= scale_y;
}

Visualize Results

void visualizeResults(cv::Mat& image, const std::vector<std::vector<float>>& dets, const std::vector<float>& scores, const std::vector<int>& labels_pred, const std::vector<std::string>& labels, float conf_threshold = 0.4) {
    for (size_t i = 0; i < dets.size(); ++i) {
        const auto& det = dets[i];
        float score = scores[i];
        if (score > conf_threshold) {
            int class_id = labels_pred[i];
            int x1 = static_cast<int>(det[0]);
            int y1 = static_cast<int>(det[1]);
            int x2 = static_cast<int>(det[2]);
            int y2 = static_cast<int>(det[3]);
            std::string label = labels[class_id];
            cv::rectangle(image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2);
            cv::putText(image, label + ": " + std::to_string(score), cv::Point(x1, y1 - 10), cv::FONT_HERSHEY_SIMPLEX, 0.9, cv::Scalar(0, 255, 0), 2);
        }
    }
}

4.3 C++ Inference Example

Clone the Code​

Build and Run​

Inference Process and Code Walkthrough​

Header File Imports​

Open Camera​

Preprocess the Input Image​

Session Initialization and Pre-configuration​

Run Inference​

Get Output and Postprocess​

Visualize Results​