Overview of ailia Tracker API

Basic usage

With ailia Trakcer, you can create an instance with ailiaTrackerCreate, then set tracking targets using results of object detection with ailiaTrackerAddTarget, execute ailiaTrackerCompute to track objects, and get tracking results with ailiaTrackerGetObject.

#include <ctime>
#include <cstdlib>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <time.h>
#include <vector>
#include <map>
 
#undef UNICODE
 
#include "ailia.h"
#include "ailia_detector.h"
#include "ailia_tracker.h"
#include "detector_utils.h"
#include "utils.h"
#include "webcamera_utils.h"
 
// ======================
// Parameters
// ======================
 
#define WEIGHT_PATH "yolox_s.opt.onnx"
#define MODEL_PATH "yolox_s.opt.onnx.prototxt"
 
#define MODEL_INPUT_WIDTH 640
#define MODEL_INPUT_HEIGHT 640
#define IMAGE_WIDTH 640  // for video mode
#define IMAGE_HEIGHT 640 // for video mode
 
#define TARGET_CATEGORY 0   // person
#define THRESHOLD 0.4f
#define IOU 0.45f
#define IS_VERTICAL_THRESHOLD 1.6
 
#define RECTANGLE_BORDER_SIZE 2
#define TEXT_COLOR cv::Scalar(0, 255, 0)
#define TEXT_SIZE 1.0
#define TEXT_BORDER_SIZE 1
#define TEXT_FONT cv::FONT_HERSHEY_SIMPLEX
 
static bool useWebCamera(false);
static bool saveOutputVideo(false);
static std::string inputVideoPath;
static std::string outputVideoPath;
static int args_env_id = -1;
 
 
void main(void){
    std::map<unsigned int, cv::Scalar> id2Color;
 
    // Create AILIANetwork instance
    AILIANetwork *ailia;
    // Create AILIADetector instance
    AILIADetector *detector;
 
    AILIATracker *ailiaTracker = nullptr;
    AILIATrackerSettings settings;
    settings.score_threshold = 0.1f;
    settings.nms_threshold = 0.7f;
    settings.track_threshold = 0.5f;
    settings.track_buffer = 30;
    settings.match_threshold = 0.8f;
    status = ailiaTrackerCreate(&ailiaTracker,
                                AILIA_TRACKER_ALGORITHM_BYTE_TRACK, &settings, AILIA_TRACKER_SETTINGS_VERSION, AILIA_TRACKER_FLAG_NONE);
 
    // Create cv::VideoCapture
    cv::VideoCapture capture;
    if (useWebCamera) {
        capture = cv::VideoCapture(atoi(inputVideoPath.c_str()));
    } else {
        capture = cv::VideoCapture(inputVideoPath.c_str());
    }
 
    // Create cv::VideoWriter
    cv::VideoWriter writer;
    if(saveOutputVideo){
        int fourcc = cv::VideoWriter::fourcc('M','P','4','V');
        writer = cv::VideoWriter(
            outputVideoPath.c_str(), fourcc, capture.get(cv::CAP_PROP_FPS), cv::Size(IMAGE_WIDTH, IMAGE_HEIGHT)
        );
    }
 
    while (1) {
        // Read frame
        cv::Mat frame, resized_img, img;
        capture >> frame;
        if ((char)cv::waitKey(1) == 'q' || frame.empty()) {
            break;
        }
        adjust_frame_size(frame, resized_img, IMAGE_WIDTH, IMAGE_HEIGHT);
        cv::cvtColor(resized_img, img, cv::COLOR_BGR2BGRA);
        // Execute object detection
        ailiaDetectorCompute(detector, img.data, MODEL_INPUT_WIDTH * 4,
                             MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT,
                             AILIA_IMAGE_FORMAT_BGRA, THRESHOLD, IOU);
 
        unsigned int objCounts;
        ailiaDetectorGetObjectCount(detector, &objCounts);
        AILIADetectorObject *ailiaDetectorObject = new AILIADetectorObject[objCounts];
        for (int i = 0; i < objCounts; i++) {
            ailiaDetectorGetObject(detector, &ailiaDetectorObject[i], i,
                                   AILIA_DETECTOR_OBJECT_VERSION);
        }
        // Set object detection result to ailiaTracker
        for(int i=0; i<objCounts; i++){
            ailiaTrackerAddTarget(ailiaTracker, &ailiaDetectorObject[i], AILIA_DETECTOR_OBJECT_VERSION);
        }
        delete[] ailiaDetectorObject;
        // Execute tracking
        ailiaTrackerCompute(ailiaTracker);
 
        unsigned int onlineSize;
        // Get the number of objects detected by tracker
        ailiaTrackerGetObjectCount(ailiaTracker, &onlineSize);
        AILIATrackerObject *ailiaTrackerObject = new AILIATrackerObject[onlineSize];
 
        // Get trakcing result
        ailiaTrackerGetObject(ailiaTracker, ailiaTrackerObject, 1);
 
        cv::Point leftUpperPoint, rightBottomPoint;
 
        // Draw bounding boxes
        cv::Scalar color;
        for (unsigned int i = 0; i < onlineSize; i++) {
            AILIATrackerObject obj = ailiaTrackerObject[i];
            
            if(obj.category != TARGET_CATEGORY){
                continue;
            }
            const unsigned int id = obj.id;
            if(id2Color.find(id) != id2Color.end()){
                color = id2Color[id];
            }else{
                int b = rand() % 256;
                int g = rand() % 256;
                int r = rand() % 256;
                color = cv::Scalar(b, g, r);
                id2Color.insert(std::make_pair(id, color));
            }
            const unsigned int x = static_cast<unsigned int>(obj.x * IMAGE_WIDTH);
            const unsigned int y = static_cast<unsigned int>(obj.y * IMAGE_HEIGHT);
            const unsigned int width = static_cast<unsigned int>(obj.w * IMAGE_WIDTH);
            const unsigned int height = static_cast<unsigned int>(obj.h * IMAGE_HEIGHT);
 
            leftUpperPoint = cv::Point(x, y);
            rightBottomPoint = cv::Point(x+width, y+height);
            cv::rectangle(resized_img, leftUpperPoint, rightBottomPoint,
                          color, RECTANGLE_BORDER_SIZE);
            cv::putText(resized_img, std::to_string(obj.id),
                        leftUpperPoint, TEXT_FONT, TEXT_SIZE, TEXT_COLOR,
                        TEXT_BORDER_SIZE);
        }
 
        delete[] ailiaTrackerObject;
        // Show result
        cv::imshow("result frame", resized_img);
        if(saveOutputVideo){
            writer.write(resized_img);
        }
    }
 
    capture.release();
    f(saveOutputVideo){
        writer.release();
    }
    cv::destroyAllWindows();
    // Destroy AILIATracker instance
    ailiaTrackerDestroy(ailiaTracker);
    ailiaDestroyDetector(detector);
    ailiaDestroy(ailia);
}

ailia Tracker Parameters

Byte Track, the tracking algorithm used by ailia Tracker, performs tracking based solely on the shape of the bounding box using a Kalman filter. Image features are not used.

1. Obtain detection results  
  ↓  
2. Filter out low‑score detections using **score_threshold**  
  ↓  
3. Remove duplicate detections using **nms_threshold** (Non‑Maximum Suppression)  
  ↓  
4. Select high‑confidence detections (scores ≥ **track_threshold**)  
  as candidates for the **first‑stage matching**  
  ↓  
5. Use **match_threshold** to perform IoU‑based matching  
  → Associate tracks from the previous frame with detections in the current frame  
  ↓  
6. For tracks that were not matched in the first stage,  
  attempt **second‑stage matching** using lower‑score detections  
  (scores between **score_threshold** and **track_threshold**)  
  if their IoU is sufficiently high.  
  → **match_threshold** is also applied in this step.  
  ↓  
7. Manage lost tracks using **track_buffer**,  
  which defines how long a track is retained after being lost  

The configurable parameters are shown below.

Parameter Name	Description
score_threshold	The lower limit of the score (confidence) used to determine whether to accept a detection result. Objects with detection scores below this value are excluded from tracking. Increasing the value will handle only reliable detections, while decreasing it will retain detections that may include noise. Default: 0.1
nms_threshold	The NMS (Non-Maximum Suppression) threshold used to remove duplicate detections. Detections whose IoU (overlap between objects) exceeds this value are considered to be the same object, and only the one with the highest score will remain. A smaller value removes duplicate detections more strictly, while a larger value makes it more lenient. Default: 0.7
track_threshold	The confidence threshold used when updating active tracks. Normally, only detections with scores above this value are used to update tracks. However, in the ByteTrack algorithm, detections with lower scores (between `score_threshold` and this threshold) may also be used for updates if their IoU with an existing track is sufficiently high. A higher value makes tracking stricter, while a lower value allows temporarily low‑confidence objects to be maintained. Default: 0.5
track_buffer	The maximum number of frames to keep a lost object track. If the same object is detected again within the specified number of frames, tracking resumes. Increasing this value retains lost objects longer, while decreasing it ends tracking sooner. Default: 30
match_threshold	The IoU (Intersection over Union) threshold for associating objects between frames. When the IoU is equal to or greater than this value, the track from the previous frame and the detection from the current frame are considered to be the same object. Increasing the value makes matching stricter, while decreasing it makes it more lenient. Default: 0.8

ailia Tracker Flags

By default, it operates in a mode optimized for human detection. According to the standard Byte Track algorithm, images with an aspect ratio of 1.6 or greater (wider than tall) are excluded from tracking.

By specifying AILIA_TRACKER_FLAG_ALLOW_WIDE_ASPECT_RATIO, wide images can also be included in tracking. This is useful for tasks such as vehicle detection.