ailia_tracker  1.1.0.0
How to use the API

Overview of ailia Tracker API

Basic usage

With ailia Trakcer, you can create an instance with ailiaTrackerCreate, then set tracking targets using results of object detection with ailiaTrackerAddTarget, execute ailiaTrackerCompute to track objects, and get tracking results with ailiaTrackerGetObject.

#include <ctime>
#include <cstdlib>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <time.h>
#include <vector>
#include <map>
#undef UNICODE
#include "ailia.h"
#include "ailia_detector.h"
#include "ailia_tracker.h"
#include "detector_utils.h"
#include "utils.h"
#include "webcamera_utils.h"
// ======================
// Parameters
// ======================
#define WEIGHT_PATH "yolox_s.opt.onnx"
#define MODEL_PATH "yolox_s.opt.onnx.prototxt"
#define MODEL_INPUT_WIDTH 640
#define MODEL_INPUT_HEIGHT 640
#define IMAGE_WIDTH 640 // for video mode
#define IMAGE_HEIGHT 640 // for video mode
#define TARGET_CATEGORY 0 // person
#define THRESHOLD 0.4f
#define IOU 0.45f
#define IS_VERTICAL_THRESHOLD 1.6
#define RECTANGLE_BORDER_SIZE 2
#define TEXT_COLOR cv::Scalar(0, 255, 0)
#define TEXT_SIZE 1.0
#define TEXT_BORDER_SIZE 1
#define TEXT_FONT cv::FONT_HERSHEY_SIMPLEX
static bool useWebCamera(false);
static bool saveOutputVideo(false);
static std::string inputVideoPath;
static std::string outputVideoPath;
static int args_env_id = -1;
void main(void){
std::map<unsigned int, cv::Scalar> id2Color;
// Create AILIANetwork instance
AILIANetwork *ailia;
// Create AILIADetector instance
AILIADetector *detector;
AILIATracker *ailiaTracker = nullptr;
settings.score_threshold = 0.1f;
settings.nms_threshold = 0.7f;
settings.track_threshold = 0.5f;
settings.track_buffer = 30;
settings.match_threshold = 0.8f;
status = ailiaTrackerCreate(&ailiaTracker,
// Create cv::VideoCapture
cv::VideoCapture capture;
if (useWebCamera) {
capture = cv::VideoCapture(atoi(inputVideoPath.c_str()));
} else {
capture = cv::VideoCapture(inputVideoPath.c_str());
}
// Create cv::VideoWriter
cv::VideoWriter writer;
if(saveOutputVideo){
int fourcc = cv::VideoWriter::fourcc('M','P','4','V');
writer = cv::VideoWriter(
outputVideoPath.c_str(), fourcc, capture.get(cv::CAP_PROP_FPS), cv::Size(IMAGE_WIDTH, IMAGE_HEIGHT)
);
}
while (1) {
// Read frame
cv::Mat frame, resized_img, img;
capture >> frame;
if ((char)cv::waitKey(1) == 'q' || frame.empty()) {
break;
}
adjust_frame_size(frame, resized_img, IMAGE_WIDTH, IMAGE_HEIGHT);
cv::cvtColor(resized_img, img, cv::COLOR_BGR2BGRA);
// Execute object detection
ailiaDetectorCompute(detector, img.data, MODEL_INPUT_WIDTH * 4,
MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT,
AILIA_IMAGE_FORMAT_BGRA, THRESHOLD, IOU);
unsigned int objCounts;
ailiaDetectorGetObjectCount(detector, &objCounts);
AILIADetectorObject *ailiaDetectorObject = new AILIADetectorObject[objCounts];
for (int i = 0; i < objCounts; i++) {
ailiaDetectorGetObject(detector, &ailiaDetectorObject[i], i,
AILIA_DETECTOR_OBJECT_VERSION);
}
// Set object detection result to ailiaTracker
for(int i=0; i<objCounts; i++){
ailiaTrackerAddTarget(ailiaTracker, &ailiaDetectorObject[i], AILIA_DETECTOR_OBJECT_VERSION);
}
delete[] ailiaDetectorObject;
// Execute tracking
ailiaTrackerCompute(ailiaTracker);
unsigned int onlineSize;
// Get the number of objects detected by tracker
ailiaTrackerGetObjectCount(ailiaTracker, &onlineSize);
AILIATrackerObject *ailiaTrackerObject = new AILIATrackerObject[onlineSize];
// Get trakcing result
ailiaTrackerGetObject(ailiaTracker, ailiaTrackerObject, 1);
cv::Point leftUpperPoint, rightBottomPoint;
// Draw bounding boxes
cv::Scalar color;
for (unsigned int i = 0; i < onlineSize; i++) {
AILIATrackerObject obj = ailiaTrackerObject[i];
if(obj.category != TARGET_CATEGORY){
continue;
}
const unsigned int id = obj.id;
if(id2Color.find(id) != id2Color.end()){
color = id2Color[id];
}else{
int b = rand() % 256;
int g = rand() % 256;
int r = rand() % 256;
color = cv::Scalar(b, g, r);
id2Color.insert(std::make_pair(id, color));
}
const unsigned int x = static_cast<unsigned int>(obj.x * IMAGE_WIDTH);
const unsigned int y = static_cast<unsigned int>(obj.y * IMAGE_HEIGHT);
const unsigned int width = static_cast<unsigned int>(obj.w * IMAGE_WIDTH);
const unsigned int height = static_cast<unsigned int>(obj.h * IMAGE_HEIGHT);
leftUpperPoint = cv::Point(x, y);
rightBottomPoint = cv::Point(x+width, y+height);
cv::rectangle(resized_img, leftUpperPoint, rightBottomPoint,
color, RECTANGLE_BORDER_SIZE);
cv::putText(resized_img, std::to_string(obj.id),
leftUpperPoint, TEXT_FONT, TEXT_SIZE, TEXT_COLOR,
TEXT_BORDER_SIZE);
}
delete[] ailiaTrackerObject;
// Show result
cv::imshow("result frame", resized_img);
if(saveOutputVideo){
writer.write(resized_img);
}
}
capture.release();
f(saveOutputVideo){
writer.release();
}
cv::destroyAllWindows();
// Destroy AILIATracker instance
ailiaTrackerDestroy(ailiaTracker);
ailiaDestroyDetector(detector);
ailiaDestroy(ailia);
}
#define AILIA_TRACKER_ALGORITHM_BYTE_TRACK
ByteTrack.
Definition: ailia_tracker.h:43
int AILIA_API ailiaTrackerAddTarget(struct AILIATracker *tracker, const AILIADetectorObject *detector_object, int version)
Set tracking target.
int AILIA_API ailiaTrackerGetObjectCount(struct AILIATracker *tracker, unsigned int *obj_count)
Gets the number of detection results.
#define AILIA_TRACKER_FLAG_NONE
Default flag.
Definition: ailia_tracker.h:202
int AILIA_API ailiaTrackerDestroy(struct AILIATracker *tracker)
It destroys the AILIATracker instance.
int AILIA_API ailiaTrackerCreate(struct AILIATracker **tracker, int algorithm, const AILIATrackerSettings *settings, int version, int flags)
Creates a AILIATracker instance.
int AILIA_API ailiaTrackerCompute(struct AILIATracker *tracker)
Perform tracking.
#define AILIA_TRACKER_SETTINGS_VERSION
Definition: ailia_tracker.h:112
int AILIA_API ailiaTrackerGetObject(struct AILIATracker *tracker, AILIATrackerObject *obj, unsigned int index, unsigned int version)
Gets the detection results.
Definition: ailia_tracker.h:50
float x
Definition: ailia_tracker.h:82
float h
Definition: ailia_tracker.h:106
unsigned int category
Definition: ailia_tracker.h:66
float w
Definition: ailia_tracker.h:98
float y
Definition: ailia_tracker.h:90
unsigned int id
Definition: ailia_tracker.h:58
Definition: ailia_tracker.h:114
float nms_threshold
Definition: ailia_tracker.h:139
float score_threshold
Definition: ailia_tracker.h:126
float match_threshold
Definition: ailia_tracker.h:186
int track_buffer
Definition: ailia_tracker.h:171
float track_threshold
Definition: ailia_tracker.h:158

ailia Tracker Parameters

Byte Track, the tracking algorithm used by ailia Tracker, performs tracking based solely on the shape of the bounding box using a Kalman filter. Image features are not used.

1. Obtain detection results
  ↓
2. Filter out low‑score detections using **score_threshold**
  ↓
3. Remove duplicate detections using **nms_threshold** (Non‑Maximum Suppression)
  ↓
4. Select high‑confidence detections (scores ≥ **track_threshold**)
  as candidates for the **first‑stage matching**
  ↓
5. Use **match_threshold** to perform IoU‑based matching
  → Associate tracks from the previous frame with detections in the current frame
  ↓
6. For tracks that were not matched in the first stage,
  attempt **second‑stage matching** using lower‑score detections
  (scores between **score_threshold** and **track_threshold**)
  if their IoU is sufficiently high.
  → **match_threshold** is also applied in this step.
  ↓
7. Manage lost tracks using **track_buffer**,
  which defines how long a track is retained after being lost

The configurable parameters are shown below.

Parameter Name Description
score_threshold The lower limit of the score (confidence) used to determine whether to accept a detection result. Objects with detection scores below this value are excluded from tracking. Increasing the value will handle only reliable detections, while decreasing it will retain detections that may include noise.
Default: 0.1
nms_threshold The NMS (Non-Maximum Suppression) threshold used to remove duplicate detections. Detections whose IoU (overlap between objects) exceeds this value are considered to be the same object, and only the one with the highest score will remain. A smaller value removes duplicate detections more strictly, while a larger value makes it more lenient.
Default: 0.7
track_threshold The confidence threshold used when updating active tracks. Normally, only detections with scores above this value are used to update tracks. However, in the ByteTrack algorithm, detections with lower scores (between score_threshold and this threshold) may also be used for updates if their IoU with an existing track is sufficiently high. A higher value makes tracking stricter, while a lower value allows temporarily low‑confidence objects to be maintained.
Default: 0.5
track_buffer The maximum number of frames to keep a lost object track. If the same object is detected again within the specified number of frames, tracking resumes. Increasing this value retains lost objects longer, while decreasing it ends tracking sooner.
Default: 30
match_threshold The IoU (Intersection over Union) threshold for associating objects between frames. When the IoU is equal to or greater than this value, the track from the previous frame and the detection from the current frame are considered to be the same object. Increasing the value makes matching stricter, while decreasing it makes it more lenient.
Default: 0.8

ailia Tracker Flags

By default, it operates in a mode optimized for human detection. According to the standard Byte Track algorithm, images with an aspect ratio of 1.6 or greater (wider than tall) are excluded from tracking.

By specifying AILIA_TRACKER_FLAG_ALLOW_WIDE_ASPECT_RATIO, wide images can also be included in tracking. This is useful for tasks such as vehicle detection.