#include <ctime>
#include <cstdlib>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <time.h>
#include <vector>
#include <map>
#undef UNICODE
#include "ailia.h"
#include "ailia_detector.h"
#include "detector_utils.h"
#include "utils.h"
#include "webcamera_utils.h"
#define WEIGHT_PATH "yolox_s.opt.onnx"
#define MODEL_PATH "yolox_s.opt.onnx.prototxt"
#define MODEL_INPUT_WIDTH 640
#define MODEL_INPUT_HEIGHT 640
#define IMAGE_WIDTH 640
#define IMAGE_HEIGHT 640
#define TARGET_CATEGORY 0
#define THRESHOLD 0.4f
#define IOU 0.45f
#define IS_VERTICAL_THRESHOLD 1.6
#define RECTANGLE_BORDER_SIZE 2
#define TEXT_COLOR cv::Scalar(0, 255, 0)
#define TEXT_SIZE 1.0
#define TEXT_BORDER_SIZE 1
#define TEXT_FONT cv::FONT_HERSHEY_SIMPLEX
static bool useWebCamera(false);
static bool saveOutputVideo(false);
static std::string inputVideoPath;
static std::string outputVideoPath;
static int args_env_id = -1;
void main(void){
std::map<unsigned int, cv::Scalar> id2Color;
AILIANetwork *ailia;
AILIADetector *detector;
AILIATracker *ailiaTracker = nullptr;
cv::VideoCapture capture;
if (useWebCamera) {
capture = cv::VideoCapture(atoi(inputVideoPath.c_str()));
} else {
capture = cv::VideoCapture(inputVideoPath.c_str());
}
cv::VideoWriter writer;
if(saveOutputVideo){
int fourcc = cv::VideoWriter::fourcc('M','P','4','V');
writer = cv::VideoWriter(
outputVideoPath.c_str(), fourcc, capture.get(cv::CAP_PROP_FPS), cv::Size(IMAGE_WIDTH, IMAGE_HEIGHT)
);
}
while (1) {
cv::Mat frame, resized_img, img;
capture >> frame;
if ((char)cv::waitKey(1) == 'q' || frame.empty()) {
break;
}
adjust_frame_size(frame, resized_img, IMAGE_WIDTH, IMAGE_HEIGHT);
cv::cvtColor(resized_img, img, cv::COLOR_BGR2BGRA);
ailiaDetectorCompute(detector, img.data, MODEL_INPUT_WIDTH * 4,
MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT,
AILIA_IMAGE_FORMAT_BGRA, THRESHOLD, IOU);
unsigned int objCounts;
ailiaDetectorGetObjectCount(detector, &objCounts);
AILIADetectorObject *ailiaDetectorObject = new AILIADetectorObject[objCounts];
for (int i = 0; i < objCounts; i++) {
ailiaDetectorGetObject(detector, &ailiaDetectorObject[i], i,
AILIA_DETECTOR_OBJECT_VERSION);
}
for(int i=0; i<objCounts; i++){
}
delete[] ailiaDetectorObject;
unsigned int onlineSize;
cv::Point leftUpperPoint, rightBottomPoint;
cv::Scalar color;
for (unsigned int i = 0; i < onlineSize; i++) {
continue;
}
const unsigned int id = obj.
id;
if(id2Color.find(id) != id2Color.end()){
color = id2Color[id];
}else{
int b = rand() % 256;
int g = rand() % 256;
int r = rand() % 256;
color = cv::Scalar(b, g, r);
id2Color.insert(std::make_pair(id, color));
}
const unsigned int x =
static_cast<unsigned int>(obj.
x * IMAGE_WIDTH);
const unsigned int y =
static_cast<unsigned int>(obj.
y * IMAGE_HEIGHT);
const unsigned int width =
static_cast<unsigned int>(obj.
w * IMAGE_WIDTH);
const unsigned int height =
static_cast<unsigned int>(obj.
h * IMAGE_HEIGHT);
leftUpperPoint = cv::Point(x, y);
rightBottomPoint = cv::Point(x+width, y+height);
cv::rectangle(resized_img, leftUpperPoint, rightBottomPoint,
color, RECTANGLE_BORDER_SIZE);
cv::putText(resized_img, std::to_string(obj.
id),
leftUpperPoint, TEXT_FONT, TEXT_SIZE, TEXT_COLOR,
TEXT_BORDER_SIZE);
}
delete[] ailiaTrackerObject;
cv::imshow("result frame", resized_img);
if(saveOutputVideo){
writer.write(resized_img);
}
}
capture.release();
f(saveOutputVideo){
writer.release();
}
cv::destroyAllWindows();
ailiaDestroyDetector(detector);
ailiaDestroy(ailia);
}
#define AILIA_TRACKER_ALGORITHM_BYTE_TRACK
ByteTrack.
Definition: ailia_tracker.h:43
int AILIA_API ailiaTrackerAddTarget(struct AILIATracker *tracker, const AILIADetectorObject *detector_object, int version)
Set tracking target.
int AILIA_API ailiaTrackerGetObjectCount(struct AILIATracker *tracker, unsigned int *obj_count)
Gets the number of detection results.
#define AILIA_TRACKER_FLAG_NONE
Default flag.
Definition: ailia_tracker.h:202
int AILIA_API ailiaTrackerDestroy(struct AILIATracker *tracker)
It destroys the AILIATracker instance.
int AILIA_API ailiaTrackerCreate(struct AILIATracker **tracker, int algorithm, const AILIATrackerSettings *settings, int version, int flags)
Creates a AILIATracker instance.
int AILIA_API ailiaTrackerCompute(struct AILIATracker *tracker)
Perform tracking.
#define AILIA_TRACKER_SETTINGS_VERSION
Definition: ailia_tracker.h:112
int AILIA_API ailiaTrackerGetObject(struct AILIATracker *tracker, AILIATrackerObject *obj, unsigned int index, unsigned int version)
Gets the detection results.
Definition: ailia_tracker.h:50
float x
Definition: ailia_tracker.h:82
float h
Definition: ailia_tracker.h:106
unsigned int category
Definition: ailia_tracker.h:66
float w
Definition: ailia_tracker.h:98
float y
Definition: ailia_tracker.h:90
unsigned int id
Definition: ailia_tracker.h:58
Definition: ailia_tracker.h:114
float nms_threshold
Definition: ailia_tracker.h:139
float score_threshold
Definition: ailia_tracker.h:126
float match_threshold
Definition: ailia_tracker.h:186
int track_buffer
Definition: ailia_tracker.h:171
float track_threshold
Definition: ailia_tracker.h:158
Byte Track, the tracking algorithm used by ailia Tracker, performs tracking based solely on the shape of the bounding box using a Kalman filter. Image features are not used.
1. Obtain detection results
↓
2. Filter out low‑score detections using **score_threshold**
↓
3. Remove duplicate detections using **nms_threshold** (Non‑Maximum Suppression)
↓
4. Select high‑confidence detections (scores ≥ **track_threshold**)
as candidates for the **first‑stage matching**
↓
5. Use **match_threshold** to perform IoU‑based matching
→ Associate tracks from the previous frame with detections in the current frame
↓
6. For tracks that were not matched in the first stage,
attempt **second‑stage matching** using lower‑score detections
(scores between **score_threshold** and **track_threshold**)
if their IoU is sufficiently high.
→ **match_threshold** is also applied in this step.
↓
7. Manage lost tracks using **track_buffer**,
which defines how long a track is retained after being lost
The configurable parameters are shown below.
By default, it operates in a mode optimized for human detection. According to the standard Byte Track algorithm, images with an aspect ratio of 1.6 or greater (wider than tall) are excluded from tracking.
By specifying AILIA_TRACKER_FLAG_ALLOW_WIDE_ASPECT_RATIO, wide images can also be included in tracking. This is useful for tasks such as vehicle detection.