From cbda3a57f51ed7edee0fd84c19f05e64583085e4 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Thu, 1 Jan 2026 21:02:08 +0300 Subject: [PATCH 01/17] Add D-FINE seg model --- README.md | 1 + src/Detector/OCVDNNDetector.cpp | 16 ++ src/Detector/OCVDNNDetector.h | 4 +- src/Detector/ONNXTensorRTDetector.cpp | 1 + src/Detector/tensorrt_onnx/DFINE_is.hpp | 196 ++++++++++++++++++ src/Detector/tensorrt_onnx/RFDETR_is.hpp | 2 +- src/Detector/tensorrt_onnx/class_detector.cpp | 4 + src/Detector/tensorrt_onnx/class_detector.h | 3 +- 8 files changed, 224 insertions(+), 3 deletions(-) create mode 100644 src/Detector/tensorrt_onnx/DFINE_is.hpp diff --git a/README.md b/README.md index 7a1e3b27..742ea7a2 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![CodeQL](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml) ## Latest Features +- Add D-FINE seg detection model [ArgoHA/D-FINE-seg](https://github.com/ArgoHA/D-FINE-seg) - Add ByteTrack MOT algorithm based on [Vertical-Beach/ByteTrack-cpp](https://github.com/Vertical-Beach/ByteTrack-cpp) - Big code cleanup from old style algorithms and detectors: some bgfg detectors, some VOT trackes, Face and Pedestrin detectors, Darknet based backend for old YOLO etc - YOLOv13 detector works with TensorRT! Export pre-trained PyTorch models [here (iMoonLab/yolov13)](https://github.com/iMoonLab/yolov13) to ONNX format and run Multitarget-tracker with `-e=3` example diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index c6982e5d..461d18b5 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -172,6 +172,7 @@ bool OCVDNNDetector::Init(const config_t& config) dictNetType["RFDETR_IS"] = ModelType::RFDETR_IS; dictNetType["DFINE"] = ModelType::DFINE; dictNetType["YOLOV13"] = ModelType::YOLOV13; + dictNetType["DFINE_IS"] = ModelType::DFINE_IS; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -428,6 +429,10 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr ParseYOLOv11(crop, detections, tmpRegions); break; + case ModelType::DFINE_IS: + ParseDFINE_IS(crop, detections, tmpRegions); + break; + default: ParseOldYOLO(crop, detections, tmpRegions); break; @@ -1055,3 +1060,14 @@ void OCVDNNDetector::ParseDFINE(const cv::Rect& crop, std::vector& dete } } +/// +/// \brief OCVDNNDetector::ParseDFINE_IS +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + assert(0); +} + diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index bd76ce48..601241ff 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -52,7 +52,8 @@ class OCVDNNDetector final : public BaseDetector RFDETR, RFDETR_IS, DFINE, - YOLOV13 + YOLOV13, + DFINE_IS }; cv::dnn::Net m_net; @@ -89,6 +90,7 @@ class OCVDNNDetector final : public BaseDetector void ParseRFDETR(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseRFDETR_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseDFINE(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); }; #endif diff --git a/src/Detector/ONNXTensorRTDetector.cpp b/src/Detector/ONNXTensorRTDetector.cpp index d6b7fddd..f30e2444 100644 --- a/src/Detector/ONNXTensorRTDetector.cpp +++ b/src/Detector/ONNXTensorRTDetector.cpp @@ -101,6 +101,7 @@ bool ONNXTensorRTDetector::Init(const config_t& config) dictNetType["RFDETR_IS"] = tensor_rt::RFDETR_IS; dictNetType["DFINE"] = tensor_rt::DFINE; dictNetType["YOLOV13"] = tensor_rt::YOLOV13; + dictNetType["DFINE_IS"] = tensor_rt::DFINE_IS; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) diff --git a/src/Detector/tensorrt_onnx/DFINE_is.hpp b/src/Detector/tensorrt_onnx/DFINE_is.hpp new file mode 100644 index 00000000..d1ceebfa --- /dev/null +++ b/src/Detector/tensorrt_onnx/DFINE_is.hpp @@ -0,0 +1,196 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The DFINE_is_onnx class +/// +class DFINE_is_onnx : public YoloONNX +{ +public: + DFINE_is_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("input"); + outputTensorNames.push_back("logits"); + outputTensorNames.push_back("boxes"); + outputTensorNames.push_back("mask_probs"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: input, size: 1x3x640x640 + //1: name: logits, size: 1x300x80 + //2: name: boxes, size: 1x300x4 + //3: name: mask_probs, size: 1x300x160x160 + + + //0: name: input, size: 1x3x432x432 + //1: name: dets, size: 1x200x4 + //2: name: labels, size: 1x200x91 + //3: name: 4245, size: 1x200x108x108 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + cv::Size inputSize(m_inputDims[0].d[3], m_inputDims[0].d[2]); + cv::Size2f inputSizef(static_cast(inputSize.width), static_cast(inputSize.height)); + + //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; + + int labelsInd = 0; + int detsInd = 1; + int segInd = 2; + + auto dets = outputs[detsInd]; + auto labels = outputs[labelsInd]; + + auto masks = outputs[segInd]; + + size_t ncInd = 2; + size_t lenInd = 1; + + + size_t nc = m_outpuDims[labelsInd].d[ncInd]; + size_t len = static_cast(m_outpuDims[detsInd].d[lenInd]) / m_params.m_explicitBatchSize; + auto volume0 = len * m_outpuDims[detsInd].d[ncInd]; // Volume(m_outpuDims[0]); + dets += volume0 * imgIdx; + auto volume1 = len * m_outpuDims[labelsInd].d[ncInd]; // Volume(m_outpuDims[0]); + labels += volume1 * imgIdx; + + int segChannels = static_cast(m_outpuDims[segInd].d[1]); + int segWidth = static_cast(m_outpuDims[segInd].d[2]); + int segHeight = static_cast(m_outpuDims[segInd].d[3]); + masks += imgIdx * segChannels * segWidth * segHeight; + + cv::Mat binaryMask8U(segHeight, segWidth, CV_8UC1); + + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.m_confThreshold << ", volume0 = " << volume0 << ", volume1 = " << volume1 << std::endl; + + auto L2Conf = [](float v) + { + return 1.f / (1.f + std::exp(-v)); + }; + + for (size_t i = 0; i < len; ++i) + { + float classConf = L2Conf(labels[0]); + size_t classId = 0; + for (size_t cli = 1; cli < nc; ++cli) + { + auto conf = L2Conf(labels[cli]); + if (classConf < conf) + { + classConf = conf; + classId = cli; + } + } + + if (classConf >= m_params.m_confThreshold) + { + float d0 = dets[0]; + float d1 = dets[1]; + float d2 = dets[2]; + float d3 = dets[3]; + + float x = fw * (inputSizef.width * (d0 - d2 / 2.f) - m_resizedROI.x); + float y = fh * (inputSizef.height * (d1 - d3 / 2.f) - m_resizedROI.y); + float width = fw * inputSizef.width * d2; + float height = fh * inputSizef.height * d3; + + //if (i == 0) + //{ + // std::cout << i << ": classConf = " << classConf << ", classId = " << classId << " (" << labels[classId] << "), rect = " << cv::Rect2f(x, y, width, height) << std::endl; + // std::cout << "dets = " << d0 << ", " << d1 << ", " << d2 << ", " << d3 << std::endl; + //} + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + + double maskThreshold = 0.1; + for (int row = 0; row < segHeight; ++row) + { + const float* maskPtr = masks + row * segWidth; + uchar* binMaskPtr = binaryMask8U.ptr(row); + + for (int col = 0; col < segWidth; ++col) + { + binMaskPtr[col] = (maskPtr[col] > maskThreshold) ? 255 : 0; + } + } + + tensor_rt::Result& resObj = resBoxes.back(); + + cv::Rect smallRect; + smallRect.x = cvRound(segHeight * (d0 - d2 / 2.f)); + smallRect.y = cvRound(segHeight * (d1 - d3 / 2.f)); + smallRect.width = cvRound(segHeight * d2); + smallRect.height = cvRound(segHeight * d3); + smallRect = Clamp(smallRect, cv::Size(segWidth, segHeight)); + + if (smallRect.area() > 0) + { + cv::resize(binaryMask8U(smallRect), resObj.m_boxMask, resObj.m_brect.size(), 0, 0, cv::INTER_NEAREST); + +#if 0 + static int globalObjInd = 0; + SaveMat(mask, std::to_string(globalObjInd) + "_mask", ".png", "tmp", true); + SaveMat(binaryMask, std::to_string(globalObjInd) + "_bin_mask", ".png", "tmp", true); + SaveMat(binaryMask8U, std::to_string(globalObjInd) + "_bin_mask_8u", ".png", "tmp", true); + SaveMat(resObj.m_boxMask, std::to_string(globalObjInd++) + "_obj_mask", ".png", "tmp", true); + std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl; +#endif + + std::vector> contours; + std::vector hierarchy; + cv::findContours(resObj.m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resObj.m_brect.x; + int dy = resObj.m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = rr.center.x * fw + dx; + rr.center.y = rr.center.y * fw + dy; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound(dx + br.x * fw); + br.y = cvRound(dy + br.y * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resObj.m_brect = br; + //resObj.m_rrect = rr; + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } + } + else + { + resObj.m_boxMask = cv::Mat(resObj.m_brect.size(), CV_8UC1, cv::Scalar(255)); + } + } + + dets += m_outpuDims[detsInd].d[ncInd]; + labels += m_outpuDims[labelsInd].d[ncInd]; + masks += segWidth * segHeight; + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/RFDETR_is.hpp b/src/Detector/tensorrt_onnx/RFDETR_is.hpp index cab24fdf..5f3811f2 100644 --- a/src/Detector/tensorrt_onnx/RFDETR_is.hpp +++ b/src/Detector/tensorrt_onnx/RFDETR_is.hpp @@ -34,8 +34,8 @@ class RFDETR_is_onnx : public YoloONNX const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); - cv::Size2f inputSizef(m_inputDims[0].d[3], m_inputDims[0].d[2]); cv::Size inputSize(m_inputDims[0].d[3], m_inputDims[0].d[2]); + cv::Size2f inputSizef(static_cast(inputSize.width), static_cast(inputSize.height)); //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; diff --git a/src/Detector/tensorrt_onnx/class_detector.cpp b/src/Detector/tensorrt_onnx/class_detector.cpp index 90d2536a..c12476ac 100644 --- a/src/Detector/tensorrt_onnx/class_detector.cpp +++ b/src/Detector/tensorrt_onnx/class_detector.cpp @@ -18,6 +18,7 @@ #include "RFDETR_is.hpp" #include "DFINE_bb.hpp" #include "YoloONNXv13_bb.hpp" +#include "DFINE_is.hpp" namespace tensor_rt { @@ -99,6 +100,9 @@ namespace tensor_rt case ModelType::YOLOV13: m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); break; + case ModelType::DFINE_IS: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; } // Threshold values diff --git a/src/Detector/tensorrt_onnx/class_detector.h b/src/Detector/tensorrt_onnx/class_detector.h index ccbacffd..de8af380 100644 --- a/src/Detector/tensorrt_onnx/class_detector.h +++ b/src/Detector/tensorrt_onnx/class_detector.h @@ -62,7 +62,8 @@ namespace tensor_rt RFDETR, RFDETR_IS, DFINE, - YOLOV13 + YOLOV13, + DFINE_IS }; /// From 9a65e3cc2d815c12a5f2374da2dcf695f3cbd7fc Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Thu, 1 Jan 2026 21:04:46 +0300 Subject: [PATCH 02/17] Remove hierarhy from segmentation --- src/Detector/MotionDetector.cpp | 5 ++--- src/Detector/tensorrt_onnx/DFINE_is.hpp | 3 +-- src/Detector/tensorrt_onnx/RFDETR_is.hpp | 3 +-- src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp | 5 ++--- src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp | 5 ++--- src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp | 5 ++--- 6 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/Detector/MotionDetector.cpp b/src/Detector/MotionDetector.cpp index 8b0d2d14..db6da7fe 100644 --- a/src/Detector/MotionDetector.cpp +++ b/src/Detector/MotionDetector.cpp @@ -45,11 +45,10 @@ void MotionDetector::DetectContour() { m_regions.clear(); std::vector> contours; - std::vector hierarchy; #if (CV_VERSION_MAJOR < 4) - cv::findContours(m_fg, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(m_fg, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); #else - cv::findContours(m_fg, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(m_fg, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); #endif for (size_t i = 0; i < contours.size(); ++i) { diff --git a/src/Detector/tensorrt_onnx/DFINE_is.hpp b/src/Detector/tensorrt_onnx/DFINE_is.hpp index d1ceebfa..1d79e913 100644 --- a/src/Detector/tensorrt_onnx/DFINE_is.hpp +++ b/src/Detector/tensorrt_onnx/DFINE_is.hpp @@ -147,8 +147,7 @@ class DFINE_is_onnx : public YoloONNX #endif std::vector> contours; - std::vector hierarchy; - cv::findContours(resObj.m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); for (const auto& contour : contours) { diff --git a/src/Detector/tensorrt_onnx/RFDETR_is.hpp b/src/Detector/tensorrt_onnx/RFDETR_is.hpp index 5f3811f2..3eb958d0 100644 --- a/src/Detector/tensorrt_onnx/RFDETR_is.hpp +++ b/src/Detector/tensorrt_onnx/RFDETR_is.hpp @@ -135,8 +135,7 @@ class RFDETR_is_onnx : public YoloONNX #endif std::vector> contours; - std::vector hierarchy; - cv::findContours(resObj.m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); for (const auto& contour : contours) { diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp index f65d75b3..ebbf8137 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp @@ -265,11 +265,10 @@ class YOLOv11_instance_onnx : public YoloONNX #endif std::vector> contours; - std::vector hierarchy; #if (CV_VERSION_MAJOR < 4) - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resBoxes[i].m_boxMask, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); #else - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); #endif for (const auto& contour : contours) { diff --git a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp index ec73b7d6..23a0445f 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp @@ -217,11 +217,10 @@ class YOLOv7_instance_onnx : public YoloONNX #endif std::vector> contours; - std::vector hierarchy; #if (CV_VERSION_MAJOR < 4) - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resBoxes[i].m_boxMask, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); #else - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); #endif for (const auto& contour : contours) { diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp index 0bc2e598..1f94ae84 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp @@ -267,11 +267,10 @@ class YOLOv8_instance_onnx : public YoloONNX #endif std::vector> contours; - std::vector hierarchy; #if (CV_VERSION_MAJOR < 4) - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resBoxes[i].m_boxMask, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); #else - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); #endif for (const auto& contour : contours) { From d35c32ae1f55774233ebf51d77866a2952ad1e1d Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Thu, 1 Jan 2026 21:07:20 +0300 Subject: [PATCH 03/17] Remove OpenCV 3 support --- src/Detector/BackgroundSubtract.cpp | 8 -------- src/Detector/BaseDetector.h | 4 ---- src/Detector/MotionDetector.cpp | 5 +---- src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp | 5 +---- src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp | 5 +---- src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp | 5 +---- src/Tracker/BaseTracker.cpp | 4 ---- 7 files changed, 4 insertions(+), 32 deletions(-) diff --git a/src/Detector/BackgroundSubtract.cpp b/src/Detector/BackgroundSubtract.cpp index dff18952..a4fc689c 100644 --- a/src/Detector/BackgroundSubtract.cpp +++ b/src/Detector/BackgroundSubtract.cpp @@ -206,21 +206,13 @@ cv::UMat BackgroundSubtract::GetImg(const cv::UMat& image) if (image.channels() == 1) { cv::UMat newImg; -#if (CV_VERSION_MAJOR < 4) - cv::cvtColor(image, newImg, CV_GRAY2BGR); -#else cv::cvtColor(image, newImg, cv::COLOR_GRAY2BGR); -#endif return newImg; } else if (image.channels() == 3) { cv::UMat newImg; -#if (CV_VERSION_MAJOR < 4) - cv::cvtColor(image, newImg, CV_BGR2GRAY); -#else cv::cvtColor(image, newImg, cv::COLOR_BGR2GRAY); -#endif return newImg; } } diff --git a/src/Detector/BaseDetector.h b/src/Detector/BaseDetector.h index 7a24f336..bf8ae882 100644 --- a/src/Detector/BaseDetector.h +++ b/src/Detector/BaseDetector.h @@ -169,11 +169,7 @@ class BaseDetector { if (region.m_boxMask.empty()) { -#if (CV_VERSION_MAJOR < 4) - cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), CV_FILLED); -#else cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), cv::FILLED); -#endif } else { diff --git a/src/Detector/MotionDetector.cpp b/src/Detector/MotionDetector.cpp index db6da7fe..6f20b352 100644 --- a/src/Detector/MotionDetector.cpp +++ b/src/Detector/MotionDetector.cpp @@ -45,11 +45,8 @@ void MotionDetector::DetectContour() { m_regions.clear(); std::vector> contours; -#if (CV_VERSION_MAJOR < 4) - cv::findContours(m_fg, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); -#else cv::findContours(m_fg, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); -#endif + for (size_t i = 0; i < contours.size(); ++i) { cv::Rect br = cv::boundingRect(contours[i]); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp index ebbf8137..dff5d318 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp @@ -265,11 +265,8 @@ class YOLOv11_instance_onnx : public YoloONNX #endif std::vector> contours; -#if (CV_VERSION_MAJOR < 4) - cv::findContours(resBoxes[i].m_boxMask, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); -#else cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); -#endif + for (const auto& contour : contours) { cv::Rect br = cv::boundingRect(contour); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp index 23a0445f..1182f6ca 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp @@ -217,11 +217,8 @@ class YOLOv7_instance_onnx : public YoloONNX #endif std::vector> contours; -#if (CV_VERSION_MAJOR < 4) - cv::findContours(resBoxes[i].m_boxMask, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); -#else cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); -#endif + for (const auto& contour : contours) { cv::Rect br = cv::boundingRect(contour); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp index 1f94ae84..454516b3 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp @@ -267,11 +267,8 @@ class YOLOv8_instance_onnx : public YoloONNX #endif std::vector> contours; -#if (CV_VERSION_MAJOR < 4) - cv::findContours(resBoxes[i].m_boxMask, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); -#else cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); -#endif + for (const auto& contour : contours) { cv::Rect br = cv::boundingRect(contour); diff --git a/src/Tracker/BaseTracker.cpp b/src/Tracker/BaseTracker.cpp index 7fbae980..ec68b858 100644 --- a/src/Tracker/BaseTracker.cpp +++ b/src/Tracker/BaseTracker.cpp @@ -173,11 +173,7 @@ void CTracker::UpdateTrackingState(const regions_t& regions, cv::Mat foreground(dbgAssignment.size(), CV_8UC1, cv::Scalar(0, 0, 100)); for (const auto& track : m_tracks) { -#if (CV_VERSION_MAJOR < 4) - cv::ellipse(foreground, track->GetLastRect(), cv::Scalar(255, 255, 255), CV_FILLED); -#else cv::ellipse(foreground, track->GetLastRect(), cv::Scalar(255, 255, 255), cv::FILLED); -#endif } const int chans = dbgAssignment.channels(); From db2f535cb84ec09b353be88b048c01da39a6fc22 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Thu, 1 Jan 2026 21:39:20 +0300 Subject: [PATCH 04/17] findContoursLinkRuns instead findContours --- src/Detector/MotionDetector.cpp | 7 +++++-- src/Detector/tensorrt_onnx/DFINE_is.hpp | 7 +++++-- src/Detector/tensorrt_onnx/RFDETR_is.hpp | 7 +++++-- src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp | 5 ++++- src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp | 5 ++++- src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp | 5 ++++- 6 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Detector/MotionDetector.cpp b/src/Detector/MotionDetector.cpp index 6f20b352..cf6f3c26 100644 --- a/src/Detector/MotionDetector.cpp +++ b/src/Detector/MotionDetector.cpp @@ -45,8 +45,11 @@ void MotionDetector::DetectContour() { m_regions.clear(); std::vector> contours; - cv::findContours(m_fg, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); - +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(m_fg, contours); +#else + cv::findContours(m_fg, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif for (size_t i = 0; i < contours.size(); ++i) { cv::Rect br = cv::boundingRect(contours[i]); diff --git a/src/Detector/tensorrt_onnx/DFINE_is.hpp b/src/Detector/tensorrt_onnx/DFINE_is.hpp index 1d79e913..b11dde33 100644 --- a/src/Detector/tensorrt_onnx/DFINE_is.hpp +++ b/src/Detector/tensorrt_onnx/DFINE_is.hpp @@ -40,7 +40,7 @@ class DFINE_is_onnx : public YoloONNX const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); - cv::Size inputSize(m_inputDims[0].d[3], m_inputDims[0].d[2]); + cv::Size inputSize(static_cast(m_inputDims[0].d[3]), static_cast(m_inputDims[0].d[2])); cv::Size2f inputSizef(static_cast(inputSize.width), static_cast(inputSize.height)); //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; @@ -147,8 +147,11 @@ class DFINE_is_onnx : public YoloONNX #endif std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resObj.m_boxMask, contours); +#else cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); - +#endif for (const auto& contour : contours) { cv::Rect br = cv::boundingRect(contour); diff --git a/src/Detector/tensorrt_onnx/RFDETR_is.hpp b/src/Detector/tensorrt_onnx/RFDETR_is.hpp index 3eb958d0..9460fb75 100644 --- a/src/Detector/tensorrt_onnx/RFDETR_is.hpp +++ b/src/Detector/tensorrt_onnx/RFDETR_is.hpp @@ -34,7 +34,7 @@ class RFDETR_is_onnx : public YoloONNX const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); - cv::Size inputSize(m_inputDims[0].d[3], m_inputDims[0].d[2]); + cv::Size inputSize(static_cast(m_inputDims[0].d[3]), static_cast(m_inputDims[0].d[2])); cv::Size2f inputSizef(static_cast(inputSize.width), static_cast(inputSize.height)); //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; @@ -135,8 +135,11 @@ class RFDETR_is_onnx : public YoloONNX #endif std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resObj.m_boxMask, contours); +#else cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); - +#endif for (const auto& contour : contours) { cv::Rect br = cv::boundingRect(contour); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp index dff5d318..f8bff35e 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp @@ -265,8 +265,11 @@ class YOLOv11_instance_onnx : public YoloONNX #endif std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); - +#endif for (const auto& contour : contours) { cv::Rect br = cv::boundingRect(contour); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp index 1182f6ca..eb1bf356 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp @@ -217,8 +217,11 @@ class YOLOv7_instance_onnx : public YoloONNX #endif std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); - +#endif for (const auto& contour : contours) { cv::Rect br = cv::boundingRect(contour); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp index 454516b3..521b1e8c 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp @@ -267,8 +267,11 @@ class YOLOv8_instance_onnx : public YoloONNX #endif std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); - +#endif for (const auto& contour : contours) { cv::Rect br = cv::boundingRect(contour); From 3cbecc6ae62a7341187aa585f8b99f0edbf34692 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Thu, 1 Jan 2026 21:46:46 +0300 Subject: [PATCH 05/17] Disable rotated rectangle from contour calculation in segmentation models --- src/Detector/tensorrt_onnx/DFINE_is.hpp | 2 ++ src/Detector/tensorrt_onnx/RFDETR_is.hpp | 2 ++ src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp | 2 ++ src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp | 2 ++ src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp | 2 ++ 5 files changed, 10 insertions(+) diff --git a/src/Detector/tensorrt_onnx/DFINE_is.hpp b/src/Detector/tensorrt_onnx/DFINE_is.hpp index b11dde33..afde0fc1 100644 --- a/src/Detector/tensorrt_onnx/DFINE_is.hpp +++ b/src/Detector/tensorrt_onnx/DFINE_is.hpp @@ -146,6 +146,7 @@ class DFINE_is_onnx : public YoloONNX std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl; #endif +#if 0 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resObj.m_boxMask, contours); @@ -181,6 +182,7 @@ class DFINE_is_onnx : public YoloONNX break; } } +#endif } else { diff --git a/src/Detector/tensorrt_onnx/RFDETR_is.hpp b/src/Detector/tensorrt_onnx/RFDETR_is.hpp index 9460fb75..d2f1988e 100644 --- a/src/Detector/tensorrt_onnx/RFDETR_is.hpp +++ b/src/Detector/tensorrt_onnx/RFDETR_is.hpp @@ -134,6 +134,7 @@ class RFDETR_is_onnx : public YoloONNX std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl; #endif +#if 0 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resObj.m_boxMask, contours); @@ -169,6 +170,7 @@ class RFDETR_is_onnx : public YoloONNX break; } } +#endif } else { diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp index f8bff35e..6348ae29 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp @@ -264,6 +264,7 @@ class YOLOv11_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif +#if 0 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); @@ -299,6 +300,7 @@ class YOLOv11_instance_onnx : public YoloONNX break; } } +#endif } } return resBoxes; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp index eb1bf356..60a90a38 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp @@ -216,6 +216,7 @@ class YOLOv7_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif +#if 0 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); @@ -251,6 +252,7 @@ class YOLOv7_instance_onnx : public YoloONNX break; } } +#endif } } return resBoxes; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp index 521b1e8c..944dc571 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp @@ -266,6 +266,7 @@ class YOLOv8_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif +#if 0 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); @@ -303,6 +304,7 @@ class YOLOv8_instance_onnx : public YoloONNX break; } } +#endif } } return resBoxes; From 6b5ab80e9e940d1473d68c7971447dfb52c7428d Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Sat, 3 Jan 2026 21:43:47 +0300 Subject: [PATCH 06/17] RT-DETRv4 works --- README.md | 2 + data/dota/DOTA_v1.0.names | 15 ++++ data/dota/DOTA_v1.5.names | 16 ++++ data/settings_dfine_seg.ini | 141 ++++++++++++++++++++++++++++++++++++ data/settings_rtdetrv4.ini | 141 ++++++++++++++++++++++++++++++++++++ 5 files changed, 315 insertions(+) create mode 100644 data/dota/DOTA_v1.0.names create mode 100644 data/dota/DOTA_v1.5.names create mode 100644 data/settings_dfine_seg.ini create mode 100644 data/settings_rtdetrv4.ini diff --git a/README.md b/README.md index 742ea7a2..2c010235 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ [![CodeQL](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml) ## Latest Features + +- Add RT-DETRv4 (API similar D-FINE) detection model [RT-DETRs/RT-DETRv4](https://github.com/RT-DETRs/RT-DETRv4) - Add D-FINE seg detection model [ArgoHA/D-FINE-seg](https://github.com/ArgoHA/D-FINE-seg) - Add ByteTrack MOT algorithm based on [Vertical-Beach/ByteTrack-cpp](https://github.com/Vertical-Beach/ByteTrack-cpp) - Big code cleanup from old style algorithms and detectors: some bgfg detectors, some VOT trackes, Face and Pedestrin detectors, Darknet based backend for old YOLO etc diff --git a/data/dota/DOTA_v1.0.names b/data/dota/DOTA_v1.0.names new file mode 100644 index 00000000..adea7619 --- /dev/null +++ b/data/dota/DOTA_v1.0.names @@ -0,0 +1,15 @@ +plane +ship +storage_tank +baseball_diamond +tennis_court +basketball_court +ground_track_field +harbor +bridge +large_vehicle +small_vehicle +helicopter +roundabout +soccer_ball_field +swimming_pool \ No newline at end of file diff --git a/data/dota/DOTA_v1.5.names b/data/dota/DOTA_v1.5.names new file mode 100644 index 00000000..4d18c4f1 --- /dev/null +++ b/data/dota/DOTA_v1.5.names @@ -0,0 +1,16 @@ +baseball_diamond +basketball_court +bridge +container_crane +ground_track_field +harbor +helicopter +large_vehicle +plane +roundabout +ship +small_vehicle +soccer_ball_field +storage_tank +swimming_pool +tennis_court diff --git a/data/settings_dfine_seg.ini b/data/settings_dfine_seg.ini new file mode 100644 index 00000000..e82c504a --- /dev/null +++ b/data/settings_dfine_seg.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/dfine_seg_s_coco.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/dfine_seg_s_coco.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = DFINE_IS + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_rtdetrv4.ini b/data/settings_rtdetrv4.ini new file mode 100644 index 00000000..4734cf9f --- /dev/null +++ b/data/settings_rtdetrv4.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/RTv4-M-hgnet.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/RTv4-M-hgnet.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = DFINE + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 From 6ca6a58b70fa49a8816ae603e31106830c4e36cb Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Sun, 18 Jan 2026 13:17:44 +0300 Subject: [PATCH 07/17] Add yolov26 --- data/settings_yolov26m.ini | 141 +++++++++ example/main.cpp | 2 +- src/Detector/OCVDNNDetector.cpp | 55 ++++ src/Detector/OCVDNNDetector.h | 6 +- src/Detector/ONNXTensorRTDetector.cpp | 6 +- src/Detector/tensorrt_onnx/DFINE_is.hpp | 6 - src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp | 64 ++++ .../tensorrt_onnx/YoloONNXv26_instance.hpp | 292 ++++++++++++++++++ .../tensorrt_onnx/YoloONNXv26_obb.hpp | 131 ++++++++ src/Detector/tensorrt_onnx/class_detector.cpp | 12 + src/Detector/tensorrt_onnx/class_detector.h | 5 +- 11 files changed, 710 insertions(+), 10 deletions(-) create mode 100644 data/settings_yolov26m.ini create mode 100644 src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp create mode 100644 src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp create mode 100644 src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp diff --git a/data/settings_yolov26m.ini b/data/settings_yolov26m.ini new file mode 100644 index 00000000..625ce893 --- /dev/null +++ b/data/settings_yolov26m.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_INFERENCE_ENGINE + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/example/main.cpp b/example/main.cpp index 2ef4f32a..f266bbd6 100644 --- a/example/main.cpp +++ b/example/main.cpp @@ -15,7 +15,7 @@ int main(int argc, char** argv) const char* keys = { "{ @1 |../data/atrium.avi | movie file | }" - "{ e example |1 | number of example 0 - MouseTracking, 1 - MotionDetector, 3 - YOLO TensorRT Detector, 4 - Cars counting | }" + "{ e example |1 | number of example 0 - MouseTracking, 1 - MotionDetector, 2 - opencv_dnn detector, 3 - YOLO TensorRT Detector, 4 - Cars counting | }" "{ sf start_frame |0 | Start a video from this position | }" "{ ef end_frame |0 | Play a video to this position (if 0 then played to the end of file) | }" "{ ed end_delay |0 | Delay in milliseconds after video ending | }" diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 461d18b5..70ff16d3 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -173,6 +173,9 @@ bool OCVDNNDetector::Init(const config_t& config) dictNetType["DFINE"] = ModelType::DFINE; dictNetType["YOLOV13"] = ModelType::YOLOV13; dictNetType["DFINE_IS"] = ModelType::DFINE_IS; + dictNetType["YOLOV26"] = ModelType::YOLOV26; + dictNetType["YOLOV26_OBB"] = ModelType::YOLOV26_OBB; + dictNetType["YOLOV26Mask"] = ModelType::YOLOV26Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -400,16 +403,21 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr case ModelType::YOLOV12: ParseYOLOv11(crop, detections, tmpRegions); break; + case ModelType::YOLOV26: + ParseYOLOv26(crop, detections, tmpRegions); + break; case ModelType::YOLOV5_OBB: case ModelType::YOLOV8_OBB: case ModelType::YOLOV11_OBB: + case ModelType::YOLOV26_OBB: ParseYOLOv5_8_11_obb(crop, detections, tmpRegions); break; case ModelType::YOLOV5Mask: case ModelType::YOLOV8Mask: case ModelType::YOLOV11Mask: + case ModelType::YOLOV26Mask: ParseYOLOv5_8_11_seg(crop, detections, tmpRegions); break; @@ -1071,3 +1079,50 @@ void OCVDNNDetector::ParseDFINE_IS(const cv::Rect& crop, std::vector& d assert(0); } +/// +/// \brief OCVDNNDetector::ParseYOLOv26 +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x6 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 6 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0]; + float y = dets[ind + 1]; + float w = dets[ind + 2] - x; + float h = dets[ind + 3] - y; + + int left = cvRound(x * x_factor); + int top = cvRound(y * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + } +} diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index 601241ff..7b035ca6 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -53,7 +53,10 @@ class OCVDNNDetector final : public BaseDetector RFDETR_IS, DFINE, YOLOV13, - DFINE_IS + DFINE_IS, + YOLOV26, + YOLOV26_OBB, + YOLOV26Mask }; cv::dnn::Net m_net; @@ -91,6 +94,7 @@ class OCVDNNDetector final : public BaseDetector void ParseRFDETR_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseDFINE(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); }; #endif diff --git a/src/Detector/ONNXTensorRTDetector.cpp b/src/Detector/ONNXTensorRTDetector.cpp index f30e2444..9d28da54 100644 --- a/src/Detector/ONNXTensorRTDetector.cpp +++ b/src/Detector/ONNXTensorRTDetector.cpp @@ -102,6 +102,9 @@ bool ONNXTensorRTDetector::Init(const config_t& config) dictNetType["DFINE"] = tensor_rt::DFINE; dictNetType["YOLOV13"] = tensor_rt::YOLOV13; dictNetType["DFINE_IS"] = tensor_rt::DFINE_IS; + dictNetType["YOLOV26"] = tensor_rt::YOLOV26; + dictNetType["YOLOV26_OBB"] = tensor_rt::YOLOV26_OBB; + dictNetType["YOLOV26Mask"] = tensor_rt::YOLOV26Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -304,7 +307,8 @@ void ONNXTensorRTDetector::CalcMotionMap(cv::Mat& frame) { if (m_localConfig.m_netType == tensor_rt::YOLOV7Mask || m_localConfig.m_netType == tensor_rt::YOLOV8Mask - || m_localConfig.m_netType == tensor_rt::YOLOV11Mask) + || m_localConfig.m_netType == tensor_rt::YOLOV11Mask + || m_localConfig.m_netType == tensor_rt::YOLOV26Mask) { static std::vector color; if (color.empty()) diff --git a/src/Detector/tensorrt_onnx/DFINE_is.hpp b/src/Detector/tensorrt_onnx/DFINE_is.hpp index afde0fc1..84c46ed9 100644 --- a/src/Detector/tensorrt_onnx/DFINE_is.hpp +++ b/src/Detector/tensorrt_onnx/DFINE_is.hpp @@ -31,12 +31,6 @@ class DFINE_is_onnx : public YoloONNX //2: name: boxes, size: 1x300x4 //3: name: mask_probs, size: 1x300x160x160 - - //0: name: input, size: 1x3x432x432 - //1: name: dets, size: 1x200x4 - //2: name: labels, size: 1x200x91 - //3: name: 4245, size: 1x200x108x108 - const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp new file mode 100644 index 00000000..7677244b --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv26_bb_onnx class +/// +class YOLOv26_bb_onnx : public YoloONNX +{ +public: + YOLOv26_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x6 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t lenInd = 1; + size_t len = static_cast(m_outpuDims[0].d[lenInd]); + auto volume = len * m_outpuDims[0].d[2]; + output += volume * imgIdx; + //std::cout << "len = " << len << ", confThreshold = " << m_params.m_confThreshold << ", volume = " << volume << std::endl; + + for (size_t i = 0; i < len; ++i) + { + auto ind = i * m_outpuDims[0].d[2]; + + float classConf = output[ind + 4]; + int64_t classId = output[ind + 5]; + + if (classConf >= m_params.m_confThreshold) + { + float x = fw * (output[ind + 0] - m_resizedROI.x); + float y = fh * (output[ind + 1] - m_resizedROI.y); + float width = fw * (output[ind + 2] - output[ind + 0]); + float height = fh * (output[ind + 3] - output[ind + 1]); + + //std::cout << "ind = " << ind << ", output[0] = " << output[ind + 0] << ", output[1] = " << output[ind + 1] << ", output[2] = " << output[ind + 2] << ", output[3] = " << output[ind + 3] << std::endl; + //std::cout << "ind = " << ind << ", classConf = " << classConf << ", classId = " << classId << ", x = " << x << ", y = " << y << ", width = " << width << ", height = " << height << std::endl; + + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp new file mode 100644 index 00000000..88204f69 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp @@ -0,0 +1,292 @@ +#pragma once + +#include "YoloONNX.hpp" +#include "../../mtracking/defines.h" + +/// +/// \brief The YOLOv26_instance_onnx class +/// +class YOLOv26_instance_onnx : public YoloONNX +{ +public: + YOLOv26_instance_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + outputTensorNames.push_back("output1"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + size_t outInd = 0; + size_t segInd = 1; + + auto output = outputs[outInd]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x38 + //2: name: output1, size: 1x32x160x160 + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[outInd].d[ncInd] - 4 - 32); + int dimensions = nc + 32 + 4; + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[outInd].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + +#if 1 + int segWidth = 160; + int segHeight = 160; + int segChannels = 32; + + if (outputs.size() > 1) + { + //std::cout << "output1 nbDims: " << m_outpuDims[segInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[segInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[segInd].d[i]; + // if (i + 1 != m_outpuDims[segInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + //std::cout << "output nbDims: " << m_outpuDims[outInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[outInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[outInd].d[i]; + // if (i + 1 != m_outpuDims[outInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + + segChannels = static_cast(m_outpuDims[segInd].d[1]); + segWidth = static_cast(m_outpuDims[segInd].d[2]); + segHeight = static_cast(m_outpuDims[segInd].d[3]); + } + cv::Mat maskProposals; + std::vector> picked_proposals; + int net_width = nc + 4 + segChannels; +#endif + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4 + 32); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + //{ + // std::cout << "without nms: mem" << i << ": "; + // for (size_t ii = 0; ii < 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = 4; ii < nc + 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = nc + 4; ii < nc + 4 + 32; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + //} + + if (objectConf >= m_params.m_confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = output[k] - output[k + 2] / 2; + float y = output[k + 1] - output[k + 3] / 2; + float width = output[k + 2]; + float height = output[k + 3]; + + //auto ClampToFrame = [](float& v, float& size, int hi) -> int + //{ + // int res = 0; +// + // if (size < 1) + // size = 0; +// + // if (v < 0) + // { + // res = v; + // v = 0; + // return res; + // } + // else if (v + size > hi - 1) + // { + // res = v; + // v = hi - 1 - size; + // if (v < 0) + // { + // size += v; + // v = 0; + // } + // res -= v; + // return res; + // } + // return res; + //}; + //ClampToFrame(x, width, frameSize.width); + //ClampToFrame(y, height, frameSize.height); + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (width > 4 && height > 4) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + + std::vector temp_proto(output + k + 4 + nc, output + k + net_width); + picked_proposals.push_back(temp_proto); + } + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], Clamp(rectBoxes[indices[bi]], frameSize)); + maskProposals.push_back(cv::Mat(picked_proposals[indices[bi]]).t()); + } + + if (!maskProposals.empty()) + { + // Mask processing + const float* pdata = outputs[segInd]; + std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); + + int INPUT_W = static_cast(m_inputDims[0].d[3]); + int INPUT_H = static_cast(m_inputDims[0].d[2]); + static constexpr float MASK_THRESHOLD = 0.5; + + cv::Mat mask_protos = cv::Mat(maskFloat); + cv::Mat protos = mask_protos.reshape(0, { segChannels, segWidth * segHeight }); + + cv::Mat matmulRes = (maskProposals * protos).t();//n*32 32*25600 + cv::Mat masks = matmulRes.reshape(static_cast(resBoxes.size()), { segWidth, segHeight }); + std::vector maskChannels; + split(masks, maskChannels); + for (size_t i = 0; i < resBoxes.size(); ++i) + { + cv::Mat dest; + cv::Mat mask; + //sigmoid + cv::exp(-maskChannels[i], dest); + dest = 1.0 / (1.0 + dest);//160*160 + + int padw = 0; + int padh = 0; + cv::Rect roi(int((float)padw / INPUT_W * segWidth), int((float)padh / INPUT_H * segHeight), int(segWidth - padw / 2), int(segHeight - padh / 2)); + dest = dest(roi); + + cv::resize(dest, mask, cv::Size(INPUT_W, INPUT_H), cv::INTER_NEAREST); + + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + +#if 0 + static int globalObjInd = 0; + SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); +#endif + +#if 0 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resBoxes[i].m_brect.x; + int dy = resBoxes[i].m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = (rr.center.x + dx - m_resizedROI.x) * fw; + rr.center.y = (rr.center.y + dy - m_resizedROI.y) * fw; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound((dx + br.x - m_resizedROI.x) * fw); + br.y = cvRound((dy + br.y - m_resizedROI.y) * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resBoxes[i].m_brect = br; + resBoxes[i].m_rrect = rr; + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + } + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp new file mode 100644 index 00000000..f651e5ff --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp @@ -0,0 +1,131 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv26_obb_onnx class +/// +class YOLOv26_obb_onnx : public YoloONNX +{ +public: + YOLOv26_obb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x20x21504 + //20: 15 DOTA classes + x + y + w + h + a + constexpr int shapeDataSize = 5; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[0].d[ncInd] - shapeDataSize); + int dimensions = nc + shapeDataSize; + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + shapeDataSize); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + //{ + // for (int jj = 0; jj < 20; ++jj) + // { + // std::cout << output[jj] << " "; + // } + // std::cout << std::endl; + //} + + if (objectConf >= m_params.m_confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) + float cx = fw * (output[k] - m_resizedROI.x); + float cy = fh * (output[k + 1] - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + float angle = 180.f * output[k + nc + shapeDataSize - 1] / static_cast(M_PI); + rectBoxes.emplace_back(cv::Point2f(cx, cy), cv::Size2f(width, height), angle); + + //if (rectBoxes.size() == 1) + // std::cout << i << ": object_conf = " << objectConf << ", classId = " << classId << ", rect = " << rectBoxes.back().boundingRect() << ", angle = " << angle << std::endl; + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + //std::vector indices; + //cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + //resBoxes.reserve(indices.size()); + + resBoxes.reserve(rectBoxes.size()); + for (size_t bi = 0; bi < rectBoxes.size(); ++bi) + { + resBoxes.emplace_back(classIds[bi], confidences[bi], rectBoxes[bi]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/class_detector.cpp b/src/Detector/tensorrt_onnx/class_detector.cpp index c12476ac..70f2aa48 100644 --- a/src/Detector/tensorrt_onnx/class_detector.cpp +++ b/src/Detector/tensorrt_onnx/class_detector.cpp @@ -19,6 +19,9 @@ #include "DFINE_bb.hpp" #include "YoloONNXv13_bb.hpp" #include "DFINE_is.hpp" +#include "YoloONNXv26_bb.hpp" +#include "YoloONNXv26_obb.hpp" +#include "YoloONNXv26_instance.hpp" namespace tensor_rt { @@ -85,6 +88,15 @@ namespace tensor_rt case ModelType::YOLOV11Mask: m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); break; + case ModelType::YOLOV26: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV26_OBB: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV26Mask: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; case ModelType::YOLOV12: m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); break; diff --git a/src/Detector/tensorrt_onnx/class_detector.h b/src/Detector/tensorrt_onnx/class_detector.h index de8af380..4381f7c9 100644 --- a/src/Detector/tensorrt_onnx/class_detector.h +++ b/src/Detector/tensorrt_onnx/class_detector.h @@ -63,7 +63,10 @@ namespace tensor_rt RFDETR_IS, DFINE, YOLOV13, - DFINE_IS + DFINE_IS, + YOLOV26, + YOLOV26_OBB, + YOLOV26Mask, }; /// From 0c2c5ef07411ea8691fc7c270781daf754781d67 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Sun, 18 Jan 2026 17:38:01 +0300 Subject: [PATCH 08/17] Add yolov26-obb --- data/settings_yolov26m_obb.ini | 141 ++++++++++++++++++ src/Detector/OCVDNNDetector.cpp | 55 ++++++- src/Detector/OCVDNNDetector.h | 1 + .../tensorrt_onnx/YoloONNXv26_obb.hpp | 105 +++---------- 4 files changed, 212 insertions(+), 90 deletions(-) create mode 100644 data/settings_yolov26m_obb.ini diff --git a/data/settings_yolov26m_obb.ini b/data/settings_yolov26m_obb.ini new file mode 100644 index 00000000..d31e8425 --- /dev/null +++ b/data/settings_yolov26m_obb.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo26m-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo26m-obb.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/dota/DOTA_v1.0.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26_OBB + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 70ff16d3..3434385f 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -403,14 +403,10 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr case ModelType::YOLOV12: ParseYOLOv11(crop, detections, tmpRegions); break; - case ModelType::YOLOV26: - ParseYOLOv26(crop, detections, tmpRegions); - break; case ModelType::YOLOV5_OBB: case ModelType::YOLOV8_OBB: case ModelType::YOLOV11_OBB: - case ModelType::YOLOV26_OBB: ParseYOLOv5_8_11_obb(crop, detections, tmpRegions); break; @@ -441,6 +437,14 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr ParseDFINE_IS(crop, detections, tmpRegions); break; + case ModelType::YOLOV26: + ParseYOLOv26(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV26_OBB: + ParseYOLOv26_obb(crop, detections, tmpRegions); + break; + default: ParseOldYOLO(crop, detections, tmpRegions); break; @@ -1126,3 +1130,46 @@ void OCVDNNDetector::ParseYOLOv26(const cv::Rect& crop, std::vector& de } } } + +/// +/// \brief OCVDNNDetector::ParseYOLOv26_obb +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x300x7 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 7 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0] * x_factor; + float y = dets[ind + 1] * y_factor; + float w = dets[ind + 2] * x_factor; + float h = dets[ind + 3] * y_factor; + float angle = 180.f * dets[ind + 6] / static_cast(M_PI); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::RotatedRect(cv::Point2f(x + crop.x, y + crop.y), cv::Size2f(w, h), angle), T2T(classId), static_cast(maxClassScore)); + } + } +} diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index 7b035ca6..3e1669ff 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -95,6 +95,7 @@ class OCVDNNDetector final : public BaseDetector void ParseDFINE(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); }; #endif diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp index f651e5ff..0b70b509 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp @@ -25,107 +25,40 @@ class YOLOv26_obb_onnx : public YoloONNX std::vector resBoxes; //0: name: images, size: 1x3x1024x1024 - //1: name: output0, size: 1x20x21504 - //20: 15 DOTA classes + x + y + w + h + a - constexpr int shapeDataSize = 5; - + //1: name: output0, size: 1x300x7 + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); auto output = outputs[0]; - size_t ncInd = 1; - size_t lenInd = 2; - int nc = static_cast(m_outpuDims[0].d[ncInd] - shapeDataSize); - int dimensions = nc + shapeDataSize; - size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; - //auto Volume = [](const nvinfer1::Dims& d) - //{ - // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); - //}; - auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + size_t lenInd = 1; + size_t len = static_cast(m_outpuDims[0].d[lenInd]); + auto volume = len * m_outpuDims[0].d[2]; output += volume * imgIdx; - //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; - - cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); - rawMemory = rawMemory.reshape(1, dimensions); - cv::transpose(rawMemory, rawMemory); - output = (float*)rawMemory.data; - - //std::cout << "output[0] mem:\n"; - //for (size_t ii = 0; ii < 100; ++ii) - //{ - // std::cout << ii << ": "; - // for (size_t jj = 0; jj < 20; ++jj) - // { - // std::cout << output[ii * 20 + jj] << " "; - // } - // std::cout << ";" << std::endl; - //} - //std::cout << ";" << std::endl; - - std::vector classIds; - std::vector confidences; - std::vector rectBoxes; - classIds.reserve(len); - confidences.reserve(len); - rectBoxes.reserve(len); + //std::cout << "len = " << len << ", confThreshold = " << m_params.m_confThreshold << ", volume = " << volume << std::endl; for (size_t i = 0; i < len; ++i) { - // Box - size_t k = i * (nc + shapeDataSize); - - int classId = -1; - float objectConf = 0.f; - for (int j = 0; j < nc; ++j) - { - const float classConf = output[k + 4 + j]; - if (classConf > objectConf) - { - classId = j; - objectConf = classConf; - } - } + auto ind = i * m_outpuDims[0].d[2]; - //if (i == 0) - //{ - // for (int jj = 0; jj < 20; ++jj) - // { - // std::cout << output[jj] << " "; - // } - // std::cout << std::endl; - //} + float classConf = output[ind + 4]; + int64_t classId = output[ind + 5]; - if (objectConf >= m_params.m_confThreshold) + if (classConf >= m_params.m_confThreshold) { - classIds.push_back(classId); - confidences.push_back(objectConf); - - // (center x, center y, width, height) - float cx = fw * (output[k] - m_resizedROI.x); - float cy = fh * (output[k + 1] - m_resizedROI.y); - float width = fw * output[k + 2]; - float height = fh * output[k + 3]; - float angle = 180.f * output[k + nc + shapeDataSize - 1] / static_cast(M_PI); - rectBoxes.emplace_back(cv::Point2f(cx, cy), cv::Size2f(width, height), angle); - - //if (rectBoxes.size() == 1) - // std::cout << i << ": object_conf = " << objectConf << ", classId = " << classId << ", rect = " << rectBoxes.back().boundingRect() << ", angle = " << angle << std::endl; + float x = fw * (output[ind + 0] - m_resizedROI.x); + float y = fh * (output[ind + 1] - m_resizedROI.y); + float width = fw * output[ind + 2]; + float height = fh * output[ind + 3]; + float angle = 180.f * output[ind + 6] / static_cast(M_PI); + //std::cout << "ind = " << ind << ", output[0] = " << output[ind + 0] << ", output[1] = " << output[ind + 1] << ", output[2] = " << output[ind + 2] << ", output[3] = " << output[ind + 3] << std::endl; + //std::cout << "ind = " << ind << ", classConf = " << classConf << ", classId = " << classId << ", x = " << x << ", y = " << y << ", width = " << width << ", height = " << height << ", angle = " << angle << std::endl; + + resBoxes.emplace_back(classId, classConf, cv::RotatedRect(cv::Point2f(x, y), cv::Size2f(width, height), angle)); } } - // Non-maximum suppression to eliminate redudant overlapping boxes - //std::vector indices; - //cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); - //resBoxes.reserve(indices.size()); - - resBoxes.reserve(rectBoxes.size()); - for (size_t bi = 0; bi < rectBoxes.size(); ++bi) - { - resBoxes.emplace_back(classIds[bi], confidences[bi], rectBoxes[bi]); - } - return resBoxes; } }; From 55ce5cea05d3c6a6d63a0ab29b8295da114cf63f Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Sun, 18 Jan 2026 21:56:46 +0300 Subject: [PATCH 09/17] Add yolov26-seg --- data/settings_yolov26m_seg.ini | 141 +++++++++++++++ src/Detector/OCVDNNDetector.cpp | 54 +++++- src/Detector/OCVDNNDetector.h | 1 + .../tensorrt_onnx/YoloONNXv11_instance.hpp | 2 +- .../tensorrt_onnx/YoloONNXv26_instance.hpp | 167 +++--------------- .../tensorrt_onnx/YoloONNXv7_instance.hpp | 2 +- .../tensorrt_onnx/YoloONNXv8_instance.hpp | 2 +- src/Detector/tensorrt_onnx/class_detector.h | 2 +- 8 files changed, 224 insertions(+), 147 deletions(-) create mode 100644 data/settings_yolov26m_seg.ini diff --git a/data/settings_yolov26m_seg.ini b/data/settings_yolov26m_seg.ini new file mode 100644 index 00000000..3a4ed1d0 --- /dev/null +++ b/data/settings_yolov26m_seg.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m-seg.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m-seg.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26Mask + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 3434385f..82f4a23d 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -413,7 +413,6 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr case ModelType::YOLOV5Mask: case ModelType::YOLOV8Mask: case ModelType::YOLOV11Mask: - case ModelType::YOLOV26Mask: ParseYOLOv5_8_11_seg(crop, detections, tmpRegions); break; @@ -445,6 +444,10 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr ParseYOLOv26_obb(crop, detections, tmpRegions); break; + case ModelType::YOLOV26Mask: + ParseYOLOv26_seg(crop, detections, tmpRegions); + break; + default: ParseOldYOLO(crop, detections, tmpRegions); break; @@ -1173,3 +1176,52 @@ void OCVDNNDetector::ParseYOLOv26_obb(const cv::Rect& crop, std::vector } } } + +/// +/// \brief OCVDNNDetector::ParseYOLOv26_seg +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x38 + //2: name: output1, size: 1x32x160x160 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 38 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0]; + float y = dets[ind + 1]; + float w = dets[ind + 2] - x; + float h = dets[ind + 3] - y; + + int left = cvRound(x * x_factor); + int top = cvRound(y * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + } +} diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index 3e1669ff..3a55dd67 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -96,6 +96,7 @@ class OCVDNNDetector final : public BaseDetector void ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseYOLOv26_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); }; #endif diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp index 6348ae29..641c5c7d 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp @@ -264,7 +264,7 @@ class YOLOv11_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif -#if 0 +#if 1 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp index 88204f69..abcb3b4f 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp @@ -38,177 +38,54 @@ class YOLOv26_instance_onnx : public YoloONNX //1: name: output0, size: 1x300x38 //2: name: output1, size: 1x32x160x160 - size_t ncInd = 1; - size_t lenInd = 2; - int nc = static_cast(m_outpuDims[outInd].d[ncInd] - 4 - 32); - int dimensions = nc + 32 + 4; - size_t len = static_cast(m_outpuDims[outInd].d[lenInd]);// / m_params.explicitBatchSize; - //auto Volume = [](const nvinfer1::Dims& d) - //{ - // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); - //}; - auto volume = len * m_outpuDims[outInd].d[ncInd]; // Volume(m_outpuDims[0]); + size_t dimInd = 2; + size_t lenInd = 1; + int dimensions = static_cast(m_outpuDims[outInd].d[dimInd]); + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]); + auto volume = len * dimensions; output += volume * imgIdx; //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; - cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); - rawMemory = rawMemory.reshape(1, dimensions); - cv::transpose(rawMemory, rawMemory); - output = (float*)rawMemory.data; - - //std::cout << "output[0] mem:\n"; - //for (size_t ii = 0; ii < 100; ++ii) - //{ - // std::cout << ii << ": "; - // for (size_t jj = 0; jj < 20; ++jj) - // { - // std::cout << output[ii * 20 + jj] << " "; - // } - // std::cout << ";" << std::endl; - //} - //std::cout << ";" << std::endl; - -#if 1 int segWidth = 160; int segHeight = 160; int segChannels = 32; if (outputs.size() > 1) { - //std::cout << "output1 nbDims: " << m_outpuDims[segInd].nbDims << ", "; - //for (size_t i = 0; i < m_outpuDims[segInd].nbDims; ++i) - //{ - // std::cout << m_outpuDims[segInd].d[i]; - // if (i + 1 != m_outpuDims[segInd].nbDims) - // std::cout << "x"; - //} - //std::cout << std::endl; - //std::cout << "output nbDims: " << m_outpuDims[outInd].nbDims << ", "; - //for (size_t i = 0; i < m_outpuDims[outInd].nbDims; ++i) - //{ - // std::cout << m_outpuDims[outInd].d[i]; - // if (i + 1 != m_outpuDims[outInd].nbDims) - // std::cout << "x"; - //} - //std::cout << std::endl; - segChannels = static_cast(m_outpuDims[segInd].d[1]); segWidth = static_cast(m_outpuDims[segInd].d[2]); segHeight = static_cast(m_outpuDims[segInd].d[3]); } cv::Mat maskProposals; - std::vector> picked_proposals; - int net_width = nc + 4 + segChannels; -#endif - - std::vector classIds; - std::vector confidences; - std::vector rectBoxes; - classIds.reserve(len); - confidences.reserve(len); - rectBoxes.reserve(len); + int netWidth = 6 + segChannels; for (size_t i = 0; i < len; ++i) { // Box - size_t k = i * (nc + 4 + 32); - - int classId = -1; - float objectConf = 0.f; - for (int j = 0; j < nc; ++j) - { - const float classConf = output[k + 4 + j]; - if (classConf > objectConf) - { - classId = j; - objectConf = classConf; - } - } - - //if (i == 0) - //{ - // std::cout << "without nms: mem" << i << ": "; - // for (size_t ii = 0; ii < 4; ++ii) - // { - // std::cout << output[k + ii] << " "; - // } - // std::cout << ";" << std::endl; - // for (size_t ii = 4; ii < nc + 4; ++ii) - // { - // std::cout << output[k + ii] << " "; - // } - // std::cout << ";" << std::endl; - // for (size_t ii = nc + 4; ii < nc + 4 + 32; ++ii) - // { - // std::cout << output[k + ii] << " "; - // } - // std::cout << ";" << std::endl; - //} + size_t k = i * dimensions; + + float objectConf = output[k + 4]; + int classId = output[k + 5]; if (objectConf >= m_params.m_confThreshold) { // (center x, center y, width, height) to (x, y, w, h) - float x = output[k] - output[k + 2] / 2; - float y = output[k + 1] - output[k + 3] / 2; - float width = output[k + 2]; - float height = output[k + 3]; - - //auto ClampToFrame = [](float& v, float& size, int hi) -> int - //{ - // int res = 0; -// - // if (size < 1) - // size = 0; -// - // if (v < 0) - // { - // res = v; - // v = 0; - // return res; - // } - // else if (v + size > hi - 1) - // { - // res = v; - // v = hi - 1 - size; - // if (v < 0) - // { - // size += v; - // v = 0; - // } - // res -= v; - // return res; - // } - // return res; - //}; - //ClampToFrame(x, width, frameSize.width); - //ClampToFrame(y, height, frameSize.height); - - //if (i == 0) - // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + float x = output[k]; + float y = output[k + 1]; + float width = output[k + 2] - output[k]; + float height = output[k + 3] - output[k + 1]; if (width > 4 && height > 4) { - classIds.push_back(classId); - confidences.push_back(objectConf); - rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + resBoxes.emplace_back(classId, objectConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); - std::vector temp_proto(output + k + 4 + nc, output + k + net_width); - picked_proposals.push_back(temp_proto); + std::vector tempProto(output + k + 6, output + k + netWidth); + maskProposals.push_back(cv::Mat(tempProto).t()); } } } - // Non-maximum suppression to eliminate redudant overlapping boxes - std::vector indices; - cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); - resBoxes.reserve(indices.size()); - - for (size_t bi = 0; bi < indices.size(); ++bi) - { - resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], Clamp(rectBoxes[indices[bi]], frameSize)); - maskProposals.push_back(cv::Mat(picked_proposals[indices[bi]]).t()); - } - + //std::cout << "maskProposals.size = " << maskProposals.size() << std::endl; if (!maskProposals.empty()) { // Mask processing @@ -241,14 +118,18 @@ class YOLOv26_instance_onnx : public YoloONNX cv::resize(dest, mask, cv::Size(INPUT_W, INPUT_H), cv::INTER_NEAREST); + //std::cout << "m_brect = " << resBoxes[i].m_brect << ", dest = " << dest.size() << ", mask = " << mask.size() << std::endl; + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + //std::cout << "m_boxMask = " << resBoxes[i].m_boxMask.size() << ", m_brect = " << resBoxes[i].m_brect << ", dest = " << dest.size() << ", mask = " << mask.size() << std::endl; + #if 0 static int globalObjInd = 0; SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif -#if 0 +#if 1 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); @@ -279,6 +160,8 @@ class YOLOv26_instance_onnx : public YoloONNX resBoxes[i].m_brect = br; resBoxes[i].m_rrect = rr; + cv::resize(resBoxes[i].m_boxMask, resBoxes[i].m_boxMask, resBoxes[i].m_brect.size(), 0, 0, cv::INTER_NEAREST); + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; break; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp index 60a90a38..247e352c 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp @@ -216,7 +216,7 @@ class YOLOv7_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif -#if 0 +#if 1 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp index 944dc571..6422cc47 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp @@ -266,7 +266,7 @@ class YOLOv8_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif -#if 0 +#if 1 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); diff --git a/src/Detector/tensorrt_onnx/class_detector.h b/src/Detector/tensorrt_onnx/class_detector.h index 4381f7c9..29780685 100644 --- a/src/Detector/tensorrt_onnx/class_detector.h +++ b/src/Detector/tensorrt_onnx/class_detector.h @@ -66,7 +66,7 @@ namespace tensor_rt DFINE_IS, YOLOV26, YOLOV26_OBB, - YOLOV26Mask, + YOLOV26Mask }; /// From c517cf5ce41f395eab1a4261bd3f7f973cbcf9e6 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Sun, 18 Jan 2026 21:59:00 +0300 Subject: [PATCH 10/17] Update README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2c010235..a7850596 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ ## Latest Features +- Add new SOTA: YOLOv26, YOLOv26-obb and YOLOv26-seg models from [ultralytics/ultralytics](https://github.com/ultralytics/ultralytics) - Add RT-DETRv4 (API similar D-FINE) detection model [RT-DETRs/RT-DETRv4](https://github.com/RT-DETRs/RT-DETRv4) - Add D-FINE seg detection model [ArgoHA/D-FINE-seg](https://github.com/ArgoHA/D-FINE-seg) - Add ByteTrack MOT algorithm based on [Vertical-Beach/ByteTrack-cpp](https://github.com/Vertical-Beach/ByteTrack-cpp) From cf6070897fdfe8c543de3db4e3829bb3b5fd6633 Mon Sep 17 00:00:00 2001 From: snuzhny Date: Mon, 9 Feb 2026 21:48:08 +0300 Subject: [PATCH 11/17] Fix include --- thirdparty/ruclip/ClipAPI.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/ruclip/ClipAPI.cpp b/thirdparty/ruclip/ClipAPI.cpp index 7c374cc8..663230ae 100644 --- a/thirdparty/ruclip/ClipAPI.cpp +++ b/thirdparty/ruclip/ClipAPI.cpp @@ -4,7 +4,7 @@ #include "RuCLIP.h" #include "RuCLIPProcessor.h" -#include "../../src/common/defines.h" +#include "../../src/mtracking/defines.h" /// class ClassificationCLIP::ClassificationCLIPImpl From 435c2794c2b44d3bab523d1ea2b44ccd49f4af6c Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Mon, 2 Mar 2026 06:59:55 +0300 Subject: [PATCH 12/17] Fix debug build with CLIP --- src/Detector/OCVDNNDetector.cpp | 2 +- thirdparty/ruclip/RuCLIPProcessor.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 82f4a23d..8151c6be 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -369,7 +369,7 @@ void OCVDNNDetector::Detect(const cv::UMat& colorFrame) void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& crop, regions_t& tmpRegions) { //Convert Mat to batch of images - cv::dnn::blobFromImage(cv::UMat(colorFrame, crop), m_inputBlob, 1.0, cv::Size(m_inWidth, m_inHeight), m_meanVal, m_swapRB, false, CV_8U); + cv::dnn::blobFromImage(colorFrame(crop), m_inputBlob, 1.0, cv::Size(m_inWidth, m_inHeight), m_meanVal, m_swapRB, false, CV_8U); m_net.setInput(m_inputBlob, "", m_inScaleFactor, m_meanVal); //set the network input diff --git a/thirdparty/ruclip/RuCLIPProcessor.cpp b/thirdparty/ruclip/RuCLIPProcessor.cpp index 3bb7242d..1167df80 100644 --- a/thirdparty/ruclip/RuCLIPProcessor.cpp +++ b/thirdparty/ruclip/RuCLIPProcessor.cpp @@ -84,8 +84,8 @@ cv::Mat RuCLIPProcessor::ResizeToInput(const cv::Mat& img, bool saveAspectRatio) int xOffset = (ImageSize - newWidth) / 2; int yOffset = (ImageSize - newHeight) / 2; - assert(2 * m_XOffset + newWidth == ImageSize); - assert(2 * m_YOffset + newHeight == ImageSize); + assert(2 * xOffset + newWidth == ImageSize); + assert(2 * yOffset + newHeight == ImageSize); cv::resize(img, newImg(cv::Rect(xOffset, yOffset, newWidth, newHeight)), cv::Size(newWidth, newHeight), 0, 0, cv::INTER_CUBIC); } From 71cdbf8601059221757af0880caeab79e9cecd00 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Mon, 6 Apr 2026 22:09:53 +0300 Subject: [PATCH 13/17] Try to add fp8 Precision # Conflicts: # data/settings_yoloe_seg.ini --- data/settings_yolov26m.ini | 1 + src/Detector/ONNXTensorRTDetector.cpp | 1 + src/Detector/tensorrt_onnx/YoloONNX.cpp | 19 ++++++++++++----- src/Detector/tensorrt_onnx/YoloONNX.hpp | 1 + src/Detector/tensorrt_onnx/class_detector.cpp | 2 ++ src/Detector/tensorrt_onnx/class_detector.h | 3 ++- .../tensorrt_onnx/common/sampleInference.cpp | 21 +++++++++++++++++-- .../tensorrt_onnx/common/sampleUtils.cpp | 11 +++++++++- 8 files changed, 50 insertions(+), 9 deletions(-) diff --git a/data/settings_yolov26m.ini b/data/settings_yolov26m.ini index 625ce893..23dd24f4 100644 --- a/data/settings_yolov26m.ini +++ b/data/settings_yolov26m.ini @@ -47,6 +47,7 @@ net_type = YOLOV26 # INT8 # FP16 # FP32 +# FP8 inference_precision = FP16 diff --git a/src/Detector/ONNXTensorRTDetector.cpp b/src/Detector/ONNXTensorRTDetector.cpp index 9d28da54..b0a734a7 100644 --- a/src/Detector/ONNXTensorRTDetector.cpp +++ b/src/Detector/ONNXTensorRTDetector.cpp @@ -72,6 +72,7 @@ bool ONNXTensorRTDetector::Init(const config_t& config) dictPrecision["INT8"] = tensor_rt::INT8; dictPrecision["FP16"] = tensor_rt::FP16; dictPrecision["FP32"] = tensor_rt::FP32; + dictPrecision["FP8"] = tensor_rt::FP8; auto precision = dictPrecision.find(inference_precision->second); if (precision != dictPrecision.end()) m_localConfig.m_inferencePrecision = precision->second; diff --git a/src/Detector/tensorrt_onnx/YoloONNX.cpp b/src/Detector/tensorrt_onnx/YoloONNX.cpp index aa4d23a6..bec31df9 100644 --- a/src/Detector/tensorrt_onnx/YoloONNX.cpp +++ b/src/Detector/tensorrt_onnx/YoloONNX.cpp @@ -19,6 +19,7 @@ bool YoloONNX::Init(const SampleYoloParams& params) m_params = params; + sample::setReportableSeverity(sample::Logger::Severity::kINFO); initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), ""); auto GetBindings = [&]() @@ -79,15 +80,16 @@ bool YoloONNX::Init(const SampleYoloParams& params) file.close(); } - nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(sample::gLogger); + m_inferRuntime = std::shared_ptr(nvinfer1::createInferRuntime(sample::gLogger)); if (m_params.m_dlaCore >= 0) - infer->setDLACore(m_params.m_dlaCore); + m_inferRuntime->setDLACore(m_params.m_dlaCore); - m_engine = std::shared_ptr(infer->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter()); + m_engine = std::shared_ptr(m_inferRuntime->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter()); #if (NV_TENSORRT_MAJOR < 8) - infer->destroy(); + m_inferRuntime->destroy(); + m_inferRuntime.reset(); #else - //delete infer; + //m_inferRuntime.reset(); #endif if (m_engine) @@ -233,6 +235,12 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, { case tensor_rt::Precision::FP16: config->setFlag(nvinfer1::BuilderFlag::kFP16); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP16)" << std::endl; + break; + + case tensor_rt::Precision::FP8: + config->setFlag(nvinfer1::BuilderFlag::kFP8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP8)" << std::endl; break; case tensor_rt::Precision::INT8: @@ -243,6 +251,7 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, BatchStream calibrationStream(m_params.m_explicitBatchSize, m_params.m_nbCalBatches, m_params.m_calibrationBatches, m_params.m_dataDirs); calibrator.reset(new Int8EntropyCalibrator2(calibrationStream, 0, "Yolo", m_params.m_inputTensorNames[0].c_str())); config->setFlag(nvinfer1::BuilderFlag::kINT8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kINT8)" << std::endl; config->setInt8Calibrator(calibrator.get()); } break; diff --git a/src/Detector/tensorrt_onnx/YoloONNX.hpp b/src/Detector/tensorrt_onnx/YoloONNX.hpp index 2452f61d..cae188c5 100644 --- a/src/Detector/tensorrt_onnx/YoloONNX.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNX.hpp @@ -86,6 +86,7 @@ class YoloONNX private: std::shared_ptr m_engine; //!< The TensorRT engine used to run the network + std::shared_ptr m_inferRuntime; cv::Mat m_resized; std::vector m_resizedBatch; diff --git a/src/Detector/tensorrt_onnx/class_detector.cpp b/src/Detector/tensorrt_onnx/class_detector.cpp index 70f2aa48..f5f4fb66 100644 --- a/src/Detector/tensorrt_onnx/class_detector.cpp +++ b/src/Detector/tensorrt_onnx/class_detector.cpp @@ -134,6 +134,8 @@ namespace tensor_rt dictprecision[tensor_rt::INT8] = "kINT8"; dictprecision[tensor_rt::FP16] = "kHALF"; dictprecision[tensor_rt::FP32] = "kFLOAT"; + dictprecision[tensor_rt::FP8] = "kFP8"; + auto precision = dictprecision.find(m_params.m_precision); if (precision != dictprecision.end()) precisionStr = precision->second; diff --git a/src/Detector/tensorrt_onnx/class_detector.h b/src/Detector/tensorrt_onnx/class_detector.h index 29780685..7ea989bc 100644 --- a/src/Detector/tensorrt_onnx/class_detector.h +++ b/src/Detector/tensorrt_onnx/class_detector.h @@ -76,7 +76,8 @@ namespace tensor_rt { INT8 = 0, FP16, - FP32 + FP32, + FP8 }; /// diff --git a/src/Detector/tensorrt_onnx/common/sampleInference.cpp b/src/Detector/tensorrt_onnx/common/sampleInference.cpp index f0470bf7..b131ca32 100644 --- a/src/Detector/tensorrt_onnx/common/sampleInference.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleInference.cpp @@ -46,6 +46,7 @@ #include "sampleOptions.h" #include "sampleReporting.h" #include "sampleUtils.h" +#include using namespace nvinfer1; namespace sample { @@ -1320,7 +1321,15 @@ void Binding::fill() fillBuffer(buffer->getHostBuffer(), volume, 0, 255); break; } - case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + fillBuffer<__nv_fp8_e4m3>(buffer->getHostBuffer(), volume, __nv_fp8_e4m3(- 1.0f), __nv_fp8_e4m3(1.0f)); +#endif + break; + } #if (NV_TENSORRT_MAJOR > 8) case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); #endif @@ -1388,7 +1397,15 @@ void Binding::dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, break; } #endif - case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + dumpBuffer<__nv_fp8_e4m3>(outputBuffer, separator, os, dims, strides, vectorDim, spv); +#endif + break; + } #if (NV_TENSORRT_MAJOR > 8) case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); #endif diff --git a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp index 8f172afe..89a128ee 100644 --- a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp @@ -18,6 +18,7 @@ #include "sampleUtils.h" #include "bfloat16.h" #include "half.h" +#include using namespace nvinfer1; @@ -433,6 +434,11 @@ void print(std::ostream& os, __half v) os << static_cast(v); } +void print(std::ostream& os, __nv_fp8_e4m3 v) +{ + os << static_cast(v); +} + template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv) @@ -482,6 +488,8 @@ template void dumpBuffer(void const* buffer, std::string const& separat Dims const& strides, int32_t vectorDim, int32_t spv); template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer<__nv_fp8_e4m3>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); template void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights) @@ -566,7 +574,7 @@ void fillBuffer(void* buffer, int64_t volume, T min, T max) { T* typedBuffer = static_cast(buffer); std::default_random_engine engine; - std::uniform_real_distribution distribution(min, max); + std::uniform_real_distribution distribution((float)min, (float)max); auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; std::generate(typedBuffer, typedBuffer + volume, generator); } @@ -580,6 +588,7 @@ template void fillBuffer(void* buffer, int64_t volume, int8_t min, int8_ template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max); template void fillBuffer(void* buffer, int64_t volume, BFloat16 min, BFloat16 max); template void fillBuffer(void* buffer, int64_t volume, uint8_t min, uint8_t max); +template void fillBuffer<__nv_fp8_e4m3>(void* buffer, int64_t volume, __nv_fp8_e4m3 min, __nv_fp8_e4m3 max); bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target) { From 0e935b0b49edfdef8ef8737c4c27b36f8b9b7d87 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Sun, 19 Apr 2026 06:39:31 +0300 Subject: [PATCH 14/17] Change default params for very small objects --- example/MotionDetectorExample.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/example/MotionDetectorExample.h b/example/MotionDetectorExample.h index 7136b758..76afa05e 100644 --- a/example/MotionDetectorExample.h +++ b/example/MotionDetectorExample.h @@ -17,7 +17,7 @@ class MotionDetectorExample final : public VideoExample { public: MotionDetectorExample(const cv::CommandLineParser& parser) - : VideoExample(parser), m_minObjWidth(10) + : VideoExample(parser) { #ifdef USE_CLIP std::string clipModel = "C:/work/clip/ruclip_/CLIP/data/ruclip-vit-large-patch14-336"; @@ -38,8 +38,7 @@ class MotionDetectorExample final : public VideoExample { m_logger->info("MotionDetectorExample::InitDetector"); - //m_minObjWidth = frame.cols / 20; - m_minObjWidth = 4; + m_minObjWidth = 2; config_t config; config.emplace("useRotatedRect", "0"); @@ -97,7 +96,7 @@ class MotionDetectorExample final : public VideoExample if (!m_trackerSettingsLoaded) { - m_trackerSettings.SetDistance(tracking::DistJaccard); + m_trackerSettings.SetDistance(tracking::DistCenters); m_trackerSettings.m_kalmanType = tracking::KalmanLinear; m_trackerSettings.m_filterGoal = tracking::FilterCenter; m_trackerSettings.m_lostTrackType = tracking::TrackNone; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect From 6eb553291cd46989bb31c4d6be34575d32f1cfe1 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Wed, 29 Apr 2026 06:28:07 +0300 Subject: [PATCH 15/17] Fixed some warnings --- src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp | 2 +- src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp | 2 +- src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp | 2 +- src/Tracker/TrackerSettings.cpp | 8 ++++---- src/Tracker/byte_track/BYTETracker.cpp | 13 +++---------- 5 files changed, 10 insertions(+), 17 deletions(-) diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp index 7677244b..19cdd67a 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp @@ -43,7 +43,7 @@ class YOLOv26_bb_onnx : public YoloONNX auto ind = i * m_outpuDims[0].d[2]; float classConf = output[ind + 4]; - int64_t classId = output[ind + 5]; + int classId = static_cast(output[ind + 5]); if (classConf >= m_params.m_confThreshold) { diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp index abcb3b4f..9ec2d27e 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp @@ -65,7 +65,7 @@ class YOLOv26_instance_onnx : public YoloONNX size_t k = i * dimensions; float objectConf = output[k + 4]; - int classId = output[k + 5]; + int classId = static_cast(output[k + 5]); if (objectConf >= m_params.m_confThreshold) { diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp index 0b70b509..8a097f2f 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp @@ -43,7 +43,7 @@ class YOLOv26_obb_onnx : public YoloONNX auto ind = i * m_outpuDims[0].d[2]; float classConf = output[ind + 4]; - int64_t classId = output[ind + 5]; + int classId = static_cast(output[ind + 5]); if (classConf >= m_params.m_confThreshold) { diff --git a/src/Tracker/TrackerSettings.cpp b/src/Tracker/TrackerSettings.cpp index e76dd72e..e06f2311 100644 --- a/src/Tracker/TrackerSettings.cpp +++ b/src/Tracker/TrackerSettings.cpp @@ -57,12 +57,12 @@ bool ParseTrackerSettings(const std::string& settingsFile, TrackerSettings& trac trackerSettings.m_useAbandonedDetection = reader.GetInteger("tracking", "detect_abandoned", 0) != 0; trackerSettings.m_minStaticTime = reader.GetInteger("tracking", "min_static_time", 5); trackerSettings.m_maxStaticTime = reader.GetInteger("tracking", "max_static_time", 25); - trackerSettings.m_maxSpeedForStatic = reader.GetInteger("tracking", "max_speed_for_static", 10); + trackerSettings.m_maxSpeedForStatic = static_cast(reader.GetReal("tracking", "max_speed_for_static", 0.5)); trackerSettings.m_byteTrackSettings.m_trackBuffer = reader.GetInteger("tracking", "bytetrack_track_buffer", 30); - trackerSettings.m_byteTrackSettings.m_trackThresh = reader.GetReal("tracking", "bytetrack_track_thresh", 0.5); - trackerSettings.m_byteTrackSettings.m_highThresh = reader.GetReal("tracking", "bytetrack_high_thresh", 0.5); - trackerSettings.m_byteTrackSettings.m_matchThresh = reader.GetReal("tracking", "bytetrack_match_thresh", 0.8); + trackerSettings.m_byteTrackSettings.m_trackThresh = static_cast(reader.GetReal("tracking", "bytetrack_track_thresh", 0.5)); + trackerSettings.m_byteTrackSettings.m_highThresh = static_cast(reader.GetReal("tracking", "bytetrack_high_thresh", 0.5)); + trackerSettings.m_byteTrackSettings.m_matchThresh = static_cast(reader.GetReal("tracking", "bytetrack_match_thresh", 0.8)); // Read detection settings trackerSettings.m_nnWeights = reader.GetString("detection", "nn_weights", "data/yolov4-tiny_best.weights"); diff --git a/src/Tracker/byte_track/BYTETracker.cpp b/src/Tracker/byte_track/BYTETracker.cpp index 8a63cf56..144490b1 100644 --- a/src/Tracker/byte_track/BYTETracker.cpp +++ b/src/Tracker/byte_track/BYTETracker.cpp @@ -310,8 +310,8 @@ void byte_track::BYTETracker::removeDuplicateStracks(const std::vector a_overlapping(a_stracks.size(), false), b_overlapping(b_stracks.size(), false); for (const auto &[a_idx, b_idx] : overlapping_combinations) { - const int timep = a_stracks[a_idx]->getFrameId() - a_stracks[a_idx]->getStartFrameId(); - const int timeq = b_stracks[b_idx]->getFrameId() - b_stracks[b_idx]->getStartFrameId(); + const size_t timep = a_stracks[a_idx]->getFrameId() - a_stracks[a_idx]->getStartFrameId(); + const size_t timeq = b_stracks[b_idx]->getFrameId() - b_stracks[b_idx]->getStartFrameId(); if (timep > timeq) b_overlapping[b_idx] = true; else @@ -359,16 +359,9 @@ void byte_track::BYTETracker::linearAssignment(const std::vector= 0) - { - std::vector match; - match.push_back(i); - match.push_back(rowsol[i]); - matches.push_back(match); - } + matches.push_back({ (int)i, rowsol[i] }); else - { a_unmatched.push_back(i); - } } for (size_t i = 0; i < colsol.size(); i++) From 2a5550fb92c23a35b3733c00399313e12eef7788 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Thu, 30 Apr 2026 19:11:18 +0300 Subject: [PATCH 16/17] More debug logs --- example/CarsCounting.cpp | 2 +- example/MotionDetectorExample.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/example/CarsCounting.cpp b/example/CarsCounting.cpp index cf801901..5ff8aca5 100644 --- a/example/CarsCounting.cpp +++ b/example/CarsCounting.cpp @@ -230,7 +230,7 @@ bool CarsCounting::InitTracker(cv::UMat frame) /// void CarsCounting::DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) { - m_logger->info("Frame ({1}): tracks = {2}, time = {3}", framesCounter, tracks.size(), currTime); + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); #if 1 // Debug output if (!m_geoParams.Empty()) diff --git a/example/MotionDetectorExample.h b/example/MotionDetectorExample.h index 76afa05e..7319c407 100644 --- a/example/MotionDetectorExample.h +++ b/example/MotionDetectorExample.h @@ -140,7 +140,7 @@ class MotionDetectorExample final : public VideoExample /// void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) override { - m_logger->info("Frame ({0}): tracks = {1}, time = {2}", framesCounter, tracks.size(), currTime); + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); #ifdef USE_CLIP std::vector clipResult; From 08503429a4b250418f29f16dc6c982e40d169d65 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Thu, 30 Apr 2026 22:26:03 +0300 Subject: [PATCH 17/17] Build with CUDA 13 --- src/Detector/tensorrt_onnx/common/sampleDevice.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Detector/tensorrt_onnx/common/sampleDevice.cpp b/src/Detector/tensorrt_onnx/common/sampleDevice.cpp index 7964aeb5..1e7ee17a 100644 --- a/src/Detector/tensorrt_onnx/common/sampleDevice.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleDevice.cpp @@ -107,8 +107,17 @@ void setCudaDevice(int32_t device, std::ostream& os) os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl; os << "Memory Bus Width: " << properties.memoryBusWidth << " bits" << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl; +#if (CUDA_VERSION < 13000) os << "Application Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl; os << "Application Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl; +#else + int clockRateKHz = 0; + cudaDeviceGetAttribute(&clockRateKHz, cudaDevAttrClockRate, device); + int memoryClockRateKHz = 0; + cudaDeviceGetAttribute(&memoryClockRateKHz, cudaDevAttrMemoryClockRate, device); + os << "Application Compute Clock Rate: " << clockRateKHz / 1000000.0F << " GHz" << std::endl; + os << "Application Memory Clock Rate: " << memoryClockRateKHz / 1000000.0F << " GHz" << std::endl; +#endif os << std::endl; os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is " << "currently running at." << std::endl;