diff --git a/README.md b/README.md index 2c010235..b99a0cc4 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ ## Latest Features +- Add new SOTA: YOLOv26, YOLOv26-obb and YOLOv26-seg models from [ultralytics/ultralytics](https://github.com/ultralytics/ultralytics) - Add RT-DETRv4 (API similar D-FINE) detection model [RT-DETRs/RT-DETRv4](https://github.com/RT-DETRs/RT-DETRv4) - Add D-FINE seg detection model [ArgoHA/D-FINE-seg](https://github.com/ArgoHA/D-FINE-seg) - Add ByteTrack MOT algorithm based on [Vertical-Beach/ByteTrack-cpp](https://github.com/Vertical-Beach/ByteTrack-cpp) @@ -21,7 +22,7 @@ ### Detection & Tracking -[![RF-DETR: detection vs instance segmentation](https://img.youtube.com/vi/oKy7jEKT83c/0.jpg)](https://youtu.be/oKy7jEKT83c) +[![WALDO30 YOLOv8l model for UAV Traffic monitoring](https://img.youtube.com/vi/RBou0eFDbrM/0.jpg)](https://youtu.be/RBou0eFDbrM) [![Satellite planes detection and tracking with YOLOv11-obb](https://img.youtube.com/vi/gTpWnkMF7Lg/0.jpg)](https://youtu.be/gTpWnkMF7Lg) [![4-in-1 latest SOTA detectors](https://img.youtube.com/vi/Pb_HnejRpY4/0.jpg)](https://youtu.be/Pb_HnejRpY4) [![YOLOv8-obb detection with rotated boxes](https://img.youtube.com/vi/1e6ur57Fhzs/0.jpg)](https://youtu.be/1e6ur57Fhzs) diff --git a/data/settings_yolov26m.ini b/data/settings_yolov26m.ini new file mode 100644 index 00000000..23dd24f4 --- /dev/null +++ b/data/settings_yolov26m.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_INFERENCE_ENGINE + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26 + +#----------------------------- +# INT8 +# FP16 +# FP32 +# FP8 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov26m_obb.ini b/data/settings_yolov26m_obb.ini new file mode 100644 index 00000000..d31e8425 --- /dev/null +++ b/data/settings_yolov26m_obb.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo26m-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo26m-obb.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/dota/DOTA_v1.0.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26_OBB + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov26m_seg.ini b/data/settings_yolov26m_seg.ini new file mode 100644 index 00000000..3a4ed1d0 --- /dev/null +++ b/data/settings_yolov26m_seg.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m-seg.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m-seg.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26Mask + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/example/CarsCounting.cpp b/example/CarsCounting.cpp index cf801901..5ff8aca5 100644 --- a/example/CarsCounting.cpp +++ b/example/CarsCounting.cpp @@ -230,7 +230,7 @@ bool CarsCounting::InitTracker(cv::UMat frame) /// void CarsCounting::DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) { - m_logger->info("Frame ({1}): tracks = {2}, time = {3}", framesCounter, tracks.size(), currTime); + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); #if 1 // Debug output if (!m_geoParams.Empty()) diff --git a/example/MotionDetectorExample.h b/example/MotionDetectorExample.h index 7136b758..7319c407 100644 --- a/example/MotionDetectorExample.h +++ b/example/MotionDetectorExample.h @@ -17,7 +17,7 @@ class MotionDetectorExample final : public VideoExample { public: MotionDetectorExample(const cv::CommandLineParser& parser) - : VideoExample(parser), m_minObjWidth(10) + : VideoExample(parser) { #ifdef USE_CLIP std::string clipModel = "C:/work/clip/ruclip_/CLIP/data/ruclip-vit-large-patch14-336"; @@ -38,8 +38,7 @@ class MotionDetectorExample final : public VideoExample { m_logger->info("MotionDetectorExample::InitDetector"); - //m_minObjWidth = frame.cols / 20; - m_minObjWidth = 4; + m_minObjWidth = 2; config_t config; config.emplace("useRotatedRect", "0"); @@ -97,7 +96,7 @@ class MotionDetectorExample final : public VideoExample if (!m_trackerSettingsLoaded) { - m_trackerSettings.SetDistance(tracking::DistJaccard); + m_trackerSettings.SetDistance(tracking::DistCenters); m_trackerSettings.m_kalmanType = tracking::KalmanLinear; m_trackerSettings.m_filterGoal = tracking::FilterCenter; m_trackerSettings.m_lostTrackType = tracking::TrackNone; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect @@ -141,7 +140,7 @@ class MotionDetectorExample final : public VideoExample /// void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) override { - m_logger->info("Frame ({0}): tracks = {1}, time = {2}", framesCounter, tracks.size(), currTime); + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); #ifdef USE_CLIP std::vector clipResult; diff --git a/example/main.cpp b/example/main.cpp index 2ef4f32a..f266bbd6 100644 --- a/example/main.cpp +++ b/example/main.cpp @@ -15,7 +15,7 @@ int main(int argc, char** argv) const char* keys = { "{ @1 |../data/atrium.avi | movie file | }" - "{ e example |1 | number of example 0 - MouseTracking, 1 - MotionDetector, 3 - YOLO TensorRT Detector, 4 - Cars counting | }" + "{ e example |1 | number of example 0 - MouseTracking, 1 - MotionDetector, 2 - opencv_dnn detector, 3 - YOLO TensorRT Detector, 4 - Cars counting | }" "{ sf start_frame |0 | Start a video from this position | }" "{ ef end_frame |0 | Play a video to this position (if 0 then played to the end of file) | }" "{ ed end_delay |0 | Delay in milliseconds after video ending | }" diff --git a/src/Detector/MotionDetector.cpp b/src/Detector/MotionDetector.cpp index cf6f3c26..a97ad063 100644 --- a/src/Detector/MotionDetector.cpp +++ b/src/Detector/MotionDetector.cpp @@ -1,4 +1,7 @@ #include "MotionDetector.h" +#if (CV_VERSION_MAJOR > 4) +#include +#endif /// /// \brief MotionDetector::MotionDetector diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 461d18b5..8151c6be 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -173,6 +173,9 @@ bool OCVDNNDetector::Init(const config_t& config) dictNetType["DFINE"] = ModelType::DFINE; dictNetType["YOLOV13"] = ModelType::YOLOV13; dictNetType["DFINE_IS"] = ModelType::DFINE_IS; + dictNetType["YOLOV26"] = ModelType::YOLOV26; + dictNetType["YOLOV26_OBB"] = ModelType::YOLOV26_OBB; + dictNetType["YOLOV26Mask"] = ModelType::YOLOV26Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -366,7 +369,7 @@ void OCVDNNDetector::Detect(const cv::UMat& colorFrame) void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& crop, regions_t& tmpRegions) { //Convert Mat to batch of images - cv::dnn::blobFromImage(cv::UMat(colorFrame, crop), m_inputBlob, 1.0, cv::Size(m_inWidth, m_inHeight), m_meanVal, m_swapRB, false, CV_8U); + cv::dnn::blobFromImage(colorFrame(crop), m_inputBlob, 1.0, cv::Size(m_inWidth, m_inHeight), m_meanVal, m_swapRB, false, CV_8U); m_net.setInput(m_inputBlob, "", m_inScaleFactor, m_meanVal); //set the network input @@ -433,6 +436,18 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr ParseDFINE_IS(crop, detections, tmpRegions); break; + case ModelType::YOLOV26: + ParseYOLOv26(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV26_OBB: + ParseYOLOv26_obb(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV26Mask: + ParseYOLOv26_seg(crop, detections, tmpRegions); + break; + default: ParseOldYOLO(crop, detections, tmpRegions); break; @@ -1071,3 +1086,142 @@ void OCVDNNDetector::ParseDFINE_IS(const cv::Rect& crop, std::vector& d assert(0); } +/// +/// \brief OCVDNNDetector::ParseYOLOv26 +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x6 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 6 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0]; + float y = dets[ind + 1]; + float w = dets[ind + 2] - x; + float h = dets[ind + 3] - y; + + int left = cvRound(x * x_factor); + int top = cvRound(y * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv26_obb +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x300x7 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 7 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0] * x_factor; + float y = dets[ind + 1] * y_factor; + float w = dets[ind + 2] * x_factor; + float h = dets[ind + 3] * y_factor; + float angle = 180.f * dets[ind + 6] / static_cast(M_PI); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::RotatedRect(cv::Point2f(x + crop.x, y + crop.y), cv::Size2f(w, h), angle), T2T(classId), static_cast(maxClassScore)); + } + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv26_seg +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x38 + //2: name: output1, size: 1x32x160x160 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 38 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0]; + float y = dets[ind + 1]; + float w = dets[ind + 2] - x; + float h = dets[ind + 3] - y; + + int left = cvRound(x * x_factor); + int top = cvRound(y * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + } +} diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index 601241ff..3a55dd67 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -53,7 +53,10 @@ class OCVDNNDetector final : public BaseDetector RFDETR_IS, DFINE, YOLOV13, - DFINE_IS + DFINE_IS, + YOLOV26, + YOLOV26_OBB, + YOLOV26Mask }; cv::dnn::Net m_net; @@ -91,6 +94,9 @@ class OCVDNNDetector final : public BaseDetector void ParseRFDETR_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseDFINE(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); }; #endif diff --git a/src/Detector/ONNXTensorRTDetector.cpp b/src/Detector/ONNXTensorRTDetector.cpp index f30e2444..b0a734a7 100644 --- a/src/Detector/ONNXTensorRTDetector.cpp +++ b/src/Detector/ONNXTensorRTDetector.cpp @@ -72,6 +72,7 @@ bool ONNXTensorRTDetector::Init(const config_t& config) dictPrecision["INT8"] = tensor_rt::INT8; dictPrecision["FP16"] = tensor_rt::FP16; dictPrecision["FP32"] = tensor_rt::FP32; + dictPrecision["FP8"] = tensor_rt::FP8; auto precision = dictPrecision.find(inference_precision->second); if (precision != dictPrecision.end()) m_localConfig.m_inferencePrecision = precision->second; @@ -102,6 +103,9 @@ bool ONNXTensorRTDetector::Init(const config_t& config) dictNetType["DFINE"] = tensor_rt::DFINE; dictNetType["YOLOV13"] = tensor_rt::YOLOV13; dictNetType["DFINE_IS"] = tensor_rt::DFINE_IS; + dictNetType["YOLOV26"] = tensor_rt::YOLOV26; + dictNetType["YOLOV26_OBB"] = tensor_rt::YOLOV26_OBB; + dictNetType["YOLOV26Mask"] = tensor_rt::YOLOV26Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -304,7 +308,8 @@ void ONNXTensorRTDetector::CalcMotionMap(cv::Mat& frame) { if (m_localConfig.m_netType == tensor_rt::YOLOV7Mask || m_localConfig.m_netType == tensor_rt::YOLOV8Mask - || m_localConfig.m_netType == tensor_rt::YOLOV11Mask) + || m_localConfig.m_netType == tensor_rt::YOLOV11Mask + || m_localConfig.m_netType == tensor_rt::YOLOV26Mask) { static std::vector color; if (color.empty()) diff --git a/src/Detector/tensorrt_onnx/DFINE_is.hpp b/src/Detector/tensorrt_onnx/DFINE_is.hpp index afde0fc1..84c46ed9 100644 --- a/src/Detector/tensorrt_onnx/DFINE_is.hpp +++ b/src/Detector/tensorrt_onnx/DFINE_is.hpp @@ -31,12 +31,6 @@ class DFINE_is_onnx : public YoloONNX //2: name: boxes, size: 1x300x4 //3: name: mask_probs, size: 1x300x160x160 - - //0: name: input, size: 1x3x432x432 - //1: name: dets, size: 1x200x4 - //2: name: labels, size: 1x200x91 - //3: name: 4245, size: 1x200x108x108 - const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); diff --git a/src/Detector/tensorrt_onnx/YoloONNX.cpp b/src/Detector/tensorrt_onnx/YoloONNX.cpp index aa4d23a6..bec31df9 100644 --- a/src/Detector/tensorrt_onnx/YoloONNX.cpp +++ b/src/Detector/tensorrt_onnx/YoloONNX.cpp @@ -19,6 +19,7 @@ bool YoloONNX::Init(const SampleYoloParams& params) m_params = params; + sample::setReportableSeverity(sample::Logger::Severity::kINFO); initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), ""); auto GetBindings = [&]() @@ -79,15 +80,16 @@ bool YoloONNX::Init(const SampleYoloParams& params) file.close(); } - nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(sample::gLogger); + m_inferRuntime = std::shared_ptr(nvinfer1::createInferRuntime(sample::gLogger)); if (m_params.m_dlaCore >= 0) - infer->setDLACore(m_params.m_dlaCore); + m_inferRuntime->setDLACore(m_params.m_dlaCore); - m_engine = std::shared_ptr(infer->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter()); + m_engine = std::shared_ptr(m_inferRuntime->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter()); #if (NV_TENSORRT_MAJOR < 8) - infer->destroy(); + m_inferRuntime->destroy(); + m_inferRuntime.reset(); #else - //delete infer; + //m_inferRuntime.reset(); #endif if (m_engine) @@ -233,6 +235,12 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, { case tensor_rt::Precision::FP16: config->setFlag(nvinfer1::BuilderFlag::kFP16); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP16)" << std::endl; + break; + + case tensor_rt::Precision::FP8: + config->setFlag(nvinfer1::BuilderFlag::kFP8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP8)" << std::endl; break; case tensor_rt::Precision::INT8: @@ -243,6 +251,7 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, BatchStream calibrationStream(m_params.m_explicitBatchSize, m_params.m_nbCalBatches, m_params.m_calibrationBatches, m_params.m_dataDirs); calibrator.reset(new Int8EntropyCalibrator2(calibrationStream, 0, "Yolo", m_params.m_inputTensorNames[0].c_str())); config->setFlag(nvinfer1::BuilderFlag::kINT8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kINT8)" << std::endl; config->setInt8Calibrator(calibrator.get()); } break; diff --git a/src/Detector/tensorrt_onnx/YoloONNX.hpp b/src/Detector/tensorrt_onnx/YoloONNX.hpp index 2452f61d..874892c0 100644 --- a/src/Detector/tensorrt_onnx/YoloONNX.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNX.hpp @@ -16,6 +16,10 @@ #include #include +#if (CV_VERSION_MAJOR > 4) +#include +#endif + #include "class_detector.h" //! @@ -86,6 +90,7 @@ class YoloONNX private: std::shared_ptr m_engine; //!< The TensorRT engine used to run the network + std::shared_ptr m_inferRuntime; cv::Mat m_resized; std::vector m_resizedBatch; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp index 6348ae29..641c5c7d 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp @@ -264,7 +264,7 @@ class YOLOv11_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif -#if 0 +#if 1 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp new file mode 100644 index 00000000..19cdd67a --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv26_bb_onnx class +/// +class YOLOv26_bb_onnx : public YoloONNX +{ +public: + YOLOv26_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x6 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t lenInd = 1; + size_t len = static_cast(m_outpuDims[0].d[lenInd]); + auto volume = len * m_outpuDims[0].d[2]; + output += volume * imgIdx; + //std::cout << "len = " << len << ", confThreshold = " << m_params.m_confThreshold << ", volume = " << volume << std::endl; + + for (size_t i = 0; i < len; ++i) + { + auto ind = i * m_outpuDims[0].d[2]; + + float classConf = output[ind + 4]; + int classId = static_cast(output[ind + 5]); + + if (classConf >= m_params.m_confThreshold) + { + float x = fw * (output[ind + 0] - m_resizedROI.x); + float y = fh * (output[ind + 1] - m_resizedROI.y); + float width = fw * (output[ind + 2] - output[ind + 0]); + float height = fh * (output[ind + 3] - output[ind + 1]); + + //std::cout << "ind = " << ind << ", output[0] = " << output[ind + 0] << ", output[1] = " << output[ind + 1] << ", output[2] = " << output[ind + 2] << ", output[3] = " << output[ind + 3] << std::endl; + //std::cout << "ind = " << ind << ", classConf = " << classConf << ", classId = " << classId << ", x = " << x << ", y = " << y << ", width = " << width << ", height = " << height << std::endl; + + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp new file mode 100644 index 00000000..9ec2d27e --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp @@ -0,0 +1,175 @@ +#pragma once + +#include "YoloONNX.hpp" +#include "../../mtracking/defines.h" + +/// +/// \brief The YOLOv26_instance_onnx class +/// +class YOLOv26_instance_onnx : public YoloONNX +{ +public: + YOLOv26_instance_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + outputTensorNames.push_back("output1"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + size_t outInd = 0; + size_t segInd = 1; + + auto output = outputs[outInd]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x38 + //2: name: output1, size: 1x32x160x160 + + size_t dimInd = 2; + size_t lenInd = 1; + int dimensions = static_cast(m_outpuDims[outInd].d[dimInd]); + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]); + auto volume = len * dimensions; + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + int segWidth = 160; + int segHeight = 160; + int segChannels = 32; + + if (outputs.size() > 1) + { + segChannels = static_cast(m_outpuDims[segInd].d[1]); + segWidth = static_cast(m_outpuDims[segInd].d[2]); + segHeight = static_cast(m_outpuDims[segInd].d[3]); + } + cv::Mat maskProposals; + int netWidth = 6 + segChannels; + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * dimensions; + + float objectConf = output[k + 4]; + int classId = static_cast(output[k + 5]); + + if (objectConf >= m_params.m_confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = output[k]; + float y = output[k + 1]; + float width = output[k + 2] - output[k]; + float height = output[k + 3] - output[k + 1]; + + if (width > 4 && height > 4) + { + resBoxes.emplace_back(classId, objectConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + + std::vector tempProto(output + k + 6, output + k + netWidth); + maskProposals.push_back(cv::Mat(tempProto).t()); + } + } + } + + //std::cout << "maskProposals.size = " << maskProposals.size() << std::endl; + if (!maskProposals.empty()) + { + // Mask processing + const float* pdata = outputs[segInd]; + std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); + + int INPUT_W = static_cast(m_inputDims[0].d[3]); + int INPUT_H = static_cast(m_inputDims[0].d[2]); + static constexpr float MASK_THRESHOLD = 0.5; + + cv::Mat mask_protos = cv::Mat(maskFloat); + cv::Mat protos = mask_protos.reshape(0, { segChannels, segWidth * segHeight }); + + cv::Mat matmulRes = (maskProposals * protos).t();//n*32 32*25600 + cv::Mat masks = matmulRes.reshape(static_cast(resBoxes.size()), { segWidth, segHeight }); + std::vector maskChannels; + split(masks, maskChannels); + for (size_t i = 0; i < resBoxes.size(); ++i) + { + cv::Mat dest; + cv::Mat mask; + //sigmoid + cv::exp(-maskChannels[i], dest); + dest = 1.0 / (1.0 + dest);//160*160 + + int padw = 0; + int padh = 0; + cv::Rect roi(int((float)padw / INPUT_W * segWidth), int((float)padh / INPUT_H * segHeight), int(segWidth - padw / 2), int(segHeight - padh / 2)); + dest = dest(roi); + + cv::resize(dest, mask, cv::Size(INPUT_W, INPUT_H), cv::INTER_NEAREST); + + //std::cout << "m_brect = " << resBoxes[i].m_brect << ", dest = " << dest.size() << ", mask = " << mask.size() << std::endl; + + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + + //std::cout << "m_boxMask = " << resBoxes[i].m_boxMask.size() << ", m_brect = " << resBoxes[i].m_brect << ", dest = " << dest.size() << ", mask = " << mask.size() << std::endl; + +#if 0 + static int globalObjInd = 0; + SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); +#endif + +#if 1 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resBoxes[i].m_brect.x; + int dy = resBoxes[i].m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = (rr.center.x + dx - m_resizedROI.x) * fw; + rr.center.y = (rr.center.y + dy - m_resizedROI.y) * fw; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound((dx + br.x - m_resizedROI.x) * fw); + br.y = cvRound((dy + br.y - m_resizedROI.y) * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resBoxes[i].m_brect = br; + resBoxes[i].m_rrect = rr; + + cv::resize(resBoxes[i].m_boxMask, resBoxes[i].m_boxMask, resBoxes[i].m_brect.size(), 0, 0, cv::INTER_NEAREST); + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + } + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp new file mode 100644 index 00000000..8a097f2f --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv26_obb_onnx class +/// +class YOLOv26_obb_onnx : public YoloONNX +{ +public: + YOLOv26_obb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x300x7 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t lenInd = 1; + size_t len = static_cast(m_outpuDims[0].d[lenInd]); + auto volume = len * m_outpuDims[0].d[2]; + output += volume * imgIdx; + //std::cout << "len = " << len << ", confThreshold = " << m_params.m_confThreshold << ", volume = " << volume << std::endl; + + for (size_t i = 0; i < len; ++i) + { + auto ind = i * m_outpuDims[0].d[2]; + + float classConf = output[ind + 4]; + int classId = static_cast(output[ind + 5]); + + if (classConf >= m_params.m_confThreshold) + { + float x = fw * (output[ind + 0] - m_resizedROI.x); + float y = fh * (output[ind + 1] - m_resizedROI.y); + float width = fw * output[ind + 2]; + float height = fh * output[ind + 3]; + float angle = 180.f * output[ind + 6] / static_cast(M_PI); + //std::cout << "ind = " << ind << ", output[0] = " << output[ind + 0] << ", output[1] = " << output[ind + 1] << ", output[2] = " << output[ind + 2] << ", output[3] = " << output[ind + 3] << std::endl; + //std::cout << "ind = " << ind << ", classConf = " << classConf << ", classId = " << classId << ", x = " << x << ", y = " << y << ", width = " << width << ", height = " << height << ", angle = " << angle << std::endl; + + resBoxes.emplace_back(classId, classConf, cv::RotatedRect(cv::Point2f(x, y), cv::Size2f(width, height), angle)); + } + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp index 60a90a38..247e352c 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp @@ -216,7 +216,7 @@ class YOLOv7_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif -#if 0 +#if 1 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp index 944dc571..6422cc47 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp @@ -266,7 +266,7 @@ class YOLOv8_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif -#if 0 +#if 1 std::vector> contours; #if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); diff --git a/src/Detector/tensorrt_onnx/class_detector.cpp b/src/Detector/tensorrt_onnx/class_detector.cpp index c12476ac..f5f4fb66 100644 --- a/src/Detector/tensorrt_onnx/class_detector.cpp +++ b/src/Detector/tensorrt_onnx/class_detector.cpp @@ -19,6 +19,9 @@ #include "DFINE_bb.hpp" #include "YoloONNXv13_bb.hpp" #include "DFINE_is.hpp" +#include "YoloONNXv26_bb.hpp" +#include "YoloONNXv26_obb.hpp" +#include "YoloONNXv26_instance.hpp" namespace tensor_rt { @@ -85,6 +88,15 @@ namespace tensor_rt case ModelType::YOLOV11Mask: m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); break; + case ModelType::YOLOV26: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV26_OBB: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV26Mask: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; case ModelType::YOLOV12: m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); break; @@ -122,6 +134,8 @@ namespace tensor_rt dictprecision[tensor_rt::INT8] = "kINT8"; dictprecision[tensor_rt::FP16] = "kHALF"; dictprecision[tensor_rt::FP32] = "kFLOAT"; + dictprecision[tensor_rt::FP8] = "kFP8"; + auto precision = dictprecision.find(m_params.m_precision); if (precision != dictprecision.end()) precisionStr = precision->second; diff --git a/src/Detector/tensorrt_onnx/class_detector.h b/src/Detector/tensorrt_onnx/class_detector.h index de8af380..7ea989bc 100644 --- a/src/Detector/tensorrt_onnx/class_detector.h +++ b/src/Detector/tensorrt_onnx/class_detector.h @@ -63,7 +63,10 @@ namespace tensor_rt RFDETR_IS, DFINE, YOLOV13, - DFINE_IS + DFINE_IS, + YOLOV26, + YOLOV26_OBB, + YOLOV26Mask }; /// @@ -73,7 +76,8 @@ namespace tensor_rt { INT8 = 0, FP16, - FP32 + FP32, + FP8 }; /// diff --git a/src/Detector/tensorrt_onnx/common/sampleDevice.cpp b/src/Detector/tensorrt_onnx/common/sampleDevice.cpp index 7964aeb5..1e7ee17a 100644 --- a/src/Detector/tensorrt_onnx/common/sampleDevice.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleDevice.cpp @@ -107,8 +107,17 @@ void setCudaDevice(int32_t device, std::ostream& os) os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl; os << "Memory Bus Width: " << properties.memoryBusWidth << " bits" << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl; +#if (CUDA_VERSION < 13000) os << "Application Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl; os << "Application Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl; +#else + int clockRateKHz = 0; + cudaDeviceGetAttribute(&clockRateKHz, cudaDevAttrClockRate, device); + int memoryClockRateKHz = 0; + cudaDeviceGetAttribute(&memoryClockRateKHz, cudaDevAttrMemoryClockRate, device); + os << "Application Compute Clock Rate: " << clockRateKHz / 1000000.0F << " GHz" << std::endl; + os << "Application Memory Clock Rate: " << memoryClockRateKHz / 1000000.0F << " GHz" << std::endl; +#endif os << std::endl; os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is " << "currently running at." << std::endl; diff --git a/src/Detector/tensorrt_onnx/common/sampleInference.cpp b/src/Detector/tensorrt_onnx/common/sampleInference.cpp index f0470bf7..b131ca32 100644 --- a/src/Detector/tensorrt_onnx/common/sampleInference.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleInference.cpp @@ -46,6 +46,7 @@ #include "sampleOptions.h" #include "sampleReporting.h" #include "sampleUtils.h" +#include using namespace nvinfer1; namespace sample { @@ -1320,7 +1321,15 @@ void Binding::fill() fillBuffer(buffer->getHostBuffer(), volume, 0, 255); break; } - case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + fillBuffer<__nv_fp8_e4m3>(buffer->getHostBuffer(), volume, __nv_fp8_e4m3(- 1.0f), __nv_fp8_e4m3(1.0f)); +#endif + break; + } #if (NV_TENSORRT_MAJOR > 8) case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); #endif @@ -1388,7 +1397,15 @@ void Binding::dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, break; } #endif - case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + dumpBuffer<__nv_fp8_e4m3>(outputBuffer, separator, os, dims, strides, vectorDim, spv); +#endif + break; + } #if (NV_TENSORRT_MAJOR > 8) case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); #endif diff --git a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp index 8f172afe..89a128ee 100644 --- a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp @@ -18,6 +18,7 @@ #include "sampleUtils.h" #include "bfloat16.h" #include "half.h" +#include using namespace nvinfer1; @@ -433,6 +434,11 @@ void print(std::ostream& os, __half v) os << static_cast(v); } +void print(std::ostream& os, __nv_fp8_e4m3 v) +{ + os << static_cast(v); +} + template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv) @@ -482,6 +488,8 @@ template void dumpBuffer(void const* buffer, std::string const& separat Dims const& strides, int32_t vectorDim, int32_t spv); template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer<__nv_fp8_e4m3>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); template void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights) @@ -566,7 +574,7 @@ void fillBuffer(void* buffer, int64_t volume, T min, T max) { T* typedBuffer = static_cast(buffer); std::default_random_engine engine; - std::uniform_real_distribution distribution(min, max); + std::uniform_real_distribution distribution((float)min, (float)max); auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; std::generate(typedBuffer, typedBuffer + volume, generator); } @@ -580,6 +588,7 @@ template void fillBuffer(void* buffer, int64_t volume, int8_t min, int8_ template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max); template void fillBuffer(void* buffer, int64_t volume, BFloat16 min, BFloat16 max); template void fillBuffer(void* buffer, int64_t volume, uint8_t min, uint8_t max); +template void fillBuffer<__nv_fp8_e4m3>(void* buffer, int64_t volume, __nv_fp8_e4m3 min, __nv_fp8_e4m3 max); bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target) { diff --git a/src/Tracker/TrackerSettings.cpp b/src/Tracker/TrackerSettings.cpp index e76dd72e..e06f2311 100644 --- a/src/Tracker/TrackerSettings.cpp +++ b/src/Tracker/TrackerSettings.cpp @@ -57,12 +57,12 @@ bool ParseTrackerSettings(const std::string& settingsFile, TrackerSettings& trac trackerSettings.m_useAbandonedDetection = reader.GetInteger("tracking", "detect_abandoned", 0) != 0; trackerSettings.m_minStaticTime = reader.GetInteger("tracking", "min_static_time", 5); trackerSettings.m_maxStaticTime = reader.GetInteger("tracking", "max_static_time", 25); - trackerSettings.m_maxSpeedForStatic = reader.GetInteger("tracking", "max_speed_for_static", 10); + trackerSettings.m_maxSpeedForStatic = static_cast(reader.GetReal("tracking", "max_speed_for_static", 0.5)); trackerSettings.m_byteTrackSettings.m_trackBuffer = reader.GetInteger("tracking", "bytetrack_track_buffer", 30); - trackerSettings.m_byteTrackSettings.m_trackThresh = reader.GetReal("tracking", "bytetrack_track_thresh", 0.5); - trackerSettings.m_byteTrackSettings.m_highThresh = reader.GetReal("tracking", "bytetrack_high_thresh", 0.5); - trackerSettings.m_byteTrackSettings.m_matchThresh = reader.GetReal("tracking", "bytetrack_match_thresh", 0.8); + trackerSettings.m_byteTrackSettings.m_trackThresh = static_cast(reader.GetReal("tracking", "bytetrack_track_thresh", 0.5)); + trackerSettings.m_byteTrackSettings.m_highThresh = static_cast(reader.GetReal("tracking", "bytetrack_high_thresh", 0.5)); + trackerSettings.m_byteTrackSettings.m_matchThresh = static_cast(reader.GetReal("tracking", "bytetrack_match_thresh", 0.8)); // Read detection settings trackerSettings.m_nnWeights = reader.GetString("detection", "nn_weights", "data/yolov4-tiny_best.weights"); diff --git a/src/Tracker/byte_track/BYTETracker.cpp b/src/Tracker/byte_track/BYTETracker.cpp index 8a63cf56..144490b1 100644 --- a/src/Tracker/byte_track/BYTETracker.cpp +++ b/src/Tracker/byte_track/BYTETracker.cpp @@ -310,8 +310,8 @@ void byte_track::BYTETracker::removeDuplicateStracks(const std::vector a_overlapping(a_stracks.size(), false), b_overlapping(b_stracks.size(), false); for (const auto &[a_idx, b_idx] : overlapping_combinations) { - const int timep = a_stracks[a_idx]->getFrameId() - a_stracks[a_idx]->getStartFrameId(); - const int timeq = b_stracks[b_idx]->getFrameId() - b_stracks[b_idx]->getStartFrameId(); + const size_t timep = a_stracks[a_idx]->getFrameId() - a_stracks[a_idx]->getStartFrameId(); + const size_t timeq = b_stracks[b_idx]->getFrameId() - b_stracks[b_idx]->getStartFrameId(); if (timep > timeq) b_overlapping[b_idx] = true; else @@ -359,16 +359,9 @@ void byte_track::BYTETracker::linearAssignment(const std::vector= 0) - { - std::vector match; - match.push_back(i); - match.push_back(rowsol[i]); - matches.push_back(match); - } + matches.push_back({ (int)i, rowsol[i] }); else - { a_unmatched.push_back(i); - } } for (size_t i = 0; i < colsol.size(); i++) diff --git a/thirdparty/ruclip/ClipAPI.cpp b/thirdparty/ruclip/ClipAPI.cpp index 7c374cc8..663230ae 100644 --- a/thirdparty/ruclip/ClipAPI.cpp +++ b/thirdparty/ruclip/ClipAPI.cpp @@ -4,7 +4,7 @@ #include "RuCLIP.h" #include "RuCLIPProcessor.h" -#include "../../src/common/defines.h" +#include "../../src/mtracking/defines.h" /// class ClassificationCLIP::ClassificationCLIPImpl diff --git a/thirdparty/ruclip/RuCLIPProcessor.cpp b/thirdparty/ruclip/RuCLIPProcessor.cpp index 3bb7242d..1167df80 100644 --- a/thirdparty/ruclip/RuCLIPProcessor.cpp +++ b/thirdparty/ruclip/RuCLIPProcessor.cpp @@ -84,8 +84,8 @@ cv::Mat RuCLIPProcessor::ResizeToInput(const cv::Mat& img, bool saveAspectRatio) int xOffset = (ImageSize - newWidth) / 2; int yOffset = (ImageSize - newHeight) / 2; - assert(2 * m_XOffset + newWidth == ImageSize); - assert(2 * m_YOffset + newHeight == ImageSize); + assert(2 * xOffset + newWidth == ImageSize); + assert(2 * yOffset + newHeight == ImageSize); cv::resize(img, newImg(cv::Rect(xOffset, yOffset, newWidth, newHeight)), cv::Size(newWidth, newHeight), 0, 0, cv::INTER_CUBIC); }