diff --git a/README.md b/README.md index 04938d4b..55bd8051 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![CodeQL](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml) ## Latest Features +- Instance segmentation model from RF-DETR detector works with TensorRT! Export pre-trained PyTorch models [here (roboflow/rf-detr)](https://github.com/roboflow/rf-detr) to ONNX format and run Multitarget-tracker with `-e=6` example - New linear assignment algorithm - [Jonker-Volgenant / LAPJV algorithm](https://github.com/yongyanghz/LAPJV-algorithm-c) used in [scipy](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html) as alternative for Hungarian allgorithm - D-FINE detector works with TensorRT! Export pre-trained PyTorch models [here (Peterande/D-FINE)](https://github.com/Peterande/D-FINE) to ONNX format and run Multitarget-tracker with `-e=6` example - RF-DETR detector works with TensorRT! Export pre-trained PyTorch models [here (roboflow/rf-detr)](https://github.com/roboflow/rf-detr) to ONNX format and run Multitarget-tracker with `-e=6` example @@ -20,6 +21,8 @@ ## Demo Videos ### Detection & Tracking + +[![RF-DETR: detection vs instance segmentation](https://img.youtube.com/vi/oKy7jEKT83c/0.jpg)](https://youtu.be/oKy7jEKT83c) [![Satellite planes detection and tracking with YOLOv11-obb](https://img.youtube.com/vi/gTpWnkMF7Lg/0.jpg)](https://youtu.be/gTpWnkMF7Lg) [![4-in-1 latest SOTA detectors](https://img.youtube.com/vi/Pb_HnejRpY4/0.jpg)](https://youtu.be/Pb_HnejRpY4) [![YOLOv8-obb detection with rotated boxes](https://img.youtube.com/vi/1e6ur57Fhzs/0.jpg)](https://youtu.be/1e6ur57Fhzs) diff --git a/data/settings_rfdetr_seg.ini b/data/settings_rfdetr_seg.ini new file mode 100644 index 00000000..453691c0 --- /dev/null +++ b/data/settings_rfdetr_seg.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 12 +# darknet_cudnn = 10 +# tensorrt = 11 +detector_backend = 11 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/rfdetr_seg_coco.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/rfdetr_seg_coco.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco_91.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = RFDETR_IS + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/src/Detector/BaseDetector.h b/src/Detector/BaseDetector.h index c585426e..b3c4c5d3 100644 --- a/src/Detector/BaseDetector.h +++ b/src/Detector/BaseDetector.h @@ -167,17 +167,25 @@ class BaseDetector cv::Mat foreground(m_motionMap.size(), CV_8UC1, cv::Scalar(0, 0, 0)); for (const auto& region : m_regions) { + if (region.m_boxMask.empty()) + { #if (CV_VERSION_MAJOR < 4) - cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), CV_FILLED); + cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), CV_FILLED); #else - cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), cv::FILLED); + cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), cv::FILLED); #endif + } + else + { + cv::Rect brect = Clamp(cv::Rect(region.m_brect.x, region.m_brect.y, region.m_boxMask.cols, region.m_boxMask.rows), foreground.size()); + region.m_boxMask.copyTo(foreground(brect)); + } } if (!m_ignoreMask.empty()) cv::bitwise_and(foreground, m_ignoreMask, foreground); cv::normalize(foreground, m_normFor, 255, 0, cv::NORM_MINMAX, m_motionMap.type()); - double alpha = 0.95; + double alpha = 0.9; cv::addWeighted(m_motionMap, alpha, m_normFor, 1 - alpha, 0, m_motionMap); const int chans = frame.channels(); diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 807f7b54..62dde116 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -169,6 +169,7 @@ bool OCVDNNDetector::Init(const config_t& config) dictNetType["YOLOV11Mask"] = ModelType::YOLOV11Mask; dictNetType["YOLOV12"] = ModelType::YOLOV12; dictNetType["RFDETR"] = ModelType::RFDETR; + dictNetType["RFDETR_IS"] = ModelType::RFDETR_IS; dictNetType["DFINE"] = ModelType::DFINE; auto netType = dictNetType.find(net_type->second); @@ -414,6 +415,10 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr ParseRFDETR(crop, detections, tmpRegions); break; + case ModelType::RFDETR_IS: + ParseRFDETR_IS(crop, detections, tmpRegions); + break; + case ModelType::DFINE: ParseDFINE(crop, detections, tmpRegions); break; @@ -934,6 +939,70 @@ void OCVDNNDetector::ParseRFDETR(const cv::Rect& crop, std::vector& det } } +/// +/// \brief OCVDNNDetector::ParseRFDETR_IS +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseRFDETR_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensionsDets = detections[0].size[2]; + int dimensionsLabels = detections[1].size[2]; + + //0: name: input, size : 1x3x560x560 + //1: name: dets, size : 1x300x4 + //2: name: labels, size : 1x300x91 + + float* dets = (float*)detections[0].data; + float* labels = (float*)detections[1].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + auto L2Conf = [](float v) + { + return 1.f / (1.f + std::exp(-v)); + }; + + for (int i = 0; i < rows; ++i) + { + float maxClassScore = L2Conf(labels[0]); + size_t classId = 0; + for (size_t cli = 1; cli < static_cast(dimensionsLabels); ++cli) + { + auto conf = L2Conf(labels[cli]); + if (maxClassScore < conf) + { + maxClassScore = conf; + classId = cli; + } + } + if (classId > 0) + --classId; + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[0]; + float y = dets[1]; + float w = dets[2]; + float h = dets[3]; + + int left = cvRound((x - 0.5f * w) * x_factor); + int top = cvRound((y - 0.5f * h) * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + dets += dimensionsDets; + labels += dimensionsLabels; + } +} + /// /// \brief OCVDNNDetector::ParseDFINE /// \param crop diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index 74647ce3..a8d16e8c 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -50,6 +50,7 @@ class OCVDNNDetector final : public BaseDetector YOLOV11Mask, YOLOV12, RFDETR, + RFDETR_IS, DFINE }; @@ -85,6 +86,7 @@ class OCVDNNDetector final : public BaseDetector void ParseYOLOv5_8_11_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseYOLOv5_8_11_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseRFDETR(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseRFDETR_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseDFINE(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); }; diff --git a/src/Detector/YoloTensorRTDetector.cpp b/src/Detector/YoloTensorRTDetector.cpp index d857f88f..5d02c778 100644 --- a/src/Detector/YoloTensorRTDetector.cpp +++ b/src/Detector/YoloTensorRTDetector.cpp @@ -112,6 +112,7 @@ bool YoloTensorRTDetector::Init(const config_t& config) dictNetType["YOLOV11Mask"] = tensor_rt::YOLOV11Mask; dictNetType["YOLOV12"] = tensor_rt::YOLOV12; dictNetType["RFDETR"] = tensor_rt::RFDETR; + dictNetType["RFDETR_IS"] = tensor_rt::RFDETR_IS; dictNetType["DFINE"] = tensor_rt::DFINE; auto netType = dictNetType.find(net_type->second); diff --git a/src/Detector/tensorrt_yolo/RFDETR_is.hpp b/src/Detector/tensorrt_yolo/RFDETR_is.hpp new file mode 100644 index 00000000..6b1a959d --- /dev/null +++ b/src/Detector/tensorrt_yolo/RFDETR_is.hpp @@ -0,0 +1,184 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The RFDETR_is_onnx class +/// +class RFDETR_is_onnx : public YoloONNX +{ +public: + RFDETR_is_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("input"); + outputTensorNames.push_back("dets"); + outputTensorNames.push_back("labels"); + outputTensorNames.push_back("4245"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: input, size: 1x3x432x432 + //1: name: dets, size: 1x200x4 + //2: name: labels, size: 1x200x91 + //3: name: 4245, size: 1x200x108x108 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + cv::Size2f inputSizef(m_inputDims[0].d[3], m_inputDims[0].d[2]); + cv::Size inputSize(m_inputDims[0].d[3], m_inputDims[0].d[2]); + + //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; + + auto dets = outputs[0]; + auto labels = outputs[1]; + + int segInd = 2; + auto masks = outputs[segInd]; + + size_t ncInd = 2; + size_t lenInd = 1; + + + size_t nc = m_outpuDims[1].d[ncInd]; + size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.explicitBatchSize; + auto volume0 = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + dets += volume0 * imgIdx; + auto volume1 = len * m_outpuDims[1].d[ncInd]; // Volume(m_outpuDims[0]); + labels += volume1 * imgIdx; + + int segChannels = static_cast(m_outpuDims[segInd].d[1]); + int segWidth = static_cast(m_outpuDims[segInd].d[2]); + int segHeight = static_cast(m_outpuDims[segInd].d[3]); + masks += imgIdx * segChannels * segWidth * segHeight; + + cv::Mat binaryMask8U(segHeight, segWidth, CV_8UC1); + + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume0 = " << volume0 << ", volume1 = " << volume1 << std::endl; + + auto L2Conf = [](float v) + { + return 1.f / (1.f + std::exp(-v)); + }; + + for (size_t i = 0; i < len; ++i) + { + float classConf = L2Conf(labels[0]); + size_t classId = 0; + for (size_t cli = 1; cli < nc; ++cli) + { + auto conf = L2Conf(labels[cli]); + if (classConf < conf) + { + classConf = conf; + classId = cli; + } + } + if (classId > 0) + --classId; + + if (classConf >= m_params.confThreshold) + { + float x = fw * (inputSizef.width * (dets[0] - dets[2] / 2.f) - m_resizedROI.x); + float y = fh * (inputSizef.height * (dets[1] - dets[3] / 2.f) - m_resizedROI.y); + float width = fw * inputSizef.width * dets[2]; + float height = fh * inputSizef.height * dets[3]; + + //if (i == 0) + //{ + // std::cout << i << ": classConf = " << classConf << ", classId = " << classId << " (" << labels[classId] << "), rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + // std::cout << "dets = " << dets[0] << ", " << dets[1] << ", " << dets[2] << ", " << dets[3] << std::endl; + //} + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + + double maskThreshold = 0.1; + for (int row = 0; row < segHeight; ++row) + { + const float* maskPtr = masks + row * segWidth; + uchar* binMaskPtr = binaryMask8U.ptr(row); + + for (int col = 0; col < segWidth; ++col) + { + binMaskPtr[col] = (maskPtr[col] > maskThreshold) ? 255 : 0; + } + } + + tensor_rt::Result& resObj = resBoxes.back(); + + cv::Rect smallRect; + smallRect.x = cvRound(segHeight * (dets[0] - dets[2] / 2.f)); + smallRect.y = cvRound(segHeight * (dets[1] - dets[3] / 2.f)); + smallRect.width = cvRound(segHeight * dets[2]); + smallRect.height = cvRound(segHeight * dets[3]); + smallRect = Clamp(smallRect, cv::Size(segWidth, segHeight)); + + if (smallRect.area() > 0) + { + cv::resize(binaryMask8U(smallRect), resObj.m_boxMask, resObj.m_brect.size(), 0, 0, cv::INTER_NEAREST); + +#if 0 + static int globalObjInd = 0; + SaveMat(mask, std::to_string(globalObjInd) + "_mask", ".png", "tmp", true); + SaveMat(binaryMask, std::to_string(globalObjInd) + "_bin_mask", ".png", "tmp", true); + SaveMat(binaryMask8U, std::to_string(globalObjInd) + "_bin_mask_8u", ".png", "tmp", true); + SaveMat(resObj.m_boxMask, std::to_string(globalObjInd++) + "_obj_mask", ".png", "tmp", true); + std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl; +#endif + + std::vector> contours; + std::vector hierarchy; + cv::findContours(resObj.m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resObj.m_brect.x; + int dy = resObj.m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = rr.center.x * fw + dx; + rr.center.y = rr.center.y * fw + dy; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound(dx + br.x * fw); + br.y = cvRound(dy + br.y * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resObj.m_brect = br; + //resObj.m_rrect = rr; + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } + } + else + { + resObj.m_boxMask = cv::Mat(resObj.m_brect.size(), CV_8UC1, cv::Scalar(255)); + } + } + + dets += m_outpuDims[0].d[ncInd]; + labels += m_outpuDims[1].d[ncInd]; + masks += segWidth * segHeight; + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp index e9ba10e1..5ca057c9 100644 --- a/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp +++ b/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp @@ -32,7 +32,7 @@ class YOLOv11_instance_onnx : public YoloONNX size_t outInd = 0; size_t segInd = 1; - auto output = outputs[0]; + auto output = outputs[outInd]; //std::cout << "output[1] mem:\n"; //auto output1 = outputs[1]; @@ -228,7 +228,7 @@ class YOLOv11_instance_onnx : public YoloONNX if (!maskProposals.empty()) { // Mask processing - const float* pdata = outputs[1]; + const float* pdata = outputs[segInd]; std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); int INPUT_W = static_cast(m_inputDims[0].d[3]); diff --git a/src/Detector/tensorrt_yolo/class_detector.cpp b/src/Detector/tensorrt_yolo/class_detector.cpp index 1b3d81a5..b2d86f7b 100644 --- a/src/Detector/tensorrt_yolo/class_detector.cpp +++ b/src/Detector/tensorrt_yolo/class_detector.cpp @@ -16,6 +16,7 @@ #include "YoloONNXv11_instance.hpp" #include "YoloONNXv12_bb.hpp" #include "RFDETR_bb.hpp" +#include "RFDETR_is.hpp" #include "DFINE_bb.hpp" namespace tensor_rt @@ -113,6 +114,9 @@ namespace tensor_rt case ModelType::RFDETR: m_detector = std::make_unique(m_params.inputTensorNames, m_params.outputTensorNames); break; + case ModelType::RFDETR_IS: + m_detector = std::make_unique(m_params.inputTensorNames, m_params.outputTensorNames); + break; case ModelType::DFINE: m_detector = std::make_unique(m_params.inputTensorNames, m_params.outputTensorNames); break; diff --git a/src/Detector/tensorrt_yolo/class_detector.h b/src/Detector/tensorrt_yolo/class_detector.h index 62ed857e..ebfcba9b 100644 --- a/src/Detector/tensorrt_yolo/class_detector.h +++ b/src/Detector/tensorrt_yolo/class_detector.h @@ -60,6 +60,7 @@ namespace tensor_rt YOLOV11Mask, YOLOV12, RFDETR, + RFDETR_IS, DFINE }; diff --git a/src/Detector/tensorrt_yolo/ds_image.cpp b/src/Detector/tensorrt_yolo/ds_image.cpp index 28628df0..7651aa2c 100644 --- a/src/Detector/tensorrt_yolo/ds_image.cpp +++ b/src/Detector/tensorrt_yolo/ds_image.cpp @@ -52,7 +52,9 @@ DsImage::DsImage(const cv::Mat& mat_image_, tensor_rt::ModelType net_type, const tensor_rt::ModelType::YOLOV8 == net_type || tensor_rt::ModelType::YOLOV8_OBB == net_type || tensor_rt::ModelType::YOLOV8Mask == net_type || tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type || tensor_rt::ModelType::YOLOV11 == net_type || tensor_rt::ModelType::YOLOV11_OBB == net_type || tensor_rt::ModelType::YOLOV11Mask == net_type || - tensor_rt::ModelType::YOLOV12 == net_type || tensor_rt::ModelType::RFDETR == net_type || tensor_rt::ModelType::DFINE == net_type) + tensor_rt::ModelType::YOLOV12 == net_type || + tensor_rt::ModelType::RFDETR == net_type || tensor_rt::ModelType::RFDETR_IS == net_type || + tensor_rt::ModelType::DFINE == net_type) { // resize the DsImage with scale float r = std::min(static_cast(inputH) / static_cast(m_Height), static_cast(inputW) / static_cast(m_Width)); @@ -105,7 +107,9 @@ DsImage::DsImage(const std::string& path, tensor_rt::ModelType net_type, const i tensor_rt::ModelType::YOLOV8 == net_type || tensor_rt::ModelType::YOLOV8_OBB == net_type || tensor_rt::ModelType::YOLOV8Mask == net_type || tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type || tensor_rt::ModelType::YOLOV11 == net_type || tensor_rt::ModelType::YOLOV11_OBB == net_type || tensor_rt::ModelType::YOLOV11Mask == net_type || - tensor_rt::ModelType::YOLOV12 == net_type || tensor_rt::ModelType::RFDETR == net_type || tensor_rt::ModelType::DFINE == net_type) + tensor_rt::ModelType::YOLOV12 == net_type || + tensor_rt::ModelType::RFDETR == net_type || tensor_rt::ModelType::RFDETR_IS == net_type || + tensor_rt::ModelType::DFINE == net_type) { // resize the DsImage with scale float dim = std::max(m_Height, m_Width); diff --git a/src/common/defines.h b/src/common/defines.h index 098b11bb..5c9708d2 100644 --- a/src/common/defines.h +++ b/src/common/defines.h @@ -351,10 +351,10 @@ inline void DrawFilledRect(cv::Mat& frame, const cv::Rect& rect, cv::Scalar cl, const int alpha_1 = 255 - alpha; const int nchans = frame.channels(); int color[3] = { cv::saturate_cast(cl[0]), cv::saturate_cast(cl[1]), cv::saturate_cast(cl[2]) }; - for (int y = rect.y; y < rect.y + rect.height; ++y) + for (int y = std::max(0, rect.y); y < std::min(rect.y + rect.height, frame.rows - 1); ++y) { uchar* ptr = frame.ptr(y) + nchans * rect.x; - for (int x = rect.x; x < rect.x + rect.width; ++x) + for (int x = std::max(0, rect.x); x < std::min(rect.x + rect.width, frame.cols - 1); ++x) { for (int i = 0; i < nchans; ++i) {