diff --git a/CMakeLists.txt b/CMakeLists.txt index a9bdcc8c..fed4b1ae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,9 +44,9 @@ endif(SILENT_WORK) include(CheckIncludeFileCXX) check_include_file_cxx(filesystem HAVE_FILESYSTEM) if(HAVE_FILESYSTEM) - add_definitions(-DHAVE_FILESYSTEM) message("Founded filesystem header") else(HAVE_FILESYSTEM) + add_definitions(-DHAVE_EXPERIMENTAL_FILESYSTEM) message("Do not found filesystem header") endif(HAVE_FILESYSTEM) diff --git a/README.md b/README.md index 7a1e3b27..a7850596 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,10 @@ [![CodeQL](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml) ## Latest Features + +- Add new SOTA: YOLOv26, YOLOv26-obb and YOLOv26-seg models from [ultralytics/ultralytics](https://github.com/ultralytics/ultralytics) +- Add RT-DETRv4 (API similar D-FINE) detection model [RT-DETRs/RT-DETRv4](https://github.com/RT-DETRs/RT-DETRv4) +- Add D-FINE seg detection model [ArgoHA/D-FINE-seg](https://github.com/ArgoHA/D-FINE-seg) - Add ByteTrack MOT algorithm based on [Vertical-Beach/ByteTrack-cpp](https://github.com/Vertical-Beach/ByteTrack-cpp) - Big code cleanup from old style algorithms and detectors: some bgfg detectors, some VOT trackes, Face and Pedestrin detectors, Darknet based backend for old YOLO etc - YOLOv13 detector works with TensorRT! Export pre-trained PyTorch models [here (iMoonLab/yolov13)](https://github.com/iMoonLab/yolov13) to ONNX format and run Multitarget-tracker with `-e=3` example diff --git a/async_detector/CMakeLists.txt b/async_detector/CMakeLists.txt index 06b97ebe..2a552a62 100644 --- a/async_detector/CMakeLists.txt +++ b/async_detector/CMakeLists.txt @@ -12,7 +12,7 @@ set(HEADERS AsyncDetector.h # добавляем include директории # ---------------------------------------------------------------------------- INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../src - ${PROJECT_SOURCE_DIR}/../src/common + ${PROJECT_SOURCE_DIR}/../src/mtracking ${PROJECT_SOURCE_DIR}/../src/Detector ${PROJECT_SOURCE_DIR}/../src/Detector/vibe_src ${PROJECT_SOURCE_DIR}/../src/Detector/Subsense diff --git a/data/dota/DOTA_v1.0.names b/data/dota/DOTA_v1.0.names new file mode 100644 index 00000000..adea7619 --- /dev/null +++ b/data/dota/DOTA_v1.0.names @@ -0,0 +1,15 @@ +plane +ship +storage_tank +baseball_diamond +tennis_court +basketball_court +ground_track_field +harbor +bridge +large_vehicle +small_vehicle +helicopter +roundabout +soccer_ball_field +swimming_pool \ No newline at end of file diff --git a/data/dota/DOTA_v1.5.names b/data/dota/DOTA_v1.5.names new file mode 100644 index 00000000..4d18c4f1 --- /dev/null +++ b/data/dota/DOTA_v1.5.names @@ -0,0 +1,16 @@ +baseball_diamond +basketball_court +bridge +container_crane +ground_track_field +harbor +helicopter +large_vehicle +plane +roundabout +ship +small_vehicle +soccer_ball_field +storage_tank +swimming_pool +tennis_court diff --git a/data/settings_dfine_seg.ini b/data/settings_dfine_seg.ini new file mode 100644 index 00000000..e82c504a --- /dev/null +++ b/data/settings_dfine_seg.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/dfine_seg_s_coco.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/dfine_seg_s_coco.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = DFINE_IS + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_rtdetrv4.ini b/data/settings_rtdetrv4.ini new file mode 100644 index 00000000..4734cf9f --- /dev/null +++ b/data/settings_rtdetrv4.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/RTv4-M-hgnet.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/RTv4-M-hgnet.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = DFINE + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov26m.ini b/data/settings_yolov26m.ini new file mode 100644 index 00000000..23dd24f4 --- /dev/null +++ b/data/settings_yolov26m.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_INFERENCE_ENGINE + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26 + +#----------------------------- +# INT8 +# FP16 +# FP32 +# FP8 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov26m_obb.ini b/data/settings_yolov26m_obb.ini new file mode 100644 index 00000000..d31e8425 --- /dev/null +++ b/data/settings_yolov26m_obb.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo26m-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo26m-obb.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/dota/DOTA_v1.0.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26_OBB + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov26m_seg.ini b/data/settings_yolov26m_seg.ini new file mode 100644 index 00000000..3a4ed1d0 --- /dev/null +++ b/data/settings_yolov26m_seg.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m-seg.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m-seg.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26Mask + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 5674e9be..163680cb 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -20,7 +20,7 @@ endif(BUILD_CARS_COUNTING) # добавляем include директории # ---------------------------------------------------------------------------- INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../src - ${PROJECT_SOURCE_DIR}/../src/common + ${PROJECT_SOURCE_DIR}/../src/mtracking ${PROJECT_SOURCE_DIR}/../src/Detector ${PROJECT_SOURCE_DIR}/../src/Detector/vibe_src ${PROJECT_SOURCE_DIR}/../src/Detector/Subsense diff --git a/example/CarsCounting.cpp b/example/CarsCounting.cpp index cf801901..5ff8aca5 100644 --- a/example/CarsCounting.cpp +++ b/example/CarsCounting.cpp @@ -230,7 +230,7 @@ bool CarsCounting::InitTracker(cv::UMat frame) /// void CarsCounting::DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) { - m_logger->info("Frame ({1}): tracks = {2}, time = {3}", framesCounter, tracks.size(), currTime); + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); #if 1 // Debug output if (!m_geoParams.Empty()) diff --git a/example/MotionDetectorExample.h b/example/MotionDetectorExample.h index ac4e5409..7319c407 100644 --- a/example/MotionDetectorExample.h +++ b/example/MotionDetectorExample.h @@ -17,7 +17,7 @@ class MotionDetectorExample final : public VideoExample { public: MotionDetectorExample(const cv::CommandLineParser& parser) - : VideoExample(parser), m_minObjWidth(10) + : VideoExample(parser) { #ifdef USE_CLIP std::string clipModel = "C:/work/clip/ruclip_/CLIP/data/ruclip-vit-large-patch14-336"; @@ -38,7 +38,6 @@ class MotionDetectorExample final : public VideoExample { m_logger->info("MotionDetectorExample::InitDetector"); - //m_minObjWidth = frame.cols / 20; m_minObjWidth = 2; config_t config; @@ -56,7 +55,7 @@ class MotionDetectorExample final : public VideoExample config.emplace("updateFactor", "16"); break; case tracking::Detectors::Motion_MOG: - config.emplace("history", std::to_string(cvRound(50 * m_fps))); + config.emplace("history", std::to_string(cvRound(5000 * m_fps))); config.emplace("nmixtures", "3"); config.emplace("backgroundRatio", "0.7"); config.emplace("noiseSigma", "0"); @@ -141,7 +140,7 @@ class MotionDetectorExample final : public VideoExample /// void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) override { - m_logger->info("Frame ({0}): tracks = {1}, time = {2}", framesCounter, tracks.size(), currTime); + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); #ifdef USE_CLIP std::vector clipResult; @@ -197,8 +196,9 @@ class MotionDetectorExample final : public VideoExample auto velocity = sqrt(sqr(track.m_velocity[0]) + sqr(track.m_velocity[1])); if (track.IsRobust(4, // Minimal trajectory size 0.3f, // Minimal ratio raw_trajectory_points / trajectory_lenght - cv::Size2f(0.2f, 5.0f))) // Min and max ratio: width / height - //velocity > 30 // Velocity more than 30 pixels per second + cv::Size2f(0.2f, 5.0f), // Min and max ratio: width / height + 2)) + //velocity > 30 // Velocity more than 30 pixels per second { //track_t mean = 0; //track_t stddev = 0; diff --git a/example/VideoExample.cpp b/example/VideoExample.cpp index 323cde52..4c41e003 100644 --- a/example/VideoExample.cpp +++ b/example/VideoExample.cpp @@ -153,8 +153,14 @@ void VideoExample::SyncProcess() int64 startLoopTime = cv::getTickCount(); + //double fps = capture.get(cv::CAP_PROP_FPS); + //double readPeriodSeconds = 2.; + //int readPeriodFrames = cvRound(readPeriodSeconds * fps); + for (;;) { + //int currFramesPos = cvRound(capture.get(cv::CAP_PROP_POS_FRAMES)); + size_t i = 0; for (; i < m_batchSize; ++i) { @@ -177,6 +183,8 @@ void VideoExample::SyncProcess() if (i < m_batchSize) break; + //capture.set(cv::CAP_PROP_POS_FRAMES, currFramesPos + readPeriodFrames); + if (!m_isDetectorInitialized || !m_isTrackerInitialized) { cv::UMat ufirst = frameInfo.m_frames[0].GetUMatBGR(); @@ -202,7 +210,6 @@ void VideoExample::SyncProcess() int64 t1 = cv::getTickCount(); - regions_t regions; Detection(frameInfo); Tracking(frameInfo); int64 t2 = cv::getTickCount(); diff --git a/example/VideoExample.h b/example/VideoExample.h index b6b65918..b5e0fc69 100644 --- a/example/VideoExample.h +++ b/example/VideoExample.h @@ -281,7 +281,11 @@ class VideoExample bool m_isDetectorInitialized = false; std::string m_inFile; std::string m_outFile; - int m_fourcc = cv::VideoWriter::fourcc('h', '2', '6', '4'); //cv::VideoWriter::fourcc('M', 'J', 'P', 'G'); +#if 0 + int m_fourcc = cv::VideoWriter::fourcc('h', '2', '6', '4'); +#else + int m_fourcc = cv::VideoWriter::fourcc('M', 'J', 'P', 'G'); +#endif int m_startFrame = 0; int m_endFrame = 0; int m_finishDelay = 0; diff --git a/example/main.cpp b/example/main.cpp index 34e413da..f266bbd6 100644 --- a/example/main.cpp +++ b/example/main.cpp @@ -9,47 +9,39 @@ #include #include -// ---------------------------------------------------------------------- - -static void Help() -{ - printf("\nExamples of the Multitarget tracking algorithm\n" - "Usage: \n" - " ./MultitargetTracker [--example]= [--start_frame]= [--end_frame]= [--end_delay]= [--out]= [--show_logs]= [--async]= [--res]= [--settings]= [--batch_size=] \n\n" - "Press:\n" - "\'m\' key for change mode: play|pause. When video is paused you can press any key for get next frame. \n\n" - "Press Esc to exit from video \n\n" - ); -} - -const char* keys = -{ - "{ @1 |../data/atrium.avi | movie file | }" - "{ e example |1 | number of example 0 - MouseTracking, 1 - MotionDetector, 3 - YOLO TensorRT Detector, 4 - Cars counting | }" - "{ sf start_frame |0 | Start a video from this position | }" - "{ ef end_frame |0 | Play a video to this position (if 0 then played to the end of file) | }" - "{ ed end_delay |0 | Delay in milliseconds after video ending | }" - "{ o out | | Name of result video file | }" - "{ show_logs |info | Show Trackers logs: trace, debug, info, warning, error, critical, off | }" - "{ g gpu |0 | Use OpenCL acceleration | }" - "{ a async |1 | Use 2 theads for processing pipeline | }" - "{ r log_res | | Path to the csv file with tracking result | }" - "{ cvat_res | | Path to the xml file in cvat format with tracking result | }" - "{ s settings | | Path to the ini file with tracking settings | }" - "{ bs batch_size |1 | Batch size - frames count for processing | }" - "{ wf write_n_frame |1 | Write logs on each N frame: 1 for writing each frame | }" - "{ hm heat_map |0 | For CarsCounting: Draw heat map | }" - "{ geo_bind |geo_bind.ini | For CarsCounting: ini file with geographical binding | }" - "{ contrast_adjustment |0 | Use contrast adjustment for frames before detection | }" -}; - -// ---------------------------------------------------------------------- - +///---------------------------------------------------------------------- int main(int argc, char** argv) { + const char* keys = + { + "{ @1 |../data/atrium.avi | movie file | }" + "{ e example |1 | number of example 0 - MouseTracking, 1 - MotionDetector, 2 - opencv_dnn detector, 3 - YOLO TensorRT Detector, 4 - Cars counting | }" + "{ sf start_frame |0 | Start a video from this position | }" + "{ ef end_frame |0 | Play a video to this position (if 0 then played to the end of file) | }" + "{ ed end_delay |0 | Delay in milliseconds after video ending | }" + "{ o out | | Name of result video file | }" + "{ show_logs |info | Show Trackers logs: trace, debug, info, warning, error, critical, off | }" + "{ g gpu |0 | Use OpenCL acceleration | }" + "{ a async |1 | Use 2 theads for processing pipeline | }" + "{ r log_res | | Path to the csv file with tracking result | }" + "{ cvat_res | | Path to the xml file in cvat format with tracking result | }" + "{ s settings | | Path to the ini file with tracking settings | }" + "{ bs batch_size |1 | Batch size - frames count for processing | }" + "{ wf write_n_frame |1 | Write logs on each N frame: 1 for writing each frame | }" + "{ hm heat_map |0 | For CarsCounting: Draw heat map | }" + "{ geo_bind |geo_bind.ini | For CarsCounting: ini file with geographical binding | }" + "{ contrast_adjustment |0 | Use contrast adjustment for frames before detection | }" + }; + cv::CommandLineParser parser(argc, argv, keys); - Help(); + std::cout << "\nExamples of the Multitarget tracking algorithm\n" + "Usage: \n" + " ./MultitargetTracker [--example]= [--start_frame]= [--end_frame]= [--end_delay]= [--out]= [--show_logs]= [--async]= [--res]= [--settings]= [--batch_size=] \n\n" + "Press:\n" + "\'m\' key for change mode: play|pause. When video is paused you can press any key for get next frame. \n\n" + "Press Esc to exit from video \n" << std::endl; + parser.printMessage(); bool useOCL = parser.get("gpu") != 0; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5a017769..5aa83f30 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,7 +23,7 @@ else() set(LIB_PTHREAD pthread) endif() - include_directories(common) + include_directories(mtracking) pybind11_add_module(pymtracking ${mtracker_python_src} ${mtracker_python_inc}) target_link_libraries(pymtracking PRIVATE mtracking mdetection ${OpenCV_LIBS} ${PYTHON_LIBRARY} pybind11::module) diff --git a/src/Detector/BackgroundSubtract.cpp b/src/Detector/BackgroundSubtract.cpp index dff18952..a4fc689c 100644 --- a/src/Detector/BackgroundSubtract.cpp +++ b/src/Detector/BackgroundSubtract.cpp @@ -206,21 +206,13 @@ cv::UMat BackgroundSubtract::GetImg(const cv::UMat& image) if (image.channels() == 1) { cv::UMat newImg; -#if (CV_VERSION_MAJOR < 4) - cv::cvtColor(image, newImg, CV_GRAY2BGR); -#else cv::cvtColor(image, newImg, cv::COLOR_GRAY2BGR); -#endif return newImg; } else if (image.channels() == 3) { cv::UMat newImg; -#if (CV_VERSION_MAJOR < 4) - cv::cvtColor(image, newImg, CV_BGR2GRAY); -#else cv::cvtColor(image, newImg, cv::COLOR_BGR2GRAY); -#endif return newImg; } } diff --git a/src/Detector/BackgroundSubtract.h b/src/Detector/BackgroundSubtract.h index c815be08..31dfe5d9 100644 --- a/src/Detector/BackgroundSubtract.h +++ b/src/Detector/BackgroundSubtract.h @@ -1,6 +1,6 @@ #pragma once -#include "defines.h" +#include "mtracking/defines.h" #include "vibe_src/vibe.hpp" #ifdef USE_OCV_BGFG diff --git a/src/Detector/BaseDetector.h b/src/Detector/BaseDetector.h index b3c4c5d3..bf8ae882 100644 --- a/src/Detector/BaseDetector.h +++ b/src/Detector/BaseDetector.h @@ -1,7 +1,7 @@ #pragma once #include -#include "defines.h" +#include "mtracking/defines.h" /// /// \brief The KeyVal struct @@ -169,11 +169,7 @@ class BaseDetector { if (region.m_boxMask.empty()) { -#if (CV_VERSION_MAJOR < 4) - cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), CV_FILLED); -#else cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), cv::FILLED); -#endif } else { diff --git a/src/Detector/CMakeLists.txt b/src/Detector/CMakeLists.txt index 8cf3f2b0..ffc36707 100644 --- a/src/Detector/CMakeLists.txt +++ b/src/Detector/CMakeLists.txt @@ -50,7 +50,7 @@ endif(USE_OCV_BGFG) include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR}/../src) -include_directories(${PROJECT_SOURCE_DIR}/../common) +include_directories(${PROJECT_SOURCE_DIR}/..) if (CMAKE_COMPILER_IS_GNUCXX) add_library(${PROJECT_NAME} SHARED @@ -76,8 +76,8 @@ target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBS}) set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${detector_headers}") install(TARGETS ${PROJECT_NAME} EXPORT MTTrackingExports - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin - PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}) + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/src/Detector/MotionDetector.cpp b/src/Detector/MotionDetector.cpp index 8b0d2d14..cf6f3c26 100644 --- a/src/Detector/MotionDetector.cpp +++ b/src/Detector/MotionDetector.cpp @@ -45,11 +45,10 @@ void MotionDetector::DetectContour() { m_regions.clear(); std::vector> contours; - std::vector hierarchy; -#if (CV_VERSION_MAJOR < 4) - cv::findContours(m_fg, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(m_fg, contours); #else - cv::findContours(m_fg, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(m_fg, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); #endif for (size_t i = 0; i < contours.size(); ++i) { diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 4239059c..8151c6be 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -1,6 +1,6 @@ #include #include "OCVDNNDetector.h" -#include "nms.h" +#include "mtracking/nms.h" /// /// \brief OCVDNNDetector::OCVDNNDetector @@ -172,6 +172,10 @@ bool OCVDNNDetector::Init(const config_t& config) dictNetType["RFDETR_IS"] = ModelType::RFDETR_IS; dictNetType["DFINE"] = ModelType::DFINE; dictNetType["YOLOV13"] = ModelType::YOLOV13; + dictNetType["DFINE_IS"] = ModelType::DFINE_IS; + dictNetType["YOLOV26"] = ModelType::YOLOV26; + dictNetType["YOLOV26_OBB"] = ModelType::YOLOV26_OBB; + dictNetType["YOLOV26Mask"] = ModelType::YOLOV26Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -365,7 +369,7 @@ void OCVDNNDetector::Detect(const cv::UMat& colorFrame) void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& crop, regions_t& tmpRegions) { //Convert Mat to batch of images - cv::dnn::blobFromImage(cv::UMat(colorFrame, crop), m_inputBlob, 1.0, cv::Size(m_inWidth, m_inHeight), m_meanVal, m_swapRB, false, CV_8U); + cv::dnn::blobFromImage(colorFrame(crop), m_inputBlob, 1.0, cv::Size(m_inWidth, m_inHeight), m_meanVal, m_swapRB, false, CV_8U); m_net.setInput(m_inputBlob, "", m_inScaleFactor, m_meanVal); //set the network input @@ -428,6 +432,22 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr ParseYOLOv11(crop, detections, tmpRegions); break; + case ModelType::DFINE_IS: + ParseDFINE_IS(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV26: + ParseYOLOv26(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV26_OBB: + ParseYOLOv26_obb(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV26Mask: + ParseYOLOv26_seg(crop, detections, tmpRegions); + break; + default: ParseOldYOLO(crop, detections, tmpRegions); break; @@ -1055,3 +1075,153 @@ void OCVDNNDetector::ParseDFINE(const cv::Rect& crop, std::vector& dete } } +/// +/// \brief OCVDNNDetector::ParseDFINE_IS +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + assert(0); +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv26 +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x6 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 6 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0]; + float y = dets[ind + 1]; + float w = dets[ind + 2] - x; + float h = dets[ind + 3] - y; + + int left = cvRound(x * x_factor); + int top = cvRound(y * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv26_obb +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x300x7 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 7 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0] * x_factor; + float y = dets[ind + 1] * y_factor; + float w = dets[ind + 2] * x_factor; + float h = dets[ind + 3] * y_factor; + float angle = 180.f * dets[ind + 6] / static_cast(M_PI); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::RotatedRect(cv::Point2f(x + crop.x, y + crop.y), cv::Size2f(w, h), angle), T2T(classId), static_cast(maxClassScore)); + } + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv26_seg +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x38 + //2: name: output1, size: 1x32x160x160 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 38 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0]; + float y = dets[ind + 1]; + float w = dets[ind + 2] - x; + float h = dets[ind + 3] - y; + + int left = cvRound(x * x_factor); + int top = cvRound(y * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + } +} diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index bd76ce48..3a55dd67 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -52,7 +52,11 @@ class OCVDNNDetector final : public BaseDetector RFDETR, RFDETR_IS, DFINE, - YOLOV13 + YOLOV13, + DFINE_IS, + YOLOV26, + YOLOV26_OBB, + YOLOV26Mask }; cv::dnn::Net m_net; @@ -89,6 +93,10 @@ class OCVDNNDetector final : public BaseDetector void ParseRFDETR(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseRFDETR_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); void ParseDFINE(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); }; #endif diff --git a/src/Detector/ONNXTensorRTDetector.cpp b/src/Detector/ONNXTensorRTDetector.cpp index 582a2e7f..b0a734a7 100644 --- a/src/Detector/ONNXTensorRTDetector.cpp +++ b/src/Detector/ONNXTensorRTDetector.cpp @@ -1,6 +1,6 @@ #include #include "ONNXTensorRTDetector.h" -#include "nms.h" +#include "mtracking/nms.h" /// /// \brief ONNXTensorRTDetector::ONNXTensorRTDetector @@ -72,6 +72,7 @@ bool ONNXTensorRTDetector::Init(const config_t& config) dictPrecision["INT8"] = tensor_rt::INT8; dictPrecision["FP16"] = tensor_rt::FP16; dictPrecision["FP32"] = tensor_rt::FP32; + dictPrecision["FP8"] = tensor_rt::FP8; auto precision = dictPrecision.find(inference_precision->second); if (precision != dictPrecision.end()) m_localConfig.m_inferencePrecision = precision->second; @@ -101,6 +102,10 @@ bool ONNXTensorRTDetector::Init(const config_t& config) dictNetType["RFDETR_IS"] = tensor_rt::RFDETR_IS; dictNetType["DFINE"] = tensor_rt::DFINE; dictNetType["YOLOV13"] = tensor_rt::YOLOV13; + dictNetType["DFINE_IS"] = tensor_rt::DFINE_IS; + dictNetType["YOLOV26"] = tensor_rt::YOLOV26; + dictNetType["YOLOV26_OBB"] = tensor_rt::YOLOV26_OBB; + dictNetType["YOLOV26Mask"] = tensor_rt::YOLOV26Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -303,7 +308,8 @@ void ONNXTensorRTDetector::CalcMotionMap(cv::Mat& frame) { if (m_localConfig.m_netType == tensor_rt::YOLOV7Mask || m_localConfig.m_netType == tensor_rt::YOLOV8Mask - || m_localConfig.m_netType == tensor_rt::YOLOV11Mask) + || m_localConfig.m_netType == tensor_rt::YOLOV11Mask + || m_localConfig.m_netType == tensor_rt::YOLOV26Mask) { static std::vector color; if (color.empty()) diff --git a/src/Detector/tensorrt_onnx/CMakeLists.txt b/src/Detector/tensorrt_onnx/CMakeLists.txt index 1de484a9..257dd3e2 100644 --- a/src/Detector/tensorrt_onnx/CMakeLists.txt +++ b/src/Detector/tensorrt_onnx/CMakeLists.txt @@ -49,7 +49,7 @@ include_directories(${OpenCV_INCLUDE_DIRS}) include_directories(${CUDA_INCLUDE_DIRS}) include_directories(${CUDNN_INCLUDE_DIR}) include_directories(${TensorRT_INCLUDE_DIRS}) -include_directories(${PROJECT_SOURCE_DIR}/../../common) +include_directories(${PROJECT_SOURCE_DIR}/../../mtracking) file(GLOB TENSORRT_SOURCE_FILES *.cpp common/*.cpp) file(GLOB TENSORRT_HEADER_FILES *.h* common/*.h*) @@ -91,9 +91,9 @@ target_link_libraries(${libname_rt} ${TENSORRT_LIBS}) install(TARGETS ${libname_rt} EXPORT MTTrackingExports - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin - PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}) + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/src/Detector/tensorrt_onnx/DFINE_is.hpp b/src/Detector/tensorrt_onnx/DFINE_is.hpp new file mode 100644 index 00000000..84c46ed9 --- /dev/null +++ b/src/Detector/tensorrt_onnx/DFINE_is.hpp @@ -0,0 +1,194 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The DFINE_is_onnx class +/// +class DFINE_is_onnx : public YoloONNX +{ +public: + DFINE_is_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("input"); + outputTensorNames.push_back("logits"); + outputTensorNames.push_back("boxes"); + outputTensorNames.push_back("mask_probs"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: input, size: 1x3x640x640 + //1: name: logits, size: 1x300x80 + //2: name: boxes, size: 1x300x4 + //3: name: mask_probs, size: 1x300x160x160 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + cv::Size inputSize(static_cast(m_inputDims[0].d[3]), static_cast(m_inputDims[0].d[2])); + cv::Size2f inputSizef(static_cast(inputSize.width), static_cast(inputSize.height)); + + //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; + + int labelsInd = 0; + int detsInd = 1; + int segInd = 2; + + auto dets = outputs[detsInd]; + auto labels = outputs[labelsInd]; + + auto masks = outputs[segInd]; + + size_t ncInd = 2; + size_t lenInd = 1; + + + size_t nc = m_outpuDims[labelsInd].d[ncInd]; + size_t len = static_cast(m_outpuDims[detsInd].d[lenInd]) / m_params.m_explicitBatchSize; + auto volume0 = len * m_outpuDims[detsInd].d[ncInd]; // Volume(m_outpuDims[0]); + dets += volume0 * imgIdx; + auto volume1 = len * m_outpuDims[labelsInd].d[ncInd]; // Volume(m_outpuDims[0]); + labels += volume1 * imgIdx; + + int segChannels = static_cast(m_outpuDims[segInd].d[1]); + int segWidth = static_cast(m_outpuDims[segInd].d[2]); + int segHeight = static_cast(m_outpuDims[segInd].d[3]); + masks += imgIdx * segChannels * segWidth * segHeight; + + cv::Mat binaryMask8U(segHeight, segWidth, CV_8UC1); + + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.m_confThreshold << ", volume0 = " << volume0 << ", volume1 = " << volume1 << std::endl; + + auto L2Conf = [](float v) + { + return 1.f / (1.f + std::exp(-v)); + }; + + for (size_t i = 0; i < len; ++i) + { + float classConf = L2Conf(labels[0]); + size_t classId = 0; + for (size_t cli = 1; cli < nc; ++cli) + { + auto conf = L2Conf(labels[cli]); + if (classConf < conf) + { + classConf = conf; + classId = cli; + } + } + + if (classConf >= m_params.m_confThreshold) + { + float d0 = dets[0]; + float d1 = dets[1]; + float d2 = dets[2]; + float d3 = dets[3]; + + float x = fw * (inputSizef.width * (d0 - d2 / 2.f) - m_resizedROI.x); + float y = fh * (inputSizef.height * (d1 - d3 / 2.f) - m_resizedROI.y); + float width = fw * inputSizef.width * d2; + float height = fh * inputSizef.height * d3; + + //if (i == 0) + //{ + // std::cout << i << ": classConf = " << classConf << ", classId = " << classId << " (" << labels[classId] << "), rect = " << cv::Rect2f(x, y, width, height) << std::endl; + // std::cout << "dets = " << d0 << ", " << d1 << ", " << d2 << ", " << d3 << std::endl; + //} + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + + double maskThreshold = 0.1; + for (int row = 0; row < segHeight; ++row) + { + const float* maskPtr = masks + row * segWidth; + uchar* binMaskPtr = binaryMask8U.ptr(row); + + for (int col = 0; col < segWidth; ++col) + { + binMaskPtr[col] = (maskPtr[col] > maskThreshold) ? 255 : 0; + } + } + + tensor_rt::Result& resObj = resBoxes.back(); + + cv::Rect smallRect; + smallRect.x = cvRound(segHeight * (d0 - d2 / 2.f)); + smallRect.y = cvRound(segHeight * (d1 - d3 / 2.f)); + smallRect.width = cvRound(segHeight * d2); + smallRect.height = cvRound(segHeight * d3); + smallRect = Clamp(smallRect, cv::Size(segWidth, segHeight)); + + if (smallRect.area() > 0) + { + cv::resize(binaryMask8U(smallRect), resObj.m_boxMask, resObj.m_brect.size(), 0, 0, cv::INTER_NEAREST); + +#if 0 + static int globalObjInd = 0; + SaveMat(mask, std::to_string(globalObjInd) + "_mask", ".png", "tmp", true); + SaveMat(binaryMask, std::to_string(globalObjInd) + "_bin_mask", ".png", "tmp", true); + SaveMat(binaryMask8U, std::to_string(globalObjInd) + "_bin_mask_8u", ".png", "tmp", true); + SaveMat(resObj.m_boxMask, std::to_string(globalObjInd++) + "_obj_mask", ".png", "tmp", true); + std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl; +#endif + +#if 0 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resObj.m_boxMask, contours); +#else + cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resObj.m_brect.x; + int dy = resObj.m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = rr.center.x * fw + dx; + rr.center.y = rr.center.y * fw + dy; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound(dx + br.x * fw); + br.y = cvRound(dy + br.y * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resObj.m_brect = br; + //resObj.m_rrect = rr; + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + else + { + resObj.m_boxMask = cv::Mat(resObj.m_brect.size(), CV_8UC1, cv::Scalar(255)); + } + } + + dets += m_outpuDims[detsInd].d[ncInd]; + labels += m_outpuDims[labelsInd].d[ncInd]; + masks += segWidth * segHeight; + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/RFDETR_is.hpp b/src/Detector/tensorrt_onnx/RFDETR_is.hpp index cab24fdf..d2f1988e 100644 --- a/src/Detector/tensorrt_onnx/RFDETR_is.hpp +++ b/src/Detector/tensorrt_onnx/RFDETR_is.hpp @@ -34,8 +34,8 @@ class RFDETR_is_onnx : public YoloONNX const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); - cv::Size2f inputSizef(m_inputDims[0].d[3], m_inputDims[0].d[2]); - cv::Size inputSize(m_inputDims[0].d[3], m_inputDims[0].d[2]); + cv::Size inputSize(static_cast(m_inputDims[0].d[3]), static_cast(m_inputDims[0].d[2])); + cv::Size2f inputSizef(static_cast(inputSize.width), static_cast(inputSize.height)); //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; @@ -134,10 +134,13 @@ class RFDETR_is_onnx : public YoloONNX std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl; #endif +#if 0 std::vector> contours; - std::vector hierarchy; - cv::findContours(resObj.m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); - +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resObj.m_boxMask, contours); +#else + cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif for (const auto& contour : contours) { cv::Rect br = cv::boundingRect(contour); @@ -167,6 +170,7 @@ class RFDETR_is_onnx : public YoloONNX break; } } +#endif } else { diff --git a/src/Detector/tensorrt_onnx/YoloONNX.cpp b/src/Detector/tensorrt_onnx/YoloONNX.cpp index 4e241d5a..bec31df9 100644 --- a/src/Detector/tensorrt_onnx/YoloONNX.cpp +++ b/src/Detector/tensorrt_onnx/YoloONNX.cpp @@ -3,7 +3,7 @@ #define DEFINE_TRT_ENTRYPOINTS 1 #include "YoloONNX.hpp" -#include "../../common/defines.h" +#include "../../mtracking/defines.h" //! //! \brief Creates the network, configures the builder and creates the network engine @@ -19,6 +19,7 @@ bool YoloONNX::Init(const SampleYoloParams& params) m_params = params; + sample::setReportableSeverity(sample::Logger::Severity::kINFO); initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), ""); auto GetBindings = [&]() @@ -79,15 +80,16 @@ bool YoloONNX::Init(const SampleYoloParams& params) file.close(); } - nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(sample::gLogger); + m_inferRuntime = std::shared_ptr(nvinfer1::createInferRuntime(sample::gLogger)); if (m_params.m_dlaCore >= 0) - infer->setDLACore(m_params.m_dlaCore); + m_inferRuntime->setDLACore(m_params.m_dlaCore); - m_engine = std::shared_ptr(infer->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter()); + m_engine = std::shared_ptr(m_inferRuntime->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter()); #if (NV_TENSORRT_MAJOR < 8) - infer->destroy(); + m_inferRuntime->destroy(); + m_inferRuntime.reset(); #else - //delete infer; + //m_inferRuntime.reset(); #endif if (m_engine) @@ -233,6 +235,12 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, { case tensor_rt::Precision::FP16: config->setFlag(nvinfer1::BuilderFlag::kFP16); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP16)" << std::endl; + break; + + case tensor_rt::Precision::FP8: + config->setFlag(nvinfer1::BuilderFlag::kFP8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP8)" << std::endl; break; case tensor_rt::Precision::INT8: @@ -243,6 +251,7 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, BatchStream calibrationStream(m_params.m_explicitBatchSize, m_params.m_nbCalBatches, m_params.m_calibrationBatches, m_params.m_dataDirs); calibrator.reset(new Int8EntropyCalibrator2(calibrationStream, 0, "Yolo", m_params.m_inputTensorNames[0].c_str())); config->setFlag(nvinfer1::BuilderFlag::kINT8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kINT8)" << std::endl; config->setInt8Calibrator(calibrator.get()); } break; diff --git a/src/Detector/tensorrt_onnx/YoloONNX.hpp b/src/Detector/tensorrt_onnx/YoloONNX.hpp index 2452f61d..cae188c5 100644 --- a/src/Detector/tensorrt_onnx/YoloONNX.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNX.hpp @@ -86,6 +86,7 @@ class YoloONNX private: std::shared_ptr m_engine; //!< The TensorRT engine used to run the network + std::shared_ptr m_inferRuntime; cv::Mat m_resized; std::vector m_resizedBatch; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp index b1a0e11c..641c5c7d 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp @@ -1,7 +1,7 @@ #pragma once #include "YoloONNX.hpp" -#include "../../common/defines.h" +#include "../../mtracking/defines.h" /// /// \brief The YOLOv11_instance_onnx class @@ -264,12 +264,12 @@ class YOLOv11_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif +#if 1 std::vector> contours; - std::vector hierarchy; -#if (CV_VERSION_MAJOR < 4) - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); #else - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); #endif for (const auto& contour : contours) { @@ -300,6 +300,7 @@ class YOLOv11_instance_onnx : public YoloONNX break; } } +#endif } } return resBoxes; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp new file mode 100644 index 00000000..19cdd67a --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv26_bb_onnx class +/// +class YOLOv26_bb_onnx : public YoloONNX +{ +public: + YOLOv26_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x6 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t lenInd = 1; + size_t len = static_cast(m_outpuDims[0].d[lenInd]); + auto volume = len * m_outpuDims[0].d[2]; + output += volume * imgIdx; + //std::cout << "len = " << len << ", confThreshold = " << m_params.m_confThreshold << ", volume = " << volume << std::endl; + + for (size_t i = 0; i < len; ++i) + { + auto ind = i * m_outpuDims[0].d[2]; + + float classConf = output[ind + 4]; + int classId = static_cast(output[ind + 5]); + + if (classConf >= m_params.m_confThreshold) + { + float x = fw * (output[ind + 0] - m_resizedROI.x); + float y = fh * (output[ind + 1] - m_resizedROI.y); + float width = fw * (output[ind + 2] - output[ind + 0]); + float height = fh * (output[ind + 3] - output[ind + 1]); + + //std::cout << "ind = " << ind << ", output[0] = " << output[ind + 0] << ", output[1] = " << output[ind + 1] << ", output[2] = " << output[ind + 2] << ", output[3] = " << output[ind + 3] << std::endl; + //std::cout << "ind = " << ind << ", classConf = " << classConf << ", classId = " << classId << ", x = " << x << ", y = " << y << ", width = " << width << ", height = " << height << std::endl; + + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp new file mode 100644 index 00000000..9ec2d27e --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp @@ -0,0 +1,175 @@ +#pragma once + +#include "YoloONNX.hpp" +#include "../../mtracking/defines.h" + +/// +/// \brief The YOLOv26_instance_onnx class +/// +class YOLOv26_instance_onnx : public YoloONNX +{ +public: + YOLOv26_instance_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + outputTensorNames.push_back("output1"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + size_t outInd = 0; + size_t segInd = 1; + + auto output = outputs[outInd]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x38 + //2: name: output1, size: 1x32x160x160 + + size_t dimInd = 2; + size_t lenInd = 1; + int dimensions = static_cast(m_outpuDims[outInd].d[dimInd]); + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]); + auto volume = len * dimensions; + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + int segWidth = 160; + int segHeight = 160; + int segChannels = 32; + + if (outputs.size() > 1) + { + segChannels = static_cast(m_outpuDims[segInd].d[1]); + segWidth = static_cast(m_outpuDims[segInd].d[2]); + segHeight = static_cast(m_outpuDims[segInd].d[3]); + } + cv::Mat maskProposals; + int netWidth = 6 + segChannels; + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * dimensions; + + float objectConf = output[k + 4]; + int classId = static_cast(output[k + 5]); + + if (objectConf >= m_params.m_confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = output[k]; + float y = output[k + 1]; + float width = output[k + 2] - output[k]; + float height = output[k + 3] - output[k + 1]; + + if (width > 4 && height > 4) + { + resBoxes.emplace_back(classId, objectConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + + std::vector tempProto(output + k + 6, output + k + netWidth); + maskProposals.push_back(cv::Mat(tempProto).t()); + } + } + } + + //std::cout << "maskProposals.size = " << maskProposals.size() << std::endl; + if (!maskProposals.empty()) + { + // Mask processing + const float* pdata = outputs[segInd]; + std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); + + int INPUT_W = static_cast(m_inputDims[0].d[3]); + int INPUT_H = static_cast(m_inputDims[0].d[2]); + static constexpr float MASK_THRESHOLD = 0.5; + + cv::Mat mask_protos = cv::Mat(maskFloat); + cv::Mat protos = mask_protos.reshape(0, { segChannels, segWidth * segHeight }); + + cv::Mat matmulRes = (maskProposals * protos).t();//n*32 32*25600 + cv::Mat masks = matmulRes.reshape(static_cast(resBoxes.size()), { segWidth, segHeight }); + std::vector maskChannels; + split(masks, maskChannels); + for (size_t i = 0; i < resBoxes.size(); ++i) + { + cv::Mat dest; + cv::Mat mask; + //sigmoid + cv::exp(-maskChannels[i], dest); + dest = 1.0 / (1.0 + dest);//160*160 + + int padw = 0; + int padh = 0; + cv::Rect roi(int((float)padw / INPUT_W * segWidth), int((float)padh / INPUT_H * segHeight), int(segWidth - padw / 2), int(segHeight - padh / 2)); + dest = dest(roi); + + cv::resize(dest, mask, cv::Size(INPUT_W, INPUT_H), cv::INTER_NEAREST); + + //std::cout << "m_brect = " << resBoxes[i].m_brect << ", dest = " << dest.size() << ", mask = " << mask.size() << std::endl; + + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + + //std::cout << "m_boxMask = " << resBoxes[i].m_boxMask.size() << ", m_brect = " << resBoxes[i].m_brect << ", dest = " << dest.size() << ", mask = " << mask.size() << std::endl; + +#if 0 + static int globalObjInd = 0; + SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); +#endif + +#if 1 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resBoxes[i].m_brect.x; + int dy = resBoxes[i].m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = (rr.center.x + dx - m_resizedROI.x) * fw; + rr.center.y = (rr.center.y + dy - m_resizedROI.y) * fw; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound((dx + br.x - m_resizedROI.x) * fw); + br.y = cvRound((dy + br.y - m_resizedROI.y) * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resBoxes[i].m_brect = br; + resBoxes[i].m_rrect = rr; + + cv::resize(resBoxes[i].m_boxMask, resBoxes[i].m_boxMask, resBoxes[i].m_brect.size(), 0, 0, cv::INTER_NEAREST); + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + } + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp new file mode 100644 index 00000000..8a097f2f --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv26_obb_onnx class +/// +class YOLOv26_obb_onnx : public YoloONNX +{ +public: + YOLOv26_obb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x300x7 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t lenInd = 1; + size_t len = static_cast(m_outpuDims[0].d[lenInd]); + auto volume = len * m_outpuDims[0].d[2]; + output += volume * imgIdx; + //std::cout << "len = " << len << ", confThreshold = " << m_params.m_confThreshold << ", volume = " << volume << std::endl; + + for (size_t i = 0; i < len; ++i) + { + auto ind = i * m_outpuDims[0].d[2]; + + float classConf = output[ind + 4]; + int classId = static_cast(output[ind + 5]); + + if (classConf >= m_params.m_confThreshold) + { + float x = fw * (output[ind + 0] - m_resizedROI.x); + float y = fh * (output[ind + 1] - m_resizedROI.y); + float width = fw * output[ind + 2]; + float height = fh * output[ind + 3]; + float angle = 180.f * output[ind + 6] / static_cast(M_PI); + //std::cout << "ind = " << ind << ", output[0] = " << output[ind + 0] << ", output[1] = " << output[ind + 1] << ", output[2] = " << output[ind + 2] << ", output[3] = " << output[ind + 3] << std::endl; + //std::cout << "ind = " << ind << ", classConf = " << classConf << ", classId = " << classId << ", x = " << x << ", y = " << y << ", width = " << width << ", height = " << height << ", angle = " << angle << std::endl; + + resBoxes.emplace_back(classId, classConf, cv::RotatedRect(cv::Point2f(x, y), cv::Size2f(width, height), angle)); + } + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp index 09a38fab..247e352c 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp @@ -1,7 +1,7 @@ #pragma once #include "YoloONNX.hpp" -#include "../../common/defines.h" +#include "../../mtracking/defines.h" /// /// \brief The YOLOv7_instance_onnx class @@ -216,12 +216,12 @@ class YOLOv7_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif +#if 1 std::vector> contours; - std::vector hierarchy; -#if (CV_VERSION_MAJOR < 4) - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); #else - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); #endif for (const auto& contour : contours) { @@ -252,6 +252,7 @@ class YOLOv7_instance_onnx : public YoloONNX break; } } +#endif } } return resBoxes; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp index 56fe03f1..6422cc47 100644 --- a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp @@ -1,7 +1,7 @@ #pragma once #include "YoloONNX.hpp" -#include "../../common/defines.h" +#include "../../mtracking/defines.h" /// /// \brief The YOLOv8_instance_onnx class @@ -266,12 +266,12 @@ class YOLOv8_instance_onnx : public YoloONNX SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); #endif +#if 1 std::vector> contours; - std::vector hierarchy; -#if (CV_VERSION_MAJOR < 4) - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); #else - cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); #endif for (const auto& contour : contours) { @@ -304,6 +304,7 @@ class YOLOv8_instance_onnx : public YoloONNX break; } } +#endif } } return resBoxes; diff --git a/src/Detector/tensorrt_onnx/class_detector.cpp b/src/Detector/tensorrt_onnx/class_detector.cpp index 90d2536a..f5f4fb66 100644 --- a/src/Detector/tensorrt_onnx/class_detector.cpp +++ b/src/Detector/tensorrt_onnx/class_detector.cpp @@ -18,6 +18,10 @@ #include "RFDETR_is.hpp" #include "DFINE_bb.hpp" #include "YoloONNXv13_bb.hpp" +#include "DFINE_is.hpp" +#include "YoloONNXv26_bb.hpp" +#include "YoloONNXv26_obb.hpp" +#include "YoloONNXv26_instance.hpp" namespace tensor_rt { @@ -84,6 +88,15 @@ namespace tensor_rt case ModelType::YOLOV11Mask: m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); break; + case ModelType::YOLOV26: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV26_OBB: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV26Mask: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; case ModelType::YOLOV12: m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); break; @@ -99,6 +112,9 @@ namespace tensor_rt case ModelType::YOLOV13: m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); break; + case ModelType::DFINE_IS: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; } // Threshold values @@ -118,6 +134,8 @@ namespace tensor_rt dictprecision[tensor_rt::INT8] = "kINT8"; dictprecision[tensor_rt::FP16] = "kHALF"; dictprecision[tensor_rt::FP32] = "kFLOAT"; + dictprecision[tensor_rt::FP8] = "kFP8"; + auto precision = dictprecision.find(m_params.m_precision); if (precision != dictprecision.end()) precisionStr = precision->second; diff --git a/src/Detector/tensorrt_onnx/class_detector.h b/src/Detector/tensorrt_onnx/class_detector.h index ccbacffd..7ea989bc 100644 --- a/src/Detector/tensorrt_onnx/class_detector.h +++ b/src/Detector/tensorrt_onnx/class_detector.h @@ -62,7 +62,11 @@ namespace tensor_rt RFDETR, RFDETR_IS, DFINE, - YOLOV13 + YOLOV13, + DFINE_IS, + YOLOV26, + YOLOV26_OBB, + YOLOV26Mask }; /// @@ -72,7 +76,8 @@ namespace tensor_rt { INT8 = 0, FP16, - FP32 + FP32, + FP8 }; /// diff --git a/src/Detector/tensorrt_onnx/common/sampleDevice.cpp b/src/Detector/tensorrt_onnx/common/sampleDevice.cpp index 7964aeb5..1e7ee17a 100644 --- a/src/Detector/tensorrt_onnx/common/sampleDevice.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleDevice.cpp @@ -107,8 +107,17 @@ void setCudaDevice(int32_t device, std::ostream& os) os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl; os << "Memory Bus Width: " << properties.memoryBusWidth << " bits" << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl; +#if (CUDA_VERSION < 13000) os << "Application Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl; os << "Application Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl; +#else + int clockRateKHz = 0; + cudaDeviceGetAttribute(&clockRateKHz, cudaDevAttrClockRate, device); + int memoryClockRateKHz = 0; + cudaDeviceGetAttribute(&memoryClockRateKHz, cudaDevAttrMemoryClockRate, device); + os << "Application Compute Clock Rate: " << clockRateKHz / 1000000.0F << " GHz" << std::endl; + os << "Application Memory Clock Rate: " << memoryClockRateKHz / 1000000.0F << " GHz" << std::endl; +#endif os << std::endl; os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is " << "currently running at." << std::endl; diff --git a/src/Detector/tensorrt_onnx/common/sampleInference.cpp b/src/Detector/tensorrt_onnx/common/sampleInference.cpp index f0470bf7..b131ca32 100644 --- a/src/Detector/tensorrt_onnx/common/sampleInference.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleInference.cpp @@ -46,6 +46,7 @@ #include "sampleOptions.h" #include "sampleReporting.h" #include "sampleUtils.h" +#include using namespace nvinfer1; namespace sample { @@ -1320,7 +1321,15 @@ void Binding::fill() fillBuffer(buffer->getHostBuffer(), volume, 0, 255); break; } - case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + fillBuffer<__nv_fp8_e4m3>(buffer->getHostBuffer(), volume, __nv_fp8_e4m3(- 1.0f), __nv_fp8_e4m3(1.0f)); +#endif + break; + } #if (NV_TENSORRT_MAJOR > 8) case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); #endif @@ -1388,7 +1397,15 @@ void Binding::dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, break; } #endif - case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + dumpBuffer<__nv_fp8_e4m3>(outputBuffer, separator, os, dims, strides, vectorDim, spv); +#endif + break; + } #if (NV_TENSORRT_MAJOR > 8) case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); #endif diff --git a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp index 8f172afe..89a128ee 100644 --- a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp @@ -18,6 +18,7 @@ #include "sampleUtils.h" #include "bfloat16.h" #include "half.h" +#include using namespace nvinfer1; @@ -433,6 +434,11 @@ void print(std::ostream& os, __half v) os << static_cast(v); } +void print(std::ostream& os, __nv_fp8_e4m3 v) +{ + os << static_cast(v); +} + template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv) @@ -482,6 +488,8 @@ template void dumpBuffer(void const* buffer, std::string const& separat Dims const& strides, int32_t vectorDim, int32_t spv); template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer<__nv_fp8_e4m3>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); template void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights) @@ -566,7 +574,7 @@ void fillBuffer(void* buffer, int64_t volume, T min, T max) { T* typedBuffer = static_cast(buffer); std::default_random_engine engine; - std::uniform_real_distribution distribution(min, max); + std::uniform_real_distribution distribution((float)min, (float)max); auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; std::generate(typedBuffer, typedBuffer + volume, generator); } @@ -580,6 +588,7 @@ template void fillBuffer(void* buffer, int64_t volume, int8_t min, int8_ template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max); template void fillBuffer(void* buffer, int64_t volume, BFloat16 min, BFloat16 max); template void fillBuffer(void* buffer, int64_t volume, uint8_t min, uint8_t max); +template void fillBuffer<__nv_fp8_e4m3>(void* buffer, int64_t volume, __nv_fp8_e4m3 min, __nv_fp8_e4m3 max); bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target) { diff --git a/src/Tracker/BaseTracker.cpp b/src/Tracker/BaseTracker.cpp index db6a9573..ec68b858 100644 --- a/src/Tracker/BaseTracker.cpp +++ b/src/Tracker/BaseTracker.cpp @@ -149,8 +149,8 @@ void CTracker::UpdateTrackingState(const regions_t& regions, std::cout << "CTracker::UpdateTrackingState: m_tracks = " << colsTracks << ", regions = " << rowsRegions << std::endl; int fontType = cv::FONT_HERSHEY_TRIPLEX; - double fontSize = 0.6; - cv::Scalar colorRegionEllow(0, 255, 255); + double fontSize = (currFrame.cols < 1000) ? 0.4 : 0.6; + cv::Scalar colorRegionEllow(100, 100, 100); cv::Scalar colorMatchedAboveThreshRed(0, 0, 255); cv::Scalar colorMatchedGreen(0, 255, 0); cv::Scalar colorMatchedNearMargenta(255, 0, 255); @@ -173,11 +173,7 @@ void CTracker::UpdateTrackingState(const regions_t& regions, cv::Mat foreground(dbgAssignment.size(), CV_8UC1, cv::Scalar(0, 0, 100)); for (const auto& track : m_tracks) { -#if (CV_VERSION_MAJOR < 4) - cv::ellipse(foreground, track->GetLastRect(), cv::Scalar(255, 255, 255), CV_FILLED); -#else cv::ellipse(foreground, track->GetLastRect(), cv::Scalar(255, 255, 255), cv::FILLED); -#endif } const int chans = dbgAssignment.channels(); @@ -217,12 +213,7 @@ void CTracker::UpdateTrackingState(const regions_t& regions, CreateDistaceMatrix(regions, regionEmbeddings, costMatrix, maxPossibleCost, maxCost); #if DRAW_DBG_ASSIGNMENT std::cout << "CTracker::UpdateTrackingState: maxPossibleCost = " << maxPossibleCost << ", maxCost = " << maxCost << std::endl; - std::cout << "costMatrix: "; - for (auto costv : costMatrix) - { - std::cout << costv << " "; - } - std::cout << std::endl; + std::cout << "costMatrix: " << cv::Mat_(rowsRegions, colsTracks, costMatrix.data()) << std::endl; #endif // Solving assignment problem (shortest paths) @@ -332,7 +323,9 @@ void CTracker::UpdateTrackingState(const regions_t& regions, m_tracks[i]->IsStaticTimeout(frameTime, m_settings.m_maxStaticTime - m_settings.m_minStaticTime)) { m_removedObjects.push_back(m_tracks[i]->GetID()); - //std::cout << "Remove: " << m_tracks[i]->GetID().ID2Str() << ": lost = " << m_tracks[i]->GetLostPeriod(frameTime) << ", maximumAllowedLostTime = " << m_settings.m_maximumAllowedLostTime << ", out of frame " << m_tracks[i]->IsOutOfTheFrame() << std::endl; +#if DRAW_DBG_ASSIGNMENT + std::cout << "Remove: " << m_tracks[i]->GetID().ID2Str() << ": lost = " << m_tracks[i]->GetLostPeriod(frameTime) << ", maximumAllowedLostTime = " << m_settings.m_maximumAllowedLostTime << ", out of frame " << m_tracks[i]->IsOutOfTheFrame() << std::endl; +#endif m_tracks.erase(m_tracks.begin() + i); assignmentT2R.erase(assignmentT2R.begin() + i); } @@ -349,8 +342,9 @@ void CTracker::UpdateTrackingState(const regions_t& regions, #endif for (size_t i = 0; i < regions.size(); ++i) { - //std::cout << "CTracker::update: regions[" << i << "].m_rrect: " << regions[i].m_rrect.center << ", " << regions[i].m_rrect.angle << ", " << regions[i].m_rrect.size << std::endl; - +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::update: regions[" << i << "].m_rrect: " << regions[i].m_rrect.center << ", " << regions[i].m_rrect.angle << ", " << regions[i].m_rrect.size << std::endl; +#endif if (std::find(assignmentT2R.begin(), assignmentT2R.end(), i) == assignmentT2R.end()) { if (regionEmbeddings.empty()) @@ -391,7 +385,9 @@ void CTracker::UpdateTrackingState(const regions_t& regions, if (assignmentT2R[i] != -1) // If we have assigned detect, then update using its coordinates, { m_tracks[i]->ResetLostTime(frameTime); - // std::cout << "Update track " << i << " for " << assignment[i] << " region, regionEmbeddings.size = " << regionEmbeddings.size() << std::endl; +#if DRAW_DBG_ASSIGNMENT + std::cout << "Update track " << i << " for " << assignmentT2R[i] << " region, regionEmbeddings.size = " << regionEmbeddings.size() << std::endl; +#endif if (regionEmbeddings.empty()) m_tracks[i]->Update(regions[assignmentT2R[i]], true, m_settings.m_maxTraceLength, diff --git a/src/Tracker/CMakeLists.txt b/src/Tracker/CMakeLists.txt index 0141e5e4..fbb355a8 100644 --- a/src/Tracker/CMakeLists.txt +++ b/src/Tracker/CMakeLists.txt @@ -3,10 +3,10 @@ cmake_minimum_required(VERSION 3.9) project(mtracking) set(main_sources - ../common/nms.h - ../common/defines.h - ../common/object_types.h - ../common/object_types.cpp) + ../mtracking/nms.h + ../mtracking/defines.h + ../mtracking/object_types.h + ../mtracking/object_types.cpp) set(tracker_sources BaseTracker.cpp @@ -76,7 +76,7 @@ endif(USE_OCV_EMBEDDINGS) include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR}/../src) -include_directories(${PROJECT_SOURCE_DIR}/../common) +include_directories(${PROJECT_SOURCE_DIR}/../mtracking) include_directories(${PROJECT_SOURCE_DIR}/../../thirdparty) if (CMAKE_COMPILER_IS_GNUCXX) @@ -98,11 +98,11 @@ endif() target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBS}) -set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${tracker_headers};../common/defines.h;../common/object_types.h") +set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${tracker_headers};../mtracking/defines.h;../mtracking/object_types.h") install(TARGETS ${PROJECT_NAME} EXPORT MTTrackingExports - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin - PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}) + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/src/Tracker/TrackerSettings.cpp b/src/Tracker/TrackerSettings.cpp index e76dd72e..e06f2311 100644 --- a/src/Tracker/TrackerSettings.cpp +++ b/src/Tracker/TrackerSettings.cpp @@ -57,12 +57,12 @@ bool ParseTrackerSettings(const std::string& settingsFile, TrackerSettings& trac trackerSettings.m_useAbandonedDetection = reader.GetInteger("tracking", "detect_abandoned", 0) != 0; trackerSettings.m_minStaticTime = reader.GetInteger("tracking", "min_static_time", 5); trackerSettings.m_maxStaticTime = reader.GetInteger("tracking", "max_static_time", 25); - trackerSettings.m_maxSpeedForStatic = reader.GetInteger("tracking", "max_speed_for_static", 10); + trackerSettings.m_maxSpeedForStatic = static_cast(reader.GetReal("tracking", "max_speed_for_static", 0.5)); trackerSettings.m_byteTrackSettings.m_trackBuffer = reader.GetInteger("tracking", "bytetrack_track_buffer", 30); - trackerSettings.m_byteTrackSettings.m_trackThresh = reader.GetReal("tracking", "bytetrack_track_thresh", 0.5); - trackerSettings.m_byteTrackSettings.m_highThresh = reader.GetReal("tracking", "bytetrack_high_thresh", 0.5); - trackerSettings.m_byteTrackSettings.m_matchThresh = reader.GetReal("tracking", "bytetrack_match_thresh", 0.8); + trackerSettings.m_byteTrackSettings.m_trackThresh = static_cast(reader.GetReal("tracking", "bytetrack_track_thresh", 0.5)); + trackerSettings.m_byteTrackSettings.m_highThresh = static_cast(reader.GetReal("tracking", "bytetrack_high_thresh", 0.5)); + trackerSettings.m_byteTrackSettings.m_matchThresh = static_cast(reader.GetReal("tracking", "bytetrack_match_thresh", 0.8)); // Read detection settings trackerSettings.m_nnWeights = reader.GetString("detection", "nn_weights", "data/yolov4-tiny_best.weights"); diff --git a/src/Tracker/TrackerSettings.h b/src/Tracker/TrackerSettings.h index ba862f5f..8e68b4bf 100644 --- a/src/Tracker/TrackerSettings.h +++ b/src/Tracker/TrackerSettings.h @@ -20,7 +20,7 @@ struct TrackerSettings tracking::KalmanType m_kalmanType = tracking::KalmanLinear; tracking::FilterGoal m_filterGoal = tracking::FilterCenter; tracking::LostTrackType m_lostTrackType = tracking::TrackKCF; // Used if m_filterGoal == tracking::FilterRect - tracking::MatchType m_matchType = tracking::MatchHungrian; + tracking::MatchType m_matchType = tracking::MatchLAPJV; std::array m_distType; diff --git a/src/Tracker/byte_track/BYTETracker.cpp b/src/Tracker/byte_track/BYTETracker.cpp index 57227535..144490b1 100644 --- a/src/Tracker/byte_track/BYTETracker.cpp +++ b/src/Tracker/byte_track/BYTETracker.cpp @@ -24,18 +24,37 @@ byte_track::BYTETracker::BYTETracker(const int& frame_rate, /// void byte_track::BYTETracker::GetTracks(std::vector& tracks) const { + tracks.clear(); + if (output_stracks_.size() > tracks.capacity()) + tracks.reserve(output_stracks_.size()); + for (const auto& track : output_stracks_) + { + std::chrono::duration period = m_lastFrameTime - m_lastFrameTime; + cv::RotatedRect rr(track->getRect().tl(), cv::Point2f(static_cast(track->getRect().x + track->getRect().width), static_cast(track->getRect().y)), track->getRect().br()); + TrackingObject to(rr, track->getTrackId(), track->getTrace(), false, cvRound(period.count()), false, + track->getType(), track->getScore(), track->getVelocity()); + + tracks.emplace_back(to); + } } /// void byte_track::BYTETracker::GetRemovedTracks(std::vector& trackIDs) const { - + if (removed_stracks_.size() > trackIDs.capacity()) + trackIDs.reserve(removed_stracks_.size()); + for (const auto& remTrack : removed_stracks_) + { + trackIDs.emplace_back(remTrack->getTrackId()); + } } /// -void byte_track::BYTETracker::Update(const regions_t& regions, cv::UMat currFrame, time_point_t frameTime) +void byte_track::BYTETracker::Update(const regions_t& regions, cv::UMat /*currFrame*/, time_point_t frameTime) { + m_lastFrameTime = frameTime; + ////////////////// Step 1: Get detections ////////////////// frame_id_++; @@ -45,7 +64,7 @@ void byte_track::BYTETracker::Update(const regions_t& regions, cv::UMat currFram for (const auto ®ion : regions) { - const auto strack = std::make_shared(region.m_brect, region.m_confidence, frameTime); + const auto strack = std::make_shared(region.m_brect, region.m_confidence, region.m_type, frameTime); if (region.m_confidence >= track_thresh_) det_stracks.push_back(strack); else @@ -81,7 +100,7 @@ void byte_track::BYTETracker::Update(const regions_t& regions, cv::UMat currFram { std::vector> matches_idx; - std::vector unmatch_detection_idx, unmatch_track_idx; + std::vector unmatch_detection_idx, unmatch_track_idx; const auto dists = calcIouDistance(strack_pool, det_stracks); linearAssignment(dists, strack_pool.size(), det_stracks.size(), match_thresh_, @@ -120,7 +139,7 @@ void byte_track::BYTETracker::Update(const regions_t& regions, cv::UMat currFram { std::vector> matches_idx; - std::vector unmatch_track_idx, unmatch_detection_idx; + std::vector unmatch_track_idx, unmatch_detection_idx; const auto dists = calcIouDistance(remain_tracked_stracks, det_low_stracks); linearAssignment(dists, remain_tracked_stracks.size(), det_low_stracks.size(), 0.5, @@ -157,8 +176,8 @@ void byte_track::BYTETracker::Update(const regions_t& regions, cv::UMat currFram std::vector current_removed_stracks; { - std::vector unmatch_detection_idx; - std::vector unmatch_unconfirmed_idx; + std::vector unmatch_detection_idx; + std::vector unmatch_unconfirmed_idx; std::vector> matches_idx; // Deal with unconfirmed tracks, usually tracks with only one beginning frame @@ -224,7 +243,7 @@ void byte_track::BYTETracker::Update(const regions_t& regions, cv::UMat currFram std::vector byte_track::BYTETracker::jointStracks(const std::vector &a_tlist, const std::vector &b_tlist) const { - std::map exists; + std::map exists; std::vector res; for (size_t i = 0; i < a_tlist.size(); i++) { @@ -233,7 +252,7 @@ std::vector byte_track::BYTETracker::jointSt } for (size_t i = 0; i < b_tlist.size(); i++) { - const int &tid = b_tlist[i]->getTrackId(); + const size_t &tid = b_tlist[i]->getTrackId(); if (!exists[tid] || exists.count(tid) == 0) { exists[tid] = 1; @@ -247,7 +266,7 @@ std::vector byte_track::BYTETracker::jointSt std::vector byte_track::BYTETracker::subStracks(const std::vector &a_tlist, const std::vector &b_tlist) const { - std::map stracks; + std::map stracks; for (size_t i = 0; i < a_tlist.size(); i++) { stracks.emplace(a_tlist[i]->getTrackId(), a_tlist[i]); @@ -255,13 +274,13 @@ std::vector byte_track::BYTETracker::subStra for (size_t i = 0; i < b_tlist.size(); i++) { - const int &tid = b_tlist[i]->getTrackId(); + const size_t&tid = b_tlist[i]->getTrackId(); if (stracks.count(tid) != 0) stracks.erase(tid); } std::vector res; - std::map::iterator it; + std::map::iterator it; for (it = stracks.begin(); it != stracks.end(); ++it) { res.push_back(it->second); @@ -291,8 +310,8 @@ void byte_track::BYTETracker::removeDuplicateStracks(const std::vector a_overlapping(a_stracks.size(), false), b_overlapping(b_stracks.size(), false); for (const auto &[a_idx, b_idx] : overlapping_combinations) { - const int timep = a_stracks[a_idx]->getFrameId() - a_stracks[a_idx]->getStartFrameId(); - const int timeq = b_stracks[b_idx]->getFrameId() - b_stracks[b_idx]->getStartFrameId(); + const size_t timep = a_stracks[a_idx]->getFrameId() - a_stracks[a_idx]->getStartFrameId(); + const size_t timeq = b_stracks[b_idx]->getFrameId() - b_stracks[b_idx]->getStartFrameId(); if (timep > timeq) b_overlapping[b_idx] = true; else @@ -314,41 +333,35 @@ void byte_track::BYTETracker::removeDuplicateStracks(const std::vector> &cost_matrix, - const int &cost_matrix_size, - const int &cost_matrix_size_size, + const size_t &cost_matrix_size, + const size_t &cost_matrix_size_size, const float &thresh, std::vector> &matches, - std::vector &a_unmatched, - std::vector &b_unmatched) const + std::vector &a_unmatched, + std::vector &b_unmatched) const { if (cost_matrix.size() == 0) { - for (int i = 0; i < cost_matrix_size; i++) + for (size_t i = 0; i < cost_matrix_size; i++) { a_unmatched.push_back(i); } - for (int i = 0; i < cost_matrix_size_size; i++) + for (size_t i = 0; i < cost_matrix_size_size; i++) { b_unmatched.push_back(i); } return; } - std::vector rowsol; std::vector colsol; + std::vector rowsol; + std::vector colsol; execLapjv(cost_matrix, rowsol, colsol, true, thresh); for (size_t i = 0; i < rowsol.size(); i++) { if (rowsol[i] >= 0) - { - std::vector match; - match.push_back(i); - match.push_back(rowsol[i]); - matches.push_back(match); - } + matches.push_back({ (int)i, rowsol[i] }); else - { a_unmatched.push_back(i); - } } for (size_t i = 0; i < colsol.size(); i++) @@ -443,12 +456,12 @@ double byte_track::BYTETracker::execLapjv(const std::vector> std::vector > cost_c_extended; - int n_rows = cost.size(); - int n_cols = cost[0].size(); + size_t n_rows = cost.size(); + size_t n_cols = cost[0].size(); rowsol.resize(n_rows); colsol.resize(n_cols); - int n = 0; + size_t n = 0; if (n_rows == n_cols) { n = n_rows; @@ -472,7 +485,7 @@ double byte_track::BYTETracker::execLapjv(const std::vector> { for (size_t j = 0; j < cost_c_extended[i].size(); j++) { - cost_c_extended[i][j] = cost_limit / 2.0; + cost_c_extended[i][j] = cost_limit / 2.0f; } } } @@ -503,9 +516,9 @@ double byte_track::BYTETracker::execLapjv(const std::vector> cost_c_extended[i][j] = 0; } } - for (int i = 0; i < n_rows; i++) + for (size_t i = 0; i < n_rows; i++) { - for (int j = 0; j < n_cols; j++) + for (size_t j = 0; j < n_cols; j++) { cost_c_extended[i][j] = cost_c[i][j]; } @@ -526,18 +539,18 @@ double byte_track::BYTETracker::execLapjv(const std::vector> if (n != n_rows) { - for (int i = 0; i < n; i++) + for (size_t i = 0; i < n; i++) { if (x_c[i] >= n_cols) x_c[i] = -1; if (y_c[i] >= n_rows) y_c[i] = -1; } - for (int i = 0; i < n_rows; i++) + for (size_t i = 0; i < n_rows; i++) { rowsol[i] = x_c[i]; } - for (int i = 0; i < n_cols; i++) + for (size_t i = 0; i < n_cols; i++) { colsol[i] = y_c[i]; } diff --git a/src/Tracker/byte_track/BYTETracker.h b/src/Tracker/byte_track/BYTETracker.h index c2b16b93..95146a7b 100644 --- a/src/Tracker/byte_track/BYTETracker.h +++ b/src/Tracker/byte_track/BYTETracker.h @@ -37,12 +37,12 @@ class BYTETracker final : public BaseTracker std::vector &b_res) const; void linearAssignment(const std::vector> &cost_matrix, - const int &cost_matrix_size, - const int &cost_matrix_size_size, + const size_t &cost_matrix_size, + const size_t &cost_matrix_size_size, const float &thresh, std::vector> &matches, - std::vector &b_unmatched, - std::vector &a_unmatched) const; + std::vector &b_unmatched, + std::vector &a_unmatched) const; std::vector> calcIouDistance(const std::vector &a_tracks, const std::vector &b_tracks) const; @@ -63,6 +63,7 @@ class BYTETracker final : public BaseTracker const float match_thresh_ = 0.8f; const size_t max_time_lost_ = 30; + time_point_t m_lastFrameTime; size_t frame_id_ = 0; size_t track_id_count_ = 0; diff --git a/src/Tracker/byte_track/STrack.cpp b/src/Tracker/byte_track/STrack.cpp index decaaa25..728b4e6e 100644 --- a/src/Tracker/byte_track/STrack.cpp +++ b/src/Tracker/byte_track/STrack.cpp @@ -2,10 +2,11 @@ #include -byte_track::STrack::STrack(const cv::Rect2f& rect, const float& score, time_point_t currTime) : +byte_track::STrack::STrack(const cv::Rect2f& rect, const float& score, objtype_t type, time_point_t currTime) : kalman_filter_(), mean_(), covariance_(), + type_(type), rect_(rect), state_(STrackState::New), is_activated_(false), @@ -58,6 +59,21 @@ const size_t& byte_track::STrack::getTrackletLength() const return tracklet_len_; } +objtype_t byte_track::STrack::getType() const +{ + return type_; +} + +const Trace& byte_track::STrack::getTrace() const +{ + return trace_; +} + +cv::Vec byte_track::STrack::getVelocity() const +{ + return cv::Vec(mean_(4), mean_(5)); +} + byte_track::KalmanFilter::DetectBox GetXyah(const cv::Rect2f& rect) { return byte_track::KalmanFilter::DetectBox( diff --git a/src/Tracker/byte_track/STrack.h b/src/Tracker/byte_track/STrack.h index ae2d6503..65607f78 100644 --- a/src/Tracker/byte_track/STrack.h +++ b/src/Tracker/byte_track/STrack.h @@ -18,7 +18,7 @@ enum class STrackState { class STrack { public: - STrack(const cv::Rect2f& rect, const float& score, time_point_t currTime); + STrack(const cv::Rect2f& rect, const float& score, objtype_t type, time_point_t currTime); ~STrack() = default; const cv::Rect2f& getRect() const; @@ -30,6 +30,9 @@ class STrack const size_t& getFrameId() const; const size_t& getStartFrameId() const; const size_t& getTrackletLength() const; + objtype_t getType() const; + const Trace& getTrace() const; + cv::Vec getVelocity() const; void activate(const size_t& frame_id, const size_t& track_id, time_point_t currTime); void reActivate(const STrack &new_track, const size_t &frame_id, const int &new_track_id, time_point_t currTime); // new_track_id = -1 @@ -45,6 +48,7 @@ class STrack KalmanFilter::StateMean mean_; KalmanFilter::StateCov covariance_; + objtype_t type_ = bad_type; cv::Rect2f rect_; STrackState state_{ STrackState::New }; diff --git a/src/Tracker/trajectory.h b/src/Tracker/trajectory.h index 323d9343..9b4dd22a 100644 --- a/src/Tracker/trajectory.h +++ b/src/Tracker/trajectory.h @@ -152,6 +152,14 @@ class Trace m_trace.clear(); } + /// + /// \brief pop_back + /// + void pop_back() + { + m_trace.pop_back(); + } + /// /// \brief GetRawCount /// \param lastPeriod diff --git a/src/common/defines.h b/src/mtracking/defines.h similarity index 95% rename from src/common/defines.h rename to src/mtracking/defines.h index e67fd142..245fc0a2 100644 --- a/src/common/defines.h +++ b/src/mtracking/defines.h @@ -1,461 +1,461 @@ -#pragma once - -#include -#include -#include - -#ifdef HAVE_FILESYSTEM -#include -namespace fs = std::filesystem; -#else -#include -namespace fs = std::experimental::filesystem; -#endif - -#include -#include "object_types.h" - -// --------------------------------------------------------------------------- -// -// --------------------------------------------------------------------------- -typedef float track_t; -typedef cv::Point_ Point_t; -#define El_t CV_32F -#define Mat_t CV_32FC - -typedef std::vector assignments_t; -typedef std::vector distMatrix_t; - -typedef std::chrono::time_point time_point_t; - -/// -template -class TrackID -{ -public: - typedef T value_type; - - TrackID() = default; - TrackID(value_type val) - : m_val(val) - { - } - - bool operator==(const TrackID& id) const - { - return m_val == id.m_val; - } - - std::string ID2Str() const - { - return std::to_string(m_val); - } - static TrackID Str2ID(const std::string& id) - { - return TrackID(std::stoi(id)); - } - TrackID NextID() const - { - return TrackID(m_val + 1); - } - size_t ID2Module(size_t module) const - { - return m_val % module; - } - - value_type m_val{ 0 }; -}; - -typedef TrackID track_id_t; -namespace std -{ - template <> - struct hash - { - std::size_t operator()(const track_id_t& k) const - { - return std::hash()(k.m_val); - } - }; - -} - -/// -/// \brief config_t -/// -typedef std::multimap config_t; - -/// -/// \brief The CRegion class -/// -class CRegion -{ -public: - /// - CRegion() = default; - - /// - CRegion(const cv::Rect& rect) noexcept - : m_brect(rect) - { - B2RRect(); - } - - /// - CRegion(const cv::RotatedRect& rrect) noexcept - : m_rrect(rrect) - { - if (m_rrect.size.width < 1) - m_rrect.size.width = 1; - if (m_rrect.size.height < 1) - m_rrect.size.height = 1; - R2BRect(); - } - - /// - CRegion(const cv::RotatedRect& rrect, objtype_t type, float confidence) noexcept - : m_type(type), m_rrect(rrect), m_confidence(confidence) - { - if (m_rrect.size.width < 1) - m_rrect.size.width = 1; - if (m_rrect.size.height < 1) - m_rrect.size.height = 1; - R2BRect(); - } - - /// - CRegion(const cv::RotatedRect& rrect, const cv::Rect& brect, objtype_t type, float confidence, const cv::Mat& boxMask) noexcept - : m_type(type), m_rrect(rrect), m_brect(brect), m_confidence(confidence) - { - m_boxMask = boxMask; - - if (m_rrect.size.width < 1) - m_rrect.size.width = 1; - if (m_rrect.size.height < 1) - m_rrect.size.height = 1; - - if (!m_boxMask.empty() && m_boxMask.size() != m_brect.size()) - { - m_brect.width = m_boxMask.cols; - m_brect.height = m_boxMask.rows; - } - } - - /// - CRegion(const cv::Rect& brect, objtype_t type, float confidence) noexcept - : m_type(type), m_brect(brect), m_confidence(confidence) - { - B2RRect(); - } - - objtype_t m_type = bad_type; - cv::RotatedRect m_rrect; - cv::Rect m_brect; - track_t m_confidence = -1; - cv::Mat m_boxMask; - -private: - /// - /// \brief R2BRect - /// \return - /// - cv::Rect R2BRect() noexcept - { - m_brect = m_rrect.boundingRect(); - return m_brect; - } - /// - /// \brief B2RRect - /// \return - /// - cv::RotatedRect B2RRect() noexcept - { - m_rrect = cv::RotatedRect(m_brect.tl(), cv::Point2f(static_cast(m_brect.x + m_brect.width), static_cast(m_brect.y)), m_brect.br()); - if (m_rrect.size.width < 1) - m_rrect.size.width = 1; - if (m_rrect.size.height < 1) - m_rrect.size.height = 1; - return m_rrect; - } -}; - -typedef std::vector regions_t; - -/// -/// \brief sqr -/// \param val -/// \return -/// -template inline -T sqr(T val) -{ - return val * val; -} - -/// -/// \brief get_lin_regress_params -/// \param in_data -/// \param start_pos -/// \param in_data_size -/// \param kx -/// \param bx -/// \param ky -/// \param by -/// -template -void get_lin_regress_params( - const CONT& in_data, - size_t start_pos, - size_t in_data_size, - T& kx, T& bx, T& ky, T& by) -{ - T m1(0.), m2(0.); - T m3_x(0.), m4_x(0.); - T m3_y(0.), m4_y(0.); - - const T el_count = static_cast(in_data_size - start_pos); - for (size_t i = start_pos; i < in_data_size; ++i) - { - m1 += i; - m2 += sqr(i); - - m3_x += in_data[i].x; - m4_x += i * in_data[i].x; - - m3_y += in_data[i].y; - m4_y += i * in_data[i].y; - } - T det_1 = 1 / (el_count * m2 - sqr(m1)); - - m1 *= -1; - - kx = det_1 * (m1 * m3_x + el_count * m4_x); - bx = det_1 * (m2 * m3_x + m1 * m4_x); - - ky = det_1 * (m1 * m3_y + el_count * m4_y); - by = det_1 * (m2 * m3_y + m1 * m4_y); -} - -/// -/// \brief sqr: Euclid distance between two points -/// \param val -/// \return -/// -template inline -T distance(const POINT_TYPE& p1, const POINT_TYPE& p2) -{ - return sqrt((T)(sqr(p2.x - p1.x) + sqr(p2.y - p1.y))); -} - -/// -/// \brief Clamp: Fit rectangle to frame -/// \param rect -/// \param size -/// \return -/// -inline cv::Rect Clamp(cv::Rect rect, const cv::Size& size) -{ - if (rect.x < 0) - { - rect.width = std::min(rect.width, size.width - 1); - rect.x = 0; - } - else if (rect.x + rect.width >= size.width) - { - rect.x = std::max(0, size.width - rect.width - 1); - rect.width = std::min(rect.width, size.width - 1); - } - if (rect.y < 0) - { - rect.height = std::min(rect.height, size.height - 1); - rect.y = 0; - } - else if (rect.y + rect.height >= size.height) - { - rect.y = std::max(0, size.height - rect.height - 1); - rect.height = std::min(rect.height, size.height - 1); - } - return rect; -} - -/// -/// \brief SaveMat -/// \param m -/// \param name -/// \param path -/// -inline bool SaveMat(const cv::Mat& m, std::string prefix, const std::string& ext, const std::string& savePath, bool compressToImage) -{ - bool res = true; - - std::map depthDict; - depthDict.emplace(CV_8U, "uint8"); - depthDict.emplace(CV_8S, "int8"); - depthDict.emplace(CV_16U, "uint16"); - depthDict.emplace(CV_16S, "int16"); - depthDict.emplace(CV_32S, "int32"); - depthDict.emplace(CV_32F, "float32"); - depthDict.emplace(CV_64F, "float64"); - depthDict.emplace(CV_16F, "float16"); - - auto depth = depthDict.find(m.depth()); - if (depth == std::end(depthDict)) - { - std::cout << "File " << prefix << " has a unknown depth: " << m.depth() << std::endl; - res = false; - return res; - } - assert(depth != std::end(depthDict)); - - fs::path fullPath(savePath); - fullPath.append(prefix + "_" + std::to_string(m.cols) + "x" + std::to_string(m.rows) + "_" + depth->second + "_C" + std::to_string(m.channels()) + ext); - prefix = fullPath.generic_string(); - - if (compressToImage) - { - res = cv::imwrite(prefix, m); - } - else - { - FILE* f = 0; -#ifdef _WIN32 - fopen_s(&f, prefix.c_str(), "wb"); -#else - f = fopen(prefix.c_str(), "wb"); -#endif // _WIN32 - res = f != 0; - if (res) - { - for (int y = 0; y < m.rows; ++y) - { - fwrite(m.ptr(y), 1, m.cols * m.elemSize(), f); - } - fclose(f); - std::cout << "File " << prefix << " was writed" << std::endl; - } - } - if (res) - std::cout << "File " << prefix << " was writed" << std::endl; - else - std::cout << "File " << prefix << " can not be opened!" << std::endl; - return res; -} - -/// -/// \brief DrawFilledRect -/// -inline void DrawFilledRect(cv::Mat& frame, const cv::Rect& rect, cv::Scalar cl, int alpha) -{ - if (alpha) - { - const int alpha_1 = 255 - alpha; - const int nchans = frame.channels(); - int color[3] = { cv::saturate_cast(cl[0]), cv::saturate_cast(cl[1]), cv::saturate_cast(cl[2]) }; - for (int y = std::max(0, rect.y); y < std::min(rect.y + rect.height, frame.rows - 1); ++y) - { - uchar* ptr = frame.ptr(y) + nchans * rect.x; - for (int x = std::max(0, rect.x); x < std::min(rect.x + rect.width, frame.cols - 1); ++x) - { - for (int i = 0; i < nchans; ++i) - { - ptr[i] = cv::saturate_cast((alpha_1 * ptr[i] + alpha * color[i]) / 255); - } - ptr += nchans; - } - } - } - else - { - cv::rectangle(frame, rect, cl, cv::FILLED); - } -} - -/// -/// -/// -namespace tracking -{ -/// -/// \brief The Detectors enum -/// -enum Detectors -{ - Motion_VIBE = 0, - Motion_MOG = 1, - Motion_GMG = 2, - Motion_CNT = 3, - Motion_MOG2 = 4, - ONNX_TensorRT = 5, - DNN_OCV = 6, - DetectorsCount -}; - -/// -/// \brief The TrackerTemplate enum -/// -enum TrackerTemplate -{ - UniversalTracker = 0, - ByteTrack = 1 -}; - -/// -/// \brief The DistType enum -/// -enum DistType -{ - DistCenters, // Euclidean distance between centers, [0, 1] - DistRects, // Euclidean distance between bounding rectangles, [0, 1] - DistJaccard, // Intersection over Union, IoU, [0, 1] - DistHist, // Bhatacharia distance between histograms, [0, 1] - DistFeatureCos, // Cosine distance between embeddings, [0, 1] - DistMahalanobis, // Mahalanobis: https://ww2.mathworks.cn/help/vision/ug/motion-based-multiple-object-tracking.html - DistsCount -}; - -/// -/// \brief The FilterGoal enum -/// -enum FilterGoal -{ - FilterCenter, - FilterRect, - FilterRRect, - FiltersCount -}; - -/// -/// \brief The KalmanType enum -/// -enum KalmanType -{ - KalmanLinear, - KalmanUnscented, - KalmanAugmentedUnscented, - KalmanCount -}; - -/// -/// \brief The MatchType enum -/// -enum MatchType -{ - MatchHungrian, - MatchLAPJV, - MatchCount -}; - -/// -/// \brief The LostTrackType enum -/// -enum LostTrackType -{ - TrackNone, - TrackKCF, - TrackCSRT, - TrackDaSiamRPN, - TrackNano, - TrackVit, - SingleTracksCount -}; -} +#pragma once + +#include +#include +#include + +#ifdef HAVE_EXPERIMENTAL_FILESYSTEM +#include +namespace fs = std::experimental::filesystem; +#else +#include +namespace fs = std::filesystem; +#endif + +#include +#include "object_types.h" + +// --------------------------------------------------------------------------- +// +// --------------------------------------------------------------------------- +typedef float track_t; +typedef cv::Point_ Point_t; +#define El_t CV_32F +#define Mat_t CV_32FC + +typedef std::vector assignments_t; +typedef std::vector distMatrix_t; + +typedef std::chrono::time_point time_point_t; + +/// +template +class TrackID +{ +public: + typedef T value_type; + + TrackID() = default; + TrackID(value_type val) + : m_val(val) + { + } + + bool operator==(const TrackID& id) const + { + return m_val == id.m_val; + } + + std::string ID2Str() const + { + return std::to_string(m_val); + } + static TrackID Str2ID(const std::string& id) + { + return TrackID(std::stoi(id)); + } + TrackID NextID() const + { + return TrackID(m_val + 1); + } + size_t ID2Module(size_t module) const + { + return m_val % module; + } + + value_type m_val{ 0 }; +}; + +typedef TrackID track_id_t; +namespace std +{ + template <> + struct hash + { + std::size_t operator()(const track_id_t& k) const + { + return std::hash()(k.m_val); + } + }; + +} + +/// +/// \brief config_t +/// +typedef std::multimap config_t; + +/// +/// \brief The CRegion class +/// +class CRegion +{ +public: + /// + CRegion() = default; + + /// + CRegion(const cv::Rect& rect) noexcept + : m_brect(rect) + { + B2RRect(); + } + + /// + CRegion(const cv::RotatedRect& rrect) noexcept + : m_rrect(rrect) + { + if (m_rrect.size.width < 1) + m_rrect.size.width = 1; + if (m_rrect.size.height < 1) + m_rrect.size.height = 1; + R2BRect(); + } + + /// + CRegion(const cv::RotatedRect& rrect, objtype_t type, float confidence) noexcept + : m_type(type), m_rrect(rrect), m_confidence(confidence) + { + if (m_rrect.size.width < 1) + m_rrect.size.width = 1; + if (m_rrect.size.height < 1) + m_rrect.size.height = 1; + R2BRect(); + } + + /// + CRegion(const cv::RotatedRect& rrect, const cv::Rect& brect, objtype_t type, float confidence, const cv::Mat& boxMask) noexcept + : m_type(type), m_rrect(rrect), m_brect(brect), m_confidence(confidence) + { + m_boxMask = boxMask; + + if (m_rrect.size.width < 1) + m_rrect.size.width = 1; + if (m_rrect.size.height < 1) + m_rrect.size.height = 1; + + if (!m_boxMask.empty() && m_boxMask.size() != m_brect.size()) + { + m_brect.width = m_boxMask.cols; + m_brect.height = m_boxMask.rows; + } + } + + /// + CRegion(const cv::Rect& brect, objtype_t type, float confidence) noexcept + : m_type(type), m_brect(brect), m_confidence(confidence) + { + B2RRect(); + } + + objtype_t m_type = bad_type; + cv::RotatedRect m_rrect; + cv::Rect m_brect; + track_t m_confidence = -1; + cv::Mat m_boxMask; + +private: + /// + /// \brief R2BRect + /// \return + /// + cv::Rect R2BRect() noexcept + { + m_brect = m_rrect.boundingRect(); + return m_brect; + } + /// + /// \brief B2RRect + /// \return + /// + cv::RotatedRect B2RRect() noexcept + { + m_rrect = cv::RotatedRect(m_brect.tl(), cv::Point2f(static_cast(m_brect.x + m_brect.width), static_cast(m_brect.y)), m_brect.br()); + if (m_rrect.size.width < 1) + m_rrect.size.width = 1; + if (m_rrect.size.height < 1) + m_rrect.size.height = 1; + return m_rrect; + } +}; + +typedef std::vector regions_t; + +/// +/// \brief sqr +/// \param val +/// \return +/// +template inline +T sqr(T val) +{ + return val * val; +} + +/// +/// \brief get_lin_regress_params +/// \param in_data +/// \param start_pos +/// \param in_data_size +/// \param kx +/// \param bx +/// \param ky +/// \param by +/// +template +void get_lin_regress_params( + const CONT& in_data, + size_t start_pos, + size_t in_data_size, + T& kx, T& bx, T& ky, T& by) +{ + T m1(0.), m2(0.); + T m3_x(0.), m4_x(0.); + T m3_y(0.), m4_y(0.); + + const T el_count = static_cast(in_data_size - start_pos); + for (size_t i = start_pos; i < in_data_size; ++i) + { + m1 += i; + m2 += sqr(i); + + m3_x += in_data[i].x; + m4_x += i * in_data[i].x; + + m3_y += in_data[i].y; + m4_y += i * in_data[i].y; + } + T det_1 = 1 / (el_count * m2 - sqr(m1)); + + m1 *= -1; + + kx = det_1 * (m1 * m3_x + el_count * m4_x); + bx = det_1 * (m2 * m3_x + m1 * m4_x); + + ky = det_1 * (m1 * m3_y + el_count * m4_y); + by = det_1 * (m2 * m3_y + m1 * m4_y); +} + +/// +/// \brief sqr: Euclid distance between two points +/// \param val +/// \return +/// +template inline +T distance(const POINT_TYPE& p1, const POINT_TYPE& p2) +{ + return sqrt((T)(sqr(p2.x - p1.x) + sqr(p2.y - p1.y))); +} + +/// +/// \brief Clamp: Fit rectangle to frame +/// \param rect +/// \param size +/// \return +/// +inline cv::Rect Clamp(cv::Rect rect, const cv::Size& size) +{ + if (rect.x < 0) + { + rect.width = std::min(rect.width, size.width - 1); + rect.x = 0; + } + else if (rect.x + rect.width >= size.width) + { + rect.x = std::max(0, size.width - rect.width - 1); + rect.width = std::min(rect.width, size.width - 1); + } + if (rect.y < 0) + { + rect.height = std::min(rect.height, size.height - 1); + rect.y = 0; + } + else if (rect.y + rect.height >= size.height) + { + rect.y = std::max(0, size.height - rect.height - 1); + rect.height = std::min(rect.height, size.height - 1); + } + return rect; +} + +/// +/// \brief SaveMat +/// \param m +/// \param name +/// \param path +/// +inline bool SaveMat(const cv::Mat& m, std::string prefix, const std::string& ext, const std::string& savePath, bool compressToImage) +{ + bool res = true; + + std::map depthDict; + depthDict.emplace(CV_8U, "uint8"); + depthDict.emplace(CV_8S, "int8"); + depthDict.emplace(CV_16U, "uint16"); + depthDict.emplace(CV_16S, "int16"); + depthDict.emplace(CV_32S, "int32"); + depthDict.emplace(CV_32F, "float32"); + depthDict.emplace(CV_64F, "float64"); + depthDict.emplace(CV_16F, "float16"); + + auto depth = depthDict.find(m.depth()); + if (depth == std::end(depthDict)) + { + std::cout << "File " << prefix << " has a unknown depth: " << m.depth() << std::endl; + res = false; + return res; + } + assert(depth != std::end(depthDict)); + + fs::path fullPath(savePath); + fullPath.append(prefix + "_" + std::to_string(m.cols) + "x" + std::to_string(m.rows) + "_" + depth->second + "_C" + std::to_string(m.channels()) + ext); + prefix = fullPath.generic_string(); + + if (compressToImage) + { + res = cv::imwrite(prefix, m); + } + else + { + FILE* f = 0; +#ifdef _WIN32 + fopen_s(&f, prefix.c_str(), "wb"); +#else + f = fopen(prefix.c_str(), "wb"); +#endif // _WIN32 + res = f != 0; + if (res) + { + for (int y = 0; y < m.rows; ++y) + { + fwrite(m.ptr(y), 1, m.cols * m.elemSize(), f); + } + fclose(f); + std::cout << "File " << prefix << " was writed" << std::endl; + } + } + if (res) + std::cout << "File " << prefix << " was writed" << std::endl; + else + std::cout << "File " << prefix << " can not be opened!" << std::endl; + return res; +} + +/// +/// \brief DrawFilledRect +/// +inline void DrawFilledRect(cv::Mat& frame, const cv::Rect& rect, cv::Scalar cl, int alpha) +{ + if (alpha) + { + const int alpha_1 = 255 - alpha; + const int nchans = frame.channels(); + int color[3] = { cv::saturate_cast(cl[0]), cv::saturate_cast(cl[1]), cv::saturate_cast(cl[2]) }; + for (int y = std::max(0, rect.y); y < std::min(rect.y + rect.height, frame.rows - 1); ++y) + { + uchar* ptr = frame.ptr(y) + nchans * rect.x; + for (int x = std::max(0, rect.x); x < std::min(rect.x + rect.width, frame.cols - 1); ++x) + { + for (int i = 0; i < nchans; ++i) + { + ptr[i] = cv::saturate_cast((alpha_1 * ptr[i] + alpha * color[i]) / 255); + } + ptr += nchans; + } + } + } + else + { + cv::rectangle(frame, rect, cl, cv::FILLED); + } +} + +/// +/// +/// +namespace tracking +{ +/// +/// \brief The Detectors enum +/// +enum Detectors +{ + Motion_VIBE = 0, + Motion_MOG = 1, + Motion_GMG = 2, + Motion_CNT = 3, + Motion_MOG2 = 4, + ONNX_TensorRT = 5, + DNN_OCV = 6, + DetectorsCount +}; + +/// +/// \brief The TrackerTemplate enum +/// +enum TrackerTemplate +{ + UniversalTracker = 0, + ByteTrack = 1 +}; + +/// +/// \brief The DistType enum +/// +enum DistType +{ + DistCenters, // Euclidean distance between centers, [0, 1] + DistRects, // Euclidean distance between bounding rectangles, [0, 1] + DistJaccard, // Intersection over Union, IoU, [0, 1] + DistHist, // Bhatacharia distance between histograms, [0, 1] + DistFeatureCos, // Cosine distance between embeddings, [0, 1] + DistMahalanobis, // Mahalanobis: https://ww2.mathworks.cn/help/vision/ug/motion-based-multiple-object-tracking.html + DistsCount +}; + +/// +/// \brief The FilterGoal enum +/// +enum FilterGoal +{ + FilterCenter, + FilterRect, + FilterRRect, + FiltersCount +}; + +/// +/// \brief The KalmanType enum +/// +enum KalmanType +{ + KalmanLinear, + KalmanUnscented, + KalmanAugmentedUnscented, + KalmanCount +}; + +/// +/// \brief The MatchType enum +/// +enum MatchType +{ + MatchHungrian, + MatchLAPJV, + MatchCount +}; + +/// +/// \brief The LostTrackType enum +/// +enum LostTrackType +{ + TrackNone, + TrackKCF, + TrackCSRT, + TrackDaSiamRPN, + TrackNano, + TrackVit, + SingleTracksCount +}; +} diff --git a/src/common/nms.h b/src/mtracking/nms.h similarity index 100% rename from src/common/nms.h rename to src/mtracking/nms.h diff --git a/src/common/object_types.cpp b/src/mtracking/object_types.cpp similarity index 93% rename from src/common/object_types.cpp rename to src/mtracking/object_types.cpp index 0e2270c0..8484723c 100644 --- a/src/common/object_types.cpp +++ b/src/mtracking/object_types.cpp @@ -1,89 +1,89 @@ -#include "object_types.h" - -std::vector TypeConverter::m_typeNames = -{ - "person", - "bicycle", - "car", - "motorbike", - "aeroplane", - "bus", - "train", - "truck", - "boat", - "traffic_light", - "fire_hydrant", - "stop_sign", - "parking_meter", - "bench", - "bird", - "cat", - "dog", - "horse", - "sheep", - "cow", - "elephant", - "bear", - "zebra", - "giraffe", - "backpack", - "umbrella", - "handbag", - "tie", - "suitcase", - "frisbee", - "skis", - "snowboard", - "sports_ball", - "kite", - "baseball_bat", - "baseball_glove", - "skateboard", - "surfboard", - "tennis_racket", - "bottle", - "wine_glass", - "cup", - "fork", - "knife", - "spoon", - "bowl", - "banana", - "apple", - "sandwich", - "orange", - "broccoli", - "carrot", - "hot_dog", - "pizza", - "donut", - "cake", - "chair", - "sofa", - "pottedplant", - "bed", - "diningtable", - "toilet", - "tvmonitor", - "laptop", - "mouse", - "remote", - "keyboard", - "cell_phone", - "microwave", - "oven", - "toaster", - "sink", - "refrigerator", - "book", - "clock", - "vase", - "scissors", - "teddy_bear", - "hair_drier", - "toothbrush", - "vehicle", - "face" -}; - -std::string TypeConverter::m_badTypeName = "unknown"; +#include "object_types.h" + +std::vector TypeConverter::m_typeNames = +{ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic_light", + "fire_hydrant", + "stop_sign", + "parking_meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports_ball", + "kite", + "baseball_bat", + "baseball_glove", + "skateboard", + "surfboard", + "tennis_racket", + "bottle", + "wine_glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot_dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell_phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy_bear", + "hair_drier", + "toothbrush", + "vehicle", + "face" +}; + +std::string TypeConverter::m_badTypeName = "unknown"; diff --git a/src/common/object_types.h b/src/mtracking/object_types.h similarity index 91% rename from src/common/object_types.h rename to src/mtracking/object_types.h index 1ecf7977..25f78a5e 100644 --- a/src/common/object_types.h +++ b/src/mtracking/object_types.h @@ -1,58 +1,58 @@ -#pragma once -#include -#include -#include - -typedef int objtype_t; -constexpr objtype_t bad_type = -1; - -/// -class TypeConverter -{ -public: - /// - static std::string Type2Str(objtype_t type) - { - return (type == bad_type) ? m_badTypeName : m_typeNames[type]; - } - - /// - static objtype_t Str2Type(const std::string& typeName) - { - for (size_t i = 0; i < m_typeNames.size(); ++i) - { - if (typeName == m_typeNames[i]) - { - //std::cout << "Str2Type: " << typeName << " exist: " << i << std::endl; - return static_cast(i); - } - } - m_typeNames.emplace_back(typeName); - //std::cout << "Str2Type: " << typeName << " new: " << (m_typeNames.size()) - 1 << std::endl; - return static_cast(m_typeNames.size()) - 1; - } - - static bool AddNewType(const std::string& typeName) - { - for (size_t i = 0; i < m_typeNames.size(); ++i) - { - if (typeName == m_typeNames[i]) - { - //std::cout << "AddNewType: " << typeName << ": false" << std::endl; - return false; - } - } - m_typeNames.emplace_back(typeName); - //std::cout << "AddNewType: " << typeName << ": " << (m_typeNames.size() - 1) << std::endl; - return true; - } - - static size_t TypesCount() - { - return m_typeNames.size(); - } - -private: - static std::vector m_typeNames; - static std::string m_badTypeName; -}; +#pragma once +#include +#include +#include + +typedef int objtype_t; +constexpr objtype_t bad_type = -1; + +/// +class TypeConverter +{ +public: + /// + static std::string Type2Str(objtype_t type) + { + return (type == bad_type) ? m_badTypeName : m_typeNames[(size_t)type]; + } + + /// + static objtype_t Str2Type(const std::string& typeName) + { + for (size_t i = 0; i < m_typeNames.size(); ++i) + { + if (typeName == m_typeNames[i]) + { + //std::cout << "Str2Type: " << typeName << " exist: " << i << std::endl; + return static_cast(i); + } + } + m_typeNames.emplace_back(typeName); + //std::cout << "Str2Type: " << typeName << " new: " << (m_typeNames.size()) - 1 << std::endl; + return static_cast(m_typeNames.size()) - 1; + } + + static bool AddNewType(const std::string& typeName) + { + for (size_t i = 0; i < m_typeNames.size(); ++i) + { + if (typeName == m_typeNames[i]) + { + //std::cout << "AddNewType: " << typeName << ": false" << std::endl; + return false; + } + } + m_typeNames.emplace_back(typeName); + //std::cout << "AddNewType: " << typeName << ": " << (m_typeNames.size() - 1) << std::endl; + return true; + } + + static size_t TypesCount() + { + return m_typeNames.size(); + } + +private: + static std::vector m_typeNames; + static std::string m_badTypeName; +}; diff --git a/src/python_bind/mtracker.cpp b/src/python_bind/mtracker.cpp index b95657ca..c9aa731b 100644 --- a/src/python_bind/mtracker.cpp +++ b/src/python_bind/mtracker.cpp @@ -7,7 +7,7 @@ #include -#include "../common/defines.h" +#include "../mtracking/defines.h" #include "../Tracker/Ctracker.h" #include "../Detector/BaseDetector.h" #include "../Detector/MotionDetector.h" diff --git a/thirdparty/inih/CMakeLists.txt b/thirdparty/inih/CMakeLists.txt index bdb1a93b..a4b78575 100644 --- a/thirdparty/inih/CMakeLists.txt +++ b/thirdparty/inih/CMakeLists.txt @@ -12,9 +12,9 @@ set_target_properties(inih PROPERTIES FOLDER "libs") install(TARGETS ${PROJECT_NAME} EXPORT MTTrackingExports - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin - PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}) + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") \ No newline at end of file diff --git a/thirdparty/ruclip/CMakeLists.txt b/thirdparty/ruclip/CMakeLists.txt index ce2657a3..74f99136 100644 --- a/thirdparty/ruclip/CMakeLists.txt +++ b/thirdparty/ruclip/CMakeLists.txt @@ -40,9 +40,9 @@ target_link_libraries(${PROJECT_NAME} ${RUCLIP_LIBS}) install(TARGETS ${PROJECT_NAME} EXPORT MTTrackingExports - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin - PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}) + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/thirdparty/ruclip/ClipAPI.cpp b/thirdparty/ruclip/ClipAPI.cpp index 7c374cc8..663230ae 100644 --- a/thirdparty/ruclip/ClipAPI.cpp +++ b/thirdparty/ruclip/ClipAPI.cpp @@ -4,7 +4,7 @@ #include "RuCLIP.h" #include "RuCLIPProcessor.h" -#include "../../src/common/defines.h" +#include "../../src/mtracking/defines.h" /// class ClassificationCLIP::ClassificationCLIPImpl diff --git a/thirdparty/ruclip/RuCLIPProcessor.cpp b/thirdparty/ruclip/RuCLIPProcessor.cpp index 3bb7242d..1167df80 100644 --- a/thirdparty/ruclip/RuCLIPProcessor.cpp +++ b/thirdparty/ruclip/RuCLIPProcessor.cpp @@ -84,8 +84,8 @@ cv::Mat RuCLIPProcessor::ResizeToInput(const cv::Mat& img, bool saveAspectRatio) int xOffset = (ImageSize - newWidth) / 2; int yOffset = (ImageSize - newHeight) / 2; - assert(2 * m_XOffset + newWidth == ImageSize); - assert(2 * m_YOffset + newHeight == ImageSize); + assert(2 * xOffset + newWidth == ImageSize); + assert(2 * yOffset + newHeight == ImageSize); cv::resize(img, newImg(cv::Rect(xOffset, yOffset, newWidth, newHeight)), cv::Size(newWidth, newHeight), 0, 0, cv::INTER_CUBIC); }