Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
[![CodeQL](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml)

## Latest Features
- Add D-FINE seg detection model [ArgoHA/D-FINE-seg](https://github.com/ArgoHA/D-FINE-seg)
- Add ByteTrack MOT algorithm based on [Vertical-Beach/ByteTrack-cpp](https://github.com/Vertical-Beach/ByteTrack-cpp)
- Big code cleanup from old style algorithms and detectors: some bgfg detectors, some VOT trackes, Face and Pedestrin detectors, Darknet based backend for old YOLO etc
- YOLOv13 detector works with TensorRT! Export pre-trained PyTorch models [here (iMoonLab/yolov13)](https://github.com/iMoonLab/yolov13) to ONNX format and run Multitarget-tracker with `-e=3` example
Expand Down
8 changes: 0 additions & 8 deletions src/Detector/BackgroundSubtract.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,21 +206,13 @@ cv::UMat BackgroundSubtract::GetImg(const cv::UMat& image)
if (image.channels() == 1)
{
cv::UMat newImg;
#if (CV_VERSION_MAJOR < 4)
cv::cvtColor(image, newImg, CV_GRAY2BGR);
#else
cv::cvtColor(image, newImg, cv::COLOR_GRAY2BGR);
#endif
return newImg;
}
else if (image.channels() == 3)
{
cv::UMat newImg;
#if (CV_VERSION_MAJOR < 4)
cv::cvtColor(image, newImg, CV_BGR2GRAY);
#else
cv::cvtColor(image, newImg, cv::COLOR_BGR2GRAY);
#endif
return newImg;
}
}
Expand Down
4 changes: 0 additions & 4 deletions src/Detector/BaseDetector.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,7 @@ class BaseDetector
{
if (region.m_boxMask.empty())
{
#if (CV_VERSION_MAJOR < 4)
cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), CV_FILLED);
#else
cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), cv::FILLED);
#endif
}
else
{
Expand Down
7 changes: 3 additions & 4 deletions src/Detector/MotionDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,10 @@ void MotionDetector::DetectContour()
{
m_regions.clear();
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
#if (CV_VERSION_MAJOR < 4)
cv::findContours(m_fg, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point());
#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9)))
cv::findContoursLinkRuns(m_fg, contours);
#else
cv::findContours(m_fg, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point());
cv::findContours(m_fg, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point());
#endif
for (size_t i = 0; i < contours.size(); ++i)
{
Expand Down
16 changes: 16 additions & 0 deletions src/Detector/OCVDNNDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ bool OCVDNNDetector::Init(const config_t& config)
dictNetType["RFDETR_IS"] = ModelType::RFDETR_IS;
dictNetType["DFINE"] = ModelType::DFINE;
dictNetType["YOLOV13"] = ModelType::YOLOV13;
dictNetType["DFINE_IS"] = ModelType::DFINE_IS;

auto netType = dictNetType.find(net_type->second);
if (netType != dictNetType.end())
Expand Down Expand Up @@ -428,6 +429,10 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr
ParseYOLOv11(crop, detections, tmpRegions);
break;

case ModelType::DFINE_IS:
ParseDFINE_IS(crop, detections, tmpRegions);
break;

default:
ParseOldYOLO(crop, detections, tmpRegions);
break;
Expand Down Expand Up @@ -1055,3 +1060,14 @@ void OCVDNNDetector::ParseDFINE(const cv::Rect& crop, std::vector<cv::Mat>& dete
}
}

///
/// \brief OCVDNNDetector::ParseDFINE_IS
/// \param crop
/// \param detections
/// \param tmpRegions
///
void OCVDNNDetector::ParseDFINE_IS(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions)
{
assert(0);
}

4 changes: 3 additions & 1 deletion src/Detector/OCVDNNDetector.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ class OCVDNNDetector final : public BaseDetector
RFDETR,
RFDETR_IS,
DFINE,
YOLOV13
YOLOV13,
DFINE_IS
};

cv::dnn::Net m_net;
Expand Down Expand Up @@ -89,6 +90,7 @@ class OCVDNNDetector final : public BaseDetector
void ParseRFDETR(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions);
void ParseRFDETR_IS(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions);
void ParseDFINE(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions);
void ParseDFINE_IS(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions);
};

#endif
1 change: 1 addition & 0 deletions src/Detector/ONNXTensorRTDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ bool ONNXTensorRTDetector::Init(const config_t& config)
dictNetType["RFDETR_IS"] = tensor_rt::RFDETR_IS;
dictNetType["DFINE"] = tensor_rt::DFINE;
dictNetType["YOLOV13"] = tensor_rt::YOLOV13;
dictNetType["DFINE_IS"] = tensor_rt::DFINE_IS;

auto netType = dictNetType.find(net_type->second);
if (netType != dictNetType.end())
Expand Down
200 changes: 200 additions & 0 deletions src/Detector/tensorrt_onnx/DFINE_is.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#pragma once

#include "YoloONNX.hpp"

///
/// \brief The DFINE_is_onnx class
///
class DFINE_is_onnx : public YoloONNX
{
public:
DFINE_is_onnx(std::vector<std::string>& inputTensorNames, std::vector<std::string>& outputTensorNames)
{
inputTensorNames.push_back("input");
outputTensorNames.push_back("logits");
outputTensorNames.push_back("boxes");
outputTensorNames.push_back("mask_probs");
}

protected:
///
/// \brief GetResult
/// \param output
/// \return
///
std::vector<tensor_rt::Result> GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector<float*>& outputs, cv::Size frameSize)
{
std::vector<tensor_rt::Result> resBoxes;

//0: name: input, size: 1x3x640x640
//1: name: logits, size: 1x300x80
//2: name: boxes, size: 1x300x4
//3: name: mask_probs, size: 1x300x160x160


//0: name: input, size: 1x3x432x432
//1: name: dets, size: 1x200x4
//2: name: labels, size: 1x200x91
//3: name: 4245, size: 1x200x108x108

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

cv::Size inputSize(static_cast<int>(m_inputDims[0].d[3]), static_cast<int>(m_inputDims[0].d[2]));
cv::Size2f inputSizef(static_cast<float>(inputSize.width), static_cast<float>(inputSize.height));

//std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl;

int labelsInd = 0;
int detsInd = 1;
int segInd = 2;

auto dets = outputs[detsInd];
auto labels = outputs[labelsInd];

auto masks = outputs[segInd];

size_t ncInd = 2;
size_t lenInd = 1;


size_t nc = m_outpuDims[labelsInd].d[ncInd];
size_t len = static_cast<size_t>(m_outpuDims[detsInd].d[lenInd]) / m_params.m_explicitBatchSize;
auto volume0 = len * m_outpuDims[detsInd].d[ncInd]; // Volume(m_outpuDims[0]);
dets += volume0 * imgIdx;
auto volume1 = len * m_outpuDims[labelsInd].d[ncInd]; // Volume(m_outpuDims[0]);
labels += volume1 * imgIdx;

int segChannels = static_cast<int>(m_outpuDims[segInd].d[1]);
int segWidth = static_cast<int>(m_outpuDims[segInd].d[2]);
int segHeight = static_cast<int>(m_outpuDims[segInd].d[3]);
masks += imgIdx * segChannels * segWidth * segHeight;

cv::Mat binaryMask8U(segHeight, segWidth, CV_8UC1);

//std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.m_confThreshold << ", volume0 = " << volume0 << ", volume1 = " << volume1 << std::endl;

auto L2Conf = [](float v)
{
return 1.f / (1.f + std::exp(-v));
};

for (size_t i = 0; i < len; ++i)
{
float classConf = L2Conf(labels[0]);
size_t classId = 0;
for (size_t cli = 1; cli < nc; ++cli)
{
auto conf = L2Conf(labels[cli]);
if (classConf < conf)
{
classConf = conf;
classId = cli;
}
}

if (classConf >= m_params.m_confThreshold)
{
float d0 = dets[0];
float d1 = dets[1];
float d2 = dets[2];
float d3 = dets[3];

float x = fw * (inputSizef.width * (d0 - d2 / 2.f) - m_resizedROI.x);
float y = fh * (inputSizef.height * (d1 - d3 / 2.f) - m_resizedROI.y);
float width = fw * inputSizef.width * d2;
float height = fh * inputSizef.height * d3;

//if (i == 0)
//{
// std::cout << i << ": classConf = " << classConf << ", classId = " << classId << " (" << labels[classId] << "), rect = " << cv::Rect2f(x, y, width, height) << std::endl;
// std::cout << "dets = " << d0 << ", " << d1 << ", " << d2 << ", " << d3 << std::endl;
//}
resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)));

double maskThreshold = 0.1;
for (int row = 0; row < segHeight; ++row)
{
const float* maskPtr = masks + row * segWidth;
uchar* binMaskPtr = binaryMask8U.ptr(row);

for (int col = 0; col < segWidth; ++col)
{
binMaskPtr[col] = (maskPtr[col] > maskThreshold) ? 255 : 0;
}
}

tensor_rt::Result& resObj = resBoxes.back();

cv::Rect smallRect;
smallRect.x = cvRound(segHeight * (d0 - d2 / 2.f));
smallRect.y = cvRound(segHeight * (d1 - d3 / 2.f));
smallRect.width = cvRound(segHeight * d2);
smallRect.height = cvRound(segHeight * d3);
smallRect = Clamp(smallRect, cv::Size(segWidth, segHeight));

if (smallRect.area() > 0)
{
cv::resize(binaryMask8U(smallRect), resObj.m_boxMask, resObj.m_brect.size(), 0, 0, cv::INTER_NEAREST);

#if 0
static int globalObjInd = 0;
SaveMat(mask, std::to_string(globalObjInd) + "_mask", ".png", "tmp", true);
SaveMat(binaryMask, std::to_string(globalObjInd) + "_bin_mask", ".png", "tmp", true);
SaveMat(binaryMask8U, std::to_string(globalObjInd) + "_bin_mask_8u", ".png", "tmp", true);
SaveMat(resObj.m_boxMask, std::to_string(globalObjInd++) + "_obj_mask", ".png", "tmp", true);
std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl;
#endif

#if 0
std::vector<std::vector<cv::Point>> contours;
#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9)))
cv::findContoursLinkRuns(resObj.m_boxMask, contours);
#else
cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point());
#endif
for (const auto& contour : contours)
{
cv::Rect br = cv::boundingRect(contour);

if (br.width >= 4 &&
br.height >= 4)
{
int dx = resObj.m_brect.x;
int dy = resObj.m_brect.y;

cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour);
rr.center.x = rr.center.x * fw + dx;
rr.center.y = rr.center.y * fw + dy;
rr.size.width *= fw;
rr.size.height *= fh;

br.x = cvRound(dx + br.x * fw);
br.y = cvRound(dy + br.y * fh);
br.width = cvRound(br.width * fw);
br.height = cvRound(br.height * fh);

resObj.m_brect = br;
//resObj.m_rrect = rr;

//std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl;

break;
}
}
#endif
}
else
{
resObj.m_boxMask = cv::Mat(resObj.m_brect.size(), CV_8UC1, cv::Scalar(255));
}
}

dets += m_outpuDims[detsInd].d[ncInd];
labels += m_outpuDims[labelsInd].d[ncInd];
masks += segWidth * segHeight;
}

return resBoxes;
}
};
14 changes: 9 additions & 5 deletions src/Detector/tensorrt_onnx/RFDETR_is.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ class RFDETR_is_onnx : public YoloONNX
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

cv::Size2f inputSizef(m_inputDims[0].d[3], m_inputDims[0].d[2]);
cv::Size inputSize(m_inputDims[0].d[3], m_inputDims[0].d[2]);
cv::Size inputSize(static_cast<int>(m_inputDims[0].d[3]), static_cast<int>(m_inputDims[0].d[2]));
cv::Size2f inputSizef(static_cast<float>(inputSize.width), static_cast<float>(inputSize.height));

//std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl;

Expand Down Expand Up @@ -134,10 +134,13 @@ class RFDETR_is_onnx : public YoloONNX
std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl;
#endif

#if 0
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(resObj.m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point());

#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9)))
cv::findContoursLinkRuns(resObj.m_boxMask, contours);
#else
cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point());
#endif
for (const auto& contour : contours)
{
cv::Rect br = cv::boundingRect(contour);
Expand Down Expand Up @@ -167,6 +170,7 @@ class RFDETR_is_onnx : public YoloONNX
break;
}
}
#endif
}
else
{
Expand Down
9 changes: 5 additions & 4 deletions src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,12 @@ class YOLOv11_instance_onnx : public YoloONNX
SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true);
#endif

#if 0
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
#if (CV_VERSION_MAJOR < 4)
cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point());
#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9)))
cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours);
#else
cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point());
cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point());
#endif
for (const auto& contour : contours)
{
Expand Down Expand Up @@ -300,6 +300,7 @@ class YOLOv11_instance_onnx : public YoloONNX
break;
}
}
#endif
}
}
return resBoxes;
Expand Down
Loading
Loading