Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/Detector/tensorrt_yolo/YoloONNX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,9 @@ bool YoloONNX::ProcessInputAspectRatio(const std::vector<cv::Mat>& sampleImages)
}
}

#if 0
m_resizedROI = cv::Rect(0, 0, inputW, inputH);

#if 1
// resize the DsImage with scale
const float imgHeight = static_cast<float>(sampleImages[0].rows);
const float imgWidth = static_cast<float>(sampleImages[0].cols);
Expand All @@ -351,7 +352,7 @@ bool YoloONNX::ProcessInputAspectRatio(const std::vector<cv::Mat>& sampleImages)
assert(2 * yOffset + resizeH == inputH);

cv::Size scaleSize(inputW, inputH);
cv::Rect roiRect(xOffset, yOffset, resizeW, resizeH);
m_resizedROI = cv::Rect(xOffset, yOffset, resizeW, resizeH);

if (m_resizedBatch.size() < sampleImages.size())
m_resizedBatch.resize(sampleImages.size());
Expand All @@ -361,7 +362,7 @@ bool YoloONNX::ProcessInputAspectRatio(const std::vector<cv::Mat>& sampleImages)
{
if (m_resizedBatch[b].size() != scaleSize)
m_resizedBatch[b] = cv::Mat(scaleSize, sampleImages[b].type(), cv::Scalar::all(128));
cv::resize(sampleImages[b], cv::Mat(m_resizedBatch[b], roiRect), roiRect.size(), 0, 0, cv::INTER_LINEAR);
cv::resize(sampleImages[b], cv::Mat(m_resizedBatch[b], m_resizedROI), m_resizedROI.size(), 0, 0, cv::INTER_LINEAR);
cv::split(m_resizedBatch[b], m_inputChannels[b]);
std::swap(m_inputChannels[b][0], m_inputChannels[b][2]);
}
Expand Down
7 changes: 4 additions & 3 deletions src/Detector/tensorrt_yolo/YoloONNX.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,10 @@ class YoloONNX
size_t GetNumClasses() const;

protected:
SampleYoloParams m_params; //!< The parameters for the sample.
nvinfer1::Dims m_inputDims; //!< The dimensions of the input to the network.
std::vector<nvinfer1::Dims> m_outpuDims; //!< The dimensions of the input to the network.
SampleYoloParams m_params; //!< The parameters for the sample
nvinfer1::Dims m_inputDims; //!< The dimensions of the input to the network
std::vector<nvinfer1::Dims> m_outpuDims; //!< The dimensions of the input to the network
cv::Rect m_resizedROI; //!< Input frame resized into input dimensions with the frame aspect ratio

virtual std::vector<tensor_rt::Result> GetResult(size_t imgIdx, int keep_topk, const std::vector<float*>& outputs, cv::Size frameSize) = 0;

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv10_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class YOLOv10_bb_onnx : public YoloONNX
//0: name: images, size: 1x3x640x640
//1: name: output0, size: 1x300x6

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -51,8 +51,8 @@ class YOLOv10_bb_onnx : public YoloONNX
//if (i == 0)
// std::cout << i << ": " << output[k + 0] << " " << output[k + 1] << " " << output[k + 2] << " " << output[k + 3] << " " << output[k + 4] << " " << output[k + 5] << std::endl;

float x = fw * output[k + 0];
float y = fh * output[k + 1];
float x = fw * (output[k + 0] - m_resizedROI.x);
float y = fh * (output[k + 1] - m_resizedROI.y);
float width = fw * (output[k + 2] - output[k + 0]);
float height = fh * (output[k + 3] - output[k + 1]);
float objectConf = output[k + 4];
Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class YOLOv11_bb_onnx : public YoloONNX
//0: name: images, size: 1x3x640x640
//1: name: output0, size: 1x84x8400

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -88,8 +88,8 @@ class YOLOv11_bb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height));
Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ class YOLOv11_instance_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

size_t outInd = (outputs.size() == 0) ? 1 : 0;
size_t segInd = (outputs.size() == 0) ? 0 : 1;
Expand Down Expand Up @@ -155,8 +155,8 @@ class YOLOv11_instance_onnx : public YoloONNX
if (objectConf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv11_obb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class YOLOv11_obb_onnx : public YoloONNX
//20: 15 DOTA classes + x + y + w + h + a
constexpr int shapeDataSize = 5;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -96,8 +96,8 @@ class YOLOv11_obb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height)
float cx = fw * output[k];
float cy = fh * output[k + 1];
float cx = fw * (output[k] - m_resizedROI.x);
float cy = fh * (output[k + 1] - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
float angle = 180.f * output[k + nc + shapeDataSize - 1] / M_PI;
Expand Down
21 changes: 9 additions & 12 deletions src/Detector/tensorrt_yolo/YoloONNXv6_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ class YOLOv6_bb_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

if (outputs.size() == 4)
{
auto dets = reinterpret_cast<int*>(outputs[0]);
Expand All @@ -26,9 +29,6 @@ class YOLOv6_bb_onnx : public YoloONNX

int objectsCount = m_outpuDims[1].d[1];

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);

//std::cout << "Dets[" << imgIdx << "] = " << dets[imgIdx] << ", objectsCount = " << objectsCount << std::endl;

const size_t step1 = imgIdx * objectsCount;
Expand All @@ -41,8 +41,8 @@ class YOLOv6_bb_onnx : public YoloONNX
int classId = classes[i + step1];
if (class_conf >= m_params.confThreshold)
{
float x = fw * boxes[k + 0 + step2];
float y = fh * boxes[k + 1 + step2];
float x = fw * (boxes[k + 0 + step2] - m_resizedROI.x);
float y = fh * (boxes[k + 1 + step2] - m_resizedROI.y);
float width = fw * boxes[k + 2 + step2] - x;
float height = fh * boxes[k + 3 + step2] - y;

Expand All @@ -57,9 +57,6 @@ class YOLOv6_bb_onnx : public YoloONNX
}
else if (outputs.size() == 1)
{
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);

auto output = outputs[0];

size_t ncInd = 2;
Expand Down Expand Up @@ -96,8 +93,8 @@ class YOLOv6_bb_onnx : public YoloONNX
int classId = cvRound(output[k + 5]);
if (class_conf >= m_params.confThreshold)
{
float x = fw * output[k + 1];
float y = fh * output[k + 2];
float x = fw * (output[k + 1] - m_resizedROI.x);
float y = fh * (output[k + 2] - m_resizedROI.y);
float width = fw * (output[k + 3] - output[k + 1]);
float height = fh * (output[k + 4] - output[k + 2]);

Expand Down Expand Up @@ -150,8 +147,8 @@ class YOLOv6_bb_onnx : public YoloONNX
if (object_conf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
21 changes: 9 additions & 12 deletions src/Detector/tensorrt_yolo/YoloONNXv7_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ class YOLOv7_bb_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

if (outputs.size() == 4)
{
auto dets = reinterpret_cast<int*>(outputs[0]);
Expand All @@ -26,9 +29,6 @@ class YOLOv7_bb_onnx : public YoloONNX

int objectsCount = m_outpuDims[1].d[1];

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);

//std::cout << "Dets[" << imgIdx << "] = " << dets[imgIdx] << ", objectsCount = " << objectsCount << std::endl;

const size_t step1 = imgIdx * objectsCount;
Expand All @@ -41,8 +41,8 @@ class YOLOv7_bb_onnx : public YoloONNX
int classId = classes[i + step1];
if (class_conf >= m_params.confThreshold)
{
float x = fw * boxes[k + 0 + step2];
float y = fh * boxes[k + 1 + step2];
float x = fw * (boxes[k + 0 + step2] - m_resizedROI.x);
float y = fh * (boxes[k + 1 + step2] - m_resizedROI.y);
float width = fw * boxes[k + 2 + step2] - x;
float height = fh * boxes[k + 3 + step2] - y;

Expand All @@ -57,9 +57,6 @@ class YOLOv7_bb_onnx : public YoloONNX
}
else if (outputs.size() == 1)
{
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);

auto output = outputs[0];

size_t ncInd = 2;
Expand Down Expand Up @@ -96,8 +93,8 @@ class YOLOv7_bb_onnx : public YoloONNX
int classId = cvRound(output[k + 5]);
if (class_conf >= m_params.confThreshold)
{
float x = fw * output[k + 1];
float y = fh * output[k + 2];
float x = fw * (output[k + 1] - m_resizedROI.x);
float y = fh * (output[k + 2] - m_resizedROI.y);
float width = fw * (output[k + 3] - output[k + 1]);
float height = fh * (output[k + 4] - output[k + 2]);

Expand Down Expand Up @@ -150,8 +147,8 @@ class YOLOv7_bb_onnx : public YoloONNX
if (object_conf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv7_instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class YOLOv7_instance_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

size_t outInd = (outputs.size() == 0) ? 0 : 1;
size_t segInd = (outputs.size() == 0) ? 1 : 0;
Expand Down Expand Up @@ -123,8 +123,8 @@ class YOLOv7_instance_onnx : public YoloONNX
if (object_conf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class YOLOv8_bb_onnx : public YoloONNX
//0: name: images, size: 1x3x640x640
//1: name: output0, size: 1x84x8400

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -88,8 +88,8 @@ class YOLOv8_bb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height));
Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv8_instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ class YOLOv8_instance_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

size_t outInd = (outputs.size() == 0) ? 0 : 1;
size_t segInd = (outputs.size() == 0) ? 1 : 0;
Expand Down Expand Up @@ -155,8 +155,8 @@ class YOLOv8_instance_onnx : public YoloONNX
if (objectConf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class YOLOv8_obb_onnx : public YoloONNX
//20: 15 DOTA classes + x + y + w + h + a
constexpr int shapeDataSize = 5;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -96,8 +96,8 @@ class YOLOv8_obb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height)
float cx = fw * output[k];
float cy = fh * output[k + 1];
float cx = fw * (output[k] - m_resizedROI.x);
float cy = fh * (output[k + 1] - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
float angle = 180.f * output[k + nc + shapeDataSize - 1] / M_PI;
Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class YOLOv9_bb_onnx : public YoloONNX
//84: 80 COCO classes + x + y + w + h
constexpr int shapeDataSize = 4;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -90,8 +90,8 @@ class YOLOv9_bb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height));
Expand Down
24 changes: 24 additions & 0 deletions thirdparty/ruclip/RuCLIPProcessor.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,29 @@
#include "RuCLIPProcessor.h"

///
torch::Tensor CVMatToTorchTensor(const cv::Mat img, const bool perm = true)
{
auto tensor_image = torch::from_blob(img.data, { img.rows, img.cols, img.channels() }, at::kByte);
if (perm)
tensor_image = tensor_image.permute({ 2,0,1 });
tensor_image.unsqueeze_(0);
tensor_image = tensor_image.toType(c10::kFloat).div(255);
return tensor_image; //tensor_image.clone();
}

///
cv::Mat TorchTensorToCVMat(const torch::Tensor tensor_image, const bool perm = true)
{
auto t = tensor_image.detach().squeeze().cpu();
if (perm)
t = t.permute({ 1, 2, 0 });
t = t.mul(255).clamp(0, 255).to(torch::kU8);
cv::Mat result_img;
cv::Mat(static_cast<int>(t.size(0)), static_cast<int>(t.size(1)), CV_MAKETYPE(CV_8U, t.sizes().size() >= 3 ? static_cast<int>(t.size(2)) : 1), t.data_ptr()).copyTo(result_img);
return result_img;
}

///
RuCLIPProcessor :: RuCLIPProcessor(
const std::string& tokenizer_path,
const int image_size /*= 224*/,
Expand Down
Loading