Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions data/settings_yolov26m.ini
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ net_type = YOLOV26
# INT8
# FP16
# FP32
# FP8
inference_precision = FP16


Expand Down
7 changes: 3 additions & 4 deletions example/MotionDetectorExample.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class MotionDetectorExample final : public VideoExample
{
public:
MotionDetectorExample(const cv::CommandLineParser& parser)
: VideoExample(parser), m_minObjWidth(10)
: VideoExample(parser)
{
#ifdef USE_CLIP
std::string clipModel = "C:/work/clip/ruclip_/CLIP/data/ruclip-vit-large-patch14-336";
Expand All @@ -38,8 +38,7 @@ class MotionDetectorExample final : public VideoExample
{
m_logger->info("MotionDetectorExample::InitDetector");

//m_minObjWidth = frame.cols / 20;
m_minObjWidth = 4;
m_minObjWidth = 2;

config_t config;
config.emplace("useRotatedRect", "0");
Expand Down Expand Up @@ -97,7 +96,7 @@ class MotionDetectorExample final : public VideoExample

if (!m_trackerSettingsLoaded)
{
m_trackerSettings.SetDistance(tracking::DistJaccard);
m_trackerSettings.SetDistance(tracking::DistCenters);
m_trackerSettings.m_kalmanType = tracking::KalmanLinear;
m_trackerSettings.m_filterGoal = tracking::FilterCenter;
m_trackerSettings.m_lostTrackType = tracking::TrackNone; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect
Expand Down
1 change: 1 addition & 0 deletions src/Detector/ONNXTensorRTDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ bool ONNXTensorRTDetector::Init(const config_t& config)
dictPrecision["INT8"] = tensor_rt::INT8;
dictPrecision["FP16"] = tensor_rt::FP16;
dictPrecision["FP32"] = tensor_rt::FP32;
dictPrecision["FP8"] = tensor_rt::FP8;
auto precision = dictPrecision.find(inference_precision->second);
if (precision != dictPrecision.end())
m_localConfig.m_inferencePrecision = precision->second;
Expand Down
19 changes: 14 additions & 5 deletions src/Detector/tensorrt_onnx/YoloONNX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ bool YoloONNX::Init(const SampleYoloParams& params)

m_params = params;

sample::setReportableSeverity(sample::Logger::Severity::kINFO);
initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), "");

auto GetBindings = [&]()
Expand Down Expand Up @@ -79,15 +80,16 @@ bool YoloONNX::Init(const SampleYoloParams& params)
file.close();
}

nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(sample::gLogger);
m_inferRuntime = std::shared_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(sample::gLogger));
if (m_params.m_dlaCore >= 0)
infer->setDLACore(m_params.m_dlaCore);
m_inferRuntime->setDLACore(m_params.m_dlaCore);

m_engine = std::shared_ptr<nvinfer1::ICudaEngine>(infer->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter());
m_engine = std::shared_ptr<nvinfer1::ICudaEngine>(m_inferRuntime->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter());
#if (NV_TENSORRT_MAJOR < 8)
infer->destroy();
m_inferRuntime->destroy();
m_inferRuntime.reset();
#else
//delete infer;
//m_inferRuntime.reset();
#endif

if (m_engine)
Expand Down Expand Up @@ -233,6 +235,12 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr<nvinfer1::IBuilder>& builder,
{
case tensor_rt::Precision::FP16:
config->setFlag(nvinfer1::BuilderFlag::kFP16);
sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP16)" << std::endl;
break;

case tensor_rt::Precision::FP8:
config->setFlag(nvinfer1::BuilderFlag::kFP8);
sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP8)" << std::endl;
break;

case tensor_rt::Precision::INT8:
Expand All @@ -243,6 +251,7 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr<nvinfer1::IBuilder>& builder,
BatchStream calibrationStream(m_params.m_explicitBatchSize, m_params.m_nbCalBatches, m_params.m_calibrationBatches, m_params.m_dataDirs);
calibrator.reset(new Int8EntropyCalibrator2<BatchStream>(calibrationStream, 0, "Yolo", m_params.m_inputTensorNames[0].c_str()));
config->setFlag(nvinfer1::BuilderFlag::kINT8);
sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kINT8)" << std::endl;
config->setInt8Calibrator(calibrator.get());
}
break;
Expand Down
1 change: 1 addition & 0 deletions src/Detector/tensorrt_onnx/YoloONNX.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class YoloONNX

private:
std::shared_ptr<nvinfer1::ICudaEngine> m_engine; //!< The TensorRT engine used to run the network
std::shared_ptr<nvinfer1::IRuntime> m_inferRuntime;

cv::Mat m_resized;
std::vector<cv::Mat> m_resizedBatch;
Expand Down
2 changes: 2 additions & 0 deletions src/Detector/tensorrt_onnx/class_detector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ namespace tensor_rt
dictprecision[tensor_rt::INT8] = "kINT8";
dictprecision[tensor_rt::FP16] = "kHALF";
dictprecision[tensor_rt::FP32] = "kFLOAT";
dictprecision[tensor_rt::FP8] = "kFP8";

auto precision = dictprecision.find(m_params.m_precision);
if (precision != dictprecision.end())
precisionStr = precision->second;
Expand Down
3 changes: 2 additions & 1 deletion src/Detector/tensorrt_onnx/class_detector.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ namespace tensor_rt
{
INT8 = 0,
FP16,
FP32
FP32,
FP8
};

///
Expand Down
21 changes: 19 additions & 2 deletions src/Detector/tensorrt_onnx/common/sampleInference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "sampleOptions.h"
#include "sampleReporting.h"
#include "sampleUtils.h"
#include <cuda_fp8.h>
using namespace nvinfer1;
namespace sample
{
Expand Down Expand Up @@ -1320,7 +1321,15 @@ void Binding::fill()
fillBuffer<uint8_t>(buffer->getHostBuffer(), volume, 0, 255);
break;
}
case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported");
case nvinfer1::DataType::kFP8:
{
#if 0
ASSERT(false && "FP8 is not supported");
#else
fillBuffer<__nv_fp8_e4m3>(buffer->getHostBuffer(), volume, __nv_fp8_e4m3(- 1.0f), __nv_fp8_e4m3(1.0f));
#endif
break;
}
#if (NV_TENSORRT_MAJOR > 8)
case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported");
#endif
Expand Down Expand Up @@ -1388,7 +1397,15 @@ void Binding::dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
break;
}
#endif
case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported");
case nvinfer1::DataType::kFP8:
{
#if 0
ASSERT(false && "FP8 is not supported");
#else
dumpBuffer<__nv_fp8_e4m3>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
#endif
break;
}
#if (NV_TENSORRT_MAJOR > 8)
case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported");
#endif
Expand Down
11 changes: 10 additions & 1 deletion src/Detector/tensorrt_onnx/common/sampleUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "sampleUtils.h"
#include "bfloat16.h"
#include "half.h"
#include <cuda_fp8.h>

using namespace nvinfer1;

Expand Down Expand Up @@ -433,6 +434,11 @@ void print(std::ostream& os, __half v)
os << static_cast<float>(v);
}

void print(std::ostream& os, __nv_fp8_e4m3 v)
{
os << static_cast<float>(v);
}

template <typename T>
void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv)
Expand Down Expand Up @@ -482,6 +488,8 @@ template void dumpBuffer<uint8_t>(void const* buffer, std::string const& separat
Dims const& strides, int32_t vectorDim, int32_t spv);
template void dumpBuffer<int64_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv);
template void dumpBuffer<__nv_fp8_e4m3>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv);

template <typename T>
void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
Expand Down Expand Up @@ -566,7 +574,7 @@ void fillBuffer(void* buffer, int64_t volume, T min, T max)
{
T* typedBuffer = static_cast<T*>(buffer);
std::default_random_engine engine;
std::uniform_real_distribution<float> distribution(min, max);
std::uniform_real_distribution<float> distribution((float)min, (float)max);
auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
std::generate(typedBuffer, typedBuffer + volume, generator);
}
Expand All @@ -580,6 +588,7 @@ template void fillBuffer<int8_t>(void* buffer, int64_t volume, int8_t min, int8_
template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max);
template void fillBuffer<BFloat16>(void* buffer, int64_t volume, BFloat16 min, BFloat16 max);
template void fillBuffer<uint8_t>(void* buffer, int64_t volume, uint8_t min, uint8_t max);
template void fillBuffer<__nv_fp8_e4m3>(void* buffer, int64_t volume, __nv_fp8_e4m3 min, __nv_fp8_e4m3 max);

bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target)
{
Expand Down
Loading