diff --git a/data/settings_yolov26m.ini b/data/settings_yolov26m.ini index 625ce893..23dd24f4 100644 --- a/data/settings_yolov26m.ini +++ b/data/settings_yolov26m.ini @@ -47,6 +47,7 @@ net_type = YOLOV26 # INT8 # FP16 # FP32 +# FP8 inference_precision = FP16 diff --git a/example/MotionDetectorExample.h b/example/MotionDetectorExample.h index 7136b758..76afa05e 100644 --- a/example/MotionDetectorExample.h +++ b/example/MotionDetectorExample.h @@ -17,7 +17,7 @@ class MotionDetectorExample final : public VideoExample { public: MotionDetectorExample(const cv::CommandLineParser& parser) - : VideoExample(parser), m_minObjWidth(10) + : VideoExample(parser) { #ifdef USE_CLIP std::string clipModel = "C:/work/clip/ruclip_/CLIP/data/ruclip-vit-large-patch14-336"; @@ -38,8 +38,7 @@ class MotionDetectorExample final : public VideoExample { m_logger->info("MotionDetectorExample::InitDetector"); - //m_minObjWidth = frame.cols / 20; - m_minObjWidth = 4; + m_minObjWidth = 2; config_t config; config.emplace("useRotatedRect", "0"); @@ -97,7 +96,7 @@ class MotionDetectorExample final : public VideoExample if (!m_trackerSettingsLoaded) { - m_trackerSettings.SetDistance(tracking::DistJaccard); + m_trackerSettings.SetDistance(tracking::DistCenters); m_trackerSettings.m_kalmanType = tracking::KalmanLinear; m_trackerSettings.m_filterGoal = tracking::FilterCenter; m_trackerSettings.m_lostTrackType = tracking::TrackNone; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect diff --git a/src/Detector/ONNXTensorRTDetector.cpp b/src/Detector/ONNXTensorRTDetector.cpp index 9d28da54..b0a734a7 100644 --- a/src/Detector/ONNXTensorRTDetector.cpp +++ b/src/Detector/ONNXTensorRTDetector.cpp @@ -72,6 +72,7 @@ bool ONNXTensorRTDetector::Init(const config_t& config) dictPrecision["INT8"] = tensor_rt::INT8; dictPrecision["FP16"] = tensor_rt::FP16; dictPrecision["FP32"] = tensor_rt::FP32; + dictPrecision["FP8"] = tensor_rt::FP8; auto precision = dictPrecision.find(inference_precision->second); if (precision != dictPrecision.end()) m_localConfig.m_inferencePrecision = precision->second; diff --git a/src/Detector/tensorrt_onnx/YoloONNX.cpp b/src/Detector/tensorrt_onnx/YoloONNX.cpp index aa4d23a6..bec31df9 100644 --- a/src/Detector/tensorrt_onnx/YoloONNX.cpp +++ b/src/Detector/tensorrt_onnx/YoloONNX.cpp @@ -19,6 +19,7 @@ bool YoloONNX::Init(const SampleYoloParams& params) m_params = params; + sample::setReportableSeverity(sample::Logger::Severity::kINFO); initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), ""); auto GetBindings = [&]() @@ -79,15 +80,16 @@ bool YoloONNX::Init(const SampleYoloParams& params) file.close(); } - nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(sample::gLogger); + m_inferRuntime = std::shared_ptr(nvinfer1::createInferRuntime(sample::gLogger)); if (m_params.m_dlaCore >= 0) - infer->setDLACore(m_params.m_dlaCore); + m_inferRuntime->setDLACore(m_params.m_dlaCore); - m_engine = std::shared_ptr(infer->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter()); + m_engine = std::shared_ptr(m_inferRuntime->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter()); #if (NV_TENSORRT_MAJOR < 8) - infer->destroy(); + m_inferRuntime->destroy(); + m_inferRuntime.reset(); #else - //delete infer; + //m_inferRuntime.reset(); #endif if (m_engine) @@ -233,6 +235,12 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, { case tensor_rt::Precision::FP16: config->setFlag(nvinfer1::BuilderFlag::kFP16); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP16)" << std::endl; + break; + + case tensor_rt::Precision::FP8: + config->setFlag(nvinfer1::BuilderFlag::kFP8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP8)" << std::endl; break; case tensor_rt::Precision::INT8: @@ -243,6 +251,7 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, BatchStream calibrationStream(m_params.m_explicitBatchSize, m_params.m_nbCalBatches, m_params.m_calibrationBatches, m_params.m_dataDirs); calibrator.reset(new Int8EntropyCalibrator2(calibrationStream, 0, "Yolo", m_params.m_inputTensorNames[0].c_str())); config->setFlag(nvinfer1::BuilderFlag::kINT8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kINT8)" << std::endl; config->setInt8Calibrator(calibrator.get()); } break; diff --git a/src/Detector/tensorrt_onnx/YoloONNX.hpp b/src/Detector/tensorrt_onnx/YoloONNX.hpp index 2452f61d..cae188c5 100644 --- a/src/Detector/tensorrt_onnx/YoloONNX.hpp +++ b/src/Detector/tensorrt_onnx/YoloONNX.hpp @@ -86,6 +86,7 @@ class YoloONNX private: std::shared_ptr m_engine; //!< The TensorRT engine used to run the network + std::shared_ptr m_inferRuntime; cv::Mat m_resized; std::vector m_resizedBatch; diff --git a/src/Detector/tensorrt_onnx/class_detector.cpp b/src/Detector/tensorrt_onnx/class_detector.cpp index 70f2aa48..f5f4fb66 100644 --- a/src/Detector/tensorrt_onnx/class_detector.cpp +++ b/src/Detector/tensorrt_onnx/class_detector.cpp @@ -134,6 +134,8 @@ namespace tensor_rt dictprecision[tensor_rt::INT8] = "kINT8"; dictprecision[tensor_rt::FP16] = "kHALF"; dictprecision[tensor_rt::FP32] = "kFLOAT"; + dictprecision[tensor_rt::FP8] = "kFP8"; + auto precision = dictprecision.find(m_params.m_precision); if (precision != dictprecision.end()) precisionStr = precision->second; diff --git a/src/Detector/tensorrt_onnx/class_detector.h b/src/Detector/tensorrt_onnx/class_detector.h index 29780685..7ea989bc 100644 --- a/src/Detector/tensorrt_onnx/class_detector.h +++ b/src/Detector/tensorrt_onnx/class_detector.h @@ -76,7 +76,8 @@ namespace tensor_rt { INT8 = 0, FP16, - FP32 + FP32, + FP8 }; /// diff --git a/src/Detector/tensorrt_onnx/common/sampleInference.cpp b/src/Detector/tensorrt_onnx/common/sampleInference.cpp index f0470bf7..b131ca32 100644 --- a/src/Detector/tensorrt_onnx/common/sampleInference.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleInference.cpp @@ -46,6 +46,7 @@ #include "sampleOptions.h" #include "sampleReporting.h" #include "sampleUtils.h" +#include using namespace nvinfer1; namespace sample { @@ -1320,7 +1321,15 @@ void Binding::fill() fillBuffer(buffer->getHostBuffer(), volume, 0, 255); break; } - case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + fillBuffer<__nv_fp8_e4m3>(buffer->getHostBuffer(), volume, __nv_fp8_e4m3(- 1.0f), __nv_fp8_e4m3(1.0f)); +#endif + break; + } #if (NV_TENSORRT_MAJOR > 8) case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); #endif @@ -1388,7 +1397,15 @@ void Binding::dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, break; } #endif - case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + dumpBuffer<__nv_fp8_e4m3>(outputBuffer, separator, os, dims, strides, vectorDim, spv); +#endif + break; + } #if (NV_TENSORRT_MAJOR > 8) case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); #endif diff --git a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp index 8f172afe..89a128ee 100644 --- a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp +++ b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp @@ -18,6 +18,7 @@ #include "sampleUtils.h" #include "bfloat16.h" #include "half.h" +#include using namespace nvinfer1; @@ -433,6 +434,11 @@ void print(std::ostream& os, __half v) os << static_cast(v); } +void print(std::ostream& os, __nv_fp8_e4m3 v) +{ + os << static_cast(v); +} + template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv) @@ -482,6 +488,8 @@ template void dumpBuffer(void const* buffer, std::string const& separat Dims const& strides, int32_t vectorDim, int32_t spv); template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer<__nv_fp8_e4m3>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); template void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights) @@ -566,7 +574,7 @@ void fillBuffer(void* buffer, int64_t volume, T min, T max) { T* typedBuffer = static_cast(buffer); std::default_random_engine engine; - std::uniform_real_distribution distribution(min, max); + std::uniform_real_distribution distribution((float)min, (float)max); auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; std::generate(typedBuffer, typedBuffer + volume, generator); } @@ -580,6 +588,7 @@ template void fillBuffer(void* buffer, int64_t volume, int8_t min, int8_ template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max); template void fillBuffer(void* buffer, int64_t volume, BFloat16 min, BFloat16 max); template void fillBuffer(void* buffer, int64_t volume, uint8_t min, uint8_t max); +template void fillBuffer<__nv_fp8_e4m3>(void* buffer, int64_t volume, __nv_fp8_e4m3 min, __nv_fp8_e4m3 max); bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target) {